From 43b0178eda1e7e5d1e205bbfd076ab5d6ecacc02 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Oct 2010 23:19:04 +0200 Subject: nfsd: fix NULL dereference in setattr() The original code would oops if this were called from nfsd4_setattr() because "filpp" is NULL. (Note this case is currently impossible, as long as we only give out read delegations.) Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ad2bfa68d534..2d191293e6aa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3081,9 +3081,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, if (status) goto out; renew_client(dp->dl_client); - if (filpp) + if (filpp) { *filpp = find_readable_file(dp->dl_file); - BUG_ON(!*filpp); + BUG_ON(!*filpp); + } } else { /* open or lock stateid */ stp = find_stateid(stateid, flags); if (!stp) -- cgit v1.2.3 From 5afa040b307952bb804eba34b21646da2842e14d Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Tue, 9 Nov 2010 09:39:23 +0800 Subject: NFSv4.1: Make sure nfsd can decode SP4_SSV correctly at exchange_id According to RFC, the argument of ssv_sp_parms4 is: struct ssv_sp_parms4 { state_protect_ops4 ssp_ops; sec_oid4 ssp_hash_algs<>; sec_oid4 ssp_encr_algs<>; uint32_t ssp_window; uint32_t ssp_num_gss_handles; }; If client send a exchange_id with SP4_SSV, server cann't decode the SP4_SSV's ssp_hash_algs and ssp_encr_algs arguments correctly. Because the kernel treat the two arguments as a signal sec_oid4 struct, but should be a set of sec_oid4 struct. Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f35a94a04026..71d7d339e44a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1005,7 +1005,7 @@ static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, struct nfsd4_exchange_id *exid) { - int dummy; + int dummy, tmp; DECODE_HEAD; READ_BUF(NFS4_VERIFIER_SIZE); @@ -1053,15 +1053,23 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, /* ssp_hash_algs<> */ READ_BUF(4); - READ32(dummy); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); + READ32(tmp); + while (tmp--) { + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + } /* ssp_encr_algs<> */ READ_BUF(4); - READ32(dummy); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); + READ32(tmp); + while (tmp--) { + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + } /* ssp_window and ssp_num_gss_handles */ READ_BUF(8); -- cgit v1.2.3 From 044bc1d4324bfb34761cb361e404cb8d39c68777 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 12 Nov 2010 14:36:06 -0500 Subject: nfsd4: return serverfault on request for ssv We're refusing to support a mandatory features of 4.1, so serverfault seems the better error; see e.g.: http://www.ietf.org/mail-archive/web/nfsv4/current/msg07638.html Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2d191293e6aa..9e7f8af12f8f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1344,7 +1344,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, case SP4_NONE: break; case SP4_SSV: - return nfserr_encr_alg_unsupp; + return nfserr_serverfault; default: BUG(); /* checked by xdr code */ case SP4_MACH_CRED: -- cgit v1.2.3 From ced6dfe9fc7128995c6d60627938944b430d82c8 Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Thu, 11 Nov 2010 18:03:50 +0800 Subject: NFS4.1: server gets drc mem fail should reply error at create_session When server gets drc mem fail, it should reply error to client. Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9e7f8af12f8f..5d0ee0f0cb0e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -749,6 +749,8 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n */ slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); + if (numslots < 1) + return NULL; new = alloc_session(slotsize, numslots); if (!new) { -- cgit v1.2.3 From 1205065764f2eda3216ebe213143f69891ee3460 Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Thu, 11 Nov 2010 18:03:40 +0800 Subject: NFS4.1: Fix bug server don't reply the right fore_channel to client at create_session At the latest kernel(2.6.37-rc1), server just initialize the forechannel at init_forechannel_attrs, but don't reflect it to reply. After initialize the session success, we should copy the forechannel info to nfsd4_create_session struct. Reviewed-by: Benny Halevy Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5d0ee0f0cb0e..afa7525a86be 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1562,6 +1562,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfs_ok; memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); + memcpy(&cr_ses->fore_channel, &new->se_fchannel, + sizeof(struct nfsd4_channel_attrs)); cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; -- cgit v1.2.3 From 7ddbead6e6d3c730570a215ab9a6b1d126c54d34 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 4 Nov 2010 20:08:02 -0700 Subject: jffs2: use vzalloc Signed-off-by: Joe Perches Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/jffs2/build.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index 85c6be2db02f..3005ec4520ad 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -336,14 +336,13 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) size = sizeof(struct jffs2_eraseblock) * c->nr_blocks; #ifndef __ECOS if (jffs2_blocks_use_vmalloc(c)) - c->blocks = vmalloc(size); + c->blocks = vzalloc(size); else #endif - c->blocks = kmalloc(size, GFP_KERNEL); + c->blocks = kzalloc(size, GFP_KERNEL); if (!c->blocks) return -ENOMEM; - memset(c->blocks, 0, size); for (i=0; inr_blocks; i++) { INIT_LIST_HEAD(&c->blocks[i].list); c->blocks[i].offset = i * c->sector_size; -- cgit v1.2.3 From f326966b3df47f4fa7e90425f60efdd30c31fe19 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Sun, 14 Nov 2010 23:08:39 +0300 Subject: jffs2: fix error value sign do_verify_xattr_datum(), do_load_xattr_datum(), load_xattr_datum() and verify_xattr_ref() should return negative value on error. Sometimes they return EIO that is positive. Change this to -EIO. Signed-off-by: Vasiliy Kulikov Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/jffs2/xattr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 9b572ca40a49..4f9cc0482949 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -151,7 +151,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", offset, je32_to_cpu(rx.hdr_crc), crc); xd->flags |= JFFS2_XFLAGS_INVALID; - return EIO; + return -EIO; } totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len)); if (je16_to_cpu(rx.magic) != JFFS2_MAGIC_BITMASK @@ -167,7 +167,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat je32_to_cpu(rx.xid), xd->xid, je32_to_cpu(rx.version), xd->version); xd->flags |= JFFS2_XFLAGS_INVALID; - return EIO; + return -EIO; } xd->xprefix = rx.xprefix; xd->name_len = rx.name_len; @@ -230,7 +230,7 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum ref_offset(xd->node), xd->data_crc, crc); kfree(data); xd->flags |= JFFS2_XFLAGS_INVALID; - return EIO; + return -EIO; } xd->flags |= JFFS2_XFLAGS_HOT; @@ -268,7 +268,7 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x if (xd->xname) return 0; if (xd->flags & JFFS2_XFLAGS_INVALID) - return EIO; + return -EIO; if (unlikely(is_xattr_datum_unchecked(c, xd))) rc = do_verify_xattr_datum(c, xd); if (!rc) @@ -460,7 +460,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref if (crc != je32_to_cpu(rr.node_crc)) { JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", offset, je32_to_cpu(rr.node_crc), crc); - return EIO; + return -EIO; } if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK || je16_to_cpu(rr.nodetype) != JFFS2_NODETYPE_XREF @@ -470,7 +470,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK, je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF, je32_to_cpu(rr.totlen), PAD(sizeof(rr))); - return EIO; + return -EIO; } ref->ino = je32_to_cpu(rr.ino); ref->xid = je32_to_cpu(rr.xid); -- cgit v1.2.3 From 027d9ac2c8de9f70b7319e08dee121b8b85c8d88 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 15 Nov 2010 21:20:05 +0300 Subject: jffs2: typo in comment It says FB instead of FS (file system). Signed-off-by: Dan Carpenter Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/jffs2/jffs2_fs_sb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index f864005de64c..0bc6a6c80a56 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -144,4 +144,4 @@ struct jffs2_sb_info { void *os_priv; }; -#endif /* _JFFS2_FB_SB */ +#endif /* _JFFS2_FS_SB */ -- cgit v1.2.3 From 6e5f15c93dc745d46c2bb9e4597b44463203844b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 24 Nov 2010 17:17:34 -0500 Subject: nfsd4: replace unintuitive match_clientid_establishment Reviewed-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4d542cfd6960..0c61cc1eadca 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1135,19 +1135,13 @@ find_unconfirmed_client(clientid_t *clid) } /* - * Return 1 iff clp's clientid establishment method matches the use_exchange_id - * parameter. Matching is based on the fact the at least one of the - * EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1 - * * FIXME: we need to unify the clientid namespaces for nfsv4.x * and correctly deal with client upgrade/downgrade in EXCHANGE_ID * and SET_CLIENTID{,_CONFIRM} */ -static inline int -match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id) +static bool clp_used_exchangeid(struct nfs4_client *clp) { - bool has_exchange_flags = (clp->cl_exchange_flags != 0); - return use_exchange_id == has_exchange_flags; + return clp->cl_exchange_flags != 0; } static struct nfs4_client * @@ -1158,7 +1152,7 @@ find_confirmed_client_by_str(const char *dname, unsigned int hashval, list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { if (same_name(clp->cl_recdir, dname) && - match_clientid_establishment(clp, use_exchange_id)) + clp_used_exchangeid(clp) == use_exchange_id) return clp; } return NULL; @@ -1172,7 +1166,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { if (same_name(clp->cl_recdir, dname) && - match_clientid_establishment(clp, use_exchange_id)) + clp_used_exchangeid(clp) == use_exchange_id) return clp; } return NULL; -- cgit v1.2.3 From e203d506bd221bfa5b3acbb7336ae7b7646636a4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 24 Nov 2010 17:30:54 -0500 Subject: nfsd4: fix mixed 4.0/4.1 handling, 4.1 reboot Instead of failing to find client entries which don't match the minorversion, we should be finding them, then either erroring out or expiring them as appropriate. This also fixes a problem which would cause the 4.1 server to fail to recognize clients after a second reboot. Reported-by: Casey Bodley Reviewed-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 1 - fs/nfsd/nfs4state.c | 37 +++++++++++++++++-------------------- 2 files changed, 17 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 7e26caab2a26..ffb59ef6f82f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -302,7 +302,6 @@ purge_old(struct dentry *parent, struct dentry *child) { int status; - /* note: we currently use this path only for minorversion 0 */ if (nfs4_has_reclaimed_state(child->d_name.name, false)) return 0; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0c61cc1eadca..73adcfb2dc17 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1134,39 +1134,30 @@ find_unconfirmed_client(clientid_t *clid) return NULL; } -/* - * FIXME: we need to unify the clientid namespaces for nfsv4.x - * and correctly deal with client upgrade/downgrade in EXCHANGE_ID - * and SET_CLIENTID{,_CONFIRM} - */ static bool clp_used_exchangeid(struct nfs4_client *clp) { return clp->cl_exchange_flags != 0; -} +} static struct nfs4_client * -find_confirmed_client_by_str(const char *dname, unsigned int hashval, - bool use_exchange_id) +find_confirmed_client_by_str(const char *dname, unsigned int hashval) { struct nfs4_client *clp; list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname) && - clp_used_exchangeid(clp) == use_exchange_id) + if (same_name(clp->cl_recdir, dname)) return clp; } return NULL; } static struct nfs4_client * -find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, - bool use_exchange_id) +find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) { struct nfs4_client *clp; list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname) && - clp_used_exchangeid(clp) == use_exchange_id) + if (same_name(clp->cl_recdir, dname)) return clp; } return NULL; @@ -1357,8 +1348,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, nfs4_lock_state(); status = nfs_ok; - conf = find_confirmed_client_by_str(dname, strhashval, true); + conf = find_confirmed_client_by_str(dname, strhashval); if (conf) { + if (!clp_used_exchangeid(conf)) { + status = nfserr_clid_inuse; /* XXX: ? */ + goto out; + } if (!same_verf(&verf, &conf->cl_verifier)) { /* 18.35.4 case 8 */ if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { @@ -1399,7 +1394,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, goto out; } - unconf = find_unconfirmed_client_by_str(dname, strhashval, true); + unconf = find_unconfirmed_client_by_str(dname, strhashval); if (unconf) { /* * Possible retry or client restart. Per 18.35.4 case 4, @@ -1799,10 +1794,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, strhashval = clientstr_hashval(dname); nfs4_lock_state(); - conf = find_confirmed_client_by_str(dname, strhashval, false); + conf = find_confirmed_client_by_str(dname, strhashval); if (conf) { /* RFC 3530 14.2.33 CASE 0: */ status = nfserr_clid_inuse; + if (clp_used_exchangeid(conf)) + goto out; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { char addr_str[INET6_ADDRSTRLEN]; rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, @@ -1817,7 +1814,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * has a description of SETCLIENTID request processing consisting * of 5 bullet points, labeled as CASE0 - CASE4 below. */ - unconf = find_unconfirmed_client_by_str(dname, strhashval, false); + unconf = find_unconfirmed_client_by_str(dname, strhashval); status = nfserr_resource; if (!conf) { /* @@ -1962,7 +1959,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, unsigned int hash = clientstr_hashval(unconf->cl_recdir); conf = find_confirmed_client_by_str(unconf->cl_recdir, - hash, false); + hash); if (conf) { nfsd4_remove_clid_dir(conf); expire_client(conf); @@ -4106,7 +4103,7 @@ nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) unsigned int strhashval = clientstr_hashval(name); struct nfs4_client *clp; - clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id); + clp = find_confirmed_client_by_str(name, strhashval); return clp ? 1 : 0; } -- cgit v1.2.3 From 18b631f83810e95eeb2e1839889b27142bd8d6d8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 29 Nov 2010 15:28:10 -0500 Subject: nfsd: fix offset printk's in nfsd3 read/write Thanks to dysbr01@ca.com for noticing that the debugging printk in the v3 write procedure can print >2GB offsets as negative numbers: https://bugzilla.kernel.org/show_bug.cgi?id=23342 Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 5b7e3021e06b..2247fc91d5e9 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -151,10 +151,10 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, __be32 nfserr; u32 max_blocksize = svc_max_payload(rqstp); - dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", + dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", SVCFH_fmt(&argp->fh), (unsigned long) argp->count, - (unsigned long) argp->offset); + (unsigned long long) argp->offset); /* Obtain buffer pointer for payload. * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) @@ -191,10 +191,10 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, __be32 nfserr; unsigned long cnt = argp->len; - dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", + dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", SVCFH_fmt(&argp->fh), argp->len, - (unsigned long) argp->offset, + (unsigned long long) argp->offset, argp->stable? " stable" : ""); fh_copy(&resp->fh, &argp->fh); -- cgit v1.2.3 From 5b6a599f0da3722dea9ecc01d97f54061662ce49 Mon Sep 17 00:00:00 2001 From: "bookjovi@gmail.com" Date: Sat, 11 Dec 2010 00:21:17 -0500 Subject: nfs: add missed CONFIG_NFSD_DEPRECATED these pieces of code only make sense when CONFIG_NFSD_DEPRECATED enabled Signed-off-by: Jovi Zhang fs/nfsd/nfsctl.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 4514ebbee4d6..6840ec3ceecf 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -127,6 +127,7 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { +#ifdef CONFIG_NFSD_DEPRECATED static int warned; if (file->f_dentry->d_name.name[0] == '.' && !warned) { printk(KERN_INFO @@ -135,6 +136,7 @@ static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size current->comm, file->f_dentry->d_name.name); warned = 1; } +#endif if (! file->private_data) { /* An attempt to read a transaction file without writing * causes a 0-byte write so that the file can return -- cgit v1.2.3 From 56560b9ae0c2d07bb5bbcec16f799d7bf756d3de Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 16 Dec 2010 09:57:15 -0500 Subject: nfsd4: 4.1 SECINFO should consume filehandle See the referenced spec language; an attempt by a 4.1 client to use the current filehandle after a secinfo call should result in a NOFILEHANDLE error. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0cdfd022bb7b..bad4bf8e4bbc 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -769,6 +769,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } else secinfo->si_exp = exp; dput(dentry); + if (cstate->minorversion) + /* See rfc 5661 section 2.6.3.1.1.8 */ + fh_put(&cstate->current_fh); return err; } -- cgit v1.2.3 From 0ff7ab46719a9c1e264b8d8e85416d59737ff13c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 16 Dec 2010 10:06:27 -0500 Subject: nfsd4: move guts of nfsd4_lookupp into helper We'll reuse this code in secinfo_no_name. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bad4bf8e4bbc..095431a35722 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -604,9 +604,7 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } -static __be32 -nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - void *arg) +static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh) { struct svc_fh tmp_fh; __be32 ret; @@ -615,13 +613,19 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ret = exp_pseudoroot(rqstp, &tmp_fh); if (ret) return ret; - if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) { + if (tmp_fh.fh_dentry == fh->fh_dentry) { fh_put(&tmp_fh); return nfserr_noent; } fh_put(&tmp_fh); - return nfsd_lookup(rqstp, &cstate->current_fh, - "..", 2, &cstate->current_fh); + return nfsd_lookup(rqstp, fh, "..", 2, fh); +} + +static __be32 +nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + void *arg) +{ + return nfsd4_do_lookupp(rqstp, &cstate->current_fh); } static __be32 -- cgit v1.2.3 From 04f4ad16b231abbfde34c762697ad035a3af0b5f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 16 Dec 2010 09:51:13 -0500 Subject: nfsd4: implement secinfo_no_name Implementation of this operation is mandatory for NFSv4.1. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 27 +++++++++++++++++++++++++++ fs/nfsd/nfs4xdr.c | 15 +++++++++++++-- fs/nfsd/xdr4.h | 5 +++++ include/linux/nfs4.h | 3 +++ 4 files changed, 48 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 095431a35722..f80c3997d24c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -779,6 +779,29 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return err; } +static __be32 +nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_secinfo_no_name *sin) +{ + __be32 err; + + switch (sin->sin_style) { + case NFS4_SECINFO_STYLE4_CURRENT_FH: + break; + case NFS4_SECINFO_STYLE4_PARENT: + err = nfsd4_do_lookupp(rqstp, &cstate->current_fh); + if (err) + return err; + break; + default: + return nfserr_inval; + } + exp_get(cstate->current_fh.fh_export); + sin->sin_exp = cstate->current_fh.fh_export; + fh_put(&cstate->current_fh); + return nfs_ok; +} + static __be32 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr) @@ -1327,6 +1350,10 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = ALLOWED_WITHOUT_FH, .op_name = "OP_RECLAIM_COMPLETE", }, + [OP_SECINFO_NO_NAME] = { + .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, + .op_name = "OP_SECINFO_NO_NAME", + }, }; static const char *nfsd4_op_name(unsigned opnum) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 71d7d339e44a..b543b2410b54 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -846,6 +846,17 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, DECODE_TAIL; } +static __be32 +nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, + struct nfsd4_secinfo_no_name *sin) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(sin->sin_style); + DECODE_TAIL; +} + static __be32 nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) { @@ -1358,7 +1369,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, @@ -3162,7 +3173,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo, [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 60fce3dc5cb5..799c30c3b495 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -311,6 +311,11 @@ struct nfsd4_secinfo { struct svc_export *si_exp; /* response */ }; +struct nfsd4_secinfo_no_name { + u32 sin_style; /* request */ + struct svc_export *sin_exp; /* response */ +}; + struct nfsd4_setattr { stateid_t sa_stateid; /* request */ u32 sa_bmval[3]; /* request */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 4925b22219d2..26afa3021ed6 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -136,6 +136,9 @@ #define SEQ4_STATUS_CB_PATH_DOWN_SESSION 0x00000200 #define SEQ4_STATUS_BACKCHANNEL_FAULT 0x00000400 +#define NFS4_SECINFO_STYLE4_CURRENT_FH 0 +#define NFS4_SECINFO_STYLE4_PARENT 1 + #define NFS4_MAX_UINT64 (~(u64)0) /* An NFS4 sessions server must support at least NFS4_MAX_OPS operations. -- cgit v1.2.3 From 8844355df7f4e091b03cc131e1549631238b397b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 25 Oct 2010 15:11:43 +0800 Subject: btrfs: Fix bugs in zlib workspace - Fix a race that can result in alloc_workspace > cpus. - Fix to check num_workspace after wakeup. Signed-off-by: Li Zefan --- fs/btrfs/zlib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b9cd5445f71c..e5b8b22e07d6 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -75,16 +75,19 @@ again: return workspace; } - spin_unlock(&workspace_lock); if (atomic_read(&alloc_workspace) > cpus) { DEFINE_WAIT(wait); + + spin_unlock(&workspace_lock); prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); - if (atomic_read(&alloc_workspace) > cpus) + if (atomic_read(&alloc_workspace) > cpus && !num_workspace) schedule(); finish_wait(&workspace_wait, &wait); goto again; } atomic_inc(&alloc_workspace); + spin_unlock(&workspace_lock); + workspace = kzalloc(sizeof(*workspace), GFP_NOFS); if (!workspace) { ret = -ENOMEM; -- cgit v1.2.3 From 4b72029dc3fd6ba7dc45ccd1cf0aa0ebfa209bd3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Nov 2010 08:27:27 +0800 Subject: btrfs: Fix error handling in zlib Return failure if alloc_page() fails to allocate memory, and the upper code will just give up compression. Signed-off-by: Li Zefan --- fs/btrfs/zlib.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index e5b8b22e07d6..b01558661e3b 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -225,6 +225,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, data_in = kmap(in_page); out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -1; + goto out; + } cpage_out = kmap(out_page); pages[0] = out_page; nr_pages = 1; @@ -263,6 +267,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, goto out; } out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -1; + goto out; + } cpage_out = kmap(out_page); pages[nr_pages] = out_page; nr_pages++; -- cgit v1.2.3 From 261507a02ccba9afda919852263b6bc1581ce1ef Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 17 Dec 2010 14:21:50 +0800 Subject: btrfs: Allow to add new compression algorithm Make the code aware of compression type, instead of always assuming zlib compression. Also make the zlib workspace function as common code for all compression types. Signed-off-by: Li Zefan --- fs/btrfs/btrfs_inode.h | 2 +- fs/btrfs/compression.c | 236 +++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/compression.h | 66 +++++++++---- fs/btrfs/ctree.h | 10 +- fs/btrfs/extent_io.c | 5 +- fs/btrfs/extent_io.h | 17 +++- fs/btrfs/extent_map.c | 2 + fs/btrfs/extent_map.h | 3 +- fs/btrfs/file.c | 2 + fs/btrfs/inode.c | 82 ++++++++++------ fs/btrfs/ioctl.c | 4 +- fs/btrfs/ordered-data.c | 18 +++- fs/btrfs/ordered-data.h | 8 +- fs/btrfs/super.c | 47 ++++++--- fs/btrfs/zlib.c | 253 ++++++++++-------------------------------------- 15 files changed, 473 insertions(+), 282 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6ad63f17eca0..ccc991c542df 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -157,7 +157,7 @@ struct btrfs_inode { /* * always compress this one file */ - unsigned force_compress:1; + unsigned force_compress:4; struct inode vfs_inode; }; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b50bc4bd5c56..6638c9877720 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -62,6 +62,9 @@ struct compressed_bio { /* number of bytes on disk */ unsigned long compressed_len; + /* the compression algorithm for this bio */ + int compress_type; + /* number of compressed pages in the array */ unsigned long nr_pages; @@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) /* ok, we're the last bio for this extent, lets start * the decompression. */ - ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, - cb->start, - cb->orig_bio->bi_io_vec, - cb->orig_bio->bi_vcnt, - cb->compressed_len); + ret = btrfs_decompress_biovec(cb->compress_type, + cb->compressed_pages, + cb->start, + cb->orig_bio->bi_io_vec, + cb->orig_bio->bi_vcnt, + cb->compressed_len); csum_failed: if (ret) cb->errors = 1; @@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cb->len = uncompressed_len; cb->compressed_len = compressed_len; + cb->compress_type = extent_compress_type(bio_flags); cb->orig_bio = bio; nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / @@ -677,3 +682,224 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_put(comp_bio); return 0; } + +static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; +static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; +static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; +static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; +static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; + +struct btrfs_compress_op *btrfs_compress_op[] = { + &btrfs_zlib_compress, +}; + +int __init btrfs_init_compress(void) +{ + int i; + + for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { + INIT_LIST_HEAD(&comp_idle_workspace[i]); + spin_lock_init(&comp_workspace_lock[i]); + atomic_set(&comp_alloc_workspace[i], 0); + init_waitqueue_head(&comp_workspace_wait[i]); + } + return 0; +} + +/* + * this finds an available workspace or allocates a new one + * ERR_PTR is returned if things go bad. + */ +static struct list_head *find_workspace(int type) +{ + struct list_head *workspace; + int cpus = num_online_cpus(); + int idx = type - 1; + + struct list_head *idle_workspace = &comp_idle_workspace[idx]; + spinlock_t *workspace_lock = &comp_workspace_lock[idx]; + atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; + wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; + int *num_workspace = &comp_num_workspace[idx]; +again: + spin_lock(workspace_lock); + if (!list_empty(idle_workspace)) { + workspace = idle_workspace->next; + list_del(workspace); + (*num_workspace)--; + spin_unlock(workspace_lock); + return workspace; + + } + if (atomic_read(alloc_workspace) > cpus) { + DEFINE_WAIT(wait); + + spin_unlock(workspace_lock); + prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); + if (atomic_read(alloc_workspace) > cpus && !*num_workspace) + schedule(); + finish_wait(workspace_wait, &wait); + goto again; + } + atomic_inc(alloc_workspace); + spin_unlock(workspace_lock); + + workspace = btrfs_compress_op[idx]->alloc_workspace(); + if (IS_ERR(workspace)) { + atomic_dec(alloc_workspace); + wake_up(workspace_wait); + } + return workspace; +} + +/* + * put a workspace struct back on the list or free it if we have enough + * idle ones sitting around + */ +static void free_workspace(int type, struct list_head *workspace) +{ + int idx = type - 1; + struct list_head *idle_workspace = &comp_idle_workspace[idx]; + spinlock_t *workspace_lock = &comp_workspace_lock[idx]; + atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; + wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; + int *num_workspace = &comp_num_workspace[idx]; + + spin_lock(workspace_lock); + if (*num_workspace < num_online_cpus()) { + list_add_tail(workspace, idle_workspace); + (*num_workspace)++; + spin_unlock(workspace_lock); + goto wake; + } + spin_unlock(workspace_lock); + + btrfs_compress_op[idx]->free_workspace(workspace); + atomic_dec(alloc_workspace); +wake: + if (waitqueue_active(workspace_wait)) + wake_up(workspace_wait); +} + +/* + * cleanup function for module exit + */ +static void free_workspaces(void) +{ + struct list_head *workspace; + int i; + + for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { + while (!list_empty(&comp_idle_workspace[i])) { + workspace = comp_idle_workspace[i].next; + list_del(workspace); + btrfs_compress_op[i]->free_workspace(workspace); + atomic_dec(&comp_alloc_workspace[i]); + } + } +} + +/* + * given an address space and start/len, compress the bytes. + * + * pages are allocated to hold the compressed result and stored + * in 'pages' + * + * out_pages is used to return the number of pages allocated. There + * may be pages allocated even if we return an error + * + * total_in is used to return the number of bytes actually read. It + * may be smaller then len if we had to exit early because we + * ran out of room in the pages array or because we cross the + * max_out threshold. + * + * total_out is used to return the total number of compressed bytes + * + * max_out tells us the max number of bytes that we're allowed to + * stuff into pages + */ +int btrfs_compress_pages(int type, struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out) +{ + struct list_head *workspace; + int ret; + + workspace = find_workspace(type); + if (IS_ERR(workspace)) + return -1; + + ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, + start, len, pages, + nr_dest_pages, out_pages, + total_in, total_out, + max_out); + free_workspace(type, workspace); + return ret; +} + +/* + * pages_in is an array of pages with compressed data. + * + * disk_start is the starting logical offset of this array in the file + * + * bvec is a bio_vec of pages from the file that we want to decompress into + * + * vcnt is the count of pages in the biovec + * + * srclen is the number of bytes in pages_in + * + * The basic idea is that we have a bio that was created by readpages. + * The pages in the bio are for the uncompressed data, and they may not + * be contiguous. They all correspond to the range of bytes covered by + * the compressed extent. + */ +int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, + struct bio_vec *bvec, int vcnt, size_t srclen) +{ + struct list_head *workspace; + int ret; + + workspace = find_workspace(type); + if (IS_ERR(workspace)) + return -ENOMEM; + + ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, + disk_start, + bvec, vcnt, srclen); + free_workspace(type, workspace); + return ret; +} + +/* + * a less complex decompression routine. Our compressed data fits in a + * single page, and we want to read a single page out of it. + * start_byte tells us the offset into the compressed data we're interested in + */ +int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, + unsigned long start_byte, size_t srclen, size_t destlen) +{ + struct list_head *workspace; + int ret; + + workspace = find_workspace(type); + if (IS_ERR(workspace)) + return -ENOMEM; + + ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, + dest_page, start_byte, + srclen, destlen); + + free_workspace(type, workspace); + return ret; +} + +void __exit btrfs_exit_compress(void) +{ + free_workspaces(); +} diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 421f5b4aa715..9b5f2f365b79 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -19,24 +19,22 @@ #ifndef __BTRFS_COMPRESSION_ #define __BTRFS_COMPRESSION_ -int btrfs_zlib_decompress(unsigned char *data_in, - struct page *dest_page, - unsigned long start_byte, - size_t srclen, size_t destlen); -int btrfs_zlib_compress_pages(struct address_space *mapping, - u64 start, unsigned long len, - struct page **pages, - unsigned long nr_dest_pages, - unsigned long *out_pages, - unsigned long *total_in, - unsigned long *total_out, - unsigned long max_out); -int btrfs_zlib_decompress_biovec(struct page **pages_in, - u64 disk_start, - struct bio_vec *bvec, - int vcnt, - size_t srclen); -void btrfs_zlib_exit(void); +int btrfs_init_compress(void); +void btrfs_exit_compress(void); + +int btrfs_compress_pages(int type, struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out); +int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, + struct bio_vec *bvec, int vcnt, size_t srclen); +int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, + unsigned long start_byte, size_t srclen, size_t destlen); + int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, unsigned long compressed_len, @@ -44,4 +42,36 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long nr_pages); int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); + +struct btrfs_compress_op { + struct list_head *(*alloc_workspace)(void); + + void (*free_workspace)(struct list_head *workspace); + + int (*compress_pages)(struct list_head *workspace, + struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out); + + int (*decompress_biovec)(struct list_head *workspace, + struct page **pages_in, + u64 disk_start, + struct bio_vec *bvec, + int vcnt, + size_t srclen); + + int (*decompress)(struct list_head *workspace, + unsigned char *data_in, + struct page *dest_page, + unsigned long start_byte, + size_t srclen, size_t destlen); +}; + +extern struct btrfs_compress_op btrfs_zlib_compress; + #endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index af52f6d7a4d8..e06534438592 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -551,9 +551,10 @@ struct btrfs_timespec { } __attribute__ ((__packed__)); enum btrfs_compression_type { - BTRFS_COMPRESS_NONE = 0, - BTRFS_COMPRESS_ZLIB = 1, - BTRFS_COMPRESS_LAST = 2, + BTRFS_COMPRESS_NONE = 0, + BTRFS_COMPRESS_ZLIB = 1, + BTRFS_COMPRESS_TYPES = 1, + BTRFS_COMPRESS_LAST = 2, }; struct btrfs_inode_item { @@ -895,7 +896,8 @@ struct btrfs_fs_info { */ u64 last_trans_log_full_commit; u64 open_ioctl_trans; - unsigned long mount_opt; + unsigned long mount_opt:20; + unsigned long compress_type:4; u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5e7a94d7da89..f1d198128959 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, BUG_ON(extent_map_end(em) <= cur); BUG_ON(end < cur); - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { this_bio_flag = EXTENT_BIO_COMPRESSED; + extent_set_compress_type(&this_bio_flag, + em->compress_type); + } iosize = min(extent_map_end(em) - cur, end - cur + 1); cur_end = min(extent_map_end(em) - 1, end); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4183c8178f01..7083cfafd061 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -20,8 +20,12 @@ #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) -/* flags for bio submission */ +/* + * flags for bio submission. The high bits indicate the compression + * type for this bio + */ #define EXTENT_BIO_COMPRESSED 1 +#define EXTENT_BIO_FLAG_SHIFT 16 /* these are bit numbers for test/set bit */ #define EXTENT_BUFFER_UPTODATE 0 @@ -135,6 +139,17 @@ struct extent_buffer { wait_queue_head_t lock_wq; }; +static inline void extent_set_compress_type(unsigned long *bio_flags, + int compress_type) +{ + *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; +} + +static inline int extent_compress_type(unsigned long bio_flags) +{ + return bio_flags >> EXTENT_BIO_FLAG_SHIFT; +} + struct extent_map_tree; static inline struct extent_state *extent_state_next(struct extent_state *state) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 23cb8da3ff66..b0e1fce12530 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -3,6 +3,7 @@ #include #include #include +#include "ctree.h" #include "extent_map.h" @@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) return em; em->in_tree = 0; em->flags = 0; + em->compress_type = BTRFS_COMPRESS_NONE; atomic_set(&em->refs, 1); return em; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ab6d74b6e647..28b44dbd1e35 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -26,7 +26,8 @@ struct extent_map { unsigned long flags; struct block_device *bdev; atomic_t refs; - int in_tree; + unsigned int in_tree:1; + unsigned int compress_type:4; }; struct extent_map_tree { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 66836d85763b..05df688c96f4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -224,6 +224,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->bdev = em->bdev; split->flags = flags; + split->compress_type = em->compress_type; ret = add_extent_mapping(em_tree, split); BUG_ON(ret); free_extent_map(split); @@ -238,6 +239,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->len = em->start + em->len - (start + len); split->bdev = em->bdev; split->flags = flags; + split->compress_type = em->compress_type; if (compressed) { split->block_len = em->block_len; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f9194438f7c..ba563b2a5d6c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, size_t cur_size = size; size_t datasize; unsigned long offset; - int use_compress = 0; + int compress_type = BTRFS_COMPRESS_NONE; if (compressed_size && compressed_pages) { - use_compress = 1; + compress_type = root->fs_info->compress_type; cur_size = compressed_size; } @@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_ram_bytes(leaf, ei, size); ptr = btrfs_file_extent_inline_start(ei); - if (use_compress) { + if (compress_type != BTRFS_COMPRESS_NONE) { struct page *cpage; int i = 0; while (compressed_size > 0) { @@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, compressed_size -= cur_size; } btrfs_set_file_extent_compression(leaf, ei, - BTRFS_COMPRESS_ZLIB); + compress_type); } else { page = find_get_page(inode->i_mapping, start >> PAGE_CACHE_SHIFT); @@ -263,6 +263,7 @@ struct async_extent { u64 compressed_size; struct page **pages; unsigned long nr_pages; + int compress_type; struct list_head list; }; @@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow, u64 start, u64 ram_size, u64 compressed_size, struct page **pages, - unsigned long nr_pages) + unsigned long nr_pages, + int compress_type) { struct async_extent *async_extent; @@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow, async_extent->compressed_size = compressed_size; async_extent->pages = pages; async_extent->nr_pages = nr_pages; + async_extent->compress_type = compress_type; list_add_tail(&async_extent->list, &cow->extents); return 0; } @@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode, unsigned long max_uncompressed = 128 * 1024; int i; int will_compress; + int compress_type = root->fs_info->compress_type; actual_end = min_t(u64, isize, end + 1); again: @@ -381,12 +385,16 @@ again: WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); - ret = btrfs_zlib_compress_pages(inode->i_mapping, start, - total_compressed, pages, - nr_pages, &nr_pages_ret, - &total_in, - &total_compressed, - max_compressed); + if (BTRFS_I(inode)->force_compress) + compress_type = BTRFS_I(inode)->force_compress; + + ret = btrfs_compress_pages(compress_type, + inode->i_mapping, start, + total_compressed, pages, + nr_pages, &nr_pages_ret, + &total_in, + &total_compressed, + max_compressed); if (!ret) { unsigned long offset = total_compressed & @@ -493,7 +501,8 @@ again: * and will submit them to the elevator. */ add_async_extent(async_cow, start, num_bytes, - total_compressed, pages, nr_pages_ret); + total_compressed, pages, nr_pages_ret, + compress_type); if (start + num_bytes < end) { start += num_bytes; @@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed: __set_page_dirty_nobuffers(locked_page); /* unlocked later on in the async handlers */ } - add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); + add_async_extent(async_cow, start, end - start + 1, + 0, NULL, 0, BTRFS_COMPRESS_NONE); *num_added += 1; } @@ -640,6 +650,7 @@ retry: em->block_start = ins.objectid; em->block_len = ins.offset; em->bdev = root->fs_info->fs_devices->latest_bdev; + em->compress_type = async_extent->compress_type; set_bit(EXTENT_FLAG_PINNED, &em->flags); set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); @@ -656,11 +667,13 @@ retry: async_extent->ram_size - 1, 0); } - ret = btrfs_add_ordered_extent(inode, async_extent->start, - ins.objectid, - async_extent->ram_size, - ins.offset, - BTRFS_ORDERED_COMPRESSED); + ret = btrfs_add_ordered_extent_compress(inode, + async_extent->start, + ins.objectid, + async_extent->ram_size, + ins.offset, + BTRFS_ORDERED_COMPRESSED, + async_extent->compress_type); BUG_ON(ret); /* @@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; - int compressed = 0; + int compress_type = 0; int ret; bool nolock = false; @@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans->block_rsv = &root->fs_info->delalloc_block_rsv; if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) - compressed = 1; + compress_type = ordered_extent->compress_type; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { - BUG_ON(compressed); + BUG_ON(compress_type); ret = btrfs_mark_extent_written(trans, inode, ordered_extent->file_offset, ordered_extent->file_offset + @@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->disk_len, ordered_extent->len, ordered_extent->len, - compressed, 0, 0, + compress_type, 0, 0, BTRFS_FILE_EXTENT_REG); unpin_extent_cache(&BTRFS_I(inode)->extent_tree, ordered_extent->file_offset, @@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { logical = em->block_start; failrec->bio_flags = EXTENT_BIO_COMPRESSED; + extent_set_compress_type(&failrec->bio_flags, + em->compress_type); } failrec->logical = logical; free_extent_map(em); @@ -4930,8 +4945,10 @@ static noinline int uncompress_inline(struct btrfs_path *path, size_t max_size; unsigned long inline_size; unsigned long ptr; + int compress_type; WARN_ON(pg_offset != 0); + compress_type = btrfs_file_extent_compression(leaf, item); max_size = btrfs_file_extent_ram_bytes(leaf, item); inline_size = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(leaf, path->slots[0])); @@ -4941,8 +4958,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, read_extent_buffer(leaf, tmp, ptr, inline_size); max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); - ret = btrfs_zlib_decompress(tmp, page, extent_offset, - inline_size, max_size); + ret = btrfs_decompress(compress_type, tmp, page, + extent_offset, inline_size, max_size); if (ret) { char *kaddr = kmap_atomic(page, KM_USER0); unsigned long copy_size = min_t(u64, @@ -4984,7 +5001,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; - int compressed; + int compress_type; again: read_lock(&em_tree->lock); @@ -5043,7 +5060,7 @@ again: found_type = btrfs_file_extent_type(leaf, item); extent_start = found_key.offset; - compressed = btrfs_file_extent_compression(leaf, item); + compress_type = btrfs_file_extent_compression(leaf, item); if (found_type == BTRFS_FILE_EXTENT_REG || found_type == BTRFS_FILE_EXTENT_PREALLOC) { extent_end = extent_start + @@ -5089,8 +5106,9 @@ again: em->block_start = EXTENT_MAP_HOLE; goto insert; } - if (compressed) { + if (compress_type != BTRFS_COMPRESS_NONE) { set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); + em->compress_type = compress_type; em->block_start = bytenr; em->block_len = btrfs_file_extent_disk_num_bytes(leaf, item); @@ -5124,12 +5142,14 @@ again: em->len = (copy_size + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); em->orig_start = EXTENT_MAP_INLINE; - if (compressed) + if (compress_type) { set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); + em->compress_type = compress_type; + } ptr = btrfs_file_extent_inline_start(item) + extent_offset; if (create == 0 && !PageUptodate(page)) { - if (btrfs_file_extent_compression(leaf, item) == - BTRFS_COMPRESS_ZLIB) { + if (btrfs_file_extent_compression(leaf, item) != + BTRFS_COMPRESS_NONE) { ret = uncompress_inline(path, inode, page, pg_offset, extent_offset, item); @@ -6479,7 +6499,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->ordered_data_close = 0; ei->orphan_meta_reserved = 0; ei->dummy_inode = 0; - ei->force_compress = 0; + ei->force_compress = BTRFS_COMPRESS_NONE; inode = &ei->vfs_inode; extent_map_tree_init(&ei->extent_tree, GFP_NOFS); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f87552a1d7ea..8cb86d4d763c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -683,7 +683,7 @@ static int btrfs_defrag_file(struct file *file, total_read++; mutex_lock(&inode->i_mutex); if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) - BTRFS_I(inode)->force_compress = 1; + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_ZLIB; ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (ret) @@ -781,7 +781,7 @@ loop_unlock: atomic_dec(&root->fs_info->async_submit_draining); mutex_lock(&inode->i_mutex); - BTRFS_I(inode)->force_compress = 0; + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; mutex_unlock(&inode->i_mutex); } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ae7737e352c9..2b61e1ddcd99 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, */ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, - int type, int dio) + int type, int dio, int compress_type) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->disk_len = disk_len; entry->bytes_left = len; entry->inode = inode; + entry->compress_type = compress_type; if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); @@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type) { return __btrfs_add_ordered_extent(inode, file_offset, start, len, - disk_len, type, 0); + disk_len, type, 0, + BTRFS_COMPRESS_NONE); } int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type) { return __btrfs_add_ordered_extent(inode, file_offset, start, len, - disk_len, type, 1); + disk_len, type, 1, + BTRFS_COMPRESS_NONE); +} + +int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, + u64 start, u64 len, u64 disk_len, + int type, int compress_type) +{ + return __btrfs_add_ordered_extent(inode, file_offset, start, len, + disk_len, type, 0, + compress_type); } /* diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 61dca83119dd..ff1f69aa1883 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -68,7 +68,7 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ -#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ +#define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ @@ -93,6 +93,9 @@ struct btrfs_ordered_extent { /* flags (described above) */ unsigned long flags; + /* compression algorithm */ + int compress_type; + /* reference count */ atomic_t refs; @@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type); int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type); +int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, + u64 start, u64 len, u64 disk_len, + int type, int compress_type); int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 61bd79abb805..f348f2b93164 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -69,9 +69,9 @@ enum { Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, - Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, - Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, - Opt_user_subvol_rm_allowed, + Opt_compress_type, Opt_compress_force, Opt_compress_force_type, + Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, + Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, }; static match_table_t tokens = { @@ -86,7 +86,9 @@ static match_table_t tokens = { {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, {Opt_compress, "compress"}, + {Opt_compress_type, "compress=%s"}, {Opt_compress_force, "compress-force"}, + {Opt_compress_force_type, "compress-force=%s"}, {Opt_ssd, "ssd"}, {Opt_ssd_spread, "ssd_spread"}, {Opt_nossd, "nossd"}, @@ -112,6 +114,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) char *p, *num, *orig; int intarg; int ret = 0; + char *compress_type; + bool compress_force = false; if (!options) return 0; @@ -154,14 +158,29 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, NODATACOW); btrfs_set_opt(info->mount_opt, NODATASUM); break; - case Opt_compress: - printk(KERN_INFO "btrfs: use compression\n"); - btrfs_set_opt(info->mount_opt, COMPRESS); - break; case Opt_compress_force: - printk(KERN_INFO "btrfs: forcing compression\n"); - btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); + case Opt_compress_force_type: + compress_force = true; + case Opt_compress: + case Opt_compress_type: + if (token == Opt_compress || + token == Opt_compress_force || + strcmp(args[0].from, "zlib") == 0) { + compress_type = "zlib"; + info->compress_type = BTRFS_COMPRESS_ZLIB; + } else { + ret = -EINVAL; + goto out; + } + btrfs_set_opt(info->mount_opt, COMPRESS); + if (compress_force) { + btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); + pr_info("btrfs: force %s compression\n", + compress_type); + } else + pr_info("btrfs: use %s compression\n", + compress_type); break; case Opt_ssd: printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); @@ -898,10 +917,14 @@ static int __init init_btrfs_fs(void) if (err) return err; - err = btrfs_init_cachep(); + err = btrfs_init_compress(); if (err) goto free_sysfs; + err = btrfs_init_cachep(); + if (err) + goto free_compress; + err = extent_io_init(); if (err) goto free_cachep; @@ -929,6 +952,8 @@ free_extent_io: extent_io_exit(); free_cachep: btrfs_destroy_cachep(); +free_compress: + btrfs_exit_compress(); free_sysfs: btrfs_exit_sysfs(); return err; @@ -943,7 +968,7 @@ static void __exit exit_btrfs_fs(void) unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); btrfs_cleanup_fs_uuids(); - btrfs_zlib_exit(); + btrfs_exit_compress(); } module_init(init_btrfs_fs) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b01558661e3b..9a3e693917f2 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -32,15 +32,6 @@ #include #include "compression.h" -/* Plan: call deflate() with avail_in == *sourcelen, - avail_out = *dstlen - 12 and flush == Z_FINISH. - If it doesn't manage to finish, call it again with - avail_in == 0 and avail_out set to the remaining 12 - bytes for it to clean up. - Q: Is 12 bytes sufficient? -*/ -#define STREAM_END_SPACE 12 - struct workspace { z_stream inf_strm; z_stream def_strm; @@ -48,155 +39,51 @@ struct workspace { struct list_head list; }; -static LIST_HEAD(idle_workspace); -static DEFINE_SPINLOCK(workspace_lock); -static unsigned long num_workspace; -static atomic_t alloc_workspace = ATOMIC_INIT(0); -static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); +static void zlib_free_workspace(struct list_head *ws) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); -/* - * this finds an available zlib workspace or allocates a new one - * NULL or an ERR_PTR is returned if things go bad. - */ -static struct workspace *find_zlib_workspace(void) + vfree(workspace->def_strm.workspace); + vfree(workspace->inf_strm.workspace); + kfree(workspace->buf); + kfree(workspace); +} + +static struct list_head *zlib_alloc_workspace(void) { struct workspace *workspace; - int ret; - int cpus = num_online_cpus(); - -again: - spin_lock(&workspace_lock); - if (!list_empty(&idle_workspace)) { - workspace = list_entry(idle_workspace.next, struct workspace, - list); - list_del(&workspace->list); - num_workspace--; - spin_unlock(&workspace_lock); - return workspace; - - } - if (atomic_read(&alloc_workspace) > cpus) { - DEFINE_WAIT(wait); - - spin_unlock(&workspace_lock); - prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); - if (atomic_read(&alloc_workspace) > cpus && !num_workspace) - schedule(); - finish_wait(&workspace_wait, &wait); - goto again; - } - atomic_inc(&alloc_workspace); - spin_unlock(&workspace_lock); workspace = kzalloc(sizeof(*workspace), GFP_NOFS); - if (!workspace) { - ret = -ENOMEM; - goto fail; - } + if (!workspace) + return ERR_PTR(-ENOMEM); workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); - if (!workspace->def_strm.workspace) { - ret = -ENOMEM; - goto fail; - } workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); - if (!workspace->inf_strm.workspace) { - ret = -ENOMEM; - goto fail_inflate; - } workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); - if (!workspace->buf) { - ret = -ENOMEM; - goto fail_kmalloc; - } - return workspace; - -fail_kmalloc: - vfree(workspace->inf_strm.workspace); -fail_inflate: - vfree(workspace->def_strm.workspace); -fail: - kfree(workspace); - atomic_dec(&alloc_workspace); - wake_up(&workspace_wait); - return ERR_PTR(ret); -} - -/* - * put a workspace struct back on the list or free it if we have enough - * idle ones sitting around - */ -static int free_workspace(struct workspace *workspace) -{ - spin_lock(&workspace_lock); - if (num_workspace < num_online_cpus()) { - list_add_tail(&workspace->list, &idle_workspace); - num_workspace++; - spin_unlock(&workspace_lock); - if (waitqueue_active(&workspace_wait)) - wake_up(&workspace_wait); - return 0; - } - spin_unlock(&workspace_lock); - vfree(workspace->def_strm.workspace); - vfree(workspace->inf_strm.workspace); - kfree(workspace->buf); - kfree(workspace); + if (!workspace->def_strm.workspace || + !workspace->inf_strm.workspace || !workspace->buf) + goto fail; - atomic_dec(&alloc_workspace); - if (waitqueue_active(&workspace_wait)) - wake_up(&workspace_wait); - return 0; -} + INIT_LIST_HEAD(&workspace->list); -/* - * cleanup function for module exit - */ -static void free_workspaces(void) -{ - struct workspace *workspace; - while (!list_empty(&idle_workspace)) { - workspace = list_entry(idle_workspace.next, struct workspace, - list); - list_del(&workspace->list); - vfree(workspace->def_strm.workspace); - vfree(workspace->inf_strm.workspace); - kfree(workspace->buf); - kfree(workspace); - atomic_dec(&alloc_workspace); - } + return &workspace->list; +fail: + zlib_free_workspace(&workspace->list); + return ERR_PTR(-ENOMEM); } -/* - * given an address space and start/len, compress the bytes. - * - * pages are allocated to hold the compressed result and stored - * in 'pages' - * - * out_pages is used to return the number of pages allocated. There - * may be pages allocated even if we return an error - * - * total_in is used to return the number of bytes actually read. It - * may be smaller then len if we had to exit early because we - * ran out of room in the pages array or because we cross the - * max_out threshold. - * - * total_out is used to return the total number of compressed bytes - * - * max_out tells us the max number of bytes that we're allowed to - * stuff into pages - */ -int btrfs_zlib_compress_pages(struct address_space *mapping, - u64 start, unsigned long len, - struct page **pages, - unsigned long nr_dest_pages, - unsigned long *out_pages, - unsigned long *total_in, - unsigned long *total_out, - unsigned long max_out) +static int zlib_compress_pages(struct list_head *ws, + struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out) { + struct workspace *workspace = list_entry(ws, struct workspace, list); int ret; - struct workspace *workspace; char *data_in; char *cpage_out; int nr_pages = 0; @@ -208,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, *total_out = 0; *total_in = 0; - workspace = find_zlib_workspace(); - if (IS_ERR(workspace)) - return -1; - if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { printk(KERN_WARNING "deflateInit failed\n"); ret = -1; @@ -325,35 +208,18 @@ out: kunmap(in_page); page_cache_release(in_page); } - free_workspace(workspace); return ret; } -/* - * pages_in is an array of pages with compressed data. - * - * disk_start is the starting logical offset of this array in the file - * - * bvec is a bio_vec of pages from the file that we want to decompress into - * - * vcnt is the count of pages in the biovec - * - * srclen is the number of bytes in pages_in - * - * The basic idea is that we have a bio that was created by readpages. - * The pages in the bio are for the uncompressed data, and they may not - * be contiguous. They all correspond to the range of bytes covered by - * the compressed extent. - */ -int btrfs_zlib_decompress_biovec(struct page **pages_in, - u64 disk_start, - struct bio_vec *bvec, - int vcnt, - size_t srclen) +static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, + u64 disk_start, + struct bio_vec *bvec, + int vcnt, + size_t srclen) { + struct workspace *workspace = list_entry(ws, struct workspace, list); int ret = 0; int wbits = MAX_WBITS; - struct workspace *workspace; char *data_in; size_t total_out = 0; unsigned long page_bytes_left; @@ -371,10 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, unsigned long current_buf_start; char *kaddr; - workspace = find_zlib_workspace(); - if (IS_ERR(workspace)) - return -ENOMEM; - data_in = kmap(pages_in[page_in_index]); workspace->inf_strm.next_in = data_in; workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE); @@ -400,8 +262,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { printk(KERN_WARNING "inflateInit failed\n"); - ret = -1; - goto out; + return -1; } while (workspace->inf_strm.total_in < srclen) { ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); @@ -527,35 +388,21 @@ done: zlib_inflateEnd(&workspace->inf_strm); if (data_in) kunmap(pages_in[page_in_index]); -out: - free_workspace(workspace); return ret; } -/* - * a less complex decompression routine. Our compressed data fits in a - * single page, and we want to read a single page out of it. - * start_byte tells us the offset into the compressed data we're interested in - */ -int btrfs_zlib_decompress(unsigned char *data_in, - struct page *dest_page, - unsigned long start_byte, - size_t srclen, size_t destlen) +static int zlib_decompress(struct list_head *ws, unsigned char *data_in, + struct page *dest_page, + unsigned long start_byte, + size_t srclen, size_t destlen) { + struct workspace *workspace = list_entry(ws, struct workspace, list); int ret = 0; int wbits = MAX_WBITS; - struct workspace *workspace; unsigned long bytes_left = destlen; unsigned long total_out = 0; char *kaddr; - if (destlen > PAGE_CACHE_SIZE) - return -ENOMEM; - - workspace = find_zlib_workspace(); - if (IS_ERR(workspace)) - return -ENOMEM; - workspace->inf_strm.next_in = data_in; workspace->inf_strm.avail_in = srclen; workspace->inf_strm.total_in = 0; @@ -576,8 +423,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { printk(KERN_WARNING "inflateInit failed\n"); - ret = -1; - goto out; + return -1; } while (bytes_left > 0) { @@ -627,12 +473,13 @@ next: ret = 0; zlib_inflateEnd(&workspace->inf_strm); -out: - free_workspace(workspace); return ret; } -void btrfs_zlib_exit(void) -{ - free_workspaces(); -} +struct btrfs_compress_op btrfs_zlib_compress = { + .alloc_workspace = zlib_alloc_workspace, + .free_workspace = zlib_free_workspace, + .compress_pages = zlib_compress_pages, + .decompress_biovec = zlib_decompress_biovec, + .decompress = zlib_decompress, +}; -- cgit v1.2.3 From a6fa6fae40ec336c7df6155255ae64ebef43a8bc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 25 Oct 2010 15:12:26 +0800 Subject: btrfs: Add lzo compression support Lzo is a much faster compression algorithm than gzib, so would allow more users to enable transparent compression, and some users can choose from compression ratio and speed for different applications Usage: # mount -t btrfs -o compress[=] dev /mnt or # mount -t btrfs -o compress-force[=] dev /mnt "-o compress" without argument is still allowed for compatability. Compatibility: If we mount a filesystem with lzo compression, it will not be able be mounted in old kernels. One reason is, otherwise btrfs will directly dump compressed data, which sits in inline extent, to user. Performance: The test copied a linux source tarball (~400M) from an ext4 partition to the btrfs partition, and then extracted it. (time in second) lzo zlib nocompress copy: 10.6 21.7 14.9 extract: 70.1 94.4 66.6 (data size in MB) lzo zlib nocompress copy: 185.87 108.69 394.49 extract: 193.80 132.36 381.21 Changelog: v1 -> v2: - Select LZO_COMPRESS and LZO_DECOMPRESS in btrfs Kconfig. - Add incompability flag. - Fix error handling in compress code. Signed-off-by: Li Zefan --- fs/btrfs/Kconfig | 2 + fs/btrfs/Makefile | 2 +- fs/btrfs/compression.c | 1 + fs/btrfs/compression.h | 1 + fs/btrfs/ctree.h | 9 +- fs/btrfs/disk-io.c | 8 +- fs/btrfs/lzo.c | 509 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/super.c | 3 + 8 files changed, 527 insertions(+), 8 deletions(-) create mode 100644 fs/btrfs/lzo.c (limited to 'fs') diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 7bb3c020e570..ecb9fd3be143 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -4,6 +4,8 @@ config BTRFS_FS select LIBCRC32C select ZLIB_INFLATE select ZLIB_DEFLATE + select LZO_COMPRESS + select LZO_DECOMPRESS help Btrfs is a new filesystem with extents, writable snapshotting, support for multiple devices and many more features. diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a35eb36b32fd..31610ea73aec 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - export.o tree-log.o acl.o free-space-cache.o zlib.o \ + export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 6638c9877720..8faa2df9e719 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -691,6 +691,7 @@ static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; struct btrfs_compress_op *btrfs_compress_op[] = { &btrfs_zlib_compress, + &btrfs_lzo_compress, }; int __init btrfs_init_compress(void) diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 9b5f2f365b79..f7ce217113fa 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -73,5 +73,6 @@ struct btrfs_compress_op { }; extern struct btrfs_compress_op btrfs_zlib_compress; +extern struct btrfs_compress_op btrfs_lzo_compress; #endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e06534438592..53b984623983 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -398,13 +398,15 @@ struct btrfs_super_block { #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) #define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL #define BTRFS_FEATURE_INCOMPAT_SUPP \ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ - BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) /* * A leaf is full of items. offset and size tell us where to find @@ -553,8 +555,9 @@ struct btrfs_timespec { enum btrfs_compression_type { BTRFS_COMPRESS_NONE = 0, BTRFS_COMPRESS_ZLIB = 1, - BTRFS_COMPRESS_TYPES = 1, - BTRFS_COMPRESS_LAST = 2, + BTRFS_COMPRESS_LZO = 2, + BTRFS_COMPRESS_TYPES = 2, + BTRFS_COMPRESS_LAST = 3, }; struct btrfs_inode_item { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a5d2249e6da5..f88eb2ce7919 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1744,10 +1744,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, } features = btrfs_super_incompat_flags(disk_super); - if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { - features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; - btrfs_set_super_incompat_flags(disk_super, features); - } + features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; + if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + btrfs_set_super_incompat_flags(disk_super, features); features = btrfs_super_compat_ro_flags(disk_super) & ~BTRFS_FEATURE_COMPAT_RO_SUPP; diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c new file mode 100644 index 000000000000..523b144e2aec --- /dev/null +++ b/fs/btrfs/lzo.c @@ -0,0 +1,509 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "compression.h" + +#define LZO_LEN 4 + +struct workspace { + void *mem; + void *buf; /* where compressed data goes */ + void *cbuf; /* where decompressed data goes */ + struct list_head list; +}; + +static void lzo_free_workspace(struct list_head *ws) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + + vfree(workspace->buf); + vfree(workspace->cbuf); + vfree(workspace->mem); + kfree(workspace); +} + +static struct list_head *lzo_alloc_workspace(void) +{ + struct workspace *workspace; + + workspace = kzalloc(sizeof(*workspace), GFP_NOFS); + if (!workspace) + return ERR_PTR(-ENOMEM); + + workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); + workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); + workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); + if (!workspace->mem || !workspace->buf || !workspace->cbuf) + goto fail; + + INIT_LIST_HEAD(&workspace->list); + + return &workspace->list; +fail: + lzo_free_workspace(&workspace->list); + return ERR_PTR(-ENOMEM); +} + +static inline void write_compress_length(char *buf, size_t len) +{ + __le32 dlen; + + dlen = cpu_to_le32(len); + memcpy(buf, &dlen, LZO_LEN); +} + +static inline size_t read_compress_length(char *buf) +{ + __le32 dlen; + + memcpy(&dlen, buf, LZO_LEN); + return le32_to_cpu(dlen); +} + +static int lzo_compress_pages(struct list_head *ws, + struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + int ret = 0; + char *data_in; + char *cpage_out; + int nr_pages = 0; + struct page *in_page = NULL; + struct page *out_page = NULL; + unsigned long bytes_left; + + size_t in_len; + size_t out_len; + char *buf; + unsigned long tot_in = 0; + unsigned long tot_out = 0; + unsigned long pg_bytes_left; + unsigned long out_offset; + unsigned long bytes; + + *out_pages = 0; + *total_out = 0; + *total_in = 0; + + in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + data_in = kmap(in_page); + + /* + * store the size of all chunks of compressed data in + * the first 4 bytes + */ + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -ENOMEM; + goto out; + } + cpage_out = kmap(out_page); + out_offset = LZO_LEN; + tot_out = LZO_LEN; + pages[0] = out_page; + nr_pages = 1; + pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; + + /* compress at most one page of data each time */ + in_len = min(len, PAGE_CACHE_SIZE); + while (tot_in < len) { + ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, + &out_len, workspace->mem); + if (ret != LZO_E_OK) { + printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", + ret); + ret = -1; + goto out; + } + + /* store the size of this chunk of compressed data */ + write_compress_length(cpage_out + out_offset, out_len); + tot_out += LZO_LEN; + out_offset += LZO_LEN; + pg_bytes_left -= LZO_LEN; + + tot_in += in_len; + tot_out += out_len; + + /* copy bytes from the working buffer into the pages */ + buf = workspace->cbuf; + while (out_len) { + bytes = min_t(unsigned long, pg_bytes_left, out_len); + + memcpy(cpage_out + out_offset, buf, bytes); + + out_len -= bytes; + pg_bytes_left -= bytes; + buf += bytes; + out_offset += bytes; + + /* + * we need another page for writing out. + * + * Note if there's less than 4 bytes left, we just + * skip to a new page. + */ + if ((out_len == 0 && pg_bytes_left < LZO_LEN) || + pg_bytes_left == 0) { + if (pg_bytes_left) { + memset(cpage_out + out_offset, 0, + pg_bytes_left); + tot_out += pg_bytes_left; + } + + /* we're done, don't allocate new page */ + if (out_len == 0 && tot_in >= len) + break; + + kunmap(out_page); + if (nr_pages == nr_dest_pages) { + out_page = NULL; + ret = -1; + goto out; + } + + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -ENOMEM; + goto out; + } + cpage_out = kmap(out_page); + pages[nr_pages++] = out_page; + + pg_bytes_left = PAGE_CACHE_SIZE; + out_offset = 0; + } + } + + /* we're making it bigger, give up */ + if (tot_in > 8192 && tot_in < tot_out) + goto out; + + /* we're all done */ + if (tot_in >= len) + break; + + if (tot_out > max_out) + break; + + bytes_left = len - tot_in; + kunmap(in_page); + page_cache_release(in_page); + + start += PAGE_CACHE_SIZE; + in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + data_in = kmap(in_page); + in_len = min(bytes_left, PAGE_CACHE_SIZE); + } + + if (tot_out > tot_in) + goto out; + + /* store the size of all chunks of compressed data */ + cpage_out = kmap(pages[0]); + write_compress_length(cpage_out, tot_out); + + kunmap(pages[0]); + + ret = 0; + *total_out = tot_out; + *total_in = tot_in; +out: + *out_pages = nr_pages; + if (out_page) + kunmap(out_page); + + if (in_page) { + kunmap(in_page); + page_cache_release(in_page); + } + + return ret; +} + +static int lzo_decompress_biovec(struct list_head *ws, + struct page **pages_in, + u64 disk_start, + struct bio_vec *bvec, + int vcnt, + size_t srclen) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + int ret = 0; + char *data_in; + unsigned long page_bytes_left; + unsigned long page_in_index = 0; + unsigned long page_out_index = 0; + struct page *page_out; + unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / + PAGE_CACHE_SIZE; + unsigned long buf_start; + unsigned long buf_offset = 0; + unsigned long bytes; + unsigned long working_bytes; + unsigned long pg_offset; + unsigned long start_byte; + unsigned long current_buf_start; + char *kaddr; + + size_t in_len; + size_t out_len; + unsigned long in_offset; + unsigned long in_page_bytes_left; + unsigned long tot_in; + unsigned long tot_out; + unsigned long tot_len; + char *buf; + + data_in = kmap(pages_in[0]); + tot_len = read_compress_length(data_in); + + tot_in = LZO_LEN; + in_offset = LZO_LEN; + tot_len = min_t(size_t, srclen, tot_len); + in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; + + tot_out = 0; + page_out = bvec[0].bv_page; + page_bytes_left = PAGE_CACHE_SIZE; + pg_offset = 0; + + while (tot_in < tot_len) { + in_len = read_compress_length(data_in + in_offset); + in_page_bytes_left -= LZO_LEN; + in_offset += LZO_LEN; + tot_in += LZO_LEN; + + tot_in += in_len; + working_bytes = in_len; + + /* fast path: avoid using the working buffer */ + if (in_page_bytes_left >= in_len) { + buf = data_in + in_offset; + bytes = in_len; + goto cont; + } + + /* copy bytes from the pages into the working buffer */ + buf = workspace->cbuf; + buf_offset = 0; + while (working_bytes) { + bytes = min(working_bytes, in_page_bytes_left); + + memcpy(buf + buf_offset, data_in + in_offset, bytes); + buf_offset += bytes; +cont: + working_bytes -= bytes; + in_page_bytes_left -= bytes; + in_offset += bytes; + + /* check if we need to pick another page */ + if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) + || in_page_bytes_left == 0) { + tot_in += in_page_bytes_left; + + if (working_bytes == 0 && tot_in >= tot_len) + break; + + kunmap(pages_in[page_in_index]); + page_in_index++; + if (page_in_index >= total_pages_in) { + ret = -1; + data_in = NULL; + goto done; + } + data_in = kmap(pages_in[page_in_index]); + + in_page_bytes_left = PAGE_CACHE_SIZE; + in_offset = 0; + } + } + + out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); + ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, + &out_len); + if (ret != LZO_E_OK) { + printk(KERN_WARNING "btrfs decompress failed\n"); + ret = -1; + break; + } + + /* + * buf start is the byte offset we're of the start of + * our workspace buffer + */ + buf_start = tot_out; + + /* tot_out is the last byte of the workspace buffer */ + tot_out += out_len; + + working_bytes = tot_out - buf_start; + + /* + * start_byte is the first byte of the page we're currently + * copying into relative to the start of the compressed data. + */ + start_byte = page_offset(page_out) - disk_start; + + if (working_bytes == 0) { + /* we didn't make progress in this inflate + * call, we're done + */ + break; + } + + /* we haven't yet hit data corresponding to this page */ + if (tot_out <= start_byte) + continue; + + /* + * the start of the data we care about is offset into + * the middle of our working buffer + */ + if (tot_out > start_byte && buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes -= buf_offset; + } else { + buf_offset = 0; + } + current_buf_start = buf_start; + + /* copy bytes from the working buffer into the pages */ + while (working_bytes > 0) { + bytes = min(PAGE_CACHE_SIZE - pg_offset, + PAGE_CACHE_SIZE - buf_offset); + bytes = min(bytes, working_bytes); + kaddr = kmap_atomic(page_out, KM_USER0); + memcpy(kaddr + pg_offset, workspace->buf + buf_offset, + bytes); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page_out); + + pg_offset += bytes; + page_bytes_left -= bytes; + buf_offset += bytes; + working_bytes -= bytes; + current_buf_start += bytes; + + /* check if we need to pick another page */ + if (page_bytes_left == 0) { + page_out_index++; + if (page_out_index >= vcnt) { + ret = 0; + goto done; + } + + page_out = bvec[page_out_index].bv_page; + pg_offset = 0; + page_bytes_left = PAGE_CACHE_SIZE; + start_byte = page_offset(page_out) - disk_start; + + /* + * make sure our new page is covered by this + * working buffer + */ + if (tot_out <= start_byte) + break; + + /* the next page in the biovec might not + * be adjacent to the last page, but it + * might still be found inside this working + * buffer. bump our offset pointer + */ + if (tot_out > start_byte && + current_buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes = tot_out - start_byte; + current_buf_start = buf_start + + buf_offset; + } + } + } + } +done: + if (data_in) + kunmap(pages_in[page_in_index]); + return ret; +} + +static int lzo_decompress(struct list_head *ws, unsigned char *data_in, + struct page *dest_page, + unsigned long start_byte, + size_t srclen, size_t destlen) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + size_t in_len; + size_t out_len; + size_t tot_len; + int ret = 0; + char *kaddr; + unsigned long bytes; + + BUG_ON(srclen < LZO_LEN); + + tot_len = read_compress_length(data_in); + data_in += LZO_LEN; + + in_len = read_compress_length(data_in); + data_in += LZO_LEN; + + out_len = PAGE_CACHE_SIZE; + ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); + if (ret != LZO_E_OK) { + printk(KERN_WARNING "btrfs decompress failed!\n"); + ret = -1; + goto out; + } + + if (out_len < start_byte) { + ret = -1; + goto out; + } + + bytes = min_t(unsigned long, destlen, out_len - start_byte); + + kaddr = kmap_atomic(dest_page, KM_USER0); + memcpy(kaddr, workspace->buf + start_byte, bytes); + kunmap_atomic(kaddr, KM_USER0); +out: + return ret; +} + +struct btrfs_compress_op btrfs_lzo_compress = { + .alloc_workspace = lzo_alloc_workspace, + .free_workspace = lzo_free_workspace, + .compress_pages = lzo_compress_pages, + .decompress_biovec = lzo_decompress_biovec, + .decompress = lzo_decompress, +}; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f348f2b93164..a1a76b2a61f9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -168,6 +168,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) strcmp(args[0].from, "zlib") == 0) { compress_type = "zlib"; info->compress_type = BTRFS_COMPRESS_ZLIB; + } else if (strcmp(args[0].from, "lzo") == 0) { + compress_type = "lzo"; + info->compress_type = BTRFS_COMPRESS_LZO; } else { ret = -EINVAL; goto out; -- cgit v1.2.3 From 1a419d85a76853d7d04e9b6280a80e96770bf3e3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 25 Oct 2010 15:12:50 +0800 Subject: btrfs: Allow to specify compress method when defrag Update defrag ioctl, so one can choose lzo or zlib when turning on compression in defrag operation. Changelog: v1 -> v2 - Add incompability flag. - Fix to check invalid compress type. Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 19 ++++++++++++++++++- fs/btrfs/ioctl.h | 9 ++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8cb86d4d763c..b6985d33eede 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -638,9 +638,11 @@ static int btrfs_defrag_file(struct file *file, struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; struct page *page; + struct btrfs_super_block *disk_super; unsigned long last_index; unsigned long ra_pages = root->fs_info->bdi.ra_pages; unsigned long total_read = 0; + u64 features; u64 page_start; u64 page_end; u64 last_len = 0; @@ -648,6 +650,14 @@ static int btrfs_defrag_file(struct file *file, u64 defrag_end = 0; unsigned long i; int ret; + int compress_type = BTRFS_COMPRESS_ZLIB; + + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { + if (range->compress_type > BTRFS_COMPRESS_TYPES) + return -EINVAL; + if (range->compress_type) + compress_type = range->compress_type; + } if (inode->i_size == 0) return 0; @@ -683,7 +693,7 @@ static int btrfs_defrag_file(struct file *file, total_read++; mutex_lock(&inode->i_mutex); if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) - BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_ZLIB; + BTRFS_I(inode)->force_compress = compress_type; ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (ret) @@ -785,6 +795,13 @@ loop_unlock: mutex_unlock(&inode->i_mutex); } + disk_super = &root->fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); + if (range->compress_type == BTRFS_COMPRESS_LZO) { + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + btrfs_set_super_incompat_flags(disk_super, features); + } + return 0; err_reservations: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index c344d12c646b..24d0f4628240 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -133,8 +133,15 @@ struct btrfs_ioctl_defrag_range_args { */ __u32 extent_thresh; + /* + * which compression method to use if turning on compression + * for this defrag operation. If unspecified, zlib will + * be used + */ + __u32 compress_type; + /* spare for later */ - __u32 unused[5]; + __u32 unused[4]; }; struct btrfs_ioctl_space_info { -- cgit v1.2.3 From 3a39c18d63fec35f49df577d4b2a4e29c2212f22 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 8 Nov 2010 15:22:19 +0800 Subject: btrfs: Extract duplicate decompress code Add a common function to copy decompressed data from working buffer to bio pages. Signed-off-by: Li Zefan --- fs/btrfs/compression.c | 92 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/compression.h | 5 +++ fs/btrfs/lzo.c | 101 +++----------------------------------------- fs/btrfs/zlib.c | 111 ++++++------------------------------------------- 4 files changed, 115 insertions(+), 194 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 8faa2df9e719..f745287fbf2e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -904,3 +904,95 @@ void __exit btrfs_exit_compress(void) { free_workspaces(); } + +/* + * Copy uncompressed data from working buffer to pages. + * + * buf_start is the byte offset we're of the start of our workspace buffer. + * + * total_out is the last byte of the buffer + */ +int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, + unsigned long total_out, u64 disk_start, + struct bio_vec *bvec, int vcnt, + unsigned long *page_index, + unsigned long *pg_offset) +{ + unsigned long buf_offset; + unsigned long current_buf_start; + unsigned long start_byte; + unsigned long working_bytes = total_out - buf_start; + unsigned long bytes; + char *kaddr; + struct page *page_out = bvec[*page_index].bv_page; + + /* + * start byte is the first byte of the page we're currently + * copying into relative to the start of the compressed data. + */ + start_byte = page_offset(page_out) - disk_start; + + /* we haven't yet hit data corresponding to this page */ + if (total_out <= start_byte) + return 1; + + /* + * the start of the data we care about is offset into + * the middle of our working buffer + */ + if (total_out > start_byte && buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes -= buf_offset; + } else { + buf_offset = 0; + } + current_buf_start = buf_start; + + /* copy bytes from the working buffer into the pages */ + while (working_bytes > 0) { + bytes = min(PAGE_CACHE_SIZE - *pg_offset, + PAGE_CACHE_SIZE - buf_offset); + bytes = min(bytes, working_bytes); + kaddr = kmap_atomic(page_out, KM_USER0); + memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page_out); + + *pg_offset += bytes; + buf_offset += bytes; + working_bytes -= bytes; + current_buf_start += bytes; + + /* check if we need to pick another page */ + if (*pg_offset == PAGE_CACHE_SIZE) { + (*page_index)++; + if (*page_index >= vcnt) + return 0; + + page_out = bvec[*page_index].bv_page; + *pg_offset = 0; + start_byte = page_offset(page_out) - disk_start; + + /* + * make sure our new page is covered by this + * working buffer + */ + if (total_out <= start_byte) + return 1; + + /* + * the next page in the biovec might not be adjacent + * to the last page, but it might still be found + * inside this working buffer. bump our offset pointer + */ + if (total_out > start_byte && + current_buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes = total_out - start_byte; + current_buf_start = buf_start + buf_offset; + } + } + } + + return 1; +} diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index f7ce217113fa..51000174b9d7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -34,6 +34,11 @@ int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, struct bio_vec *bvec, int vcnt, size_t srclen); int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, unsigned long start_byte, size_t srclen, size_t destlen); +int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, + unsigned long total_out, u64 disk_start, + struct bio_vec *bvec, int vcnt, + unsigned long *page_index, + unsigned long *pg_offset); int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 523b144e2aec..cc9b450399df 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -260,12 +260,10 @@ static int lzo_decompress_biovec(struct list_head *ws, size_t srclen) { struct workspace *workspace = list_entry(ws, struct workspace, list); - int ret = 0; + int ret = 0, ret2; char *data_in; - unsigned long page_bytes_left; unsigned long page_in_index = 0; unsigned long page_out_index = 0; - struct page *page_out; unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE; unsigned long buf_start; @@ -273,9 +271,6 @@ static int lzo_decompress_biovec(struct list_head *ws, unsigned long bytes; unsigned long working_bytes; unsigned long pg_offset; - unsigned long start_byte; - unsigned long current_buf_start; - char *kaddr; size_t in_len; size_t out_len; @@ -295,8 +290,6 @@ static int lzo_decompress_biovec(struct list_head *ws, in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; tot_out = 0; - page_out = bvec[0].bv_page; - page_bytes_left = PAGE_CACHE_SIZE; pg_offset = 0; while (tot_in < tot_len) { @@ -359,97 +352,15 @@ cont: break; } - /* - * buf start is the byte offset we're of the start of - * our workspace buffer - */ buf_start = tot_out; - - /* tot_out is the last byte of the workspace buffer */ tot_out += out_len; - working_bytes = tot_out - buf_start; - - /* - * start_byte is the first byte of the page we're currently - * copying into relative to the start of the compressed data. - */ - start_byte = page_offset(page_out) - disk_start; - - if (working_bytes == 0) { - /* we didn't make progress in this inflate - * call, we're done - */ + ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, + tot_out, disk_start, + bvec, vcnt, + &page_out_index, &pg_offset); + if (ret2 == 0) break; - } - - /* we haven't yet hit data corresponding to this page */ - if (tot_out <= start_byte) - continue; - - /* - * the start of the data we care about is offset into - * the middle of our working buffer - */ - if (tot_out > start_byte && buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes -= buf_offset; - } else { - buf_offset = 0; - } - current_buf_start = buf_start; - - /* copy bytes from the working buffer into the pages */ - while (working_bytes > 0) { - bytes = min(PAGE_CACHE_SIZE - pg_offset, - PAGE_CACHE_SIZE - buf_offset); - bytes = min(bytes, working_bytes); - kaddr = kmap_atomic(page_out, KM_USER0); - memcpy(kaddr + pg_offset, workspace->buf + buf_offset, - bytes); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page_out); - - pg_offset += bytes; - page_bytes_left -= bytes; - buf_offset += bytes; - working_bytes -= bytes; - current_buf_start += bytes; - - /* check if we need to pick another page */ - if (page_bytes_left == 0) { - page_out_index++; - if (page_out_index >= vcnt) { - ret = 0; - goto done; - } - - page_out = bvec[page_out_index].bv_page; - pg_offset = 0; - page_bytes_left = PAGE_CACHE_SIZE; - start_byte = page_offset(page_out) - disk_start; - - /* - * make sure our new page is covered by this - * working buffer - */ - if (tot_out <= start_byte) - break; - - /* the next page in the biovec might not - * be adjacent to the last page, but it - * might still be found inside this working - * buffer. bump our offset pointer - */ - if (tot_out > start_byte && - current_buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes = tot_out - start_byte; - current_buf_start = buf_start + - buf_offset; - } - } - } } done: if (data_in) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 9a3e693917f2..f5ec2d44150d 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -218,24 +218,16 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, size_t srclen) { struct workspace *workspace = list_entry(ws, struct workspace, list); - int ret = 0; + int ret = 0, ret2; int wbits = MAX_WBITS; char *data_in; size_t total_out = 0; - unsigned long page_bytes_left; unsigned long page_in_index = 0; unsigned long page_out_index = 0; - struct page *page_out; unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE; unsigned long buf_start; - unsigned long buf_offset; - unsigned long bytes; - unsigned long working_bytes; unsigned long pg_offset; - unsigned long start_byte; - unsigned long current_buf_start; - char *kaddr; data_in = kmap(pages_in[page_in_index]); workspace->inf_strm.next_in = data_in; @@ -245,8 +237,6 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, workspace->inf_strm.total_out = 0; workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; - page_out = bvec[page_out_index].bv_page; - page_bytes_left = PAGE_CACHE_SIZE; pg_offset = 0; /* If it's deflate, and it's got no preset dictionary, then @@ -268,100 +258,23 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); if (ret != Z_OK && ret != Z_STREAM_END) break; - /* - * buf start is the byte offset we're of the start of - * our workspace buffer - */ - buf_start = total_out; - /* total_out is the last byte of the workspace buffer */ + buf_start = total_out; total_out = workspace->inf_strm.total_out; - working_bytes = total_out - buf_start; - - /* - * start byte is the first byte of the page we're currently - * copying into relative to the start of the compressed data. - */ - start_byte = page_offset(page_out) - disk_start; - - if (working_bytes == 0) { - /* we didn't make progress in this inflate - * call, we're done - */ - if (ret != Z_STREAM_END) - ret = -1; + /* we didn't make progress in this inflate call, we're done */ + if (buf_start == total_out) break; - } - /* we haven't yet hit data corresponding to this page */ - if (total_out <= start_byte) - goto next; - - /* - * the start of the data we care about is offset into - * the middle of our working buffer - */ - if (total_out > start_byte && buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes -= buf_offset; - } else { - buf_offset = 0; - } - current_buf_start = buf_start; - - /* copy bytes from the working buffer into the pages */ - while (working_bytes > 0) { - bytes = min(PAGE_CACHE_SIZE - pg_offset, - PAGE_CACHE_SIZE - buf_offset); - bytes = min(bytes, working_bytes); - kaddr = kmap_atomic(page_out, KM_USER0); - memcpy(kaddr + pg_offset, workspace->buf + buf_offset, - bytes); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page_out); - - pg_offset += bytes; - page_bytes_left -= bytes; - buf_offset += bytes; - working_bytes -= bytes; - current_buf_start += bytes; - - /* check if we need to pick another page */ - if (page_bytes_left == 0) { - page_out_index++; - if (page_out_index >= vcnt) { - ret = 0; - goto done; - } - - page_out = bvec[page_out_index].bv_page; - pg_offset = 0; - page_bytes_left = PAGE_CACHE_SIZE; - start_byte = page_offset(page_out) - disk_start; - - /* - * make sure our new page is covered by this - * working buffer - */ - if (total_out <= start_byte) - goto next; - - /* the next page in the biovec might not - * be adjacent to the last page, but it - * might still be found inside this working - * buffer. bump our offset pointer - */ - if (total_out > start_byte && - current_buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes = total_out - start_byte; - current_buf_start = buf_start + - buf_offset; - } - } + ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, + total_out, disk_start, + bvec, vcnt, + &page_out_index, &pg_offset); + if (ret2 == 0) { + ret = 0; + goto done; } -next: + workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; -- cgit v1.2.3 From fa0d2b9bd717340e0bc4850a80ac0eb344e9a7fb Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 20 Dec 2010 15:53:28 +0800 Subject: Btrfs: Refactor btrfs_ioctl_snap_create() Split it into two functions for two different ioctls, since they share no common code. Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 84 +++++++++++++++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f87552a1d7ea..02554e19d974 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -946,58 +946,54 @@ out: } static noinline int btrfs_ioctl_snap_create(struct file *file, - void __user *arg, int subvol, - int v2) + void __user *arg, int subvol) { - struct btrfs_ioctl_vol_args *vol_args = NULL; - struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL; - char *name; - u64 fd; + struct btrfs_ioctl_vol_args *vol_args; int ret; - if (v2) { - u64 transid = 0; - u64 *ptr = NULL; + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2)); - if (IS_ERR(vol_args_v2)) - return PTR_ERR(vol_args_v2); + ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, + vol_args->fd, subvol, NULL); - if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { - ret = -EINVAL; - goto out; - } - - name = vol_args_v2->name; - fd = vol_args_v2->fd; - vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; + kfree(vol_args); + return ret; +} - if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC) - ptr = &transid; +static noinline int btrfs_ioctl_snap_create_v2(struct file *file, + void __user *arg, int subvol) +{ + struct btrfs_ioctl_vol_args_v2 *vol_args; + int ret; + u64 transid = 0; + u64 *ptr = NULL; - ret = btrfs_ioctl_snap_create_transid(file, name, fd, - subvol, ptr); + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; - if (ret == 0 && ptr && - copy_to_user(arg + - offsetof(struct btrfs_ioctl_vol_args_v2, - transid), ptr, sizeof(*ptr))) - ret = -EFAULT; - } else { - vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) - return PTR_ERR(vol_args); - name = vol_args->name; - fd = vol_args->fd; - vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - - ret = btrfs_ioctl_snap_create_transid(file, name, fd, - subvol, NULL); + if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { + ret = -EINVAL; + goto out; } + + if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) + ptr = &transid; + + ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, + vol_args->fd, subvol, ptr); + + if (ret == 0 && ptr && + copy_to_user(arg + + offsetof(struct btrfs_ioctl_vol_args_v2, + transid), ptr, sizeof(*ptr))) + ret = -EFAULT; out: kfree(vol_args); - kfree(vol_args_v2); - return ret; } @@ -2257,11 +2253,11 @@ long btrfs_ioctl(struct file *file, unsigned int case FS_IOC_GETVERSION: return btrfs_ioctl_getversion(file, argp); case BTRFS_IOC_SNAP_CREATE: - return btrfs_ioctl_snap_create(file, argp, 0, 0); + return btrfs_ioctl_snap_create(file, argp, 0); case BTRFS_IOC_SNAP_CREATE_V2: - return btrfs_ioctl_snap_create(file, argp, 0, 1); + return btrfs_ioctl_snap_create_v2(file, argp, 0); case BTRFS_IOC_SUBVOL_CREATE: - return btrfs_ioctl_snap_create(file, argp, 1, 0); + return btrfs_ioctl_snap_create(file, argp, 1); case BTRFS_IOC_SNAP_DESTROY: return btrfs_ioctl_snap_destroy(file, argp); case BTRFS_IOC_DEFAULT_SUBVOL: -- cgit v1.2.3 From b83cc9693f39689490970c19f6c5b866f6719a70 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 20 Dec 2010 16:04:08 +0800 Subject: Btrfs: Add readonly snapshots support Usage: Set BTRFS_SUBVOL_RDONLY of btrfs_ioctl_vol_arg_v2->flags, and call ioctl(BTRFS_I0CTL_SNAP_CREATE_V2). Implementation: - Set readonly bit of btrfs_root_item->flags. - Add readonly checks in btrfs_permission (inode_permission), btrfs_setattr, btrfs_set/remove_xattr and some ioctls. Changelog for v3: - Eliminate btrfs_root->readonly, but check btrfs_root->root_item.flags. - Rename BTRFS_ROOT_SNAP_RDONLY to BTRFS_ROOT_SUBVOL_RDONLY. Signed-off-by: Li Zefan --- fs/btrfs/ctree.h | 7 +++++++ fs/btrfs/inode.c | 8 ++++++++ fs/btrfs/ioctl.c | 42 ++++++++++++++++++++++++++++++++---------- fs/btrfs/ioctl.h | 1 + fs/btrfs/transaction.c | 8 ++++++++ fs/btrfs/transaction.h | 1 + fs/btrfs/xattr.c | 18 ++++++++++++++++++ 7 files changed, 75 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index af52f6d7a4d8..4403e5643d43 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -597,6 +597,8 @@ struct btrfs_dir_item { u8 type; } __attribute__ ((__packed__)); +#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) + struct btrfs_root_item { struct btrfs_inode_item inode; __le64 generation; @@ -1893,6 +1895,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, last_snapshot, 64); +static inline bool btrfs_root_readonly(struct btrfs_root *root) +{ + return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; +} + /* struct btrfs_super_block */ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f9194438f7c..956f1eb913b1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3671,8 +3671,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; int err; + if (btrfs_root_readonly(root)) + return -EROFS; + err = inode_change_ok(inode, attr); if (err) return err; @@ -7206,6 +7210,10 @@ static int btrfs_set_page_dirty(struct page *page) static int btrfs_permission(struct inode *inode, int mask) { + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) + return -EROFS; if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) return -EACCES; return generic_permission(inode, mask, btrfs_check_acl); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 02554e19d974..f066ccb5dddf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) unsigned int flags, oldflags; int ret; + if (btrfs_root_readonly(root)) + return -EROFS; + if (copy_from_user(&flags, arg, sizeof(flags))) return -EFAULT; @@ -360,7 +363,8 @@ fail: } static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, - char *name, int namelen, u64 *async_transid) + char *name, int namelen, u64 *async_transid, + bool readonly) { struct inode *inode; struct dentry *parent; @@ -378,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, btrfs_init_block_rsv(&pending_snapshot->block_rsv); pending_snapshot->dentry = dentry; pending_snapshot->root = root; + pending_snapshot->readonly = readonly; trans = btrfs_start_transaction(root->fs_info->extent_root, 5); if (IS_ERR(trans)) { @@ -509,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) static noinline int btrfs_mksubvol(struct path *parent, char *name, int namelen, struct btrfs_root *snap_src, - u64 *async_transid) + u64 *async_transid, bool readonly) { struct inode *dir = parent->dentry->d_inode; struct dentry *dentry; @@ -541,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent, if (snap_src) { error = create_snapshot(snap_src, dentry, - name, namelen, async_transid); + name, namelen, async_transid, readonly); } else { error = create_subvol(BTRFS_I(dir)->root, dentry, name, namelen, async_transid); @@ -901,7 +906,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, char *name, unsigned long fd, int subvol, - u64 *transid) + u64 *transid, + bool readonly) { struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct file *src_file; @@ -919,7 +925,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, if (subvol) { ret = btrfs_mksubvol(&file->f_path, name, namelen, - NULL, transid); + NULL, transid, readonly); } else { struct inode *src_inode; src_file = fget(fd); @@ -938,7 +944,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, } ret = btrfs_mksubvol(&file->f_path, name, namelen, BTRFS_I(src_inode)->root, - transid); + transid, readonly); fput(src_file); } out: @@ -957,7 +963,8 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, - vol_args->fd, subvol, NULL); + vol_args->fd, subvol, + NULL, false); kfree(vol_args); return ret; @@ -970,22 +977,27 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, int ret; u64 transid = 0; u64 *ptr = NULL; + bool readonly = false; vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; - if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { - ret = -EINVAL; + if (vol_args->flags & + ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { + ret = -EOPNOTSUPP; goto out; } if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) ptr = &transid; + if (vol_args->flags & BTRFS_SUBVOL_RDONLY) + readonly = true; ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, - vol_args->fd, subvol, ptr); + vol_args->fd, subvol, + ptr, readonly); if (ret == 0 && ptr && copy_to_user(arg + @@ -1505,6 +1517,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) struct btrfs_ioctl_defrag_range_args *range; int ret; + if (btrfs_root_readonly(root)) + return -EROFS; + ret = mnt_want_write(file->f_path.mnt); if (ret) return ret; @@ -1633,6 +1648,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) return -EINVAL; + if (btrfs_root_readonly(root)) + return -EROFS; + ret = mnt_want_write(file->f_path.mnt); if (ret) return ret; @@ -1954,6 +1972,10 @@ static long btrfs_ioctl_trans_start(struct file *file) if (file->private_data) goto out; + ret = -EROFS; + if (btrfs_root_readonly(root)) + goto out; + ret = mnt_want_write(file->f_path.mnt); if (ret) goto out; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index c344d12c646b..52ae489974be 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args { }; #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) #define BTRFS_SUBVOL_NAME_MAX 4039 struct btrfs_ioctl_vol_args_v2 { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f50e931fc217..29e30d832ec9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -910,6 +910,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, u64 to_reserve = 0; u64 index = 0; u64 objectid; + u64 root_flags; new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { @@ -967,6 +968,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_root_last_snapshot(&root->root_item, trans->transid); memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); + root_flags = btrfs_root_flags(new_root_item); + if (pending->readonly) + root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; + else + root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; + btrfs_set_root_flags(new_root_item, root_flags); + old = btrfs_lock_root_node(root); btrfs_cow_block(trans, root, old, NULL, 0, &old); btrfs_set_lock_blocking(old); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f104b57ad4ef..229a594cacd5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -62,6 +62,7 @@ struct btrfs_pending_snapshot { struct btrfs_block_rsv block_rsv; /* extra metadata reseration for relocation */ int error; + bool readonly; struct list_head list; }; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 698fdd2c739c..a5776531dc2b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; + + /* + * The permission on security.* and system.* is not checked + * in permission(). + */ + if (btrfs_root_readonly(root)) + return -EROFS; + /* * If this is a request for a synthetic attribute in the system.* * namespace use the generic infrastructure to resolve a handler @@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, int btrfs_removexattr(struct dentry *dentry, const char *name) { + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; + + /* + * The permission on security.* and system.* is not checked + * in permission(). + */ + if (btrfs_root_readonly(root)) + return -EROFS; + /* * If this is a request for a synthetic attribute in the system.* * namespace use the generic infrastructure to resolve a handler -- cgit v1.2.3 From 0caa102da82799efaba88e234484786a9591c797 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 20 Dec 2010 16:30:25 +0800 Subject: Btrfs: Add BTRFS_IOC_SUBVOL_GETFLAGS/SETFLAGS ioctls This allows us to set a snapshot or a subvolume readonly or writable on the fly. Usage: Set BTRFS_SUBVOL_RDONLY of btrfs_ioctl_vol_arg_v2->flags, and then call ioctl(BTRFS_IOCTL_SUBVOL_SETFLAGS); Changelog for v3: - Change to pass __u64 as ioctl parameter. Changelog for v2: - Add _GETFLAGS ioctl. - Check if the passed fd is the root of a subvolume. - Change the name from _SNAP_SETFLAGS to _SUBVOL_SETFLAGS. Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ioctl.h | 2 ++ 2 files changed, 85 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f066ccb5dddf..ad1983524f97 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1009,6 +1009,85 @@ out: return ret; } +static noinline int btrfs_ioctl_subvol_getflags(struct file *file, + void __user *arg) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = 0; + u64 flags = 0; + + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) + return -EINVAL; + + down_read(&root->fs_info->subvol_sem); + if (btrfs_root_readonly(root)) + flags |= BTRFS_SUBVOL_RDONLY; + up_read(&root->fs_info->subvol_sem); + + if (copy_to_user(arg, &flags, sizeof(flags))) + ret = -EFAULT; + + return ret; +} + +static noinline int btrfs_ioctl_subvol_setflags(struct file *file, + void __user *arg) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + u64 root_flags; + u64 flags; + int ret = 0; + + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) + return -EINVAL; + + if (copy_from_user(&flags, arg, sizeof(flags))) + return -EFAULT; + + if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) + return -EINVAL; + + if (flags & ~BTRFS_SUBVOL_RDONLY) + return -EOPNOTSUPP; + + down_write(&root->fs_info->subvol_sem); + + /* nothing to do */ + if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) + goto out; + + root_flags = btrfs_root_flags(&root->root_item); + if (flags & BTRFS_SUBVOL_RDONLY) + btrfs_set_root_flags(&root->root_item, + root_flags | BTRFS_ROOT_SUBVOL_RDONLY); + else + btrfs_set_root_flags(&root->root_item, + root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out_reset; + } + + ret = btrfs_update_root(trans, root, + &root->root_key, &root->root_item); + + btrfs_commit_transaction(trans, root); +out_reset: + if (ret) + btrfs_set_root_flags(&root->root_item, root_flags); +out: + up_write(&root->fs_info->subvol_sem); + return ret; +} + /* * helper to check if the subvolume references other subvolumes */ @@ -2282,6 +2361,10 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create(file, argp, 1); case BTRFS_IOC_SNAP_DESTROY: return btrfs_ioctl_snap_destroy(file, argp); + case BTRFS_IOC_SUBVOL_GETFLAGS: + return btrfs_ioctl_subvol_getflags(file, argp); + case BTRFS_IOC_SUBVOL_SETFLAGS: + return btrfs_ioctl_subvol_setflags(file, argp); case BTRFS_IOC_DEFAULT_SUBVOL: return btrfs_ioctl_default_subvol(file, argp); case BTRFS_IOC_DEFRAG: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 52ae489974be..1223223351fa 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -194,4 +194,6 @@ struct btrfs_ioctl_space_args { #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) #endif -- cgit v1.2.3 From 22b6dee842c6341b49bc09cc5728eb2f8f2b3766 Mon Sep 17 00:00:00 2001 From: Mi Jinlong Date: Mon, 27 Dec 2010 14:29:57 +0800 Subject: nfsd4: fix oops on secinfo_no_name result encoding The secinfo_no_name code oopses on encoding with BUG: unable to handle kernel NULL pointer dereference at 00000044 IP: [] nfsd4_encode_secinfo+0x1c/0x1c1 [nfsd] We should implement a nfsd4_encode_secinfo_no_name() instead using nfsd4_encode_secinfo(). Signed-off-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b543b2410b54..437b4623cb02 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2845,11 +2845,10 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo *secinfo) +nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, + __be32 nfserr,struct svc_export *exp) { int i = 0; - struct svc_export *exp = secinfo->si_exp; u32 nflavs; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; @@ -2911,6 +2910,20 @@ out: return nfserr; } +static __be32 +nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_secinfo *secinfo) +{ + return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp); +} + +static __be32 +nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_secinfo_no_name *secinfo) +{ + return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp); +} + /* * The SETATTR encode routine is special -- it always encodes a bitmap, * regardless of the error status. @@ -3173,7 +3186,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo, + [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop, -- cgit v1.2.3 From 65e5341b9a0c39767ae1fecc727d70eda0dd6d83 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 24 Dec 2010 06:41:52 -0500 Subject: Btrfs: fix off by one while setting block groups readonly When we read in block groups, we'll set non-redundant groups readonly if we find a raid1, DUP or raid10 group. But the ro code has an off by one bug in the math around testing to make sure out accounting doesn't go wrong. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e5162e5c411..b180efdc8b68 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7971,13 +7971,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + sinfo->bytes_may_use + sinfo->bytes_readonly + - cache->reserved_pinned + num_bytes < sinfo->total_bytes) { + cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { sinfo->bytes_readonly += num_bytes; sinfo->bytes_reserved += cache->reserved_pinned; cache->reserved_pinned = 0; cache->ro = 1; ret = 0; } + spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); return ret; -- cgit v1.2.3 From 65e4c8945575abca4e368e05ca3e9f77df030290 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 16 Dec 2010 15:25:54 +0200 Subject: nfsd: declare several functions of nfs4callback as static setup_callback_client(), nfsd4_release_cb() and nfsd4_process_cb_update() do not have users outside the translation unit. Let's declare it as static. Signed-off-by: Kirill A. Shutemov Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 143da2eecd7b..a08580553fda 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -473,7 +473,8 @@ static int max_cb_time(void) /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cl_cb_set an atomic? */ -int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) +static int setup_callback_client(struct nfs4_client *clp, + struct nfs4_cb_conn *conn) { struct rpc_timeout timeparms = { .to_initval = max_cb_time(), @@ -748,13 +749,13 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) flush_workqueue(callback_wq); } -void nfsd4_release_cb(struct nfsd4_callback *cb) +static void nfsd4_release_cb(struct nfsd4_callback *cb) { if (cb->cb_ops->rpc_release) cb->cb_ops->rpc_release(cb); } -void nfsd4_process_cb_update(struct nfsd4_callback *cb) +static void nfsd4_process_cb_update(struct nfsd4_callback *cb) { struct nfs4_cb_conn conn; struct nfs4_client *clp = cb->cb_clp; -- cgit v1.2.3 From 3beb6cd1d448e7ded938bbd676493e6a08e9a6cd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 1 Jan 2011 15:43:50 -0500 Subject: nfsd: don't drop requests on -ENOMEM We never want to drop a request if we could return a JUKEBOX/DELAY error instead; so, convert to nfserr_jukebox and let nfsd_dispatch() convert that to a dropit error as a last resort if JUKEBOX/DELAY is unavailable (as in the NFSv2 case). Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 08e17264784b..dc9c2e3fd1b8 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -736,7 +736,7 @@ nfserrno (int errno) { nfserr_jukebox, -ETIMEDOUT }, { nfserr_jukebox, -ERESTARTSYS }, { nfserr_dropit, -EAGAIN }, - { nfserr_dropit, -ENOMEM }, + { nfserr_jukebox, -ENOMEM }, { nfserr_badname, -ESRCH }, { nfserr_io, -ETXTBSY }, { nfserr_notsupp, -EOPNOTSUPP }, -- cgit v1.2.3 From 9e701c610923aaeac8b38b9202a686d1cc9ee35d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Jan 2011 21:56:36 -0500 Subject: svcrpc: simpler request dropping Currently we use -EAGAIN returns to determine when to drop a deferred request. On its own, that is error-prone, as it makes us treat -EAGAIN returns from other functions specially to prevent inadvertent dropping. So, use a flag on the request instead. Returning an error on request deferral is still required, to prevent further processing, but we no longer need worry that an error return on its own could result in a drop. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 2 +- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 3 ++- net/sunrpc/svc_xprt.c | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 2bae1d86f5f2..18743c4d8bca 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -608,7 +608,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* Now call the procedure handler, and encode NFS status. */ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); nfserr = map_new_errors(rqstp->rq_vers, nfserr); - if (nfserr == nfserr_dropit) { + if (nfserr == nfserr_dropit || rqstp->rq_dropme) { dprintk("nfsd: Dropping request; may be revisited later\n"); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); return 0; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5a3085b9b394..d45c482b6444 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -269,6 +269,7 @@ struct svc_rqst { struct cache_req rq_chandle; /* handle passed to caches for * request delaying */ + bool rq_dropme; /* Catering to nfsd */ struct auth_domain * rq_client; /* RPC peer info */ struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6359c42c4941..df1931f8ae98 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1005,6 +1005,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) rqstp->rq_splice_ok = 1; /* Will be turned off only when NFSv4 Sessions are used */ rqstp->rq_usedeferral = 1; + rqstp->rq_dropme = false; /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); @@ -1106,7 +1107,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); /* Encode reply */ - if (*statp == rpc_drop_reply) { + if (rqstp->rq_dropme) { if (procp->pc_release) procp->pc_release(rqstp, NULL, rqstp->rq_resp); goto dropit; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5eae53b1e306..173f3b975d49 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1019,6 +1019,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) } svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; + rqstp->rq_dropme = true; dr->handle.revisit = svc_revisit; return &dr->handle; -- cgit v1.2.3 From 062304a815fe10068c478a4a3f28cf091c55cb82 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Jan 2011 22:05:33 -0500 Subject: nfsd: stop translating EAGAIN to nfserr_dropit We no longer need this. Also, EWOULDBLOCK is generally a synonym for EAGAIN, but that may not be true on all architectures, so map it as well. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsproc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index dc9c2e3fd1b8..fd608a27a8d5 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -735,7 +735,8 @@ nfserrno (int errno) { nfserr_stale, -ESTALE }, { nfserr_jukebox, -ETIMEDOUT }, { nfserr_jukebox, -ERESTARTSYS }, - { nfserr_dropit, -EAGAIN }, + { nfserr_jukebox, -EAGAIN }, + { nfserr_jukebox, -EWOULDBLOCK }, { nfserr_jukebox, -ENOMEM }, { nfserr_badname, -ESRCH }, { nfserr_io, -ETXTBSY }, -- cgit v1.2.3 From da165dd60e136d0609e0a2c0c2a9b9a5372200d6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Jan 2011 22:13:18 -0500 Subject: nfsd: remove some unnecessary dropit handling We no longer need a few of these special cases. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ---- fs/nfsd/nfs4state.c | 2 -- fs/nfsd/nfs4xdr.c | 2 -- fs/nfsd/vfs.c | 4 ---- 4 files changed, 12 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f80c3997d24c..fd6694b49e1c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1156,10 +1156,6 @@ encode_op: nfsd4_increment_op_stats(op->opnum); } - if (!rqstp->rq_usedeferral && status == nfserr_dropit) { - dprintk("%s Dropit - send NFS4ERR_DELAY\n", __func__); - status = nfserr_jukebox; - } resp->cstate.status = status; fh_put(&resp->cstate.current_fh); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 73adcfb2dc17..b82e36862044 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2509,8 +2509,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file if (!fp->fi_fds[oflag]) { status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &fp->fi_fds[oflag]); - if (status == nfserr_dropit) - status = nfserr_jukebox; if (status) return status; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 437b4623cb02..364aae7d5998 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2328,8 +2328,6 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, case nfserr_resource: nfserr = nfserr_toosmall; goto fail; - case nfserr_dropit: - goto fail; case nfserr_noent: goto skip_entry; default: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 184938fcff04..6a3af2ff3afe 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -380,8 +380,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * we need to break all leases. */ host_err = break_lease(inode, O_WRONLY | O_NONBLOCK); - if (host_err == -EWOULDBLOCK) - host_err = -ETIMEDOUT; if (host_err) /* ENOMEM or EWOULDBLOCK */ goto out_nfserr; @@ -752,8 +750,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, */ if (!(access & NFSD_MAY_NOT_BREAK_LEASE)) host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); - if (host_err == -EWOULDBLOCK) - host_err = -ETIMEDOUT; if (host_err) /* NOMEM or WOULDBLOCK */ goto out_nfserr; -- cgit v1.2.3 From e63eb9375089f9d2041305d04c3f33a194e0e014 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 30 Oct 2010 17:41:26 -0400 Subject: nfsd4: eliminate lease delete callback nfsd controls the lifetime of the lease, not the lock code, so there's no need for this callback on lease destruction. Signed-off-by: J. Bruce Fields --- Documentation/filesystems/Locking | 2 -- fs/nfsd/nfs4state.c | 18 ------------------ 2 files changed, 20 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index b6426f15b4ae..075be12bc531 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -327,14 +327,12 @@ fl_release_private: yes yes prototypes: int (*fl_compare_owner)(struct file_lock *, struct file_lock *); void (*fl_notify)(struct file_lock *); /* unblock callback */ - void (*fl_release_private)(struct file_lock *); void (*fl_break)(struct file_lock *); /* break_lease callback */ locking rules: BKL may block fl_compare_owner: yes no fl_notify: yes no -fl_release_private: yes yes fl_break: yes no Currently only NFSD and NLM provide instances of this class. None of the diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b82e36862044..2e44ad2539ab 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2295,23 +2295,6 @@ void nfsd_break_deleg_cb(struct file_lock *fl) nfsd4_cb_recall(dp); } -/* - * The file_lock is being reapd. - * - * Called by locks_free_lock() with lock_flocks() held. - */ -static -void nfsd_release_deleg_cb(struct file_lock *fl) -{ - struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; - - dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count)); - - if (!(fl->fl_flags & FL_LEASE) || !dp) - return; - dp->dl_flock = NULL; -} - /* * Called from setlease() with lock_flocks() held */ @@ -2341,7 +2324,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) static const struct lock_manager_operations nfsd_lease_mng_ops = { .fl_break = nfsd_break_deleg_cb, - .fl_release_private = nfsd_release_deleg_cb, .fl_mylease = nfsd_same_client_deleg_cb, .fl_change = nfsd_change_deleg_cb, }; -- cgit v1.2.3 From c84d500bc41658165ceb0dd04dc6a75249940fba Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 30 Oct 2010 23:35:04 -0400 Subject: nfsd4: use a single struct file for delegations When we converted to sharing struct filess between nfs4 opens I went too far and also used the same mechanism for delegations. But keeping a reference to the struct file ensures it will outlast the lease, and allows us to remove the lease with the same file as we added it. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 +++++----- fs/nfsd/state.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2e44ad2539ab..cbe1b81c147d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -230,7 +230,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f dp->dl_client = clp; get_nfs4_file(fp); dp->dl_file = fp; - nfs4_file_get_access(fp, O_RDONLY); + dp->dl_vfs_file = find_readable_file(fp); + get_file(dp->dl_vfs_file); dp->dl_flock = NULL; dp->dl_type = type; dp->dl_stateid.si_boot = boot_time; @@ -252,6 +253,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp) if (atomic_dec_and_test(&dp->dl_count)) { dprintk("NFSD: freeing dp %p\n",dp); put_nfs4_file(dp->dl_file); + fput(dp->dl_vfs_file); kmem_cache_free(deleg_slab, dp); num_delegations--; } @@ -265,12 +267,10 @@ nfs4_put_delegation(struct nfs4_delegation *dp) static void nfs4_close_delegation(struct nfs4_delegation *dp) { - struct file *filp = find_readable_file(dp->dl_file); - dprintk("NFSD: close_delegation dp %p\n",dp); + /* XXX: do we even need this check?: */ if (dp->dl_flock) - vfs_setlease(filp, F_UNLCK, &dp->dl_flock); - nfs4_file_put_access(dp->dl_file, O_RDONLY); + vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock); } /* Called under the state lock. */ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 39adc27b0685..84b230217b1b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -81,6 +81,7 @@ struct nfs4_delegation { atomic_t dl_count; /* ref count */ struct nfs4_client *dl_client; struct nfs4_file *dl_file; + struct file *dl_vfs_file; struct file_lock *dl_flock; u32 dl_type; time_t dl_time; -- cgit v1.2.3 From c45821d263a8a5109d69a9e8942b8d65bcd5f31a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 31 Oct 2010 00:04:44 -0400 Subject: locks: eliminate fl_mylease callback The nfs server only supports read delegations for now, so we don't care how conflicts are determined. All we care is that unlocks are recognized as matching the leases they are meant to remove. After the last patch, a comparison of struct files will work for that purpose. So we no longer need this callback. Signed-off-by: J. Bruce Fields --- fs/locks.c | 8 +------- fs/nfsd/nfs4state.c | 21 +-------------------- include/linux/fs.h | 1 - 3 files changed, 2 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 8729347bcd1a..5cb65062281a 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -444,15 +444,9 @@ static void lease_release_private_callback(struct file_lock *fl) fl->fl_file->f_owner.signum = 0; } -static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try) -{ - return fl->fl_file == try->fl_file; -} - static const struct lock_manager_operations lease_manager_ops = { .fl_break = lease_break_callback, .fl_release_private = lease_release_private_callback, - .fl_mylease = lease_mylease_callback, .fl_change = lease_modify, }; @@ -1405,7 +1399,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) for (before = &inode->i_flock; ((fl = *before) != NULL) && IS_LEASE(fl); before = &fl->fl_next) { - if (lease->fl_lmops->fl_mylease(fl, lease)) + if (fl->fl_file == lease->fl_file) my_before = before; else if (fl->fl_type == (F_INPROGRESS | F_UNLCK)) /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cbe1b81c147d..87d4c48b6069 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2295,24 +2295,6 @@ void nfsd_break_deleg_cb(struct file_lock *fl) nfsd4_cb_recall(dp); } -/* - * Called from setlease() with lock_flocks() held - */ -static -int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) -{ - struct nfs4_delegation *onlistd = - (struct nfs4_delegation *)onlist->fl_owner; - struct nfs4_delegation *tryd = - (struct nfs4_delegation *)try->fl_owner; - - if (onlist->fl_lmops != try->fl_lmops) - return 0; - - return onlistd->dl_client == tryd->dl_client; -} - - static int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) { @@ -2324,7 +2306,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) static const struct lock_manager_operations nfsd_lease_mng_ops = { .fl_break = nfsd_break_deleg_cb, - .fl_mylease = nfsd_same_client_deleg_cb, .fl_change = nfsd_change_deleg_cb, }; @@ -2630,7 +2611,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta dp->dl_flock = fl; /* vfs_setlease checks to see if delegation should be handed out. - * the lock_manager callbacks fl_mylease and fl_change are used + * the lock_manager callback fl_change is used */ if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { dprintk("NFSD: setlease failed [%d], no delegation\n", status); diff --git a/include/linux/fs.h b/include/linux/fs.h index 090f0eacde29..73ce69f728e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1059,7 +1059,6 @@ struct lock_manager_operations { int (*fl_grant)(struct file_lock *, struct file_lock *, int); void (*fl_release_private)(struct file_lock *); void (*fl_break)(struct file_lock *); - int (*fl_mylease)(struct file_lock *, struct file_lock *); int (*fl_change)(struct file_lock **, int); }; -- cgit v1.2.3 From 255c7cf810e4776ae8f1023332060459f30d8a2a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 31 Oct 2010 12:35:48 -0400 Subject: locks: minor setlease cleanup Signed-off-by: J. Bruce Fields --- fs/locks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 5cb65062281a..feaac634d0da 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1399,7 +1399,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) for (before = &inode->i_flock; ((fl = *before) != NULL) && IS_LEASE(fl); before = &fl->fl_next) { - if (fl->fl_file == lease->fl_file) + if (fl->fl_file == filp) my_before = before; else if (fl->fl_type == (F_INPROGRESS | F_UNLCK)) /* -- cgit v1.2.3 From f6af99ec1b261e21219d5eba99e3af48fc6c32d4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 18:02:15 -0500 Subject: nfsd4: name->id mapping should fail with BADOWNER not BADNAME According to rfc 3530 BADNAME is for strings that represent paths; BADOWNER is for user/group names that don't map. And the too-long name should probably be BADOWNER as well; it's effectively the same as if we couldn't map it. Cc: stable@kernel.org Reported-by: Trond Myklebust Reported-by: Simon Kirby Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4idmap.c | 4 ++-- fs/nfsd/nfsd.h | 1 + fs/nfsd/nfsproc.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index f0695e815f0e..844960fd0395 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -524,13 +524,13 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen int ret; if (namelen + 1 > sizeof(key.name)) - return -EINVAL; + return -ESRCH; /* nfserr_badowner */ memcpy(key.name, name, namelen); key.name[namelen] = '\0'; strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); if (ret == -ENOENT) - ret = -ESRCH; /* nfserr_badname */ + ret = -ESRCH; /* nfserr_badowner */ if (ret) return ret; *id = item->id; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 6b641cf2c19a..7ecfa2420307 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -158,6 +158,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) #define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) #define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE) +#define nfserr_badowner cpu_to_be32(NFSERR_BADOWNER) #define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD) #define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL) #define nfserr_grace cpu_to_be32(NFSERR_GRACE) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index fd608a27a8d5..8f05dcd0bf85 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -738,7 +738,7 @@ nfserrno (int errno) { nfserr_jukebox, -EAGAIN }, { nfserr_jukebox, -EWOULDBLOCK }, { nfserr_jukebox, -ENOMEM }, - { nfserr_badname, -ESRCH }, + { nfserr_badowner, -ESRCH }, { nfserr_io, -ETXTBSY }, { nfserr_notsupp, -EOPNOTSUPP }, { nfserr_toosmall, -ETOOSMALL }, -- cgit v1.2.3 From 2ca72e17e5acb1052c35c9faba609c2289ce7a92 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 17:37:15 -0500 Subject: nfsd4: move idmap and acl header files into fs/nfsd These are internal nfsd interfaces. Signed-off-by: J. Bruce Fields --- fs/nfsd/acl.h | 61 +++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/idmap.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfs4acl.c | 2 +- fs/nfsd/nfs4idmap.c | 2 +- fs/nfsd/nfs4xdr.c | 5 ++-- fs/nfsd/nfsctl.c | 2 +- fs/nfsd/vfs.c | 4 +-- include/linux/nfs4_acl.h | 61 ------------------------------------------- include/linux/nfsd_idmap.h | 64 ---------------------------------------------- 9 files changed, 133 insertions(+), 132 deletions(-) create mode 100644 fs/nfsd/acl.h create mode 100644 fs/nfsd/idmap.h delete mode 100644 include/linux/nfs4_acl.h delete mode 100644 include/linux/nfsd_idmap.h (limited to 'fs') diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h new file mode 100644 index 000000000000..c9c05a78e9bb --- /dev/null +++ b/fs/nfsd/acl.h @@ -0,0 +1,61 @@ +/* + * include/linux/nfs4_acl.c + * + * Common NFSv4 ACL handling definitions. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LINUX_NFS4_ACL_H +#define LINUX_NFS4_ACL_H + +#include + +/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to + * fit in a page: */ +#define NFS4_ACL_MAX 170 + +struct nfs4_acl *nfs4_acl_new(int); +int nfs4_acl_get_whotype(char *, u32); +int nfs4_acl_write_who(int who, char *p); +int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, + uid_t who, u32 mask); + +#define NFS4_ACL_TYPE_DEFAULT 0x01 +#define NFS4_ACL_DIR 0x02 +#define NFS4_ACL_OWNER 0x04 + +struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *, + struct posix_acl *, unsigned int flags); +int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **, + struct posix_acl **, unsigned int flags); + +#endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h new file mode 100644 index 000000000000..d4a2ac18bd4c --- /dev/null +++ b/fs/nfsd/idmap.h @@ -0,0 +1,64 @@ +/* + * include/linux/nfsd_idmap.h + * + * Mapping of UID to name and vice versa. + * + * Copyright (c) 2002, 2003 The Regents of the University of + * Michigan. All rights reserved. +> * + * Marius Aamodt Eriksen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LINUX_NFSD_IDMAP_H +#define LINUX_NFSD_IDMAP_H + +#include +#include + +/* XXX from linux/nfs_idmap.h */ +#define IDMAP_NAMESZ 128 + +#ifdef CONFIG_NFSD_V4 +int nfsd_idmap_init(void); +void nfsd_idmap_shutdown(void); +#else +static inline int nfsd_idmap_init(void) +{ + return 0; +} +static inline void nfsd_idmap_shutdown(void) +{ +} +#endif + +int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); +int nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); +int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); +int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); + +#endif /* LINUX_NFSD_IDMAP_H */ diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index e48052615159..ad88f1c0a4c3 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -36,7 +36,7 @@ #include #include -#include +#include "acl.h" /* mode bit translations: */ diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 844960fd0395..cbd599732765 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -33,10 +33,10 @@ */ #include -#include #include #include #include +#include "idmap.h" /* * Cache entry diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 364aae7d5998..2a0814d0ab1a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -44,13 +44,14 @@ #include #include #include -#include -#include #include +#include "idmap.h" +#include "acl.h" #include "xdr4.h" #include "vfs.h" + #define NFSDDBG_FACILITY NFSDDBG_XDR /* diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6840ec3ceecf..33b3e2b06779 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -8,12 +8,12 @@ #include #include -#include #include #include #include #include +#include "idmap.h" #include "nfsd.h" #include "cache.h" diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6a3af2ff3afe..b991125ce4a5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -35,8 +35,8 @@ #endif /* CONFIG_NFSD_V3 */ #ifdef CONFIG_NFSD_V4 -#include -#include +#include "acl.h" +#include "idmap.h" #endif /* CONFIG_NFSD_V4 */ #include "nfsd.h" diff --git a/include/linux/nfs4_acl.h b/include/linux/nfs4_acl.h deleted file mode 100644 index c9c05a78e9bb..000000000000 --- a/include/linux/nfs4_acl.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * include/linux/nfs4_acl.c - * - * Common NFSv4 ACL handling definitions. - * - * Copyright (c) 2002 The Regents of the University of Michigan. - * All rights reserved. - * - * Marius Aamodt Eriksen - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef LINUX_NFS4_ACL_H -#define LINUX_NFS4_ACL_H - -#include - -/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to - * fit in a page: */ -#define NFS4_ACL_MAX 170 - -struct nfs4_acl *nfs4_acl_new(int); -int nfs4_acl_get_whotype(char *, u32); -int nfs4_acl_write_who(int who, char *p); -int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, - uid_t who, u32 mask); - -#define NFS4_ACL_TYPE_DEFAULT 0x01 -#define NFS4_ACL_DIR 0x02 -#define NFS4_ACL_OWNER 0x04 - -struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *, - struct posix_acl *, unsigned int flags); -int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **, - struct posix_acl **, unsigned int flags); - -#endif /* LINUX_NFS4_ACL_H */ diff --git a/include/linux/nfsd_idmap.h b/include/linux/nfsd_idmap.h deleted file mode 100644 index d4a2ac18bd4c..000000000000 --- a/include/linux/nfsd_idmap.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * include/linux/nfsd_idmap.h - * - * Mapping of UID to name and vice versa. - * - * Copyright (c) 2002, 2003 The Regents of the University of - * Michigan. All rights reserved. -> * - * Marius Aamodt Eriksen - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef LINUX_NFSD_IDMAP_H -#define LINUX_NFSD_IDMAP_H - -#include -#include - -/* XXX from linux/nfs_idmap.h */ -#define IDMAP_NAMESZ 128 - -#ifdef CONFIG_NFSD_V4 -int nfsd_idmap_init(void); -void nfsd_idmap_shutdown(void); -#else -static inline int nfsd_idmap_init(void) -{ - return 0; -} -static inline void nfsd_idmap_shutdown(void) -{ -} -#endif - -int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); -int nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); -int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); -int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); - -#endif /* LINUX_NFSD_IDMAP_H */ -- cgit v1.2.3 From 775a1905e1e042e830eae31e70efec9387eb3e1d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 17:38:41 -0500 Subject: nfsd4: remove outdated pathname-comments Signed-off-by: J. Bruce Fields --- fs/nfsd/acl.h | 2 -- fs/nfsd/idmap.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index c9c05a78e9bb..34e5c40af5ef 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -1,6 +1,4 @@ /* - * include/linux/nfs4_acl.c - * * Common NFSv4 ACL handling definitions. * * Copyright (c) 2002 The Regents of the University of Michigan. diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h index d4a2ac18bd4c..514758994763 100644 --- a/fs/nfsd/idmap.h +++ b/fs/nfsd/idmap.h @@ -1,6 +1,4 @@ /* - * include/linux/nfsd_idmap.h - * * Mapping of UID to name and vice versa. * * Copyright (c) 2002, 2003 The Regents of the University of -- cgit v1.2.3 From 3c726023402a2f3b28f49b9d90ebf9e71151157d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 17:53:52 -0500 Subject: nfsd4: return nfs errno from name_to_id functions This avoids the need for the confusing ESRCH mapping. Signed-off-by: J. Bruce Fields --- fs/nfsd/idmap.h | 4 ++-- fs/nfsd/nfs4idmap.c | 13 +++++++------ fs/nfsd/nfs4xdr.c | 10 +++++----- fs/nfsd/nfsproc.c | 1 - 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h index 514758994763..2f3be1321534 100644 --- a/fs/nfsd/idmap.h +++ b/fs/nfsd/idmap.h @@ -54,8 +54,8 @@ static inline void nfsd_idmap_shutdown(void) } #endif -int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); -int nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); +__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); +__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index cbd599732765..6d2c397d458b 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -37,6 +37,7 @@ #include #include #include "idmap.h" +#include "nfsd.h" /* * Cache entry @@ -514,7 +515,7 @@ rqst_authname(struct svc_rqst *rqstp) return clp->name; } -static int +static __be32 idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) { @@ -524,15 +525,15 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen int ret; if (namelen + 1 > sizeof(key.name)) - return -ESRCH; /* nfserr_badowner */ + return nfserr_badowner; memcpy(key.name, name, namelen); key.name[namelen] = '\0'; strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); if (ret == -ENOENT) - ret = -ESRCH; /* nfserr_badowner */ + return nfserr_badowner; if (ret) - return ret; + return nfserrno(ret); *id = item->id; cache_put(&item->h, &nametoid_cache); return 0; @@ -560,14 +561,14 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) return ret; } -int +__be32 nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, __u32 *id) { return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); } -int +__be32 nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, __u32 *id) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2a0814d0ab1a..ca3786905dec 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -289,17 +289,17 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, len += XDR_QUADLEN(dummy32) << 2; READMEM(buf, dummy32); ace->whotype = nfs4_acl_get_whotype(buf, dummy32); - host_err = 0; + status = nfs_ok; if (ace->whotype != NFS4_ACL_WHO_NAMED) ace->who = 0; else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) - host_err = nfsd_map_name_to_gid(argp->rqstp, + status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &ace->who); else - host_err = nfsd_map_name_to_uid(argp->rqstp, + status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &ace->who); - if (host_err) - goto out_nfserr; + if (status) + return status; } } else *acl = NULL; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 8f05dcd0bf85..e15dc45fc5ec 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -738,7 +738,6 @@ nfserrno (int errno) { nfserr_jukebox, -EAGAIN }, { nfserr_jukebox, -EWOULDBLOCK }, { nfserr_jukebox, -ENOMEM }, - { nfserr_badowner, -ESRCH }, { nfserr_io, -ETXTBSY }, { nfserr_notsupp, -EOPNOTSUPP }, { nfserr_toosmall, -ETOOSMALL }, -- cgit v1.2.3 From 6f3d772fb8a039de8f21d725f5e38c252b4c0efd Mon Sep 17 00:00:00 2001 From: Takuma Umeya Date: Wed, 15 Dec 2010 14:09:01 +0900 Subject: nfs4: set source address when callback is generated when callback is generated in NFSv4 server, it doesn't set the source address. When an alias IP is utilized on NFSv4 server and suppose the client is accessing via that alias IP (e.g. eth0:0), the client invokes the callback to the IP address that is set on the original device (e.g. eth0). This behavior results in timeout of xprt. The patch sets the IP address that the client should invoke callback to. Signed-off-by: Takuma Umeya [bfields@redhat.com: Simplify gen_callback arguments, use helper function] Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 + fs/nfsd/nfs4state.c | 22 +++++++++++++++++++--- fs/nfsd/state.h | 1 + 3 files changed, 21 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index a08580553fda..dd183af24fe6 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -484,6 +484,7 @@ static int setup_callback_client(struct nfs4_client *clp, .net = &init_net, .address = (struct sockaddr *) &conn->cb_addr, .addrsize = conn->cb_addrlen, + .saddress = (struct sockaddr *) &conn->cb_saddr, .timeout = &timeparms, .program = &cb_program, .version = 0, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 87d4c48b6069..b583e4e800ab 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1163,10 +1163,26 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) return NULL; } +static void rpc_svcaddr2sockaddr(struct sockaddr *sa, unsigned short family, union svc_addr_u *svcaddr) +{ + switch (family) { + case AF_INET: + ((struct sockaddr_in *)sa)->sin_family = AF_INET; + ((struct sockaddr_in *)sa)->sin_addr = svcaddr->addr; + return; + case AF_INET6: + ((struct sockaddr_in6 *)sa)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *)sa)->sin6_addr = svcaddr->addr6; + return; + } +} + static void -gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) +gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) { struct nfs4_cb_conn *conn = &clp->cl_cb_conn; + struct sockaddr *sa = svc_addr(rqstp); + u32 scopeid = rpc_get_scope_id(sa); unsigned short expected_family; /* Currently, we only support tcp and tcp6 for the callback channel */ @@ -1192,6 +1208,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) conn->cb_prog = se->se_callback_prog; conn->cb_ident = se->se_callback_ident; + rpc_svcaddr2sockaddr((struct sockaddr *)&conn->cb_saddr, expected_family, &rqstp->rq_daddr); return; out_err: conn->cb_addr.ss_family = AF_UNSPEC; @@ -1768,7 +1785,6 @@ __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid *setclid) { - struct sockaddr *sa = svc_addr(rqstp); struct xdr_netobj clname = { .len = setclid->se_namelen, .data = setclid->se_name, @@ -1871,7 +1887,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * for consistent minorversion use throughout: */ new->cl_minorversion = 0; - gen_callback(new, setclid, rpc_get_scope_id(sa)); + gen_callback(new, setclid, rqstp); add_to_unconfirmed(new, strhashval); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 84b230217b1b..cf6dc83fd545 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -96,6 +96,7 @@ struct nfs4_delegation { struct nfs4_cb_conn { /* SETCLIENTID info */ struct sockaddr_storage cb_addr; + struct sockaddr_storage cb_saddr; size_t cb_addrlen; u32 cb_prog; /* used only in 4.0 case; per-session otherwise */ -- cgit v1.2.3 From a363f0c2030cb9781e7e458f4a9e354b6c43d7ce Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 11 Jan 2011 10:13:53 +1100 Subject: xfs: ensure sync write errors are returned xfs_file_aio_write() only returns the error from synchronous flushing of the data and inode if error == 0. At the point where error is being checked, it is guaranteed to be > 0. Therefore any errors returned by the data or fsync flush will never be returned. Fix the checks so we overwrite the current error once and only if an error really occurred. Signed-off-by: Dave Chinner Reviewed-by: Alex Elder Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_file.c | 49 +++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index ba8ad422a165..10b7fb4807a6 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -574,7 +574,7 @@ xfs_file_aio_write( struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - ssize_t ret = 0, error = 0; + ssize_t ret = 0; int ioflags = 0; xfs_fsize_t isize, new_size; int iolock; @@ -590,9 +590,9 @@ xfs_file_aio_write( if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; - error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); - if (error) - return error; + ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); + if (ret) + return ret; count = ocount; if (count == 0) @@ -616,9 +616,9 @@ relock: xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); start: - error = -generic_write_checks(file, &pos, &count, + ret = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); - if (error) { + if (ret) { xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } @@ -660,8 +660,8 @@ start: */ if (pos > ip->i_size) { - error = xfs_zero_eof(ip, pos, ip->i_size); - if (error) { + ret = -xfs_zero_eof(ip, pos, ip->i_size); + if (ret) { xfs_iunlock(ip, XFS_ILOCK_EXCL); goto out_unlock_internal; } @@ -674,8 +674,8 @@ start: * by root. This keeps people from modifying setuid and * setgid binaries. */ - error = -file_remove_suid(file); - if (unlikely(error)) + ret = file_remove_suid(file); + if (unlikely(ret)) goto out_unlock_internal; /* We can write back this queue in page reclaim */ @@ -684,10 +684,10 @@ start: if ((ioflags & IO_ISDIRECT)) { if (mapping->nrpages) { WARN_ON(need_i_mutex == 0); - error = xfs_flushinval_pages(ip, + ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); - if (error) + if (ret) goto out_unlock_internal; } @@ -720,24 +720,22 @@ start: } } else { int enospc = 0; - ssize_t ret2 = 0; write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); - ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, + ret = generic_file_buffered_write(iocb, iovp, nr_segs, pos, &iocb->ki_pos, count, ret); /* * if we just got an ENOSPC, flush the inode now we * aren't holding any page locks and retry *once* */ - if (ret2 == -ENOSPC && !enospc) { - error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); - if (error) + if (ret == -ENOSPC && !enospc) { + ret = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); + if (ret) goto out_unlock_internal; enospc = 1; goto write_retry; } - ret = ret2; } current->backing_dev_info = NULL; @@ -753,7 +751,6 @@ write_retry: xfs_iunlock(ip, XFS_ILOCK_EXCL); } - error = -ret; if (ret <= 0) goto out_unlock_internal; @@ -762,23 +759,23 @@ write_retry: /* Handle various SYNC-type writes */ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; - int error2; + int error, error2; xfs_iunlock(ip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); - error2 = filemap_write_and_wait_range(mapping, pos, end); - if (!error) - error = error2; + error = filemap_write_and_wait_range(mapping, pos, end); if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(ip, iolock); error2 = -xfs_file_fsync(file, (file->f_flags & __O_SYNC) ? 0 : 1); - if (!error) - error = error2; + if (error) + ret = error; + else if (error2) + ret = error2; } out_unlock_internal: @@ -800,7 +797,7 @@ write_retry: out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); - return -error; + return ret; } STATIC int -- cgit v1.2.3 From edafb6da9aa725e4de5fe758fe81644b6167f9a2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 11 Jan 2011 10:14:06 +1100 Subject: xfs: factor common post-write isize handling code Signed-off-by: Dave Chinner Reviewed-by: Alex Elder Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_file.c | 54 +++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 10b7fb4807a6..b3915bf25770 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -321,6 +321,30 @@ xfs_file_splice_read( return ret; } +STATIC void +xfs_aio_write_isize_update( + struct inode *inode, + loff_t *ppos, + ssize_t bytes_written) +{ + struct xfs_inode *ip = XFS_I(inode); + xfs_fsize_t isize = i_size_read(inode); + + if (bytes_written > 0) + XFS_STATS_ADD(xs_write_bytes, bytes_written); + + if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && + *ppos > isize)) + *ppos = isize; + + if (*ppos > ip->i_size) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (*ppos > ip->i_size) + ip->i_size = *ppos; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } +} + STATIC ssize_t xfs_file_splice_write( struct pipe_inode_info *pipe, @@ -331,7 +355,7 @@ xfs_file_splice_write( { struct inode *inode = outfilp->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); - xfs_fsize_t isize, new_size; + xfs_fsize_t new_size; int ioflags = 0; ssize_t ret; @@ -355,19 +379,8 @@ xfs_file_splice_write( trace_xfs_file_splice_write(ip, count, *ppos, ioflags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); - if (ret > 0) - XFS_STATS_ADD(xs_write_bytes, ret); - - isize = i_size_read(inode); - if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) - *ppos = isize; - if (*ppos > ip->i_size) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (*ppos > ip->i_size) - ip->i_size = *ppos; - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } + xfs_aio_write_isize_update(inode, ppos, ret); if (ip->i_new_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -576,7 +589,7 @@ xfs_file_aio_write( struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; int ioflags = 0; - xfs_fsize_t isize, new_size; + xfs_fsize_t new_size; int iolock; size_t ocount = 0, count; int need_i_mutex; @@ -740,22 +753,11 @@ write_retry: current->backing_dev_info = NULL; - isize = i_size_read(inode); - if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) - iocb->ki_pos = isize; - - if (iocb->ki_pos > ip->i_size) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (iocb->ki_pos > ip->i_size) - ip->i_size = iocb->ki_pos; - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } + xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); if (ret <= 0) goto out_unlock_internal; - XFS_STATS_ADD(xs_write_bytes, ret); - /* Handle various SYNC-type writes */ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; -- cgit v1.2.3 From 4c5cfd1b4157fb75d43b44a147c2feba6422fc4f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 11 Jan 2011 10:14:16 +1100 Subject: xfs: factor post-write newsize updates Signed-off-by: Dave Chinner Reviewed-by: Alex Elder Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_file.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index b3915bf25770..c47d7dc0a307 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -345,6 +345,25 @@ xfs_aio_write_isize_update( } } +/* + * If this was a direct or synchronous I/O that failed (such as ENOSPC) then + * part of the I/O may have been written to disk before the error occured. In + * this case the on-disk file size may have been adjusted beyond the in-memory + * file size and now needs to be truncated back. + */ +STATIC void +xfs_aio_write_newsize_update( + struct xfs_inode *ip) +{ + if (ip->i_new_size) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + ip->i_new_size = 0; + if (ip->i_d.di_size > ip->i_size) + ip->i_d.di_size = ip->i_size; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } +} + STATIC ssize_t xfs_file_splice_write( struct pipe_inode_info *pipe, @@ -381,14 +400,7 @@ xfs_file_splice_write( ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); xfs_aio_write_isize_update(inode, ppos, ret); - - if (ip->i_new_size) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - ip->i_new_size = 0; - if (ip->i_d.di_size > ip->i_size) - ip->i_d.di_size = ip->i_size; - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } + xfs_aio_write_newsize_update(ip); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } @@ -781,20 +793,7 @@ write_retry: } out_unlock_internal: - if (ip->i_new_size) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - ip->i_new_size = 0; - /* - * If this was a direct or synchronous I/O that failed (such - * as ENOSPC) then part of the I/O may have been written to - * disk before the error occured. In this case the on-disk - * file size may have been adjusted beyond the in-memory file - * size and now needs to be truncated back. - */ - if (ip->i_d.di_size > ip->i_size) - ip->i_d.di_size = ip->i_size; - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } + xfs_aio_write_newsize_update(ip); xfs_iunlock(ip, iolock); out_unlock_mutex: if (need_i_mutex) -- cgit v1.2.3 From 487f84f3f80bc6f00c59725e822653d3ec174b85 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 12 Jan 2011 11:37:10 +1100 Subject: xfs: introduce xfs_rw_lock() helpers for locking the inode We need to obtain the i_mutex, i_iolock and i_ilock during the read and write paths. Add a set of wrapper functions to neatly encapsulate the lock ordering and shared/exclusive semantics to make the locking easier to follow and get right. Note that this changes some of the exclusive locking serialisation in that serialisation will occur against the i_mutex instead of the XFS_IOLOCK_EXCL. This does not change any behaviour, and it is arguably more efficient to use the mutex for such serialisation than the rw_sem. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_file.c | 131 +++++++++++++++++++++++++++----------------- 1 file changed, 80 insertions(+), 51 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index c47d7dc0a307..b5e13fbb7386 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -40,6 +40,40 @@ static const struct vm_operations_struct xfs_file_vm_ops; +/* + * Locking primitives for read and write IO paths to ensure we consistently use + * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. + */ +static inline void +xfs_rw_ilock( + struct xfs_inode *ip, + int type) +{ + if (type & XFS_IOLOCK_EXCL) + mutex_lock(&VFS_I(ip)->i_mutex); + xfs_ilock(ip, type); +} + +static inline void +xfs_rw_iunlock( + struct xfs_inode *ip, + int type) +{ + xfs_iunlock(ip, type); + if (type & XFS_IOLOCK_EXCL) + mutex_unlock(&VFS_I(ip)->i_mutex); +} + +static inline void +xfs_rw_ilock_demote( + struct xfs_inode *ip, + int type) +{ + xfs_ilock_demote(ip, type); + if (type & XFS_IOLOCK_EXCL) + mutex_unlock(&VFS_I(ip)->i_mutex); +} + /* * xfs_iozero * @@ -262,22 +296,21 @@ xfs_file_aio_read( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (unlikely(ioflags & IO_ISDIRECT)) - mutex_lock(&inode->i_mutex); - xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (unlikely(ioflags & IO_ISDIRECT)) { + xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); + if (inode->i_mapping->nrpages) { ret = -xfs_flushinval_pages(ip, (iocb->ki_pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); + if (ret) { + xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); + return ret; + } } - mutex_unlock(&inode->i_mutex); - if (ret) { - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return ret; - } - } + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + } else + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); @@ -285,7 +318,7 @@ xfs_file_aio_read( if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; } @@ -309,7 +342,7 @@ xfs_file_splice_read( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - xfs_ilock(ip, XFS_IOLOCK_SHARED); + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); trace_xfs_file_splice_read(ip, count, *ppos, ioflags); @@ -317,7 +350,7 @@ xfs_file_splice_read( if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; } @@ -338,10 +371,10 @@ xfs_aio_write_isize_update( *ppos = isize; if (*ppos > ip->i_size) { - xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_rw_ilock(ip, XFS_ILOCK_EXCL); if (*ppos > ip->i_size) ip->i_size = *ppos; - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); } } @@ -356,14 +389,22 @@ xfs_aio_write_newsize_update( struct xfs_inode *ip) { if (ip->i_new_size) { - xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_rw_ilock(ip, XFS_ILOCK_EXCL); ip->i_new_size = 0; if (ip->i_d.di_size > ip->i_size) ip->i_d.di_size = ip->i_size; - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); } } +/* + * xfs_file_splice_write() does not use xfs_rw_ilock() because + * generic_file_splice_write() takes the i_mutex itself. This, in theory, + * couuld cause lock inversions between the aio_write path and the splice path + * if someone is doing concurrent splice(2) based writes and write(2) based + * writes to the same inode. The only real way to fix this is to re-implement + * the generic code here with correct locking orders. + */ STATIC ssize_t xfs_file_splice_write( struct pipe_inode_info *pipe, @@ -604,7 +645,6 @@ xfs_file_aio_write( xfs_fsize_t new_size; int iolock; size_t ocount = 0, count; - int need_i_mutex; XFS_STATS_INC(xs_write_calls); @@ -631,21 +671,17 @@ xfs_file_aio_write( relock: if (ioflags & IO_ISDIRECT) { iolock = XFS_IOLOCK_SHARED; - need_i_mutex = 0; } else { iolock = XFS_IOLOCK_EXCL; - need_i_mutex = 1; - mutex_lock(&inode->i_mutex); } - xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); - start: + xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock); ret = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (ret) { - xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); - goto out_unlock_mutex; + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock); + return ret; } if (ioflags & IO_ISDIRECT) { @@ -654,16 +690,20 @@ start: mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock); return XFS_ERROR(-EINVAL); } - if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); + /* + * For direct I/O, if there are cached pages or we're extending + * the file, we need IOLOCK_EXCL until we're sure the bytes at + * the new EOF have been zeroed and/or the cached pages are + * flushed out. Upgrade the I/O lock and start again. + */ + if (iolock != XFS_IOLOCK_EXCL && + (mapping->nrpages || pos > ip->i_size)) { + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock); iolock = XFS_IOLOCK_EXCL; - need_i_mutex = 1; - mutex_lock(&inode->i_mutex); - xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); goto start; } } @@ -687,11 +727,11 @@ start: if (pos > ip->i_size) { ret = -xfs_zero_eof(ip, pos, ip->i_size); if (ret) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); goto out_unlock_internal; } } - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the @@ -708,7 +748,7 @@ start: if ((ioflags & IO_ISDIRECT)) { if (mapping->nrpages) { - WARN_ON(need_i_mutex == 0); + WARN_ON(iolock != XFS_IOLOCK_EXCL); ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); @@ -716,13 +756,10 @@ start: goto out_unlock_internal; } - if (need_i_mutex) { + if (iolock == XFS_IOLOCK_EXCL) { /* demote the lock now the cached pages are gone */ - xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); - mutex_unlock(&inode->i_mutex); - + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); iolock = XFS_IOLOCK_SHARED; - need_i_mutex = 0; } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); @@ -740,7 +777,7 @@ start: count -= ret; ioflags &= ~IO_ISDIRECT; - xfs_iunlock(ip, iolock); + xfs_rw_iunlock(ip, iolock); goto relock; } } else { @@ -775,14 +812,9 @@ write_retry: loff_t end = pos + ret - 1; int error, error2; - xfs_iunlock(ip, iolock); - if (need_i_mutex) - mutex_unlock(&inode->i_mutex); - + xfs_rw_iunlock(ip, iolock); error = filemap_write_and_wait_range(mapping, pos, end); - if (need_i_mutex) - mutex_lock(&inode->i_mutex); - xfs_ilock(ip, iolock); + xfs_rw_ilock(ip, iolock); error2 = -xfs_file_fsync(file, (file->f_flags & __O_SYNC) ? 0 : 1); @@ -794,10 +826,7 @@ write_retry: out_unlock_internal: xfs_aio_write_newsize_update(ip); - xfs_iunlock(ip, iolock); - out_unlock_mutex: - if (need_i_mutex) - mutex_unlock(&inode->i_mutex); + xfs_rw_iunlock(ip, iolock); return ret; } -- cgit v1.2.3 From f0d26e860b6c496464c5c8165d7df08dabde01fa Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 11 Jan 2011 10:15:36 +1100 Subject: xfs: split direct IO write path from xfs_file_aio_write The current xfs_file_aio_write code is a mess of locking shenanigans to handle the different locking requirements of buffered and direct IO. Start to clean this up by disentangling the direct IO path from the mess. This also removes the failed direct IO fallback path to buffered IO. XFS handles all direct IO cases without needing to fall back to buffered IO, so we can safely remove this unused path. This greatly simplifies the logic and locking needed in the write path. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_file.c | 179 ++++++++++++++++++++++++++++---------------- 1 file changed, 116 insertions(+), 63 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index b5e13fbb7386..00661fd21fc0 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -628,6 +628,116 @@ out_lock: return error; } +/* + * xfs_file_dio_aio_write - handle direct IO writes + * + * Lock the inode appropriately to prepare for and issue a direct IO write. + * By spearating it from the buffered write path we remove all the tricky to + * follow locking changes and looping. + * + * Returns with locks held indicated by @iolock and errors indicated by + * negative return values. + */ +STATIC ssize_t +xfs_file_dio_aio_write( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos, + size_t ocount, + int *iolock) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + ssize_t ret = 0; + xfs_fsize_t new_size; + size_t count = ocount; + struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + + *iolock = 0; + if ((pos & target->bt_smask) || (count & target->bt_smask)) + return -XFS_ERROR(EINVAL); + + /* + * For direct I/O, if there are cached pages or we're extending + * the file, we need IOLOCK_EXCL until we're sure the bytes at + * the new EOF have been zeroed and/or the cached pages are + * flushed out. + */ + if (mapping->nrpages || pos > ip->i_size) + *iolock = XFS_IOLOCK_EXCL; + else + *iolock = XFS_IOLOCK_SHARED; + xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); + + ret = generic_write_checks(file, &pos, &count, + S_ISBLK(inode->i_mode)); + if (ret) { + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); + *iolock = 0; + return ret; + } + + new_size = pos + count; + if (new_size > ip->i_size) + ip->i_new_size = new_size; + + if (likely(!(file->f_mode & FMODE_NOCMTIME))) + file_update_time(file); + + /* + * If the offset is beyond the size of the file, we have a couple of + * things to do. First, if there is already space allocated we need to + * either create holes or zero the disk or ... + * + * If there is a page where the previous size lands, we need to zero it + * out up to the new size. + */ + if (pos > ip->i_size) { + ret = -xfs_zero_eof(ip, pos, ip->i_size); + if (ret) { + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); + return ret; + } + } + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); + + /* + * If we're writing the file then make sure to clear the setuid and + * setgid bits if the process is not being run by root. This keeps + * people from modifying setuid and setgid binaries. + */ + ret = file_remove_suid(file); + if (unlikely(ret)) + return ret; + + if (mapping->nrpages) { + WARN_ON(*iolock != XFS_IOLOCK_EXCL); + ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, + FI_REMAPF_LOCKED); + if (ret) + return ret; + } + + if (*iolock == XFS_IOLOCK_EXCL) { + /* demote the lock now the cached pages are gone */ + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + *iolock = XFS_IOLOCK_SHARED; + } + + trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); + ret = generic_file_direct_write(iocb, iovp, + &nr_segs, pos, &iocb->ki_pos, count, ocount); + + /* No fallback to buffered IO on errors for XFS. */ + ASSERT(ret < 0 || ret == count); + return ret; +} + STATIC ssize_t xfs_file_aio_write( struct kiocb *iocb, @@ -670,12 +780,12 @@ xfs_file_aio_write( relock: if (ioflags & IO_ISDIRECT) { - iolock = XFS_IOLOCK_SHARED; - } else { - iolock = XFS_IOLOCK_EXCL; + ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, + ocount, &iolock); + goto done_io; } + iolock = XFS_IOLOCK_EXCL; -start: xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock); ret = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -684,30 +794,6 @@ start: return ret; } - if (ioflags & IO_ISDIRECT) { - xfs_buftarg_t *target = - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; - - if ((pos & target->bt_smask) || (count & target->bt_smask)) { - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock); - return XFS_ERROR(-EINVAL); - } - - /* - * For direct I/O, if there are cached pages or we're extending - * the file, we need IOLOCK_EXCL until we're sure the bytes at - * the new EOF have been zeroed and/or the cached pages are - * flushed out. Upgrade the I/O lock and start again. - */ - if (iolock != XFS_IOLOCK_EXCL && - (mapping->nrpages || pos > ip->i_size)) { - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock); - iolock = XFS_IOLOCK_EXCL; - goto start; - } - } - new_size = pos + count; if (new_size > ip->i_size) ip->i_new_size = new_size; @@ -746,41 +832,7 @@ start: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; - if ((ioflags & IO_ISDIRECT)) { - if (mapping->nrpages) { - WARN_ON(iolock != XFS_IOLOCK_EXCL); - ret = -xfs_flushinval_pages(ip, - (pos & PAGE_CACHE_MASK), - -1, FI_REMAPF_LOCKED); - if (ret) - goto out_unlock_internal; - } - - if (iolock == XFS_IOLOCK_EXCL) { - /* demote the lock now the cached pages are gone */ - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); - iolock = XFS_IOLOCK_SHARED; - } - - trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); - ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, &iocb->ki_pos, count, ocount); - - /* - * direct-io write to a hole: fall through to buffered I/O - * for completing the rest of the request. - */ - if (ret >= 0 && ret != count) { - XFS_STATS_ADD(xs_write_bytes, ret); - - pos += ret; - count -= ret; - - ioflags &= ~IO_ISDIRECT; - xfs_rw_iunlock(ip, iolock); - goto relock; - } - } else { + if (!(ioflags & IO_ISDIRECT)) { int enospc = 0; write_retry: @@ -802,6 +854,7 @@ write_retry: current->backing_dev_info = NULL; +done_io: xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); if (ret <= 0) -- cgit v1.2.3 From 637bbc75d9fda57c7bc77ce5ee37e29a77a0520d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 11 Jan 2011 10:17:30 +1100 Subject: xfs: split buffered IO write path from xfs_file_aio_write Complete the split of the different write IO paths by splitting the buffered IO write path out of xfs_file_aio_write(). This makes the different mechanisms of the write patchs easier to follow. Signed-off-by: Dave Chinner Reviewed-by: Alex Elder Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_file.c | 146 +++++++++++++++++++++----------------------- 1 file changed, 69 insertions(+), 77 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 00661fd21fc0..e2bcf51d292e 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -739,58 +739,31 @@ xfs_file_dio_aio_write( } STATIC ssize_t -xfs_file_aio_write( +xfs_file_buffered_aio_write( struct kiocb *iocb, const struct iovec *iovp, unsigned long nr_segs, - loff_t pos) + loff_t pos, + size_t ocount, + int *iolock) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - ssize_t ret = 0; - int ioflags = 0; + ssize_t ret; + int enospc = 0; xfs_fsize_t new_size; - int iolock; - size_t ocount = 0, count; - - XFS_STATS_INC(xs_write_calls); - - BUG_ON(iocb->ki_pos != pos); - - if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; - if (file->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); - if (ret) - return ret; - - count = ocount; - if (count == 0) - return 0; - - xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; + size_t count = ocount; -relock: - if (ioflags & IO_ISDIRECT) { - ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, - ocount, &iolock); - goto done_io; - } - iolock = XFS_IOLOCK_EXCL; + *iolock = XFS_IOLOCK_EXCL; + xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); - xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock); ret = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (ret) { - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock); + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); + *iolock = 0; return ret; } @@ -798,67 +771,86 @@ relock: if (new_size > ip->i_size) ip->i_new_size = new_size; - if (likely(!(ioflags & IO_INVIS))) + if (likely(!(file->f_mode & FMODE_NOCMTIME))) file_update_time(file); - /* - * If the offset is beyond the size of the file, we have a couple - * of things to do. First, if there is already space allocated - * we need to either create holes or zero the disk or ... - * - * If there is a page where the previous size lands, we need - * to zero it out up to the new size. - */ - if (pos > ip->i_size) { ret = -xfs_zero_eof(ip, pos, ip->i_size); if (ret) { xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - goto out_unlock_internal; + return ret; } } xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - /* - * If we're writing the file then make sure to clear the - * setuid and setgid bits if the process is not being run - * by root. This keeps people from modifying setuid and - * setgid binaries. - */ ret = file_remove_suid(file); if (unlikely(ret)) - goto out_unlock_internal; + return ret; /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; - if (!(ioflags & IO_ISDIRECT)) { - int enospc = 0; - write_retry: - trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); - ret = generic_file_buffered_write(iocb, iovp, nr_segs, - pos, &iocb->ki_pos, count, ret); - /* - * if we just got an ENOSPC, flush the inode now we - * aren't holding any page locks and retry *once* - */ - if (ret == -ENOSPC && !enospc) { - ret = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); - if (ret) - goto out_unlock_internal; - enospc = 1; - goto write_retry; - } + trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); + ret = generic_file_buffered_write(iocb, iovp, nr_segs, + pos, &iocb->ki_pos, count, ret); + /* + * if we just got an ENOSPC, flush the inode now we aren't holding any + * page locks and retry *once* + */ + if (ret == -ENOSPC && !enospc) { + ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); + if (ret) + return ret; + enospc = 1; + goto write_retry; } - current->backing_dev_info = NULL; + return ret; +} + +STATIC ssize_t +xfs_file_aio_write( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + ssize_t ret; + int iolock; + size_t ocount = 0; + + XFS_STATS_INC(xs_write_calls); + + BUG_ON(iocb->ki_pos != pos); + + ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); + if (ret) + return ret; + + if (ocount == 0) + return 0; + + xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + if (unlikely(file->f_flags & O_DIRECT)) + ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, + ocount, &iolock); + else + ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, + ocount, &iolock); -done_io: xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); if (ret <= 0) - goto out_unlock_internal; + goto out_unlock; /* Handle various SYNC-type writes */ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { @@ -877,7 +869,7 @@ done_io: ret = error2; } - out_unlock_internal: +out_unlock: xfs_aio_write_newsize_update(ip); xfs_rw_iunlock(ip, iolock); return ret; -- cgit v1.2.3 From 4d8d15812fd9bc96d0da11467d23e0373feae933 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 11 Jan 2011 10:23:42 +1100 Subject: xfs: factor common write setup code The buffered IO and direct IO write paths share a common set of checks and limiting code prior to issuing the write. Factor that into a common helper function. Signed-off-by: Dave Chinner Reviewed-by: Alex Elder Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_file.c | 123 ++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 67 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index e2bcf51d292e..5863dd8f448c 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -628,6 +628,58 @@ out_lock: return error; } +/* + * Common pre-write limit and setup checks. + * + * Returns with iolock held according to @iolock. + */ +STATIC ssize_t +xfs_file_aio_write_checks( + struct file *file, + loff_t *pos, + size_t *count, + int *iolock) +{ + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + xfs_fsize_t new_size; + int error = 0; + + error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); + if (error) { + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); + *iolock = 0; + return error; + } + + new_size = *pos + *count; + if (new_size > ip->i_size) + ip->i_new_size = new_size; + + if (likely(!(file->f_mode & FMODE_NOCMTIME))) + file_update_time(file); + + /* + * If the offset is beyond the size of the file, we need to zero any + * blocks that fall between the existing EOF and the start of this + * write. + */ + if (*pos > ip->i_size) + error = -xfs_zero_eof(ip, *pos, ip->i_size); + + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); + if (error) + return error; + + /* + * If we're writing the file then make sure to clear the setuid and + * setgid bits if the process is not being run by root. This keeps + * people from modifying setuid and setgid binaries. + */ + return file_remove_suid(file); + +} + /* * xfs_file_dio_aio_write - handle direct IO writes * @@ -653,7 +705,6 @@ xfs_file_dio_aio_write( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; - xfs_fsize_t new_size; size_t count = ocount; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; @@ -674,45 +725,8 @@ xfs_file_dio_aio_write( *iolock = XFS_IOLOCK_SHARED; xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); - ret = generic_write_checks(file, &pos, &count, - S_ISBLK(inode->i_mode)); - if (ret) { - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); - *iolock = 0; - return ret; - } - - new_size = pos + count; - if (new_size > ip->i_size) - ip->i_new_size = new_size; - - if (likely(!(file->f_mode & FMODE_NOCMTIME))) - file_update_time(file); - - /* - * If the offset is beyond the size of the file, we have a couple of - * things to do. First, if there is already space allocated we need to - * either create holes or zero the disk or ... - * - * If there is a page where the previous size lands, we need to zero it - * out up to the new size. - */ - if (pos > ip->i_size) { - ret = -xfs_zero_eof(ip, pos, ip->i_size); - if (ret) { - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - return ret; - } - } - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - - /* - * If we're writing the file then make sure to clear the setuid and - * setgid bits if the process is not being run by root. This keeps - * people from modifying setuid and setgid binaries. - */ - ret = file_remove_suid(file); - if (unlikely(ret)) + ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); + if (ret) return ret; if (mapping->nrpages) { @@ -753,38 +767,13 @@ xfs_file_buffered_aio_write( struct xfs_inode *ip = XFS_I(inode); ssize_t ret; int enospc = 0; - xfs_fsize_t new_size; size_t count = ocount; *iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); - ret = generic_write_checks(file, &pos, &count, - S_ISBLK(inode->i_mode)); - if (ret) { - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); - *iolock = 0; - return ret; - } - - new_size = pos + count; - if (new_size > ip->i_size) - ip->i_new_size = new_size; - - if (likely(!(file->f_mode & FMODE_NOCMTIME))) - file_update_time(file); - - if (pos > ip->i_size) { - ret = -xfs_zero_eof(ip, pos, ip->i_size); - if (ret) { - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - return ret; - } - } - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - - ret = file_remove_suid(file); - if (unlikely(ret)) + ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); + if (ret) return ret; /* We can write back this queue in page reclaim */ -- cgit v1.2.3 From eda77982729b7170bdc9e8855f0682edf322d277 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 11 Jan 2011 10:22:40 +1100 Subject: xfs: serialise unaligned direct IOs When two concurrent unaligned, non-overlapping direct IOs are issued to the same block, the direct Io layer will race to zero the block. The result is that one of the concurrent IOs will overwrite data written by the other IO with zeros. This is demonstrated by the xfsqa test 240. To avoid this problem, serialise all unaligned direct IOs to an inode with a big hammer. We need a big hammer approach as we need to serialise AIO as well, so we can't just block writes on locks. Hence, the big hammer is calling xfs_ioend_wait() while holding out other unaligned direct IOs from starting. We don't bother trying to serialised aligned vs unaligned IOs as they are overlapping IO and the result of concurrent overlapping IOs is undefined - the result of either IO is a valid result so we let them race. Hence we only penalise unaligned IO, which already has a major overhead compared to aligned IO so this isn't a major problem. Signed-off-by: Dave Chinner Reviewed-by: Alex Elder Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_file.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 5863dd8f448c..ef51eb43e137 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -684,9 +684,24 @@ xfs_file_aio_write_checks( * xfs_file_dio_aio_write - handle direct IO writes * * Lock the inode appropriately to prepare for and issue a direct IO write. - * By spearating it from the buffered write path we remove all the tricky to + * By separating it from the buffered write path we remove all the tricky to * follow locking changes and looping. * + * If there are cached pages or we're extending the file, we need IOLOCK_EXCL + * until we're sure the bytes at the new EOF have been zeroed and/or the cached + * pages are flushed out. + * + * In most cases the direct IO writes will be done holding IOLOCK_SHARED + * allowing them to be done in parallel with reads and other direct IO writes. + * However, if the IO is not aligned to filesystem blocks, the direct IO layer + * needs to do sub-block zeroing and that requires serialisation against other + * direct IOs to the same block. In this case we need to serialise the + * submission of the unaligned IOs so that we don't get racing block zeroing in + * the dio layer. To avoid the problem with aio, we also need to wait for + * outstanding IOs to complete so that unwritten extent conversion is completed + * before we try to map the overlapping block. This is currently implemented by + * hitting it with a big hammer (i.e. xfs_ioend_wait()). + * * Returns with locks held indicated by @iolock and errors indicated by * negative return values. */ @@ -706,6 +721,7 @@ xfs_file_dio_aio_write( struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; size_t count = ocount; + int unaligned_io = 0; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; @@ -713,13 +729,10 @@ xfs_file_dio_aio_write( if ((pos & target->bt_smask) || (count & target->bt_smask)) return -XFS_ERROR(EINVAL); - /* - * For direct I/O, if there are cached pages or we're extending - * the file, we need IOLOCK_EXCL until we're sure the bytes at - * the new EOF have been zeroed and/or the cached pages are - * flushed out. - */ - if (mapping->nrpages || pos > ip->i_size) + if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) + unaligned_io = 1; + + if (unaligned_io || mapping->nrpages || pos > ip->i_size) *iolock = XFS_IOLOCK_EXCL; else *iolock = XFS_IOLOCK_SHARED; @@ -737,8 +750,13 @@ xfs_file_dio_aio_write( return ret; } - if (*iolock == XFS_IOLOCK_EXCL) { - /* demote the lock now the cached pages are gone */ + /* + * If we are doing unaligned IO, wait for all other IO to drain, + * otherwise demote the lock if we had to flush cached pages + */ + if (unaligned_io) + xfs_ioend_wait(ip); + else if (*iolock == XFS_IOLOCK_EXCL) { xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); *iolock = XFS_IOLOCK_SHARED; } -- cgit v1.2.3 From 4c6493785a1ea9c3b3522f199760a90a30e1626c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 15 Jun 2010 14:22:37 -0400 Subject: nfsd4: modify session list under cl_lock We want to traverse this from the callback code. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b583e4e800ab..3cf9900d5f32 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -771,7 +771,9 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n idx = hash_sessionid(&new->se_sessionid); spin_lock(&client_lock); list_add(&new->se_hash, &sessionid_hashtbl[idx]); + spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); + spin_unlock(&clp->cl_lock); spin_unlock(&client_lock); status = nfsd4_new_conn(rqstp, new); @@ -819,7 +821,9 @@ static void unhash_session(struct nfsd4_session *ses) { list_del(&ses->se_hash); + spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); + spin_unlock(&ses->se_client->cl_lock); } /* must be called under the client_lock */ @@ -925,8 +929,10 @@ unhash_client_locked(struct nfs4_client *clp) mark_client_expired(clp); list_del(&clp->cl_lru); + spin_lock(&clp->cl_lock); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) list_del_init(&ses->se_hash); + spin_unlock(&clp->cl_lock); } static void -- cgit v1.2.3 From 1d1bc8f2074f0b728dfca2a3c16f2f5a3f298ffc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 4 Oct 2010 23:12:59 -0400 Subject: nfsd4: support BIND_CONN_TO_SESSION Basic xdr and processing for BIND_CONN_TO_SESSION. This adds a connection to the list of connections associated with a session. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 9 ++++++-- fs/nfsd/nfs4state.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++------ fs/nfsd/nfs4xdr.c | 34 ++++++++++++++++++++++++++++-- fs/nfsd/state.h | 5 +++++ fs/nfsd/xdr4.h | 2 ++ include/linux/nfs4.h | 3 +++ 6 files changed, 102 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index fd6694b49e1c..db52546143d1 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1004,8 +1004,8 @@ static const char *nfsd4_op_name(unsigned opnum); * Also note, enforced elsewhere: * - SEQUENCE other than as first op results in * NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().) - * - BIND_CONN_TO_SESSION must be the only op in its compound - * (Will be enforced in nfsd4_bind_conn_to_session().) + * - BIND_CONN_TO_SESSION must be the only op in its compound. + * (Enforced in nfsd4_bind_conn_to_session().) * - DESTROY_SESSION must be the final operation in a compound, if * sessionid's in SEQUENCE and DESTROY_SESSION are the same. * (Enforced in nfsd4_destroy_session().) @@ -1326,6 +1326,11 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_EXCHANGE_ID", }, + [OP_BIND_CONN_TO_SESSION] = { + .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, + .op_name = "OP_BIND_CONN_TO_SESSION", + }, [OP_CREATE_SESSION] = { .op_func = (nfsd4op_func)nfsd4_create_session, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3cf9900d5f32..956174f488a7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -679,15 +679,12 @@ static int nfsd4_register_conn(struct nfsd4_conn *conn) return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); } -static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) +static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses, u32 dir) { struct nfsd4_conn *conn; - u32 flags = NFS4_CDFC4_FORE; int ret; - if (ses->se_flags & SESSION4_BACK_CHAN) - flags |= NFS4_CDFC4_BACK; - conn = alloc_conn(rqstp, flags); + conn = alloc_conn(rqstp, dir); if (!conn) return nfserr_jukebox; nfsd4_hash_conn(conn, ses); @@ -698,6 +695,17 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) return nfs_ok; } +static __be32 nfsd4_new_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_session *ses) +{ + u32 dir = NFS4_CDFC4_FORE; + + if (ses->se_flags & SESSION4_BACK_CHAN) + dir |= NFS4_CDFC4_BACK; + + return nfsd4_new_conn(rqstp, ses, dir); +} + +/* must be called under client_lock */ static void nfsd4_del_conns(struct nfsd4_session *s) { struct nfs4_client *clp = s->se_client; @@ -776,7 +784,7 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n spin_unlock(&clp->cl_lock); spin_unlock(&client_lock); - status = nfsd4_new_conn(rqstp, new); + status = nfsd4_new_conn_from_crses(rqstp, new); /* whoops: benny points out, status is ignored! (err, or bogus) */ if (status) { free_session(&new->se_ref); @@ -1597,6 +1605,45 @@ static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +static __be32 nfsd4_map_bcts_dir(u32 *dir) +{ + switch (*dir) { + case NFS4_CDFC4_FORE: + case NFS4_CDFC4_BACK: + return nfs_ok; + case NFS4_CDFC4_FORE_OR_BOTH: + case NFS4_CDFC4_BACK_OR_BOTH: + *dir = NFS4_CDFC4_BOTH; + return nfs_ok; + }; + return nfserr_inval; +} + +__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_bind_conn_to_session *bcts) +{ + __be32 status; + + if (!nfsd4_last_compound_op(rqstp)) + return nfserr_not_only_op; + spin_lock(&client_lock); + cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid); + /* Sorta weird: we only need the refcnt'ing because new_conn acquires + * client_lock iself: */ + if (cstate->session) { + nfsd4_get_session(cstate->session); + atomic_inc(&cstate->session->se_client->cl_refcount); + } + spin_unlock(&client_lock); + if (!cstate->session) + return nfserr_badsession; + + status = nfsd4_map_bcts_dir(&bcts->dir); + nfsd4_new_conn(rqstp, cstate->session, bcts->dir); + return nfs_ok; +} + static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) { if (!session) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ca3786905dec..4ff2c9e0b276 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -421,6 +421,21 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access DECODE_TAIL; } +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +{ + DECODE_HEAD; + u32 dummy; + + READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); + COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); + READ32(bcts->dir); + /* XXX: Perhaps Tom Tucker could help us figure out how we + * should be using ctsa_use_conn_in_rdma_mode: */ + READ32(dummy); + + DECODE_TAIL; +} + static __be32 nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) { @@ -1359,7 +1374,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { /* new operations for NFSv4.1 */ [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, @@ -2383,6 +2398,21 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ return nfserr; } +static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) +{ + __be32 *p; + + if (!nfserr) { + RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); + WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(bcts->dir); + /* XXX: ? */ + WRITE32(0); + ADJUST_ARGS(); + } + return nfserr; +} + static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { @@ -3174,7 +3204,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { /* NFSv4.1 operations */ [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index cf6dc83fd545..442f6d8e024c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -148,6 +148,11 @@ struct nfsd4_create_session { u32 gid; }; +struct nfsd4_bind_conn_to_session { + struct nfs4_sessionid sessionid; + u32 dir; +}; + /* The single slot clientid cache structure */ struct nfsd4_clid_slot { u32 sl_seqid; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 799c30c3b495..3a7aa4d98c1f 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -427,6 +427,7 @@ struct nfsd4_op { /* NFSv4.1 */ struct nfsd4_exchange_id exchange_id; + struct nfsd4_bind_conn_to_session bind_conn_to_session; struct nfsd4_create_session create_session; struct nfsd4_destroy_session destroy_session; struct nfsd4_sequence sequence; @@ -523,6 +524,7 @@ extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); +extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *); extern __be32 nfsd4_create_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_create_session *); diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 26afa3021ed6..a5918938e41e 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -65,6 +65,9 @@ #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 +#define NFS4_CDFC4_BOTH 0x3 +#define NFS4_CDFC4_FORE_OR_BOTH 0x3 +#define NFS4_CDFC4_BACK_OR_BOTH 0x7 #define NFS4_SET_TO_SERVER_TIME 0 #define NFS4_SET_TO_CLIENT_TIME 1 -- cgit v1.2.3 From dcbeaa68dbbdacbbb330a86c7fc95a28473fc209 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 15 Jun 2010 17:25:45 -0400 Subject: nfsd4: allow backchannel recovery Now that we have a list of connections to choose from, we can teach the callback code to just pick a suitable connection and use that, instead of insisting on forever using the connection that the first create_session was sent with. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 36 +++++++++++++++++++++++++++++++++--- fs/nfsd/nfs4state.c | 16 ++++++++++------ 2 files changed, 43 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index dd183af24fe6..18b740bd29ac 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -473,8 +473,7 @@ static int max_cb_time(void) /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cl_cb_set an atomic? */ -static int setup_callback_client(struct nfs4_client *clp, - struct nfs4_cb_conn *conn) +static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { struct rpc_timeout timeparms = { .to_initval = max_cb_time(), @@ -501,6 +500,10 @@ static int setup_callback_client(struct nfs4_client *clp, args.protocol = XPRT_TRANSPORT_TCP; clp->cl_cb_ident = conn->cb_ident; } else { + if (!conn->cb_xprt) + return -EINVAL; + clp->cl_cb_conn.cb_xprt = conn->cb_xprt; + clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; args.protocol = XPRT_TRANSPORT_BC_TCP; @@ -756,10 +759,27 @@ static void nfsd4_release_cb(struct nfsd4_callback *cb) cb->cb_ops->rpc_release(cb); } +/* requires cl_lock: */ +static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) +{ + struct nfsd4_session *s; + struct nfsd4_conn *c; + + list_for_each_entry(s, &clp->cl_sessions, se_perclnt) { + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_flags & NFS4_CDFC4_BACK) + return c; + } + } + return NULL; +} + static void nfsd4_process_cb_update(struct nfsd4_callback *cb) { struct nfs4_cb_conn conn; struct nfs4_client *clp = cb->cb_clp; + struct nfsd4_session *ses = NULL; + struct nfsd4_conn *c; int err; /* @@ -770,6 +790,10 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; } + if (clp->cl_cb_conn.cb_xprt) { + svc_xprt_put(clp->cl_cb_conn.cb_xprt); + clp->cl_cb_conn.cb_xprt = NULL; + } if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) return; spin_lock(&clp->cl_lock); @@ -780,9 +804,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) BUG_ON(!clp->cl_cb_flags); clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); + c = __nfsd4_find_backchannel(clp); + if (c) { + svc_xprt_get(c->cn_xprt); + conn.cb_xprt = c->cn_xprt; + ses = c->cn_session; + } spin_unlock(&clp->cl_lock); - err = setup_callback_client(clp, &conn); + err = setup_callback_client(clp, &conn, ses); if (err) warn_no_callback_path(clp, err); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 956174f488a7..290370bc9ae7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -642,6 +642,7 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) free_conn(c); } spin_unlock(&clp->cl_lock); + /* XXX: mark callback for update, probe callback */ } static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) @@ -790,16 +791,19 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n free_session(&new->se_ref); return NULL; } - if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) { + if (cses->flags & SESSION4_BACK_CHAN) { struct sockaddr *sa = svc_addr(rqstp); - - clp->cl_cb_session = new; - clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt; - svc_xprt_get(rqstp->rq_xprt); + /* + * This is a little silly; with sessions there's no real + * use for the callback address. Use the peer address + * as a reasonable default for now, but consider fixing + * the rpc client not to require an address in the + * future: + */ rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); - nfsd4_probe_callback(clp); } + nfsd4_probe_callback(clp); return new; } -- cgit v1.2.3 From 77a3569d6c4e14e89fa628df383b6dccc0cce6be Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 30 Apr 2010 18:51:44 -0400 Subject: nfsd4: keep finer-grained callback status Distinguish between when the callback channel is known to be down, and when it is not yet confirmed. This will be useful in the 4.1 case. Also, we don't seem to be using the fact that this field is atomic. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 26 ++++++++++++++------------ fs/nfsd/nfs4state.c | 8 ++++---- fs/nfsd/state.h | 5 ++++- 3 files changed, 22 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 18b740bd29ac..d32f49d6ca2c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -470,8 +470,6 @@ static int max_cb_time(void) return max(nfsd4_lease/10, (time_t)1) * HZ; } -/* Reference counting, callback cleanup, etc., all look racy as heck. - * And why is cl_cb_set an atomic? */ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { @@ -526,14 +524,20 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason) (int)clp->cl_name.len, clp->cl_name.data, reason); } +static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) +{ + clp->cl_cb_state = NFSD4_CB_DOWN; + warn_no_callback_path(clp, reason); +} + static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) { struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); if (task->tk_status) - warn_no_callback_path(clp, task->tk_status); + nfsd4_mark_cb_down(clp, task->tk_status); else - atomic_set(&clp->cl_cb_set, 1); + clp->cl_cb_state = NFSD4_CB_UP; } static const struct rpc_call_ops nfsd4_cb_probe_ops = { @@ -579,14 +583,15 @@ static void do_probe_callback(struct nfs4_client *clp) */ void nfsd4_probe_callback(struct nfs4_client *clp) { + /* XXX: atomicity? Also, should we be using cl_cb_flags? */ + clp->cl_cb_state = NFSD4_CB_UNKNOWN; set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); do_probe_callback(clp); } void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) { - BUG_ON(atomic_read(&clp->cl_cb_set)); - + clp->cl_cb_state = NFSD4_CB_UNKNOWN; spin_lock(&clp->cl_lock); memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); spin_unlock(&clp->cl_lock); @@ -693,8 +698,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) break; default: /* Network partition? */ - atomic_set(&clp->cl_cb_set, 0); - warn_no_callback_path(clp, task->tk_status); + nfsd4_mark_cb_down(clp, task->tk_status); if (current_rpc_client != task->tk_client) { /* queue a callback on the new connection: */ atomic_inc(&dp->dl_count); @@ -707,10 +711,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) task->tk_status = 0; rpc_restart_call_prepare(task); return; - } else { - atomic_set(&clp->cl_cb_set, 0); - warn_no_callback_path(clp, task->tk_status); - } + } else + nfsd4_mark_cb_down(clp, task->tk_status); } static void nfsd4_cb_recall_release(void *calldata) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 290370bc9ae7..919ad25660d6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1071,7 +1071,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); atomic_set(&clp->cl_refcount, 0); - atomic_set(&clp->cl_cb_set, 0); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -2003,7 +2003,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) status = nfserr_clid_inuse; else { - atomic_set(&conf->cl_cb_set, 0); nfsd4_change_callback(conf, &unconf->cl_cb_conn); nfsd4_probe_callback(conf); expire_client(unconf); @@ -2633,7 +2632,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta { struct nfs4_delegation *dp; struct nfs4_stateowner *sop = stp->st_stateowner; - int cb_up = atomic_read(&sop->so_client->cl_cb_set); + /* XXX: or unknown and nfsv4.1: */ + int cb_up = (sop->so_client->cl_cb_state == NFSD4_CB_UP); struct file_lock *fl; int status, flag = 0; @@ -2823,7 +2823,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, renew_client(clp); status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) - && !atomic_read(&clp->cl_cb_set)) + && clp->cl_cb_state != NFSD4_CB_UP) goto out; status = nfs_ok; out: diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 442f6d8e024c..32ff615c36f4 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -242,7 +242,10 @@ struct nfs4_client { unsigned long cl_cb_flags; struct rpc_clnt *cl_cb_client; u32 cl_cb_ident; - atomic_t cl_cb_set; +#define NFSD4_CB_UP 0 +#define NFSD4_CB_UNKNOWN 1 +#define NFSD4_CB_DOWN 2 + int cl_cb_state; struct nfsd4_callback cl_cb_null; struct nfsd4_session *cl_cb_session; -- cgit v1.2.3 From 0d7bb71907546b2baf15d78edd3e508e12963dbf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 18 Nov 2010 08:30:33 -0500 Subject: nfsd4: set sequence flag when backchannel is down Implement the SEQ4_STATUS_CB_PATH_DOWN flag. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +++++- fs/nfsd/nfs4xdr.c | 8 ++------ fs/nfsd/xdr4.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 919ad25660d6..15bd1cc77de7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1800,8 +1800,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, out: /* Hold a session reference until done processing the compound. */ if (cstate->session) { + struct nfs4_client *clp = session->se_client; + nfsd4_get_session(cstate->session); - atomic_inc(&session->se_client->cl_refcount); + atomic_inc(&clp->cl_refcount); + if (clp->cl_cb_state == NFSD4_CB_DOWN) + seq->status_flags |= SEQ4_STATUS_CB_PATH_DOWN; } kfree(conn); spin_unlock(&client_lock); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4ff2c9e0b276..956629b9cdc9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3137,13 +3137,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, WRITE32(seq->seqid); WRITE32(seq->slotid); WRITE32(seq->maxslots); - /* - * FIXME: for now: - * target_maxslots = maxslots - * status_flags = 0 - */ + /* For now: target_maxslots = maxslots */ WRITE32(seq->maxslots); - WRITE32(0); + WRITE32(seq->status_flags); ADJUST_ARGS(); resp->cstate.datap = p; /* DRC cache data pointer */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 3a7aa4d98c1f..366401e1a536 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -378,8 +378,8 @@ struct nfsd4_sequence { u32 cachethis; /* request */ #if 0 u32 target_maxslots; /* response */ - u32 status_flags; /* response */ #endif /* not yet */ + u32 status_flags; /* response */ }; struct nfsd4_destroy_session { -- cgit v1.2.3 From eea4980660bc204bb9d11bb3bf2b1bde5fd5175f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 18 Nov 2010 08:34:12 -0500 Subject: nfsd4: re-probe callback on connection loss This makes sure we set the sequence flag when necessary. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 15bd1cc77de7..b24f19d4187a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -642,7 +642,7 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) free_conn(c); } spin_unlock(&clp->cl_lock); - /* XXX: mark callback for update, probe callback */ + nfsd4_probe_callback(clp); } static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) -- cgit v1.2.3 From 84f5f7ccc59e628fc8754c0a837fd7e9559711ac Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 9 Dec 2010 15:52:19 -0500 Subject: nfsd4: make sure sequence flags are set after destroy_session If this loses any backchannel, make sure we have a chance to notice that and set the sequence flags. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 6 ++++++ fs/nfsd/nfs4state.c | 3 +-- fs/nfsd/state.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index d32f49d6ca2c..cb002dce5630 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -589,6 +589,12 @@ void nfsd4_probe_callback(struct nfs4_client *clp) do_probe_callback(clp); } +void nfsd4_probe_callback_sync(struct nfs4_client *clp) +{ + nfsd4_probe_callback(clp); + flush_workqueue(callback_wq); +} + void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) { clp->cl_cb_state = NFSD4_CB_UNKNOWN; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b24f19d4187a..00a50b8ac878 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1686,8 +1686,7 @@ nfsd4_destroy_session(struct svc_rqst *r, spin_unlock(&client_lock); nfs4_lock_state(); - /* wait for callbacks */ - nfsd4_shutdown_callback(ses->se_client); + nfsd4_probe_callback_sync(ses->se_client); nfs4_unlock_state(); nfsd4_del_conns(ses); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 32ff615c36f4..4e5bdfd9169c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -464,6 +464,7 @@ extern __be32 nfs4_check_open_reclaim(clientid_t *clid); extern void nfs4_free_stateowner(struct kref *kref); extern int set_callback_cred(void); extern void nfsd4_probe_callback(struct nfs4_client *clp); +extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_do_callback_rpc(struct work_struct *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); -- cgit v1.2.3 From 229b2a0839870d0d4f91ad3b24ec13c57bbd50a0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 10 Dec 2010 17:37:44 -0500 Subject: nfsd4: add helper function to run callbacks Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index cb002dce5630..fff96dc7704e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -560,6 +560,11 @@ int set_callback_cred(void) static struct workqueue_struct *callback_wq; +static void run_nfsd4_cb(struct nfsd4_callback *cb) +{ + queue_work(callback_wq, &cb->cb_work); +} + static void do_probe_callback(struct nfs4_client *clp) { struct nfsd4_callback *cb = &clp->cl_cb_null; @@ -574,7 +579,7 @@ static void do_probe_callback(struct nfs4_client *clp) cb->cb_ops = &nfsd4_cb_probe_ops; - queue_work(callback_wq, &cb->cb_work); + run_nfsd4_cb(cb); } /* @@ -859,5 +864,5 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) cb->cb_ops = &nfsd4_cb_recall_ops; dp->dl_retries = 1; - queue_work(callback_wq, &dp->dl_recall.cb_work); + run_nfsd4_cb(&dp->dl_recall); } -- cgit v1.2.3 From 14a24e99f4f506265b634c1cd04eca6394f49dbc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 10 Dec 2010 19:02:49 -0500 Subject: nfsd4: give out delegations more quickly in 4.1 case Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 00a50b8ac878..408957cf6016 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2627,6 +2627,19 @@ nfs4_set_claim_prev(struct nfsd4_open *open) open->op_stateowner->so_client->cl_firststate = 1; } +/* Should we give out recallable state?: */ +static bool nfsd4_cb_channel_good(struct nfs4_client *clp) +{ + if (clp->cl_cb_state == NFSD4_CB_UP) + return true; + /* + * In the sessions case, since we don't have to establish a + * separate connection for callbacks, we assume it's OK + * until we hear otherwise: + */ + return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; +} + /* * Attempt to hand out a delegation. */ @@ -2635,11 +2648,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta { struct nfs4_delegation *dp; struct nfs4_stateowner *sop = stp->st_stateowner; - /* XXX: or unknown and nfsv4.1: */ - int cb_up = (sop->so_client->cl_cb_state == NFSD4_CB_UP); + int cb_up; struct file_lock *fl; int status, flag = 0; + cb_up = nfsd4_cb_channel_good(sop->so_client); flag = NFS4_OPEN_DELEGATE_NONE; open->op_recall = 0; switch (open->op_claim_type) { -- cgit v1.2.3 From 3ff3600e7eab16301e824293e8f49b9990bd4641 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Jan 2011 16:37:51 -0500 Subject: nfsd4: simplify nfsd4_cb_prepare Remove handling for a nonexistant case (status && !-EAGAIN). Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index fff96dc7704e..69955e98e086 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -613,24 +613,14 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) * If the slot is available, then mark it busy. Otherwise, set the * thread for sleeping on the callback RPC wait queue. */ -static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, - struct rpc_task *task) +static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) { - u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data; - int status = 0; - - dprintk("%s: %u:%u:%u:%u\n", __func__, - ptr[0], ptr[1], ptr[2], ptr[3]); - if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); dprintk("%s slot is busy\n", __func__); - status = -EAGAIN; - goto out; + return false; } -out: - dprintk("%s status=%d\n", __func__, status); - return status; + return true; } /* @@ -643,19 +633,11 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); struct nfs4_client *clp = dp->dl_client; u32 minorversion = clp->cl_minorversion; - int status = 0; cb->cb_minorversion = minorversion; if (minorversion) { - status = nfsd41_cb_setup_sequence(clp, task); - if (status) { - if (status != -EAGAIN) { - /* terminate rpc task */ - task->tk_status = status; - task->tk_action = NULL; - } + if (!nfsd41_cb_get_slot(clp, task)) return; - } } rpc_call_start(task); } -- cgit v1.2.3 From 5ce8ba25d657a71d6d8cdb05a2b90c5ae7debfda Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Jan 2011 16:44:41 -0500 Subject: nfsd4: allow restarting callbacks If we lose the backchannel and then the client repairs the problem, resend any callbacks. We use a new cb_done flag to track whether there is still work to be done for the callback or whether it can be destroyed with the rpc. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 34 ++++++++++++++++++++++++++++------ fs/nfsd/nfs4state.c | 1 + fs/nfsd/state.h | 3 +++ 3 files changed, 32 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 69955e98e086..f1d9dd45553a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -639,6 +639,10 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) if (!nfsd41_cb_get_slot(clp, task)) return; } + cb->cb_done = false; + spin_lock(&clp->cl_lock); + list_add(&cb->cb_per_client, &clp->cl_callbacks); + spin_unlock(&clp->cl_lock); rpc_call_start(task); } @@ -681,8 +685,11 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) return; } + if (cb->cb_done) + return; switch (task->tk_status) { case 0: + cb->cb_done = true; return; case -EBADHANDLE: case -NFS4ERR_BAD_STATEID: @@ -695,7 +702,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) if (current_rpc_client != task->tk_client) { /* queue a callback on the new connection: */ atomic_inc(&dp->dl_count); - nfsd4_cb_recall(dp); + run_nfsd4_cb(&dp->dl_recall); return; } } @@ -704,16 +711,23 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) task->tk_status = 0; rpc_restart_call_prepare(task); return; - } else - nfsd4_mark_cb_down(clp, task->tk_status); + } + nfsd4_mark_cb_down(clp, task->tk_status); + cb->cb_done = true; } static void nfsd4_cb_recall_release(void *calldata) { struct nfsd4_callback *cb = calldata; + struct nfs4_client *clp = cb->cb_clp; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - nfs4_put_delegation(dp); + if (cb->cb_done) { + spin_lock(&clp->cl_lock); + list_del(&cb->cb_per_client); + spin_unlock(&clp->cl_lock); + nfs4_put_delegation(dp); + } } static const struct rpc_call_ops nfsd4_cb_recall_ops = { @@ -808,8 +822,13 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) spin_unlock(&clp->cl_lock); err = setup_callback_client(clp, &conn, ses); - if (err) + if (err) { warn_no_callback_path(clp, err); + return; + } + /* Yay, the callback channel's back! Restart any callbacks: */ + list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) + run_nfsd4_cb(cb); } void nfsd4_do_callback_rpc(struct work_struct *w) @@ -834,10 +853,11 @@ void nfsd4_do_callback_rpc(struct work_struct *w) void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfsd4_callback *cb = &dp->dl_recall; + struct nfs4_client *clp = dp->dl_client; dp->dl_retries = 1; cb->cb_op = dp; - cb->cb_clp = dp->dl_client; + cb->cb_clp = clp; cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; @@ -846,5 +866,7 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) cb->cb_ops = &nfsd4_cb_recall_ops; dp->dl_retries = 1; + cb->cb_done = true; + run_nfsd4_cb(&dp->dl_recall); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 408957cf6016..6e1f9aadd439 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1077,6 +1077,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_lru); + INIT_LIST_HEAD(&clp->cl_callbacks); spin_lock_init(&clp->cl_lock); INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); clp->cl_time = get_seconds(); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4e5bdfd9169c..3074656ba7bf 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -68,10 +68,12 @@ typedef struct { struct nfsd4_callback { void *cb_op; struct nfs4_client *cb_clp; + struct list_head cb_per_client; u32 cb_minorversion; struct rpc_message cb_msg; const struct rpc_call_ops *cb_ops; struct work_struct cb_work; + bool cb_done; }; struct nfs4_delegation { @@ -248,6 +250,7 @@ struct nfs4_client { int cl_cb_state; struct nfsd4_callback cl_cb_null; struct nfsd4_session *cl_cb_session; + struct list_head cl_callbacks; /* list of in-progress callbacks */ /* for all client information that callback code might need: */ spinlock_t cl_lock; -- cgit v1.2.3 From c58efdb442bb49dea1d148f207560c41918c1bf4 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 4 Jan 2011 04:49:29 +0000 Subject: xfs: ensure log covering transactions are synchronous To ensure the log is covered and the filesystem idles correctly, we need to ensure that dummy transactions hit the disk and do not stay pinned in memory. If the superblock is pinned in memory, it can't be flushed so the log covering cannot make progress. The result is dependent on timing - more oftent han not we continue to issues a log covering transaction every 36s rather than idling after ~90s. Fix this by making the log covering transaction synchronous. To avoid additional log force from xfssyncd, make the log covering transaction take the place of the existing log force in the xfssyncd background sync process. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_super.c | 2 +- fs/xfs/linux-2.6/xfs_sync.c | 11 ++++++----- fs/xfs/xfs_fsops.c | 10 +++++----- fs/xfs/xfs_fsops.h | 2 +- 4 files changed, 13 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c51faaa5e291..af32f375ca96 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1413,7 +1413,7 @@ xfs_fs_freeze( xfs_save_resvblks(mp); xfs_quiesce_attr(mp); - return -xfs_fs_log_dummy(mp, SYNC_WAIT); + return -xfs_fs_log_dummy(mp); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a02480de9759..e22f0057d21f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -362,7 +362,7 @@ xfs_quiesce_data( /* mark the log as covered if needed */ if (xfs_log_need_covered(mp)) - error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); + error2 = xfs_fs_log_dummy(mp); /* flush data-only devices */ if (mp->m_rtdev_targp) @@ -503,13 +503,14 @@ xfs_sync_worker( int error; if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_log_force(mp, 0); - xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ - error = xfs_qm_sync(mp, SYNC_TRYLOCK); if (mp->m_super->s_frozen == SB_UNFROZEN && xfs_log_need_covered(mp)) - error = xfs_fs_log_dummy(mp, 0); + error = xfs_fs_log_dummy(mp); + else + xfs_log_force(mp, 0); + xfs_reclaim_inodes(mp, 0); + error = xfs_qm_sync(mp, SYNC_TRYLOCK); } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index f56d30e8040c..cec89dd5d7d2 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -612,12 +612,13 @@ out: * * We cannot use an inode here for this - that will push dirty state back up * into the VFS and then periodic inode flushing will prevent log covering from - * making progress. Hence we log a field in the superblock instead. + * making progress. Hence we log a field in the superblock instead and use a + * synchronous transaction to ensure the superblock is immediately unpinned + * and can be written back. */ int xfs_fs_log_dummy( - xfs_mount_t *mp, - int flags) + xfs_mount_t *mp) { xfs_trans_t *tp; int error; @@ -632,8 +633,7 @@ xfs_fs_log_dummy( /* log the UUID because it is an unchanging field */ xfs_mod_sb(tp, XFS_SB_UUID); - if (flags & SYNC_WAIT) - xfs_trans_set_sync(tp); + xfs_trans_set_sync(tp); return xfs_trans_commit(tp, 0); } diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index a786c5212c1e..1b6a98b66886 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, xfs_fsop_resblks_t *outval); extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); -extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); +extern int xfs_fs_log_dummy(struct xfs_mount *mp); #endif /* __XFS_FSOPS_H__ */ -- cgit v1.2.3 From a46db60834883c1c8c665d7fcc7b4ab66f5966fc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Jan 2011 13:02:04 +0000 Subject: xfs: add FITRIM support Allow manual discards from userspace using the FITRIM ioctl. This is not intended to be run during normal workloads, as the freepsace btree walks can cause large performance degradation. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/Makefile | 1 + fs/xfs/linux-2.6/xfs_discard.c | 191 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_discard.h | 8 ++ fs/xfs/linux-2.6/xfs_ioctl.c | 3 + fs/xfs/linux-2.6/xfs_trace.h | 33 +++++++ fs/xfs/xfs_alloc.c | 10 +-- fs/xfs/xfs_alloc.h | 25 ++++-- 7 files changed, 259 insertions(+), 12 deletions(-) create mode 100644 fs/xfs/linux-2.6/xfs_discard.c create mode 100644 fs/xfs/linux-2.6/xfs_discard.h (limited to 'fs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0dce969d6cad..faca44997099 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -98,6 +98,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ kmem.o \ xfs_aops.o \ xfs_buf.o \ + xfs_discard.o \ xfs_export.o \ xfs_file.o \ xfs_fs_subr.o \ diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c new file mode 100644 index 000000000000..05201ae719e5 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.c @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2010 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_quota.h" +#include "xfs_trans.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_discard.h" +#include "xfs_trace.h" + +STATIC int +xfs_trim_extents( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_fsblock_t start, + xfs_fsblock_t len, + xfs_fsblock_t minlen, + __uint64_t *blocks_trimmed) +{ + struct block_device *bdev = mp->m_ddev_targp->bt_bdev; + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + struct xfs_perag *pag; + int error; + int i; + + pag = xfs_perag_get(mp, agno); + + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + if (error || !agbp) + goto out_put_perag; + + cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); + + /* + * Force out the log. This means any transactions that might have freed + * space before we took the AGF buffer lock are now on disk, and the + * volatile disk cache is flushed. + */ + xfs_log_force(mp, XFS_LOG_SYNC); + + /* + * Look up the longest btree in the AGF and start with it. + */ + error = xfs_alloc_lookup_le(cur, 0, + XFS_BUF_TO_AGF(agbp)->agf_longest, &i); + if (error) + goto out_del_cursor; + + /* + * Loop until we are done with all extents that are large + * enough to be worth discarding. + */ + while (i) { + xfs_agblock_t fbno; + xfs_extlen_t flen; + + error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); + if (error) + goto out_del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); + ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); + + /* + * Too small? Give up. + */ + if (flen < minlen) { + trace_xfs_discard_toosmall(mp, agno, fbno, flen); + goto out_del_cursor; + } + + /* + * If the extent is entirely outside of the range we are + * supposed to discard skip it. Do not bother to trim + * down partially overlapping ranges for now. + */ + if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || + XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { + trace_xfs_discard_exclude(mp, agno, fbno, flen); + goto next_extent; + } + + /* + * If any blocks in the range are still busy, skip the + * discard and try again the next time. + */ + if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { + trace_xfs_discard_busy(mp, agno, fbno, flen); + goto next_extent; + } + + trace_xfs_discard_extent(mp, agno, fbno, flen); + error = -blkdev_issue_discard(bdev, + XFS_AGB_TO_DADDR(mp, agno, fbno), + XFS_FSB_TO_BB(mp, flen), + GFP_NOFS, 0); + if (error) + goto out_del_cursor; + *blocks_trimmed += flen; + +next_extent: + error = xfs_btree_decrement(cur, 0, &i); + if (error) + goto out_del_cursor; + } + +out_del_cursor: + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_buf_relse(agbp); +out_put_perag: + xfs_perag_put(pag); + return error; +} + +int +xfs_ioc_trim( + struct xfs_mount *mp, + struct fstrim_range __user *urange) +{ + struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; + unsigned int granularity = q->limits.discard_granularity; + struct fstrim_range range; + xfs_fsblock_t start, len, minlen; + xfs_agnumber_t start_agno, end_agno, agno; + __uint64_t blocks_trimmed = 0; + int error, last_error = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&range, urange, sizeof(range))) + return -XFS_ERROR(EFAULT); + + /* + * Truncating down the len isn't actually quite correct, but using + * XFS_B_TO_FSB would mean we trivially get overflows for values + * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default + * used by the fstrim application. In the end it really doesn't + * matter as trimming blocks is an advisory interface. + */ + start = XFS_B_TO_FSBT(mp, range.start); + len = XFS_B_TO_FSBT(mp, range.len); + minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); + + start_agno = XFS_FSB_TO_AGNO(mp, start); + if (start_agno >= mp->m_sb.sb_agcount) + return -XFS_ERROR(EINVAL); + + end_agno = XFS_FSB_TO_AGNO(mp, start + len); + if (end_agno >= mp->m_sb.sb_agcount) + end_agno = mp->m_sb.sb_agcount - 1; + + for (agno = start_agno; agno <= end_agno; agno++) { + error = -xfs_trim_extents(mp, agno, start, len, minlen, + &blocks_trimmed); + if (error) + last_error = error; + } + + if (last_error) + return last_error; + + range.len = XFS_FSB_TO_B(mp, blocks_trimmed); + if (copy_to_user(urange, &range, sizeof(range))) + return -XFS_ERROR(EFAULT); + return 0; +} diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h new file mode 100644 index 000000000000..e82b6dd3e127 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.h @@ -0,0 +1,8 @@ +#ifndef XFS_DISCARD_H +#define XFS_DISCARD_H 1 + +struct fstrim_range; + +extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); + +#endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index ad442d9e392e..b06ede1d0bed 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -39,6 +39,7 @@ #include "xfs_dfrag.h" #include "xfs_fsops.h" #include "xfs_vnodeops.h" +#include "xfs_discard.h" #include "xfs_quota.h" #include "xfs_inode_item.h" #include "xfs_export.h" @@ -1294,6 +1295,8 @@ xfs_file_ioctl( trace_xfs_file_ioctl(ip); switch (cmd) { + case FITRIM: + return xfs_ioc_trim(mp, arg); case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 647af2a2e7aa..2d0bcb479075 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -1759,6 +1759,39 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); +DECLARE_EVENT_CLASS(xfs_discard_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len), + TP_ARGS(mp, agno, agbno, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len) +) + +#define DEFINE_DISCARD_EVENT(name) \ +DEFINE_EVENT(xfs_discard_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + xfs_agblock_t agbno, xfs_extlen_t len), \ + TP_ARGS(mp, agno, agbno, len)) +DEFINE_DISCARD_EVENT(xfs_discard_extent); +DEFINE_DISCARD_EVENT(xfs_discard_toosmall); +DEFINE_DISCARD_EVENT(xfs_discard_exclude); +DEFINE_DISCARD_EVENT(xfs_discard_busy); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index fa8723f5870a..f3227984a9bf 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -41,10 +41,6 @@ #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 -static int -xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t bno, xfs_extlen_t len); - /* * Prototypes for per-ag allocation routines */ @@ -94,7 +90,7 @@ xfs_alloc_lookup_ge( * Lookup the first record less than or equal to [bno, len] * in the btree given by cur. */ -STATIC int /* error */ +int /* error */ xfs_alloc_lookup_le( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -127,7 +123,7 @@ xfs_alloc_update( /* * Get the data from the pointed-to record. */ -STATIC int /* error */ +int /* error */ xfs_alloc_get_rec( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t *bno, /* output: starting block of extent */ @@ -2615,7 +2611,7 @@ restart: * will require a synchronous transaction, but it can still be * used to distinguish between a partial or exact match. */ -static int +int xfs_alloc_busy_search( struct xfs_mount *mp, xfs_agnumber_t agno, diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 895009a97271..0ab56b32c7eb 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -19,6 +19,7 @@ #define __XFS_ALLOC_H__ struct xfs_buf; +struct xfs_btree_cur; struct xfs_mount; struct xfs_perag; struct xfs_trans; @@ -118,16 +119,16 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, struct xfs_perag *pag); #ifdef __KERNEL__ - void -xfs_alloc_busy_insert(xfs_trans_t *tp, - xfs_agnumber_t agno, - xfs_agblock_t bno, - xfs_extlen_t len); +xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, + xfs_agblock_t bno, xfs_extlen_t len); void xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); +int +xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t bno, xfs_extlen_t len); #endif /* __KERNEL__ */ /* @@ -205,4 +206,18 @@ xfs_free_extent( xfs_fsblock_t bno, /* starting block number of extent */ xfs_extlen_t len); /* length of extent */ +int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +int /* error */ +xfs_alloc_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat); /* output: success/failure */ + #endif /* __XFS_ALLOC_H__ */ -- cgit v1.2.3 From bfc60177f8ab509bc225becbb58f7e53a0e33e81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Jan 2011 13:02:23 +0000 Subject: xfs: fix error handling for synchronous writes If we get an IO error on a synchronous superblock write, we attach an error release function to it so that when the last reference goes away the release function is called and the buffer is invalidated and unlocked. The buffer is left locked until the release function is called so that other concurrent users of the buffer will be locked out until the buffer error is fully processed. Unfortunately, for the superblock buffer the filesyetm itself holds a reference to the buffer which prevents the reference count from dropping to zero and the release function being called. As a result, once an IO error occurs on a sync write, the buffer will never be unlocked and all future attempts to lock the buffer will hang. To make matters worse, this problems is not unique to such buffers; if there is a concurrent _xfs_buf_find() running, the lookup will grab a reference to the buffer and then wait on the buffer lock, preventing the reference count from ever falling to zero and hence unlocking the buffer. As such, the whole b_relse function implementation is broken because it cannot rely on the buffer reference count falling to zero to unlock the errored buffer. The synchronous write error path is the only path that uses this callback - it is used to ensure that the synchronous waiter gets the buffer error before the error state is cleared from the buffer by the release function. Given that the only sychronous buffer writes now go through xfs_bwrite and the error path in question can only occur for a write of a dirty, logged buffer, we can move most of the b_relse processing to happen inline in xfs_buf_iodone_callbacks, just like a normal I/O completion. In addition to that we make sure the error is not cleared in xfs_buf_iodone_callbacks, so that xfs_bwrite can reliably check it. Given that xfs_bwrite keeps the buffer locked until it has waited for it and checked the error this allows to reliably propagate the error to the caller, and make sure that the buffer is reliably unlocked. Given that xfs_buf_iodone_callbacks was the only instance of the b_relse callback we can remove it entirely. Based on earlier patches by Dave Chinner and Ajeet Yadav. Signed-off-by: Christoph Hellwig Reported-by: Ajeet Yadav Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 7 +-- fs/xfs/linux-2.6/xfs_buf.h | 7 +-- fs/xfs/xfs_buf_item.c | 151 +++++++++++++++------------------------------ 3 files changed, 51 insertions(+), 114 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 92f1f2acc6ab..ac1c7e8378dd 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -896,7 +896,6 @@ xfs_buf_rele( trace_xfs_buf_rele(bp, _RET_IP_); if (!pag) { - ASSERT(!bp->b_relse); ASSERT(list_empty(&bp->b_lru)); ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); if (atomic_dec_and_test(&bp->b_hold)) @@ -908,11 +907,7 @@ xfs_buf_rele( ASSERT(atomic_read(&bp->b_hold) > 0); if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { - if (bp->b_relse) { - atomic_inc(&bp->b_hold); - spin_unlock(&pag->pag_buf_lock); - bp->b_relse(bp); - } else if (!(bp->b_flags & XBF_STALE) && + if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { xfs_buf_lru_add(bp); spin_unlock(&pag->pag_buf_lock); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a76c2428faff..cbe65950e524 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -152,8 +152,6 @@ typedef struct xfs_buftarg { struct xfs_buf; typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); -typedef void (*xfs_buf_relse_t)(struct xfs_buf *); -typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); #define XB_PAGES 2 @@ -183,7 +181,6 @@ typedef struct xfs_buf { void *b_addr; /* virtual address of buffer */ struct work_struct b_iodone_work; xfs_buf_iodone_t b_iodone; /* I/O completion function */ - xfs_buf_relse_t b_relse; /* releasing function */ struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; void *b_fspriv2; @@ -323,7 +320,6 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) #define XFS_BUF_SET_START(bp) do { } while (0) -#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) @@ -360,8 +356,7 @@ xfs_buf_set_ref( static inline void xfs_buf_relse(xfs_buf_t *bp) { - if (!bp->b_relse) - xfs_buf_unlock(bp); + xfs_buf_unlock(bp); xfs_buf_rele(bp); } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index ed2b65f3f8b9..98c6f73b6752 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -141,7 +141,6 @@ xfs_buf_item_log_check( #define xfs_buf_item_log_check(x) #endif -STATIC void xfs_buf_error_relse(xfs_buf_t *bp); STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); /* @@ -959,128 +958,76 @@ xfs_buf_do_callbacks( */ void xfs_buf_iodone_callbacks( - xfs_buf_t *bp) + struct xfs_buf *bp) { - xfs_log_item_t *lip; - static ulong lasttime; - static xfs_buftarg_t *lasttarg; - xfs_mount_t *mp; + struct xfs_log_item *lip = bp->b_fspriv; + struct xfs_mount *mp = lip->li_mountp; + static ulong lasttime; + static xfs_buftarg_t *lasttarg; - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); - lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + if (likely(!XFS_BUF_GETERROR(bp))) + goto do_callbacks; - if (XFS_BUF_GETERROR(bp) != 0) { - /* - * If we've already decided to shutdown the filesystem - * because of IO errors, there's no point in giving this - * a retry. - */ - mp = lip->li_mountp; - if (XFS_FORCED_SHUTDOWN(mp)) { - ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); - XFS_BUF_SUPER_STALE(bp); - trace_xfs_buf_item_iodone(bp, _RET_IP_); - xfs_buf_do_callbacks(bp); - XFS_BUF_SET_FSPRIVATE(bp, NULL); - XFS_BUF_CLR_IODONE_FUNC(bp); - xfs_buf_ioend(bp, 0); - return; - } + /* + * If we've already decided to shutdown the filesystem because of + * I/O errors, there's no point in giving this a retry. + */ + if (XFS_FORCED_SHUTDOWN(mp)) { + XFS_BUF_SUPER_STALE(bp); + trace_xfs_buf_item_iodone(bp, _RET_IP_); + goto do_callbacks; + } - if ((XFS_BUF_TARGET(bp) != lasttarg) || - (time_after(jiffies, (lasttime + 5*HZ)))) { - lasttime = jiffies; - cmn_err(CE_ALERT, "Device %s, XFS metadata write error" - " block 0x%llx in %s", - XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), - (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); - } - lasttarg = XFS_BUF_TARGET(bp); + if (XFS_BUF_TARGET(bp) != lasttarg || + time_after(jiffies, (lasttime + 5*HZ))) { + lasttime = jiffies; + cmn_err(CE_ALERT, "Device %s, XFS metadata write error" + " block 0x%llx in %s", + XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), + (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); + } + lasttarg = XFS_BUF_TARGET(bp); - if (XFS_BUF_ISASYNC(bp)) { - /* - * If the write was asynchronous then noone will be - * looking for the error. Clear the error state - * and write the buffer out again delayed write. - * - * XXXsup This is OK, so long as we catch these - * before we start the umount; we don't want these - * DELWRI metadata bufs to be hanging around. - */ - XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */ - - if (!(XFS_BUF_ISSTALE(bp))) { - XFS_BUF_DELAYWRITE(bp); - XFS_BUF_DONE(bp); - XFS_BUF_SET_START(bp); - } - ASSERT(XFS_BUF_IODONE_FUNC(bp)); - trace_xfs_buf_item_iodone_async(bp, _RET_IP_); - xfs_buf_relse(bp); - } else { - /* - * If the write of the buffer was not asynchronous, - * then we want to make sure to return the error - * to the caller of bwrite(). Because of this we - * cannot clear the B_ERROR state at this point. - * Instead we install a callback function that - * will be called when the buffer is released, and - * that routine will clear the error state and - * set the buffer to be written out again after - * some delay. - */ - /* We actually overwrite the existing b-relse - function at times, but we're gonna be shutting down - anyway. */ - XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); + /* + * If the write was asynchronous then noone will be looking for the + * error. Clear the error state and write the buffer out again. + * + * During sync or umount we'll write all pending buffers again + * synchronous, which will catch these errors if they keep hanging + * around. + */ + if (XFS_BUF_ISASYNC(bp)) { + XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */ + + if (!XFS_BUF_ISSTALE(bp)) { + XFS_BUF_DELAYWRITE(bp); XFS_BUF_DONE(bp); - XFS_BUF_FINISH_IOWAIT(bp); + XFS_BUF_SET_START(bp); } + ASSERT(XFS_BUF_IODONE_FUNC(bp)); + trace_xfs_buf_item_iodone_async(bp, _RET_IP_); + xfs_buf_relse(bp); return; } - xfs_buf_do_callbacks(bp); - XFS_BUF_SET_FSPRIVATE(bp, NULL); - XFS_BUF_CLR_IODONE_FUNC(bp); - xfs_buf_ioend(bp, 0); -} - -/* - * This is a callback routine attached to a buffer which gets an error - * when being written out synchronously. - */ -STATIC void -xfs_buf_error_relse( - xfs_buf_t *bp) -{ - xfs_log_item_t *lip; - xfs_mount_t *mp; - - lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); - mp = (xfs_mount_t *)lip->li_mountp; - ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); - + /* + * If the write of the buffer was synchronous, we want to make + * sure to return the error to the caller of xfs_bwrite(). + */ XFS_BUF_STALE(bp); XFS_BUF_DONE(bp); XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_ERROR(bp,0); trace_xfs_buf_error_relse(bp, _RET_IP_); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - if (! XFS_FORCED_SHUTDOWN(mp)) - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - /* - * We have to unpin the pinned buffers so do the - * callbacks. - */ +do_callbacks: xfs_buf_do_callbacks(bp); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); - XFS_BUF_SET_BRELSE_FUNC(bp,NULL); - xfs_buf_relse(bp); + xfs_buf_ioend(bp, 0); } - /* * This is the iodone() function for buffers which have been * logged. It is called when they are eventually flushed out. -- cgit v1.2.3 From 1884bd8354c9aec4ca501dc4773c13ad2a09af7b Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 25 Dec 2010 20:14:53 +0000 Subject: xfs: fix an assignment within an ASSERT() In fs/xfs/xfs_trans.c::xfs_trans_unreserve_and_mod_sb() at the out: label we have this: ASSERT(error = 0); I believe a comparison was intended, not an assignment. If I'm right, the patch below fixes that up. Signed-off-by: Jesper Juhl Signed-off-by: Alex Elder --- fs/xfs/xfs_trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f80a067a4658..33dbc4e0ad62 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1137,7 +1137,7 @@ out_undo_fdblocks: if (blkdelta) xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); out: - ASSERT(error = 0); + ASSERT(error == 0); return; } -- cgit v1.2.3 From 65a84a0f7567ea244e5246e642920260cfc2744a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 7 Jan 2011 03:30:41 +0000 Subject: xfs: Add log level to assertion printk I received a ppc64 bug report involving xfs but the assertion was filtered out by the console log level. Use KERN_CRIT to ensure it makes it out. Signed-off-by: Anton Blanchard Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/support/debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 975aa10e1a47..86162e5f9a21 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -104,7 +104,8 @@ xfs_fs_vcmn_err( void assfail(char *expr, char *file, int line) { - printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line); + printk(KERN_CRIT "Assertion failed: %s, file: %s, line: %d\n", expr, + file, line); BUG(); } -- cgit v1.2.3 From 73efe4a4ddf8eb2b1cc7039e8a66a23a424961af Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 12 Jan 2011 00:35:42 +0000 Subject: xfs: prevent NMI timeouts in cmn_err We currently have a global error message buffer in cmn_err that is protected by a spin lock that disables interrupts. Recently there have been reports of NMI timeouts occurring when the console is being flooded by SCSI error reports due to cmn_err() getting stuck trying to print to the console while holding this lock (i.e. with interrupts disabled). The NMI watchdog is seeing this CPU as non-responding and so is triggering a panic. While the trigger for the reported case is SCSI errors, pretty much anything that spams the kernel log could cause this to occur. Realistically the only reason that we have the intemediate message buffer is to prepend the correct kernel log level prefix to the log message. The only reason we have the lock is to protect the global message buffer and the only reason the message buffer is global is to keep it off the stack. Hence if we can avoid needing a global message buffer we avoid needing the lock, and we can do this with a small amount of cleanup and some preprocessor tricks: 1. clean up xfs_cmn_err() panic mask functionality to avoid needing debug code in xfs_cmn_err() 2. remove the couple of "!" message prefixes that still exist that the existing cmn_err() code steps over. 3. redefine CE_* levels directly to KERN_* 4. redefine cmn_err() and friends to use printk() directly via variable argument length macros. By doing this, we can completely remove the cmn_err() code and the lock that is causing the problems, and rely solely on printk() serialisation to ensure that we don't get garbled messages. A series of followup patches is really needed to clean up all the cmn_err() calls and related messages properly, but that results in a series that is not easily back portable to enterprise kernels. Hence this initial fix is only to address the direct problem in the lowest impact way possible. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sysctl.c | 23 ++++++++- fs/xfs/support/debug.c | 109 +++++++++++++++++++----------------------- fs/xfs/support/debug.h | 25 ++++++---- fs/xfs/xfs_error.c | 31 ------------ fs/xfs/xfs_error.h | 18 +++---- fs/xfs/xfs_log.c | 2 +- fs/xfs/xfs_log_recover.c | 2 +- 7 files changed, 96 insertions(+), 114 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7bb5092d6ae4..ee3cee097e7e 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -18,6 +18,7 @@ #include "xfs.h" #include #include +#include "xfs_error.h" static struct ctl_table_header *xfs_table_header; @@ -51,6 +52,26 @@ xfs_stats_clear_proc_handler( return ret; } + +STATIC int +xfs_panic_mask_proc_handler( + ctl_table *ctl, + int write, + void __user *buffer, + size_t *lenp, + loff_t *ppos) +{ + int ret, *valp = ctl->data; + + ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); + if (!ret && write) { + xfs_panic_mask = *valp; +#ifdef DEBUG + xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); +#endif + } + return ret; +} #endif /* CONFIG_PROC_FS */ static ctl_table xfs_table[] = { @@ -77,7 +98,7 @@ static ctl_table xfs_table[] = { .data = &xfs_params.panic_mask.val, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = xfs_panic_mask_proc_handler, .extra1 = &xfs_params.panic_mask.min, .extra2 = &xfs_params.panic_mask.max }, diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 86162e5f9a21..e6cf955ec0fc 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -25,80 +25,71 @@ #include "xfs_mount.h" #include "xfs_error.h" -static char message[1024]; /* keep it off the stack */ -static DEFINE_SPINLOCK(xfs_err_lock); - -/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */ -#define XFS_MAX_ERR_LEVEL 7 -#define XFS_ERR_MASK ((1 << 3) - 1) -static const char * const err_level[XFS_MAX_ERR_LEVEL+1] = - {KERN_EMERG, KERN_ALERT, KERN_CRIT, - KERN_ERR, KERN_WARNING, KERN_NOTICE, - KERN_INFO, KERN_DEBUG}; - void -cmn_err(register int level, char *fmt, ...) +cmn_err( + const char *lvl, + const char *fmt, + ...) { - char *fp = fmt; - int len; - ulong flags; - va_list ap; - - level &= XFS_ERR_MASK; - if (level > XFS_MAX_ERR_LEVEL) - level = XFS_MAX_ERR_LEVEL; - spin_lock_irqsave(&xfs_err_lock,flags); - va_start(ap, fmt); - if (*fmt == '!') fp++; - len = vsnprintf(message, sizeof(message), fp, ap); - if (len >= sizeof(message)) - len = sizeof(message) - 1; - if (message[len-1] == '\n') - message[len-1] = 0; - printk("%s%s\n", err_level[level], message); - va_end(ap); - spin_unlock_irqrestore(&xfs_err_lock,flags); - BUG_ON(level == CE_PANIC); + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + printk("%s%pV", lvl, &vaf); + va_end(args); + + BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0); } void -xfs_fs_vcmn_err( - int level, +xfs_fs_cmn_err( + const char *lvl, struct xfs_mount *mp, - char *fmt, - va_list ap) + const char *fmt, + ...) { - unsigned long flags; - int len = 0; + struct va_format vaf; + va_list args; - level &= XFS_ERR_MASK; - if (level > XFS_MAX_ERR_LEVEL) - level = XFS_MAX_ERR_LEVEL; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; - spin_lock_irqsave(&xfs_err_lock,flags); + printk("%sFilesystem %s: %pV", lvl, mp->m_fsname, &vaf); + va_end(args); - if (mp) { - len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname); + BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0); +} + +/* All callers to xfs_cmn_err use CE_ALERT, so don't bother testing lvl */ +void +xfs_cmn_err( + int panic_tag, + const char *lvl, + struct xfs_mount *mp, + const char *fmt, + ...) +{ + struct va_format vaf; + va_list args; + int panic = 0; - /* - * Skip the printk if we can't print anything useful - * due to an over-long device name. - */ - if (len >= sizeof(message)) - goto out; + if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { + printk(KERN_ALERT "XFS: Transforming an alert into a BUG."); + panic = 1; } - len = vsnprintf(message + len, sizeof(message) - len, fmt, ap); - if (len >= sizeof(message)) - len = sizeof(message) - 1; - if (message[len-1] == '\n') - message[len-1] = 0; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; - printk("%s%s\n", err_level[level], message); - out: - spin_unlock_irqrestore(&xfs_err_lock,flags); + printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf); + va_end(args); - BUG_ON(level == CE_PANIC); + BUG_ON(panic); } void diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index d2d20462fd4f..05699f67d475 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -20,15 +20,22 @@ #include -#define CE_DEBUG 7 /* debug */ -#define CE_CONT 6 /* continuation */ -#define CE_NOTE 5 /* notice */ -#define CE_WARN 4 /* warning */ -#define CE_ALERT 1 /* alert */ -#define CE_PANIC 0 /* panic */ - -extern void cmn_err(int, char *, ...) - __attribute__ ((format (printf, 2, 3))); +struct xfs_mount; + +#define CE_DEBUG KERN_DEBUG +#define CE_CONT KERN_INFO +#define CE_NOTE KERN_NOTICE +#define CE_WARN KERN_WARNING +#define CE_ALERT KERN_ALERT +#define CE_PANIC KERN_EMERG + +void cmn_err(const char *lvl, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +void xfs_fs_cmn_err( const char *lvl, struct xfs_mount *mp, + const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); +void xfs_cmn_err( int panic_tag, const char *lvl, struct xfs_mount *mp, + const char *fmt, ...) __attribute__ ((format (printf, 4, 5))); + extern void assfail(char *expr, char *f, int l); #define ASSERT_ALWAYS(expr) \ diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index c78cc6a3d87c..4c7db74a05f7 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -152,37 +152,6 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) } #endif /* DEBUG */ - -void -xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - xfs_fs_vcmn_err(level, mp, fmt, ap); - va_end(ap); -} - -void -xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...) -{ - va_list ap; - -#ifdef DEBUG - xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); -#endif - - if (xfs_panic_mask && (xfs_panic_mask & panic_tag) - && (level & CE_ALERT)) { - level &= ~CE_ALERT; - level |= CE_PANIC; - cmn_err(CE_ALERT, "XFS: Transforming an alert into a BUG."); - } - va_start(ap, fmt); - xfs_fs_vcmn_err(level, mp, fmt, ap); - va_end(ap); -} - void xfs_error_report( const char *tag, diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index f338847f80b8..10dce5475f02 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -136,8 +136,8 @@ extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ (rf)))) -extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); -extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); +extern int xfs_errortag_add(int error_tag, struct xfs_mount *mp); +extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud); #else #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) #define xfs_errortag_add(tag, mp) (ENOSYS) @@ -162,21 +162,15 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); struct xfs_mount; -extern void xfs_fs_vcmn_err(int level, struct xfs_mount *mp, - char *fmt, va_list ap) - __attribute__ ((format (printf, 3, 0))); -extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, - char *fmt, ...) - __attribute__ ((format (printf, 4, 5))); -extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); - extern void xfs_hex_dump(void *p, int length); #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) #define xfs_fs_mount_cmn_err(f, fmt, args...) \ - ((f & XFS_MFSI_QUIET)? (void)0 : cmn_err(CE_WARN, "XFS: " fmt, ## args)) + do { \ + if (!(f & XFS_MFSI_QUIET)) \ + cmn_err(CE_WARN, "XFS: " fmt, ## args); \ + } while (0) #endif /* __XFS_ERROR_H__ */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0bf24b11d0c4..ae6fef1ff563 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -377,7 +377,7 @@ xfs_log_mount( cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); else { cmn_err(CE_NOTE, - "!Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", + "Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", mp->m_fsname); ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 204d8e5fa7fa..aa0ebb776903 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3800,7 +3800,7 @@ xlog_recover_finish( log->l_flags &= ~XLOG_RECOVERY_NEEDED; } else { cmn_err(CE_DEBUG, - "!Ending clean XFS mount for filesystem: %s\n", + "Ending clean XFS mount for filesystem: %s\n", log->l_mp->m_fsname); } return 0; -- cgit v1.2.3 From f00c9e44ad1a9660fe8cd3ca15b6cd9497172eab Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Sep 2010 17:38:58 +0200 Subject: quota: Fix deadlock during path resolution As Al Viro pointed out path resolution during Q_QUOTAON calls to quotactl is prone to deadlocks. We hold s_umount semaphore for reading during the path resolution and resolution itself may need to acquire the semaphore for writing when e. g. autofs mountpoint is passed. Solve the problem by performing the resolution before we get hold of the superblock (and thus s_umount semaphore). The whole thing is complicated by the fact that some filesystems (OCFS2) ignore the path argument. So to distinguish between filesystem which want the path and which do not we introduce new .quota_on_meta callback which does not get the path. OCFS2 then uses this callback instead of old .quota_on. CC: Al Viro CC: Christoph Hellwig CC: Ted Ts'o CC: Joel Becker Signed-off-by: Jan Kara --- fs/ext3/super.c | 25 +++++++------------------ fs/ext4/super.c | 25 +++++++------------------ fs/ocfs2/super.c | 5 ++--- fs/quota/dquot.c | 18 ++---------------- fs/quota/quota.c | 41 +++++++++++++++++++++++++++-------------- fs/reiserfs/super.c | 17 ++++++----------- include/linux/quota.h | 5 ++++- include/linux/quotaops.h | 4 +--- 8 files changed, 56 insertions(+), 84 deletions(-) (limited to 'fs') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index b7d0554631e4..0e0d391626be 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -755,7 +755,7 @@ static int ext3_release_dquot(struct dquot *dquot); static int ext3_mark_dquot_dirty(struct dquot *dquot); static int ext3_write_info(struct super_block *sb, int type); static int ext3_quota_on(struct super_block *sb, int type, int format_id, - char *path); + struct path *path); static int ext3_quota_on_mount(struct super_block *sb, int type); static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); @@ -2885,27 +2885,20 @@ static int ext3_quota_on_mount(struct super_block *sb, int type) * Standard function to be called on quota_on */ static int ext3_quota_on(struct super_block *sb, int type, int format_id, - char *name) + struct path *path) { int err; - struct path path; if (!test_opt(sb, QUOTA)) return -EINVAL; - err = kern_path(name, LOOKUP_FOLLOW, &path); - if (err) - return err; - /* Quotafile not on the same filesystem? */ - if (path.mnt->mnt_sb != sb) { - path_put(&path); + if (path->mnt->mnt_sb != sb) return -EXDEV; - } /* Journaling quota? */ if (EXT3_SB(sb)->s_qf_names[type]) { /* Quotafile not of fs root? */ - if (path.dentry->d_parent != sb->s_root) + if (path->dentry->d_parent != sb->s_root) ext3_msg(sb, KERN_WARNING, "warning: Quota file not on filesystem root. " "Journaled quota will not work."); @@ -2915,7 +2908,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, * When we journal data on quota file, we have to flush journal to see * all updates to the file when we bypass pagecache... */ - if (ext3_should_journal_data(path.dentry->d_inode)) { + if (ext3_should_journal_data(path->dentry->d_inode)) { /* * We don't need to lock updates but journal_flush() could * otherwise be livelocked... @@ -2923,15 +2916,11 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, journal_lock_updates(EXT3_SB(sb)->s_journal); err = journal_flush(EXT3_SB(sb)->s_journal); journal_unlock_updates(EXT3_SB(sb)->s_journal); - if (err) { - path_put(&path); + if (err) return err; - } } - err = dquot_quota_on_path(sb, type, format_id, &path); - path_put(&path); - return err; + return dquot_quota_on(sb, type, format_id, path); } /* Read data from quotafile - avoid pagecache and such because we cannot afford diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 29c80f6d8b27..0f10ccd6bfc0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1162,7 +1162,7 @@ static int ext4_release_dquot(struct dquot *dquot); static int ext4_mark_dquot_dirty(struct dquot *dquot); static int ext4_write_info(struct super_block *sb, int type); static int ext4_quota_on(struct super_block *sb, int type, int format_id, - char *path); + struct path *path); static int ext4_quota_off(struct super_block *sb, int type); static int ext4_quota_on_mount(struct super_block *sb, int type); static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, @@ -4566,27 +4566,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type) * Standard function to be called on quota_on */ static int ext4_quota_on(struct super_block *sb, int type, int format_id, - char *name) + struct path *path) { int err; - struct path path; if (!test_opt(sb, QUOTA)) return -EINVAL; - err = kern_path(name, LOOKUP_FOLLOW, &path); - if (err) - return err; - /* Quotafile not on the same filesystem? */ - if (path.mnt->mnt_sb != sb) { - path_put(&path); + if (path->mnt->mnt_sb != sb) return -EXDEV; - } /* Journaling quota? */ if (EXT4_SB(sb)->s_qf_names[type]) { /* Quotafile not in fs root? */ - if (path.dentry->d_parent != sb->s_root) + if (path->dentry->d_parent != sb->s_root) ext4_msg(sb, KERN_WARNING, "Quota file not on filesystem root. " "Journaled quota will not work"); @@ -4597,7 +4590,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, * all updates to the file when we bypass pagecache... */ if (EXT4_SB(sb)->s_journal && - ext4_should_journal_data(path.dentry->d_inode)) { + ext4_should_journal_data(path->dentry->d_inode)) { /* * We don't need to lock updates but journal_flush() could * otherwise be livelocked... @@ -4605,15 +4598,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); - if (err) { - path_put(&path); + if (err) return err; - } } - err = dquot_quota_on_path(sb, type, format_id, &path); - path_put(&path); - return err; + return dquot_quota_on(sb, type, format_id, path); } static int ext4_quota_off(struct super_block *sb, int type) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 17ff46fa8a10..31c3ffd2f8d0 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -993,8 +993,7 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) } /* Handle quota on quotactl */ -static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, - char *path) +static int ocfs2_quota_on(struct super_block *sb, int type, int format_id) { unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; @@ -1013,7 +1012,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type) } static const struct quotactl_ops ocfs2_quotactl_ops = { - .quota_on = ocfs2_quota_on, + .quota_on_meta = ocfs2_quota_on, .quota_off = ocfs2_quota_off, .quota_sync = dquot_quota_sync, .get_info = dquot_get_dqinfo, diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 84becd3e4772..a2a622e079f0 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2189,8 +2189,8 @@ int dquot_resume(struct super_block *sb, int type) } EXPORT_SYMBOL(dquot_resume); -int dquot_quota_on_path(struct super_block *sb, int type, int format_id, - struct path *path) +int dquot_quota_on(struct super_block *sb, int type, int format_id, + struct path *path) { int error = security_quota_on(path->dentry); if (error) @@ -2204,20 +2204,6 @@ int dquot_quota_on_path(struct super_block *sb, int type, int format_id, DQUOT_LIMITS_ENABLED); return error; } -EXPORT_SYMBOL(dquot_quota_on_path); - -int dquot_quota_on(struct super_block *sb, int type, int format_id, char *name) -{ - struct path path; - int error; - - error = kern_path(name, LOOKUP_FOLLOW, &path); - if (!error) { - error = dquot_quota_on_path(sb, type, format_id, &path); - path_put(&path); - } - return error; -} EXPORT_SYMBOL(dquot_quota_on); /* diff --git a/fs/quota/quota.c b/fs/quota/quota.c index b299961e1edb..b34bdb25490c 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -64,18 +64,15 @@ static int quota_sync_all(int type) } static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, - void __user *addr) + struct path *path) { - char *pathname; - int ret = -ENOSYS; - - pathname = getname(addr); - if (IS_ERR(pathname)) - return PTR_ERR(pathname); - if (sb->s_qcop->quota_on) - ret = sb->s_qcop->quota_on(sb, type, id, pathname); - putname(pathname); - return ret; + if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_on_meta) + return -ENOSYS; + if (sb->s_qcop->quota_on_meta) + return sb->s_qcop->quota_on_meta(sb, type, id); + if (IS_ERR(path)) + return PTR_ERR(path); + return sb->s_qcop->quota_on(sb, type, id, path); } static int quota_getfmt(struct super_block *sb, int type, void __user *addr) @@ -241,7 +238,7 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, /* Copy parameters and call proper function */ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, - void __user *addr) + void __user *addr, struct path *path) { int ret; @@ -256,7 +253,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, switch (cmd) { case Q_QUOTAON: - return quota_quotaon(sb, type, cmd, id, addr); + return quota_quotaon(sb, type, cmd, id, path); case Q_QUOTAOFF: if (!sb->s_qcop->quota_off) return -ENOSYS; @@ -335,6 +332,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, { uint cmds, type; struct super_block *sb = NULL; + struct path path, *pathp = NULL; int ret; cmds = cmd >> SUBCMDSHIFT; @@ -351,12 +349,27 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, return -ENODEV; } + /* + * Path for quotaon has to be resolved before grabbing superblock + * because that gets s_umount sem which is also possibly needed by path + * resolution (think about autofs) and thus deadlocks could arise. + */ + if (cmds == Q_QUOTAON) { + ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW, &path); + if (ret) + pathp = ERR_PTR(ret); + else + pathp = &path; + } + sb = quotactl_block(special); if (IS_ERR(sb)) return PTR_ERR(sb); - ret = do_quotactl(sb, type, cmds, id, addr); + ret = do_quotactl(sb, type, cmds, id, addr, pathp); drop_super(sb); + if (pathp && !IS_ERR(pathp)) + path_put(pathp); return ret; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 2575682a9ead..0aab04f46827 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -632,7 +632,7 @@ static int reiserfs_acquire_dquot(struct dquot *); static int reiserfs_release_dquot(struct dquot *); static int reiserfs_mark_dquot_dirty(struct dquot *); static int reiserfs_write_info(struct super_block *, int); -static int reiserfs_quota_on(struct super_block *, int, int, char *); +static int reiserfs_quota_on(struct super_block *, int, int, struct path *); static const struct dquot_operations reiserfs_quota_operations = { .write_dquot = reiserfs_write_dquot, @@ -2048,25 +2048,21 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type) * Standard function to be called on quota_on */ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, - char *name) + struct path *path) { int err; - struct path path; struct inode *inode; struct reiserfs_transaction_handle th; if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) return -EINVAL; - err = kern_path(name, LOOKUP_FOLLOW, &path); - if (err) - return err; /* Quotafile not on the same filesystem? */ - if (path.mnt->mnt_sb != sb) { + if (path->mnt->mnt_sb != sb) { err = -EXDEV; goto out; } - inode = path.dentry->d_inode; + inode = path->dentry->d_inode; /* We must not pack tails for quota files on reiserfs for quota IO to work */ if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { err = reiserfs_unpack(inode, NULL); @@ -2082,7 +2078,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, /* Journaling quota? */ if (REISERFS_SB(sb)->s_qf_names[type]) { /* Quotafile not of fs root? */ - if (path.dentry->d_parent != sb->s_root) + if (path->dentry->d_parent != sb->s_root) reiserfs_warning(sb, "super-6521", "Quota file not on filesystem root. " "Journalled quota will not work."); @@ -2101,9 +2097,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, if (err) goto out; } - err = dquot_quota_on_path(sb, type, format_id, &path); + err = dquot_quota_on(sb, type, format_id, path); out: - path_put(&path); return err; } diff --git a/include/linux/quota.h b/include/linux/quota.h index 94c1f03b50eb..9a85412e0db6 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -322,9 +322,12 @@ struct dquot_operations { qsize_t *(*get_reserved_space) (struct inode *); }; +struct path; + /* Operations handling requests from userspace */ struct quotactl_ops { - int (*quota_on)(struct super_block *, int, int, char *); + int (*quota_on)(struct super_block *, int, int, struct path *); + int (*quota_on_meta)(struct super_block *, int, int); int (*quota_off)(struct super_block *, int); int (*quota_sync)(struct super_block *, int, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 223b14cd129c..eb354f6f26b3 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -76,11 +76,9 @@ int dquot_mark_dquot_dirty(struct dquot *dquot); int dquot_file_open(struct inode *inode, struct file *file); -int dquot_quota_on(struct super_block *sb, int type, int format_id, - char *path); int dquot_enable(struct inode *inode, int type, int format_id, unsigned int flags); -int dquot_quota_on_path(struct super_block *sb, int type, int format_id, +int dquot_quota_on(struct super_block *sb, int type, int format_id, struct path *path); int dquot_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); -- cgit v1.2.3 From 8a0eebf66e3b1deae036553ba641a9c2bdbae678 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jan 2011 14:15:50 -0500 Subject: NFS: Fix NFSv3 exclusive open semantics Commit c0204fd2b8fe047b18b67e07e1bf2a03691240cd (NFS: Clean up nfs4_proc_create()) broke NFSv3 exclusive open by removing the code that passes the O_EXCL flag down to nfs3_proc_create(). This patch reverts that offending hunk from the original commit. Reported-by: Nick Bowler Signed-off-by: Trond Myklebust Cc: stable@kernel.org [2.6.37] Tested-by: Nick Bowler Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 95b081bc9e25..64ee240f3c80 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1579,6 +1579,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, { struct iattr attr; int error; + int open_flags = 0; dfprintk(VFS, "NFS: create(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1586,7 +1587,10 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL); + if ((nd->flags & LOOKUP_CREATE) != 0) + open_flags = nd->intent.open.flags; + + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); if (error != 0) goto out_err; return 0; -- cgit v1.2.3 From 81bb8debd0d570dc67dc1e9d8b612632cb941893 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 9 Dec 2010 02:02:29 +0000 Subject: Squashfs: add XZ compression support Add support for reading file systems compressed with the XZ compression algorithm. This patch adds the XZ decompressor wrapper code. Signed-off-by: Phillip Lougher --- fs/squashfs/squashfs_fs.h | 1 + fs/squashfs/xz_wrapper.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 fs/squashfs/xz_wrapper.c (limited to 'fs') diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index c5137fc9ab11..39533feffd6d 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -238,6 +238,7 @@ struct meta_index { #define ZLIB_COMPRESSION 1 #define LZMA_COMPRESSION 2 #define LZO_COMPRESSION 3 +#define XZ_COMPRESSION 4 struct squashfs_super_block { __le32 s_magic; diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c new file mode 100644 index 000000000000..856756ca5ee4 --- /dev/null +++ b/fs/squashfs/xz_wrapper.c @@ -0,0 +1,153 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + * Phillip Lougher + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * xz_wrapper.c + */ + + +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" +#include "decompressor.h" + +struct squashfs_xz { + struct xz_dec *state; + struct xz_buf buf; +}; + +static void *squashfs_xz_init(struct squashfs_sb_info *msblk) +{ + int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); + + struct squashfs_xz *stream = kmalloc(sizeof(*stream), GFP_KERNEL); + if (stream == NULL) + goto failed; + + stream->state = xz_dec_init(XZ_PREALLOC, block_size); + if (stream->state == NULL) + goto failed; + + return stream; + +failed: + ERROR("Failed to allocate xz workspace\n"); + kfree(stream); + return NULL; +} + + +static void squashfs_xz_free(void *strm) +{ + struct squashfs_xz *stream = strm; + + if (stream) { + xz_dec_end(stream->state); + kfree(stream); + } +} + + +static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, + struct buffer_head **bh, int b, int offset, int length, int srclength, + int pages) +{ + enum xz_ret xz_err; + int avail, total = 0, k = 0, page = 0; + struct squashfs_xz *stream = msblk->stream; + + mutex_lock(&msblk->read_data_mutex); + + xz_dec_reset(stream->state); + stream->buf.in_pos = 0; + stream->buf.in_size = 0; + stream->buf.out_pos = 0; + stream->buf.out_size = PAGE_CACHE_SIZE; + stream->buf.out = buffer[page++]; + + do { + if (stream->buf.in_pos == stream->buf.in_size && k < b) { + avail = min(length, msblk->devblksize - offset); + length -= avail; + wait_on_buffer(bh[k]); + if (!buffer_uptodate(bh[k])) + goto release_mutex; + + if (avail == 0) { + offset = 0; + put_bh(bh[k++]); + continue; + } + + stream->buf.in = bh[k]->b_data + offset; + stream->buf.in_size = avail; + stream->buf.in_pos = 0; + offset = 0; + } + + if (stream->buf.out_pos == stream->buf.out_size + && page < pages) { + stream->buf.out = buffer[page++]; + stream->buf.out_pos = 0; + total += PAGE_CACHE_SIZE; + } + + xz_err = xz_dec_run(stream->state, &stream->buf); + + if (stream->buf.in_pos == stream->buf.in_size && k < b) + put_bh(bh[k++]); + } while (xz_err == XZ_OK); + + if (xz_err != XZ_STREAM_END) { + ERROR("xz_dec_run error, data probably corrupt\n"); + goto release_mutex; + } + + if (k < b) { + ERROR("xz_uncompress error, input remaining\n"); + goto release_mutex; + } + + total += stream->buf.out_pos; + mutex_unlock(&msblk->read_data_mutex); + return total; + +release_mutex: + mutex_unlock(&msblk->read_data_mutex); + + for (; k < b; k++) + put_bh(bh[k]); + + return -EIO; +} + +const struct squashfs_decompressor squashfs_xz_comp_ops = { + .init = squashfs_xz_init, + .free = squashfs_xz_free, + .decompress = squashfs_xz_uncompress, + .id = XZ_COMPRESSION, + .name = "xz", + .supported = 1 +}; -- cgit v1.2.3 From 7a43ae523744c01b6187013e781f44c2281c579c Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 9 Dec 2010 02:08:31 +0000 Subject: Squashfs: Add XZ compression configuration option Signed-off-by: Phillip Lougher --- fs/squashfs/Kconfig | 15 +++++++++++++++ fs/squashfs/Makefile | 1 + fs/squashfs/decompressor.c | 7 +++++++ fs/squashfs/decompressor.h | 5 +++++ 4 files changed, 28 insertions(+) (limited to 'fs') diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index e5f63da64d04..50cc4edbca06 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -53,6 +53,21 @@ config SQUASHFS_LZO If unsure, say N. +config SQUASHFS_XZ + bool "Include support for XZ compressed file systems" + depends on SQUASHFS + select XZ_DEC + help + Saying Y here includes support for reading Squashfs file systems + compressed with XZ compresssion. XZ gives better compression than + the default zlib compression, at the expense of greater CPU and + memory overhead. + + XZ is not the standard compression used in Squashfs and so most + file systems will be readable without selecting this option. + + If unsure, say N. + config SQUASHFS_EMBEDDED bool "Additional option for memory-constrained systems" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 7672bac8d328..cecf2bea07af 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -7,3 +7,4 @@ squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o +squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 24af9ce9722f..482d78197811 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -46,6 +46,12 @@ static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = { }; #endif +#ifndef CONFIG_SQUASHFS_XZ +static const struct squashfs_decompressor squashfs_xz_comp_ops = { + NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 +}; +#endif + static const struct squashfs_decompressor squashfs_unknown_comp_ops = { NULL, NULL, NULL, 0, "unknown", 0 }; @@ -58,6 +64,7 @@ static const struct squashfs_decompressor *decompressor[] = { #else &squashfs_lzo_unsupported_comp_ops, #endif + &squashfs_xz_comp_ops, &squashfs_unknown_comp_ops }; diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 7425f80783f6..57e1acb4c6a9 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -52,4 +52,9 @@ static inline int squashfs_decompress(struct squashfs_sb_info *msblk, return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, length, srclength, pages); } + +#ifdef CONFIG_SQUASHFS_XZ +extern const struct squashfs_decompressor squashfs_xz_comp_ops; +#endif + #endif -- cgit v1.2.3 From 170cf02165272dfe026eba183563bad973ca4f05 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 5 Jan 2011 17:52:26 +0000 Subject: Squashfs: remove unnecessary variable in zlib_wrapper Get rid of unnecessary bytes variable, and remove redundant initialisation of zlib_err. Signed-off-by: Phillip Lougher --- fs/squashfs/zlib_wrapper.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 7a603874e483..1f4833b87ea3 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -66,8 +66,8 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, struct buffer_head **bh, int b, int offset, int length, int srclength, int pages) { - int zlib_err = 0, zlib_init = 0; - int avail, bytes, k = 0, page = 0; + int zlib_err, zlib_init = 0; + int k = 0, page = 0; z_stream *stream = msblk->stream; mutex_lock(&msblk->read_data_mutex); @@ -75,11 +75,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, stream->avail_out = 0; stream->avail_in = 0; - bytes = length; do { if (stream->avail_in == 0 && k < b) { - avail = min(bytes, msblk->devblksize - offset); - bytes -= avail; + int avail = min(length, msblk->devblksize - offset); + length -= avail; wait_on_buffer(bh[k]); if (!buffer_uptodate(bh[k])) goto release_mutex; -- cgit v1.2.3 From e7ee11f0ecd587caed0063c5f68ca20fef699f32 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 5 Jan 2011 18:02:37 +0000 Subject: Squashfs: add missing check in zlib_wrapper On file system corruption zlib can return Z_STREAM_OK with input buffers remaining, which will not be released. Signed-off-by: Phillip Lougher --- fs/squashfs/zlib_wrapper.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 1f4833b87ea3..ab5801f66e26 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -127,6 +127,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, goto release_mutex; } + if (k < b) { + ERROR("zlib_uncompress error, data remaining\n"); + goto release_mutex; + } + length = stream->total_out; mutex_unlock(&msblk->read_data_mutex); return length; -- cgit v1.2.3 From 6197fd86789a28760f8375b5ae8885cd7258042f Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 5 Jan 2011 18:15:58 +0000 Subject: Squashfs: get rid of default n in Kconfig As pointed out by Geert Uytterhoeven, "default n" is the default, no reason to specify it. Signed-off-by: Phillip Lougher --- fs/squashfs/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 50cc4edbca06..aa68a8a31518 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -29,7 +29,6 @@ config SQUASHFS config SQUASHFS_XATTR bool "Squashfs XATTR support" depends on SQUASHFS - default n help Saying Y here includes support for extended attributes (xattrs). Xattrs are name:value pairs associated with inodes by @@ -40,7 +39,6 @@ config SQUASHFS_XATTR config SQUASHFS_LZO bool "Include support for LZO compressed file systems" depends on SQUASHFS - default n select LZO_DECOMPRESS help Saying Y here includes support for reading Squashfs file systems @@ -71,7 +69,6 @@ config SQUASHFS_XZ config SQUASHFS_EMBEDDED bool "Additional option for memory-constrained systems" depends on SQUASHFS - default n help Saying Y here allows you to specify cache size. -- cgit v1.2.3 From 8fcd97216f45b1691f8f91f35cc108d06e0bfca8 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 6 Jan 2011 06:08:50 +0000 Subject: Squashfs: move squashfs_i() definition from squashfs.h Move squashfs_i() definition out of squashfs.h, this eliminates the need to #include squashfs_fs_i.h from numerous files. Signed-off-by: Phillip Lougher --- fs/squashfs/block.c | 1 - fs/squashfs/cache.c | 1 - fs/squashfs/decompressor.c | 1 - fs/squashfs/fragment.c | 1 - fs/squashfs/id.c | 1 - fs/squashfs/lzo_wrapper.c | 1 - fs/squashfs/squashfs.h | 5 ----- fs/squashfs/squashfs_fs_i.h | 6 ++++++ fs/squashfs/xattr_id.c | 1 - fs/squashfs/zlib_wrapper.c | 1 - 10 files changed, 6 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 653c030eb840..2fb2882f0fa7 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -34,7 +34,6 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" #include "squashfs.h" #include "decompressor.h" diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 57314bee9059..26b15ae34d6f 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -55,7 +55,6 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" #include "squashfs.h" /* diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 482d78197811..50b22d330cec 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -27,7 +27,6 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" #include "decompressor.h" #include "squashfs.h" diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 7c90bbd6879d..7eef571443c6 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -39,7 +39,6 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" #include "squashfs.h" /* diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index b7f64bcd2b70..d8f32452638e 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -37,7 +37,6 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" #include "squashfs.h" /* diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 5d87789bf1c1..7da759e34c52 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -29,7 +29,6 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" #include "squashfs.h" #include "decompressor.h" diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 5d45569d5f72..18f187fb486b 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -27,11 +27,6 @@ #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) -static inline struct squashfs_inode_info *squashfs_i(struct inode *inode) -{ - return list_entry(inode, struct squashfs_inode_info, vfs_inode); -} - /* block.c */ extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, int, int); diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h index d3e3a37f28a1..359baefc01fc 100644 --- a/fs/squashfs/squashfs_fs_i.h +++ b/fs/squashfs/squashfs_fs_i.h @@ -45,4 +45,10 @@ struct squashfs_inode_info { }; struct inode vfs_inode; }; + + +static inline struct squashfs_inode_info *squashfs_i(struct inode *inode) +{ + return list_entry(inode, struct squashfs_inode_info, vfs_inode); +} #endif diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index d33be5dd6c32..05385dbe1465 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -32,7 +32,6 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" #include "squashfs.h" #include "xattr.h" diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index ab5801f66e26..818a5e063faf 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -29,7 +29,6 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" #include "squashfs.h" #include "decompressor.h" -- cgit v1.2.3 From 01a678c5a2f41663b8faf03d17e2bbdbf44158a9 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 5 Jan 2011 18:23:53 +0000 Subject: Squashfs: simplify CONFIG_SQUASHFS_LZO handling Get rid of messy repeated #if(n)def CONFIG_SQUASHFS_LZO code in decompressor.c Signed-off-by: Phillip Lougher --- fs/squashfs/decompressor.c | 8 ++------ fs/squashfs/decompressor.h | 4 ++++ fs/squashfs/squashfs.h | 3 --- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 50b22d330cec..a5940e54c4dd 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -40,7 +40,7 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { }; #ifndef CONFIG_SQUASHFS_LZO -static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = { +static const struct squashfs_decompressor squashfs_lzo_comp_ops = { NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 }; #endif @@ -57,13 +57,9 @@ static const struct squashfs_decompressor squashfs_unknown_comp_ops = { static const struct squashfs_decompressor *decompressor[] = { &squashfs_zlib_comp_ops, - &squashfs_lzma_unsupported_comp_ops, -#ifdef CONFIG_SQUASHFS_LZO &squashfs_lzo_comp_ops, -#else - &squashfs_lzo_unsupported_comp_ops, -#endif &squashfs_xz_comp_ops, + &squashfs_lzma_unsupported_comp_ops, &squashfs_unknown_comp_ops }; diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 57e1acb4c6a9..3b305a70f7aa 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -57,4 +57,8 @@ static inline int squashfs_decompress(struct squashfs_sb_info *msblk, extern const struct squashfs_decompressor squashfs_xz_comp_ops; #endif +#ifdef CONFIG_SQUASHFS_LZO +extern const struct squashfs_decompressor squashfs_lzo_comp_ops; +#endif + #endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 18f187fb486b..ba729d808876 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -99,6 +99,3 @@ extern const struct xattr_handler *squashfs_xattr_handlers[]; /* zlib_wrapper.c */ extern const struct squashfs_decompressor squashfs_zlib_comp_ops; - -/* lzo_wrapper.c */ -extern const struct squashfs_decompressor squashfs_lzo_comp_ops; -- cgit v1.2.3 From 1c1266bb916e6a6b362d3be95f2cc7f3c41277a6 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Wed, 12 Jan 2011 16:53:27 -0800 Subject: ceph: fix getattr on directory when using norbytes The norbytes mount option was broken, and when doing getattr on a directory it return the rbytes instead of the number of entities. This commit fixes it. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/inode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e791fa34b23d..50001de66c69 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -701,10 +701,6 @@ static int fill_inode(struct inode *inode, ci->i_ceph_flags |= CEPH_I_COMPLETE; ci->i_max_offset = 2; } - - /* it may be better to set st_size in getattr instead? */ - if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) - inode->i_size = ci->i_rbytes; break; default: pr_err("fill_inode %llx.%llx BAD mode 0%o\n", @@ -1805,7 +1801,11 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, else stat->dev = 0; if (S_ISDIR(inode->i_mode)) { - stat->size = ci->i_rbytes; + if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), + RBYTES)) + stat->size = ci->i_rbytes; + else + stat->size = ci->i_files + ci->i_subdirs; stat->blocks = 0; stat->blksize = 65536; } -- cgit v1.2.3 From 17db143fc091238c43ab9f373974ca2224a4c3f8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 13 Jan 2011 15:27:29 -0800 Subject: ceph: fix xattr rbtree search Fix xattr name comparison in rbtree search for strings that share a prefix. The *name argument is null terminated, but the xattr name is not, so we need to use strncmp, but that means adjusting for the case where name is a prefix of xattr->name. The corresponding case in __set_xattr() already handles this properly (although in that case *name is also not null terminated). Reported-by: Sergiy Kibrik Signed-off-by: Sage Weil --- fs/ceph/xattr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 6e12a6ba5f79..8c9eba6ef9df 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -219,6 +219,7 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, struct rb_node **p; struct rb_node *parent = NULL; struct ceph_inode_xattr *xattr = NULL; + int name_len = strlen(name); int c; p = &ci->i_xattrs.index.rb_node; @@ -226,6 +227,8 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, parent = *p; xattr = rb_entry(parent, struct ceph_inode_xattr, node); c = strncmp(name, xattr->name, xattr->name_len); + if (c == 0 && name_len > xattr->name_len) + c = 1; if (c < 0) p = &(*p)->rb_left; else if (c > 0) -- cgit v1.2.3 From 6254b32b5791e47ba1c679d023f26985fa34755a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 13 Jan 2011 17:19:38 -0800 Subject: ecryptfs: fix broken build Stephen Rothwell reports that the vfs merge broke the build of ecryptfs. The breakage comes from commit 66cb76666d69 ("sanitize ecryptfs ->mount()") which was obviously not even build tested. Tssk, tssk, Al. This is the minimal build fixup for the situation, although I don't have a filesystem to actually test it with. Reported-by: Stephen Rothwell Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/ecryptfs/main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9ed476906327..d3b28abdd6aa 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -141,13 +141,12 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) return rc; } -static inode *ecryptfs_get_inode(struct inode *lower_inode, +static struct inode *ecryptfs_get_inode(struct inode *lower_inode, struct super_block *sb) { struct inode *inode; int rc = 0; - lower_inode = lower_dentry->d_inode; if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { rc = -EXDEV; goto out; @@ -202,7 +201,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, { struct inode *lower_inode = lower_dentry->d_inode; struct inode *inode = ecryptfs_get_inode(lower_inode, sb); - if (IS_ERR(inode) + if (IS_ERR(inode)) return PTR_ERR(inode); if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) d_add(dentry, inode); -- cgit v1.2.3 From 6585027a5e8cb490e3a761b2f3f3c3acf722aff2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 13 Jan 2011 15:45:44 -0800 Subject: writeback: integrated background writeback work Check whether background writeback is needed after finishing each work. When bdi flusher thread finishes doing some work check whether any kind of background writeback needs to be done (either because dirty_background_ratio is exceeded or because we need to start flushing old inodes). If so, just do background write back. This way, bdi_start_background_writeback() just needs to wake up the flusher thread. It will do background writeback as soon as there is no other work. This is a preparatory patch for the next patch which stops background writeback as soon as there is other work to do. Signed-off-by: Jan Kara Signed-off-by: Wu Fengguang Cc: Johannes Weiner Cc: Dave Chinner Cc: Christoph Hellwig Cc: Jan Engelhardt Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 61 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3d06ccc953aa..3a07f6d8bc0b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -84,13 +84,9 @@ static inline struct inode *wb_inode(struct list_head *head) return list_entry(head, struct inode, i_wb_list); } -static void bdi_queue_work(struct backing_dev_info *bdi, - struct wb_writeback_work *work) +/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ +static void bdi_wakeup_flusher(struct backing_dev_info *bdi) { - trace_writeback_queue(bdi, work); - - spin_lock_bh(&bdi->wb_lock); - list_add_tail(&work->list, &bdi->work_list); if (bdi->wb.task) { wake_up_process(bdi->wb.task); } else { @@ -98,15 +94,26 @@ static void bdi_queue_work(struct backing_dev_info *bdi, * The bdi thread isn't there, wake up the forker thread which * will create and run it. */ - trace_writeback_nothread(bdi, work); wake_up_process(default_backing_dev_info.wb.task); } +} + +static void bdi_queue_work(struct backing_dev_info *bdi, + struct wb_writeback_work *work) +{ + trace_writeback_queue(bdi, work); + + spin_lock_bh(&bdi->wb_lock); + list_add_tail(&work->list, &bdi->work_list); + if (!bdi->wb.task) + trace_writeback_nothread(bdi, work); + bdi_wakeup_flusher(bdi); spin_unlock_bh(&bdi->wb_lock); } static void __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, - bool range_cyclic, bool for_background) + bool range_cyclic) { struct wb_writeback_work *work; @@ -126,7 +133,6 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, work->sync_mode = WB_SYNC_NONE; work->nr_pages = nr_pages; work->range_cyclic = range_cyclic; - work->for_background = for_background; bdi_queue_work(bdi, work); } @@ -144,7 +150,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, */ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) { - __bdi_start_writeback(bdi, nr_pages, true, false); + __bdi_start_writeback(bdi, nr_pages, true); } /** @@ -152,13 +158,20 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) * @bdi: the backing device to write from * * Description: - * This does WB_SYNC_NONE background writeback. The IO is only - * started when this function returns, we make no guarentees on - * completion. Caller need not hold sb s_umount semaphore. + * This makes sure WB_SYNC_NONE background writeback happens. When + * this function returns, it is only guaranteed that for given BDI + * some IO is happening if we are over background dirty threshold. + * Caller need not hold sb s_umount semaphore. */ void bdi_start_background_writeback(struct backing_dev_info *bdi) { - __bdi_start_writeback(bdi, LONG_MAX, true, true); + /* + * We just wake up the flusher thread. It will perform background + * writeback as soon as there is no other work to do. + */ + spin_lock_bh(&bdi->wb_lock); + bdi_wakeup_flusher(bdi); + spin_unlock_bh(&bdi->wb_lock); } /* @@ -718,6 +731,23 @@ static unsigned long get_nr_dirty_pages(void) get_nr_dirty_inodes(); } +static long wb_check_background_flush(struct bdi_writeback *wb) +{ + if (over_bground_thresh()) { + + struct wb_writeback_work work = { + .nr_pages = LONG_MAX, + .sync_mode = WB_SYNC_NONE, + .for_background = 1, + .range_cyclic = 1, + }; + + return wb_writeback(wb, &work); + } + + return 0; +} + static long wb_check_old_data_flush(struct bdi_writeback *wb) { unsigned long expired; @@ -787,6 +817,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) * Check for periodic writeback, kupdated() style */ wrote += wb_check_old_data_flush(wb); + wrote += wb_check_background_flush(wb); clear_bit(BDI_writeback_running, &wb->bdi->state); return wrote; @@ -873,7 +904,7 @@ void wakeup_flusher_threads(long nr_pages) list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; - __bdi_start_writeback(bdi, nr_pages, false, false); + __bdi_start_writeback(bdi, nr_pages, false); } rcu_read_unlock(); } -- cgit v1.2.3 From 71927e84e0aebfbe5a91565c3b207af25a4e9162 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 13 Jan 2011 15:45:46 -0800 Subject: writeback: trace wakeup event for background writeback This tracks when balance_dirty_pages() tries to wakeup the flusher thread for background writeback (if it was not started already). Suggested-by: Christoph Hellwig Signed-off-by: Wu Fengguang Cc: Jan Kara Cc: Johannes Weiner Cc: Dave Chinner Cc: Jan Engelhardt Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 1 + include/trace/events/writeback.h | 1 + 2 files changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3a07f6d8bc0b..482de0a92ca7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -169,6 +169,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) * We just wake up the flusher thread. It will perform background * writeback as soon as there is no other work to do. */ + trace_writeback_wake_background(bdi); spin_lock_bh(&bdi->wb_lock); bdi_wakeup_flusher(bdi); spin_unlock_bh(&bdi->wb_lock); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 89a2b2db4375..4e249b927eaa 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -81,6 +81,7 @@ DEFINE_EVENT(writeback_class, name, \ TP_ARGS(bdi)) DEFINE_WRITEBACK_EVENT(writeback_nowork); +DEFINE_WRITEBACK_EVENT(writeback_wake_background); DEFINE_WRITEBACK_EVENT(writeback_wake_thread); DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread); DEFINE_WRITEBACK_EVENT(writeback_bdi_register); -- cgit v1.2.3 From aa373cf550994623efb5d49a4d8775bafd10bbc1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 13 Jan 2011 15:45:47 -0800 Subject: writeback: stop background/kupdate works from livelocking other works Background writeback is easily livelockable in a loop in wb_writeback() by a process continuously re-dirtying pages (or continuously appending to a file). This is in fact intended as the target of background writeback is to write dirty pages it can find as long as we are over dirty_background_threshold. But the above behavior gets inconvenient at times because no other work queued in the flusher thread's queue gets processed. In particular, since e.g. sync(1) relies on flusher thread to do all the IO for it, sync(1) can hang forever waiting for flusher thread to do the work. Generally, when a flusher thread has some work queued, someone submitted the work to achieve a goal more specific than what background writeback does. Moreover by working on the specific work, we also reduce amount of dirty pages which is exactly the target of background writeout. So it makes sense to give specific work a priority over a generic page cleaning. Thus we interrupt background writeback if there is some other work to do. We return to the background writeback after completing all the queued work. This may delay the writeback of expired inodes for a while, however the expired inodes will eventually be flushed to disk as long as the other works won't livelock. [fengguang.wu@intel.com: update comment] Signed-off-by: Jan Kara Signed-off-by: Wu Fengguang Cc: Johannes Weiner Cc: Dave Chinner Cc: Christoph Hellwig Cc: Jan Engelhardt Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 482de0a92ca7..9e72d04e706e 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -650,6 +650,16 @@ static long wb_writeback(struct bdi_writeback *wb, if (work->nr_pages <= 0) break; + /* + * Background writeout and kupdate-style writeback may + * run forever. Stop them if there is other work to do + * so that e.g. sync can proceed. They'll be restarted + * after the other works are all done. + */ + if ((work->for_background || work->for_kupdate) && + !list_empty(&wb->bdi->work_list)) + break; + /* * For background writeout, stop when we are below the * background dirty threshold -- cgit v1.2.3 From b9543dac5bbc4aef0a598965b6b34f6259ab9a9b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 13 Jan 2011 15:45:48 -0800 Subject: writeback: avoid livelocking WB_SYNC_ALL writeback When wb_writeback() is called in WB_SYNC_ALL mode, work->nr_to_write is usually set to LONG_MAX. The logic in wb_writeback() then calls __writeback_inodes_sb() with nr_to_write == MAX_WRITEBACK_PAGES and we easily end up with non-positive nr_to_write after the function returns, if the inode has more than MAX_WRITEBACK_PAGES dirty pages at the moment. When nr_to_write is <= 0 wb_writeback() decides we need another round of writeback but this is wrong in some cases! For example when a single large file is continuously dirtied, we would never finish syncing it because each pass would be able to write MAX_WRITEBACK_PAGES and inode dirty timestamp never gets updated (as inode is never completely clean). Thus __writeback_inodes_sb() would write the redirtied inode again and again. Fix the issue by setting nr_to_write to LONG_MAX in WB_SYNC_ALL mode. We do not need nr_to_write in WB_SYNC_ALL mode anyway since write_cache_pages() does livelock avoidance using page tagging in WB_SYNC_ALL mode. This makes wb_writeback() call __writeback_inodes_sb() only once on WB_SYNC_ALL. The latter function won't livelock because it works on - a finite set of files by doing queue_io() once at the beginning - a finite set of pages by PAGECACHE_TAG_TOWRITE page tagging After this patch, program from http://lkml.org/lkml/2010/10/24/154 is no longer able to stall sync forever. [fengguang.wu@intel.com: fix locking comment] Signed-off-by: Jan Kara Signed-off-by: Wu Fengguang Cc: Johannes Weiner Cc: Dave Chinner Cc: Christoph Hellwig Cc: Jan Engelhardt Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 9e72d04e706e..e8063c938dd2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -630,6 +630,7 @@ static long wb_writeback(struct bdi_writeback *wb, }; unsigned long oldest_jif; long wrote = 0; + long write_chunk; struct inode *inode; if (wbc.for_kupdate) { @@ -642,6 +643,24 @@ static long wb_writeback(struct bdi_writeback *wb, wbc.range_end = LLONG_MAX; } + /* + * WB_SYNC_ALL mode does livelock avoidance by syncing dirty + * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX + * here avoids calling into writeback_inodes_wb() more than once. + * + * The intended call sequence for WB_SYNC_ALL writeback is: + * + * wb_writeback() + * __writeback_inodes_sb() <== called only once + * write_cache_pages() <== called once for each inode + * (quickly) tag currently dirty pages + * (maybe slowly) sync all tagged pages + */ + if (wbc.sync_mode == WB_SYNC_NONE) + write_chunk = MAX_WRITEBACK_PAGES; + else + write_chunk = LONG_MAX; + wbc.wb_start = jiffies; /* livelock avoidance */ for (;;) { /* @@ -668,7 +687,7 @@ static long wb_writeback(struct bdi_writeback *wb, break; wbc.more_io = 0; - wbc.nr_to_write = MAX_WRITEBACK_PAGES; + wbc.nr_to_write = write_chunk; wbc.pages_skipped = 0; trace_wbc_writeback_start(&wbc, wb->bdi); @@ -678,8 +697,8 @@ static long wb_writeback(struct bdi_writeback *wb, writeback_inodes_wb(wb, &wbc); trace_wbc_writeback_written(&wbc, wb->bdi); - work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; - wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; + work->nr_pages -= write_chunk - wbc.nr_to_write; + wrote += write_chunk - wbc.nr_to_write; /* * If we consumed everything, see if we have more @@ -694,7 +713,7 @@ static long wb_writeback(struct bdi_writeback *wb, /* * Did we write something? Try for more */ - if (wbc.nr_to_write < MAX_WRITEBACK_PAGES) + if (wbc.nr_to_write < write_chunk) continue; /* * Nothing written. Wait for some inode to -- cgit v1.2.3 From c691b9d983d7015d54057034f4cd9b6d8affd976 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 13 Jan 2011 15:45:48 -0800 Subject: sync_inode_metadata: fix comment Use correct function name, remove incorrect apostrophe Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e8063c938dd2..05aab263e9aa 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1303,11 +1303,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) EXPORT_SYMBOL(sync_inode); /** - * sync_inode - write an inode to disk + * sync_inode_metadata - write an inode to disk * @inode: the inode to sync * @wait: wait for I/O to complete. * - * Write an inode to disk and adjust it's dirty state after completion. + * Write an inode to disk and adjust its dirty state after completion. * * Note: only writes the actual inode, no associated data or other metadata. */ -- cgit v1.2.3 From c32b0d4b3f19c2f5d29568f8b7b72b61693f1277 Mon Sep 17 00:00:00 2001 From: Hai Shan Date: Thu, 13 Jan 2011 15:45:51 -0800 Subject: fs/mpage.c: consolidate code Merge mpage_end_io_read() and mpage_end_io_write() into mpage_end_io() to eliminate code duplication. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Hai Shan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/mpage.c | 49 +++++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/mpage.c b/fs/mpage.c index fd56ca2ea556..d78455a81ec9 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -40,7 +40,7 @@ * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ -static void mpage_end_io_read(struct bio *bio, int err) +static void mpage_end_io(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -50,44 +50,29 @@ static void mpage_end_io_read(struct bio *bio, int err) if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - - if (uptodate) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); - } while (bvec >= bio->bi_io_vec); - bio_put(bio); -} - -static void mpage_end_io_write(struct bio *bio, int err) -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - - do { - struct page *page = bvec->bv_page; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (!uptodate){ - SetPageError(page); - if (page->mapping) - set_bit(AS_EIO, &page->mapping->flags); + if (bio_data_dir(bio) == READ) { + if (uptodate) { + SetPageUptodate(page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + unlock_page(page); + } else { /* bio_data_dir(bio) == WRITE */ + if (!uptodate) { + SetPageError(page); + if (page->mapping) + set_bit(AS_EIO, &page->mapping->flags); + } + end_page_writeback(page); } - end_page_writeback(page); } while (bvec >= bio->bi_io_vec); bio_put(bio); } static struct bio *mpage_bio_submit(int rw, struct bio *bio) { - bio->bi_end_io = mpage_end_io_read; - if (rw == WRITE) - bio->bi_end_io = mpage_end_io_write; + bio->bi_end_io = mpage_end_io; submit_bio(rw, bio); return NULL; } -- cgit v1.2.3 From 2d90508f638241a2e7422d884767398296ebe720 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Thu, 13 Jan 2011 15:45:53 -0800 Subject: mm: smaps: export mlock information Currently there is no way to find whether a process has locked its pages in memory or not. And which of the memory regions are locked in memory. Add a new field "Locked" to export this information via the smaps file. Signed-off-by: Nikanth Karthikesan Acked-by: Balbir Singh Acked-by: Wu Fengguang Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 3 +++ fs/proc/task_mmu.c | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 9471225212c4..ef757fca470b 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -375,6 +375,7 @@ Anonymous: 0 kB Swap: 0 kB KernelPageSize: 4 kB MMUPageSize: 4 kB +Locked: 374 kB The first of these lines shows the same information as is displayed for the mapping in /proc/PID/maps. The remaining lines show the size of the mapping @@ -670,6 +671,8 @@ varies by architecture and compile options. The following is from a > cat /proc/meminfo +The "Locked" indicates whether the mapping is locked in memory or not. + MemTotal: 16344972 kB MemFree: 13634064 kB diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c3755bd8dd3e..60b914860f81 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -418,7 +418,8 @@ static int show_smap(struct seq_file *m, void *v) "Anonymous: %8lu kB\n" "Swap: %8lu kB\n" "KernelPageSize: %8lu kB\n" - "MMUPageSize: %8lu kB\n", + "MMUPageSize: %8lu kB\n" + "Locked: %8lu kB\n", (vma->vm_end - vma->vm_start) >> 10, mss.resident >> 10, (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), @@ -430,7 +431,9 @@ static int show_smap(struct seq_file *m, void *v) mss.anonymous >> 10, mss.swap >> 10, vma_kernel_pagesize(vma) >> 10, - vma_mmu_pagesize(vma) >> 10); + vma_mmu_pagesize(vma) >> 10, + (vma->vm_flags & VM_LOCKED) ? + (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; -- cgit v1.2.3 From dabb16f639820267b3850d804571c70bd93d4e07 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Thu, 13 Jan 2011 15:46:05 -0800 Subject: oom: allow a non-CAP_SYS_RESOURCE proces to oom_score_adj down We'd like to be able to oom_score_adj a process up/down as it enters/leaves the foreground. Currently, it is not possible to oom_adj down without CAP_SYS_RESOURCE. This patch allows a task to decrease its oom_score_adj back to the value that a CAP_SYS_RESOURCE thread set it to or its inherited value at fork. Assuming the thread that has forked it has oom_score_adj of 0, each process could decrease it back from 0 upon activation unless a CAP_SYS_RESOURCE thread elevated it to something higher. Alternative considered: * a setuid binary * a daemon with CAP_SYS_RESOURCE Since you don't wan't all processes to be able to reduce their oom_adj, a setuid or daemon implementation would be complex. The alternatives also have much higher overhead. This patch updated from original patch based on feedback from David Rientjes. Signed-off-by: Mandeep Singh Baines Acked-by: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Rik van Riel Cc: Ying Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 4 ++++ fs/proc/base.c | 4 +++- include/linux/sched.h | 2 ++ kernel/fork.c | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index ef757fca470b..23cae6548d3a 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1323,6 +1323,10 @@ scaled linearly with /proc//oom_score_adj. Writing to /proc//oom_score_adj or /proc//oom_adj will change the other with its scaled value. +The value of /proc//oom_score_adj may be reduced no lower than the last +value set by a CAP_SYS_RESOURCE process. To reduce the value any lower +requires CAP_SYS_RESOURCE. + NOTICE: /proc//oom_adj is deprecated and will be removed, please see Documentation/feature-removal-schedule.txt. diff --git a/fs/proc/base.c b/fs/proc/base.c index 93f1cdd5d3d7..9d096e82b201 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1151,7 +1151,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, goto err_task_lock; } - if (oom_score_adj < task->signal->oom_score_adj && + if (oom_score_adj < task->signal->oom_score_adj_min && !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_sighand; @@ -1164,6 +1164,8 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, atomic_dec(&task->mm->oom_disable_count); } task->signal->oom_score_adj = oom_score_adj; + if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) + task->signal->oom_score_adj_min = oom_score_adj; /* * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is * always attainable. diff --git a/include/linux/sched.h b/include/linux/sched.h index 07402530fc70..f23b5bb6f52e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -634,6 +634,8 @@ struct signal_struct { int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ + int oom_score_adj_min; /* OOM kill score adjustment minimum value. + * Only settable by CAP_SYS_RESOURCE. */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations diff --git a/kernel/fork.c b/kernel/fork.c index 76a1fdd80bdf..1499607e4da2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -910,6 +910,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; + sig->oom_score_adj_min = current->signal->oom_score_adj_min; mutex_init(&sig->cred_guard_mutex); -- cgit v1.2.3 From 79134171df238171daa4c024a42b77b401ccb00b Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 13 Jan 2011 15:46:58 -0800 Subject: thp: transparent hugepage vmstat Add hugepage stat information to /proc/vmstat and /proc/meminfo. Signed-off-by: Andrea Arcangeli Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 14 +++++++++++++- include/linux/mmzone.h | 1 + mm/huge_memory.c | 3 +++ mm/rmap.c | 20 ++++++++++++++++---- mm/vmstat.c | 1 + 5 files changed, 34 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index a65239cfd97e..ed257d141568 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -100,6 +100,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "VmallocChunk: %8lu kB\n" #ifdef CONFIG_MEMORY_FAILURE "HardwareCorrupted: %5lu kB\n" +#endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + "AnonHugePages: %8lu kB\n" #endif , K(i.totalram), @@ -128,7 +131,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freeswap), K(global_page_state(NR_FILE_DIRTY)), K(global_page_state(NR_WRITEBACK)), - K(global_page_state(NR_ANON_PAGES)), + K(global_page_state(NR_ANON_PAGES) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * + HPAGE_PMD_NR +#endif + ), K(global_page_state(NR_FILE_MAPPED)), K(global_page_state(NR_SHMEM)), K(global_page_state(NR_SLAB_RECLAIMABLE) + @@ -150,6 +158,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) vmi.largest_chunk >> 10 #ifdef CONFIG_MEMORY_FAILURE ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) +#endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * + HPAGE_PMD_NR) #endif ); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index dad3612a7e39..02ecb0189b1d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -114,6 +114,7 @@ enum zone_stat_item { NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ #endif + NR_ANON_TRANSPARENT_HUGEPAGES, NR_VM_ZONE_STAT_ITEMS }; /* diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a313403b3c5e..7101112a5429 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -751,6 +751,9 @@ static void __split_huge_page_refcount(struct page *page) lru_add_page_tail(zone, page, page_tail); } + __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); + ClearPageCompound(page); compound_unlock(page); spin_unlock_irq(&zone->lru_lock); diff --git a/mm/rmap.c b/mm/rmap.c index 92e14dcfe737..3825ae4bc32f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -882,8 +882,13 @@ void do_page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, int exclusive) { int first = atomic_inc_and_test(&page->_mapcount); - if (first) - __inc_zone_page_state(page, NR_ANON_PAGES); + if (first) { + if (!PageTransHuge(page)) + __inc_zone_page_state(page, NR_ANON_PAGES); + else + __inc_zone_page_state(page, + NR_ANON_TRANSPARENT_HUGEPAGES); + } if (unlikely(PageKsm(page))) return; @@ -911,7 +916,10 @@ void page_add_new_anon_rmap(struct page *page, VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); SetPageSwapBacked(page); atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ - __inc_zone_page_state(page, NR_ANON_PAGES); + if (!PageTransHuge(page)) + __inc_zone_page_state(page, NR_ANON_PAGES); + else + __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __page_set_anon_rmap(page, vma, address, 1); if (page_evictable(page, vma)) lru_cache_add_lru(page, LRU_ACTIVE_ANON); @@ -964,7 +972,11 @@ void page_remove_rmap(struct page *page) return; if (PageAnon(page)) { mem_cgroup_uncharge_page(page); - __dec_zone_page_state(page, NR_ANON_PAGES); + if (!PageTransHuge(page)) + __dec_zone_page_state(page, NR_ANON_PAGES); + else + __dec_zone_page_state(page, + NR_ANON_TRANSPARENT_HUGEPAGES); } else { __dec_zone_page_state(page, NR_FILE_MAPPED); mem_cgroup_update_file_mapped(page, -1); diff --git a/mm/vmstat.c b/mm/vmstat.c index 751a65e00aac..0c3b5048773e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -880,6 +880,7 @@ static const char * const vmstat_text[] = { "numa_local", "numa_other", #endif + "nr_anon_transparent_hugepages", "nr_dirty_threshold", "nr_dirty_background_threshold", -- cgit v1.2.3 From 5f24ce5fd34c3ca1b3d10d30da754732da64d5c0 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 13 Jan 2011 15:47:00 -0800 Subject: thp: remove PG_buddy PG_buddy can be converted to _mapcount == -2. So the PG_compound_lock can be added to page->flags without overflowing (because of the sparse section bits increasing) with CONFIG_X86_PAE=y and CONFIG_X86_PAT=y. This also has to move the memory hotplug code from _mapcount to lru.next to avoid any risk of clashes. We can't use lru.next for PG_buddy removal, but memory hotplug can use lru.next even more easily than the mapcount instead. Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/page.c | 14 ++++++++------ include/linux/memory_hotplug.h | 14 +++++++++----- include/linux/mm.h | 21 +++++++++++++++++++++ include/linux/page-flags.h | 7 +------ mm/memory_hotplug.c | 14 ++++++++------ mm/page_alloc.c | 7 +++---- mm/sparse.c | 4 ++-- 7 files changed, 52 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/proc/page.c b/fs/proc/page.c index b06c674624e6..6d8e6a9e93ab 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -116,15 +116,17 @@ u64 stable_page_flags(struct page *page) if (PageHuge(page)) u |= 1 << KPF_HUGE; - u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); - /* - * Caveats on high order pages: - * PG_buddy will only be set on the head page; SLUB/SLQB do the same - * for PG_slab; SLOB won't set PG_slab at all on compound pages. + * Caveats on high order pages: page->_count will only be set + * -1 on the head page; SLUB/SLQB do the same for PG_slab; + * SLOB won't set PG_slab at all on compound pages. */ + if (PageBuddy(page)) + u |= 1 << KPF_BUDDY; + + u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); + u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); - u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy); u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 31c237a00c48..24376fe7ee68 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -13,12 +13,16 @@ struct mem_section; #ifdef CONFIG_MEMORY_HOTPLUG /* - * Types for free bootmem. - * The normal smallest mapcount is -1. Here is smaller value than it. + * Types for free bootmem stored in page->lru.next. These have to be in + * some random range in unsigned long space for debugging purposes. */ -#define SECTION_INFO (-1 - 1) -#define MIX_SECTION_INFO (-1 - 2) -#define NODE_INFO (-1 - 3) +enum { + MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12, + SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE, + MIX_SECTION_INFO, + NODE_INFO, + MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, +}; /* * pgdat resizing functions diff --git a/include/linux/mm.h b/include/linux/mm.h index 2ec5138badab..7ab7d2b60041 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -397,6 +397,27 @@ static inline void init_page_count(struct page *page) atomic_set(&page->_count, 1); } +/* + * PageBuddy() indicate that the page is free and in the buddy system + * (see mm/page_alloc.c). + */ +static inline int PageBuddy(struct page *page) +{ + return atomic_read(&page->_mapcount) == -2; +} + +static inline void __SetPageBuddy(struct page *page) +{ + VM_BUG_ON(atomic_read(&page->_mapcount) != -1); + atomic_set(&page->_mapcount, -2); +} + +static inline void __ClearPageBuddy(struct page *page) +{ + VM_BUG_ON(!PageBuddy(page)); + atomic_set(&page->_mapcount, -1); +} + void put_page(struct page *page); void put_pages_list(struct list_head *pages); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4ca1241ef94e..0db8037e2725 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -48,9 +48,6 @@ * struct page (these bits with information) are always mapped into kernel * address space... * - * PG_buddy is set to indicate that the page is free and in the buddy system - * (see mm/page_alloc.c). - * * PG_hwpoison indicates that a page got corrupted in hardware and contains * data with incorrect ECC bits that triggered a machine check. Accessing is * not safe since it may cause another machine check. Don't touch! @@ -96,7 +93,6 @@ enum pageflags { PG_swapcache, /* Swap page: swp_entry_t in private */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ - PG_buddy, /* Page is free, on buddy lists */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ #ifdef CONFIG_MMU @@ -233,7 +229,6 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) * risky: they bypass page accounting. */ TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) -__PAGEFLAG(Buddy, buddy) PAGEFLAG(MappedToDisk, mappedtodisk) /* PG_readahead is only used for file reads; PG_reclaim is only for writes */ @@ -461,7 +456,7 @@ static inline int PageTransCompound(struct page *page) #define PAGE_FLAGS_CHECK_AT_FREE \ (1 << PG_lru | 1 << PG_locked | \ 1 << PG_private | 1 << PG_private_2 | \ - 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ + 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ __PG_COMPOUND_LOCK) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a2832c092509..e92f04749fcb 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -82,9 +82,10 @@ static void release_memory_resource(struct resource *res) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE #ifndef CONFIG_SPARSEMEM_VMEMMAP -static void get_page_bootmem(unsigned long info, struct page *page, int type) +static void get_page_bootmem(unsigned long info, struct page *page, + unsigned long type) { - atomic_set(&page->_mapcount, type); + page->lru.next = (struct list_head *) type; SetPagePrivate(page); set_page_private(page, info); atomic_inc(&page->_count); @@ -94,15 +95,16 @@ static void get_page_bootmem(unsigned long info, struct page *page, int type) * so use __ref to tell modpost not to generate a warning */ void __ref put_page_bootmem(struct page *page) { - int type; + unsigned long type; - type = atomic_read(&page->_mapcount); - BUG_ON(type >= -1); + type = (unsigned long) page->lru.next; + BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || + type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); if (atomic_dec_return(&page->_count) == 1) { ClearPagePrivate(page); set_page_private(page, 0); - reset_page_mapcount(page); + INIT_LIST_HEAD(&page->lru); __free_pages_bootmem(page, 0); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e7664b9f706c..9dfe49bceff4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -449,8 +449,8 @@ __find_combined_index(unsigned long page_idx, unsigned int order) * (c) a page and its buddy have the same order && * (d) a page and its buddy are in the same zone. * - * For recording whether a page is in the buddy system, we use PG_buddy. - * Setting, clearing, and testing PG_buddy is serialized by zone->lock. + * For recording whether a page is in the buddy system, we set ->_mapcount -2. + * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock. * * For recording page's order, we use page_private(page). */ @@ -483,7 +483,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, * as necessary, plus some accounting needed to play nicely with other * parts of the VM system. * At each level, we keep a list of pages, which are heads of continuous - * free pages of length of (1 << order) and marked with PG_buddy. Page's + * free pages of length of (1 << order) and marked with _mapcount -2. Page's * order is recorded in page_private(page) field. * So when we are allocating or freeing one, we can derive the state of the * other. That is, if we allocate a small block, and both were @@ -5574,7 +5574,6 @@ static struct trace_print_flags pageflag_names[] = { {1UL << PG_swapcache, "swapcache" }, {1UL << PG_mappedtodisk, "mappedtodisk" }, {1UL << PG_reclaim, "reclaim" }, - {1UL << PG_buddy, "buddy" }, {1UL << PG_swapbacked, "swapbacked" }, {1UL << PG_unevictable, "unevictable" }, #ifdef CONFIG_MMU diff --git a/mm/sparse.c b/mm/sparse.c index 95ac219af379..93250207c5cf 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -671,10 +671,10 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) static void free_map_bootmem(struct page *page, unsigned long nr_pages) { unsigned long maps_section_nr, removing_section_nr, i; - int magic; + unsigned long magic; for (i = 0; i < nr_pages; i++, page++) { - magic = atomic_read(&page->_mapcount); + magic = (unsigned long) page->lru.next; BUG_ON(magic == NODE_INFO); -- cgit v1.2.3 From cb9ef8d5e394f70db64bda79c20d3569a20d2574 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 13 Jan 2011 15:47:26 -0800 Subject: fs/fs-writeback.c: fix sync_inodes_sb() return value kernel-doc The sync_inodes_sb() function does not have a return value. Remove the outdated documentation comment. Signed-off-by: Stefan Hajnoczi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 05aab263e9aa..59c6e4956786 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1225,7 +1225,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); * @sb: the superblock * * This function writes and waits on any dirty inode belonging to this - * super_block. The number of pages synced is returned. + * super_block. */ void sync_inodes_sb(struct super_block *sb) { -- cgit v1.2.3 From 9ee1ba5402e9d35fb35f8e61c968f4987b5fb443 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Jan 2011 17:08:19 -0500 Subject: nfsd4: initialize cb_per_client Otherwise a callback that is aborted before it runs will result in a list_del on an uninitialized list head. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index f1d9dd45553a..209e186386a0 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -866,6 +866,7 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) cb->cb_ops = &nfsd4_cb_recall_ops; dp->dl_retries = 1; + INIT_LIST_HEAD(&cb->cb_per_client); cb->cb_done = true; run_nfsd4_cb(&dp->dl_recall); -- cgit v1.2.3 From 9ce137eee4febaabca81143be07d4205d2bd52d4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Jan 2011 14:07:12 -0500 Subject: nfsd: don't support msnfs export option We've long had these pointless #ifdef MSNFS's sprinkled throughout the code--pointless because MSNFS is always defined (and we give no config option to make that easy to change). So we could just remove the ifdef's and compile the resulting code unconditionally. But as long as we're there: why not just rip out this code entirely? The only purpose is to implement the "msnfs" export option which turns on Windows-like behavior in some cases, and: - the export option isn't documented anywhere; - the userland utilities (which would need to be able to parse "msnfs" in an export file) don't support it; - I don't know how to maintain this, as I don't know what the proper behavior is; and - google shows no evidence that anyone has ever used this. Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 4 ---- fs/nfsd/vfs.c | 41 ++--------------------------------------- include/linux/nfsd/export.h | 2 +- 3 files changed, 3 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c0fcb7ab7f6d..8b31e5f8795d 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1,4 +1,3 @@ -#define MSNFS /* HACK HACK */ /* * NFS exporting and validation. * @@ -1444,9 +1443,6 @@ static struct flags { { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, { NFSEXP_V4ROOT, {"v4root", ""}}, -#ifdef MSNFS - { NFSEXP_MSNFS, {"msnfs", ""}}, -#endif { 0, {"", ""}} }; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b991125ce4a5..0a01e2fc5dda 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1,4 +1,3 @@ -#define MSNFS /* HACK HACK */ /* * File operations used by nfsd. Some of these have been ripped from * other parts of the kernel because they weren't exported, others @@ -875,15 +874,6 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -static inline int svc_msnfs(struct svc_fh *ffhp) -{ -#ifdef MSNFS - return (ffhp->fh_export->ex_flags & NFSEXP_MSNFS); -#else - return 0; -#endif -} - static __be32 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) @@ -896,9 +886,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, err = nfserr_perm; inode = file->f_path.dentry->d_inode; - if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count)) - goto out; - if (file->f_op->splice_read && rqstp->rq_splice_ok) { struct splice_desc sd = { .len = 0, @@ -923,7 +910,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, fsnotify_access(file); } else err = nfserrno(host_err); -out: return err; } @@ -988,14 +974,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int stable = *stablep; int use_wgather; -#ifdef MSNFS - err = nfserr_perm; - - if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && - (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt))) - goto out; -#endif - dentry = file->f_path.dentry; inode = dentry->d_inode; exp = fhp->fh_export; @@ -1046,7 +1024,6 @@ out_nfserr: err = 0; else err = nfserrno(host_err); -out: return err; } @@ -1751,13 +1728,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (ndentry == trap) goto out_dput_new; - if (svc_msnfs(ffhp) && - ((atomic_read(&odentry->d_count) > 1) - || (atomic_read(&ndentry->d_count) > 1))) { - host_err = -EPERM; - goto out_dput_new; - } - host_err = -EXDEV; if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) goto out_dput_new; @@ -1836,17 +1806,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_nfserr; - if (type != S_IFDIR) { /* It's UNLINK */ -#ifdef MSNFS - if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && - (atomic_read(&rdentry->d_count) > 1)) { - host_err = -EPERM; - } else -#endif + if (type != S_IFDIR) host_err = vfs_unlink(dirp, rdentry); - } else { /* It's RMDIR */ + else host_err = vfs_rmdir(dirp, rdentry); - } dput(rdentry); diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 8ae78a61eea4..bd316159278c 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -35,7 +35,7 @@ #define NFSEXP_NOHIDE 0x0200 #define NFSEXP_NOSUBTREECHECK 0x0400 #define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */ -#define NFSEXP_MSNFS 0x1000 /* do silly things that MS clients expect */ +#define NFSEXP_MSNFS 0x1000 /* do silly things that MS clients expect; no longer supported */ #define NFSEXP_FSID 0x2000 #define NFSEXP_CROSSMOUNT 0x4000 #define NFSEXP_NOACL 0x8000 /* reserved for possible ACL related use */ -- cgit v1.2.3 From 6a76bebefe15d9a08864f824d7f8d5beaf37c997 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Jan 2011 12:54:39 -0500 Subject: nfsd4: break lease on nfsd setattr Leases (delegations) should really be broken on any metadata change, not just on size change. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0a01e2fc5dda..f97d4356431b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -374,14 +374,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, goto out; } - /* - * If we are changing the size of the file, then - * we need to break all leases. - */ - host_err = break_lease(inode, O_WRONLY | O_NONBLOCK); - if (host_err) /* ENOMEM or EWOULDBLOCK */ - goto out_nfserr; - host_err = get_write_access(inode); if (host_err) goto out_nfserr; @@ -422,7 +414,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, err = nfserr_notsync; if (!check_guard || guardtime == inode->i_ctime.tv_sec) { + host_err = break_lease(inode, O_WRONLY | O_NONBLOCK); + if (host_err) + goto out_nfserr; fh_lock(fhp); + host_err = notify_change(dentry, iap); err = nfserrno(host_err); fh_unlock(fhp); -- cgit v1.2.3 From 4795bb37effb7b8fe77e2d2034545d062d3788a8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Jan 2011 13:55:46 -0500 Subject: nfsd: break lease on unlink, link, and rename Any change to any of the links pointing to an entry should also break delegations. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f97d4356431b..a110adbb3673 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -272,6 +272,13 @@ out: return err; } +static int nfsd_break_lease(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 0; + return break_lease(inode, O_WRONLY | O_NONBLOCK); +} + /* * Commit metadata changes to stable storage. */ @@ -414,7 +421,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, err = nfserr_notsync; if (!check_guard || guardtime == inode->i_ctime.tv_sec) { - host_err = break_lease(inode, O_WRONLY | O_NONBLOCK); + host_err = nfsd_break_lease(inode); if (host_err) goto out_nfserr; fh_lock(fhp); @@ -1639,6 +1646,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, err = nfserrno(host_err); goto out_dput; } + err = nfserr_noent; + if (!dold->d_inode) + goto out_drop_write; + host_err = nfsd_break_lease(dold->d_inode); + if (host_err) + goto out_drop_write; host_err = vfs_link(dold, dirp, dnew); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); @@ -1650,6 +1663,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } +out_drop_write: mnt_drop_write(tfhp->fh_export->ex_path.mnt); out_dput: dput(dnew); @@ -1731,15 +1745,17 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (host_err) goto out_dput_new; + host_err = nfsd_break_lease(odentry->d_inode); + if (host_err) + goto out_drop_write; host_err = vfs_rename(fdir, odentry, tdir, ndentry); if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) host_err = commit_metadata(ffhp); } - +out_drop_write: mnt_drop_write(ffhp->fh_export->ex_path.mnt); - out_dput_new: dput(ndentry); out_dput_old: @@ -1802,11 +1818,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_nfserr; + host_err = nfsd_break_lease(rdentry->d_inode); + if (host_err) + goto out_put; if (type != S_IFDIR) host_err = vfs_unlink(dirp, rdentry); else host_err = vfs_rmdir(dirp, rdentry); - +out_put: dput(rdentry); if (!host_err) -- cgit v1.2.3 From bb20c18db6fbb5e6ba499c76473a487d35073467 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 14 Jan 2011 02:35:53 +0000 Subject: fs: force_reval_path drop rcu-walk before d_invalidate d_revalidate can return in rcu-walk mode even when it returns 0. We can't just call any old dcache function on rcu-walk dentry (the dentry is unstable, so even through d_lock can safely be taken, the result may no longer be what we expect -- careful re-checks would be required). So just drop rcu in this case. (I missed this conversion when switching to the rcu-walk convention that Linus suggested) Signed-off-by: Nick Piggin --- fs/namei.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 19433cdba011..0f02359ce685 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -583,6 +583,13 @@ void release_open_intent(struct nameidata *nd) fput(nd->intent.open.file); } +/* + * Call d_revalidate and handle filesystems that request rcu-walk + * to be dropped. This may be called and return in rcu-walk mode, + * regardless of success or error. If -ECHILD is returned, the caller + * must return -ECHILD back up the path walk stack so path walk may + * be restarted in ref-walk mode. + */ static int d_revalidate(struct dentry *dentry, struct nameidata *nd) { int status; @@ -673,6 +680,9 @@ force_reval_path(struct path *path, struct nameidata *nd) return 0; if (!status) { + /* Don't d_invalidate in rcu-walk mode */ + if (nameidata_drop_rcu(nd)) + return -ECHILD; d_invalidate(dentry); status = -ESTALE; } -- cgit v1.2.3 From 90dbb77ba48dddb87445d238e84cd137cf97dd98 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 14 Jan 2011 02:36:19 +0000 Subject: fs: fix dropping of rcu-walk from force_reval_path As J. R. Okajima noted, force_reval_path passes in the same dentry to d_revalidate as the one in the nameidata structure (other callers pass in a child), so the locking breaks. This can oops with a chrooted nfs mount, for example. Similarly there can be other problems with revalidating a dentry which is already in nameidata of the path walk. Signed-off-by: Nick Piggin --- fs/namei.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 0f02359ce685..14c73edca9ce 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -479,6 +479,14 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry struct fs_struct *fs = current->fs; struct dentry *parent = nd->path.dentry; + /* + * It can be possible to revalidate the dentry that we started + * the path walk with. force_reval_path may also revalidate the + * dentry already committed to the nameidata. + */ + if (unlikely(parent == dentry)) + return nameidata_drop_rcu(nd); + BUG_ON(!(nd->flags & LOOKUP_RCU)); if (nd->root.mnt) { spin_lock(&fs->lock); -- cgit v1.2.3 From 657e94b673a805b427903c5628e95348235fad06 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 14 Jan 2011 02:48:39 +0000 Subject: nfs: add missing rcu-walk check Signed-off-by: Nick Piggin --- fs/nfs/dir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d33da530097a..a0d8320bed9c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1410,11 +1410,15 @@ no_open: static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) { struct dentry *parent = NULL; - struct inode *inode = dentry->d_inode; + struct inode *inode; struct inode *dir; struct nfs_open_context *ctx; int openflags, ret = 0; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; if (!is_atomic_open(nd) || d_mountpoint(dentry)) goto no_open; -- cgit v1.2.3 From f20877d94a74557b7c28b4ed8920d834c31e0ea5 Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Fri, 14 Jan 2011 03:56:04 +0000 Subject: fs: fix do_last error case when need_reval_dot When open(2) without O_DIRECTORY opens an existing dir, it should return EISDIR. In do_last(), the variable 'error' is initialized EISDIR, but it is changed by d_revalidate() which returns any positive to represent 'the target dir is valid.' Should we keep and return the initialized 'error' in this case. Signed-off-by: Nick Piggin --- fs/namei.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 14c73edca9ce..bc24894c5f14 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2122,11 +2122,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, dir = nd->path.dentry; case LAST_DOT: if (need_reval_dot(dir)) { - error = d_revalidate(nd->path.dentry, nd); - if (!error) - error = -ESTALE; - if (error < 0) + int status = d_revalidate(nd->path.dentry, nd); + if (!status) + status = -ESTALE; + if (status < 0) { + error = status; goto exit; + } } /* fallthrough */ case LAST_ROOT: -- cgit v1.2.3 From 7b9337aaf98f9941d0927a75217d3ff31afec609 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 14 Jan 2011 08:42:43 +0000 Subject: fs: namei fix ->put_link on wrong inode in do_filp_open J. R. Okajima noticed that ->put_link is being attempted on the wrong inode, and suggested the way to fix it. I changed it a bit according to Al's suggestion to keep an explicit link path around. Signed-off-by: Nick Piggin --- fs/namei.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index bc24894c5f14..9cda4c452a6d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -779,7 +779,8 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) mntput(path->mnt); } -static inline void path_to_nameidata(struct path *path, struct nameidata *nd) +static inline void path_to_nameidata(const struct path *path, + struct nameidata *nd) { if (!(nd->flags & LOOKUP_RCU)) { dput(nd->path.dentry); @@ -791,20 +792,20 @@ static inline void path_to_nameidata(struct path *path, struct nameidata *nd) } static __always_inline int -__do_follow_link(struct path *path, struct nameidata *nd, void **p) +__do_follow_link(const struct path *link, struct nameidata *nd, void **p) { int error; - struct dentry *dentry = path->dentry; + struct dentry *dentry = link->dentry; - touch_atime(path->mnt, dentry); + touch_atime(link->mnt, dentry); nd_set_link(nd, NULL); - if (path->mnt != nd->path.mnt) { - path_to_nameidata(path, nd); + if (link->mnt != nd->path.mnt) { + path_to_nameidata(link, nd); nd->inode = nd->path.dentry->d_inode; dget(dentry); } - mntget(path->mnt); + mntget(link->mnt); nd->last_type = LAST_BIND; *p = dentry->d_inode->i_op->follow_link(dentry, nd); @@ -2347,11 +2348,12 @@ reval: nd.flags = flags; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); while (unlikely(!filp)) { /* trailing symlink */ - struct path holder; + struct path link = path; + struct inode *linki = link.dentry->d_inode; void *cookie; error = -ELOOP; /* S_ISDIR part is a temporary automount kludge */ - if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode)) + if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(linki->i_mode)) goto exit_dput; if (count++ == 32) goto exit_dput; @@ -2367,23 +2369,22 @@ reval: * just set LAST_BIND. */ nd.flags |= LOOKUP_PARENT; - error = security_inode_follow_link(path.dentry, &nd); + error = security_inode_follow_link(link.dentry, &nd); if (error) goto exit_dput; - error = __do_follow_link(&path, &nd, &cookie); + error = __do_follow_link(&link, &nd, &cookie); if (unlikely(error)) { - if (!IS_ERR(cookie) && nd.inode->i_op->put_link) - nd.inode->i_op->put_link(path.dentry, &nd, cookie); + if (!IS_ERR(cookie) && linki->i_op->put_link) + linki->i_op->put_link(link.dentry, &nd, cookie); /* nd.path had been dropped */ - nd.path = path; + nd.path = link; goto out_path; } - holder = path; nd.flags &= ~LOOKUP_PARENT; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); - if (nd.inode->i_op->put_link) - nd.inode->i_op->put_link(holder.dentry, &nd, cookie); - path_put(&holder); + if (linki->i_op->put_link) + linki->i_op->put_link(link.dentry, &nd, cookie); + path_put(&link); } out: if (nd.root.mnt) -- cgit v1.2.3 From ba28b93a5227cc69ec811507f7d85ac25fa20fe2 Mon Sep 17 00:00:00 2001 From: Akshat Aranya Date: Fri, 14 Jan 2011 16:00:47 +0000 Subject: FS-Cache: Fix operation handling fscache_submit_exclusive_op() adds an operation to the pending list if other operations are pending. Fix the check for pending ops as n_ops must be greater than 0 at the point it is checked as it is incremented immediately before under lock. Signed-off-by: Akshat Aranya Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/fscache/operation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index b9f34eaede09..48a18f184d50 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -101,7 +101,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ - if (object->n_ops > 0) { + if (object->n_ops > 1) { atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); -- cgit v1.2.3 From 0ad53eeefcbb2620b6a71ffdaad4add20b450b8b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Jan 2011 15:56:37 +0000 Subject: afs: add afs_wq and use it instead of the system workqueue flush_scheduled_work() is going away. afs needs to make sure all the works it has queued have finished before being unloaded and there can be arbitrary number of pending works. Add afs_wq and use it as the flush domain instead of the system workqueue. Also, convert cancel_delayed_work() + flush_scheduled_work() to cancel_delayed_work_sync() in afs_mntpt_kill_timer(). Signed-off-by: Tejun Heo Signed-off-by: David Howells Cc: linux-afs@lists.infradead.org Signed-off-by: Linus Torvalds --- fs/afs/cmservice.c | 12 ++++++------ fs/afs/internal.h | 1 + fs/afs/main.c | 13 +++++++++++-- fs/afs/mntpt.c | 11 +++++------ fs/afs/rxrpc.c | 2 +- fs/afs/server.c | 13 +++++++------ fs/afs/vlocation.c | 14 +++++++------- 7 files changed, 38 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index a3bcec75c54a..1c8c6cc6de30 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -289,7 +289,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, call->server = server; INIT_WORK(&call->work, SRXAFSCB_CallBack); - schedule_work(&call->work); + queue_work(afs_wq, &call->work); return 0; } @@ -336,7 +336,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, call->server = server; INIT_WORK(&call->work, SRXAFSCB_InitCallBackState); - schedule_work(&call->work); + queue_work(afs_wq, &call->work); return 0; } @@ -367,7 +367,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, call->server = server; INIT_WORK(&call->work, SRXAFSCB_InitCallBackState); - schedule_work(&call->work); + queue_work(afs_wq, &call->work); return 0; } @@ -400,7 +400,7 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, call->state = AFS_CALL_REPLYING; INIT_WORK(&call->work, SRXAFSCB_Probe); - schedule_work(&call->work); + queue_work(afs_wq, &call->work); return 0; } @@ -496,7 +496,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, call->state = AFS_CALL_REPLYING; INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); - schedule_work(&call->work); + queue_work(afs_wq, &call->work); return 0; } @@ -580,6 +580,6 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, call->state = AFS_CALL_REPLYING; INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself); - schedule_work(&call->work); + queue_work(afs_wq, &call->work); return 0; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index ab6db5abaf53..58c633b80246 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -577,6 +577,7 @@ extern int afs_drop_inode(struct inode *); /* * main.c */ +extern struct workqueue_struct *afs_wq; extern struct afs_uuid afs_uuid; /* diff --git a/fs/afs/main.c b/fs/afs/main.c index cfd1cbe25b22..42dd2e499ed8 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -30,6 +30,7 @@ module_param(rootcell, charp, 0); MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); struct afs_uuid afs_uuid; +struct workqueue_struct *afs_wq; /* * get a client UUID @@ -87,10 +88,16 @@ static int __init afs_init(void) if (ret < 0) return ret; + /* create workqueue */ + ret = -ENOMEM; + afs_wq = alloc_workqueue("afs", 0, 0); + if (!afs_wq) + return ret; + /* register the /proc stuff */ ret = afs_proc_init(); if (ret < 0) - return ret; + goto error_proc; #ifdef CONFIG_AFS_FSCACHE /* we want to be able to cache */ @@ -140,6 +147,8 @@ error_cell_init: error_cache: #endif afs_proc_cleanup(); +error_proc: + destroy_workqueue(afs_wq); rcu_barrier(); printk(KERN_ERR "kAFS: failed to register: %d\n", ret); return ret; @@ -163,7 +172,7 @@ static void __exit afs_exit(void) afs_purge_servers(); afs_callback_update_kill(); afs_vlocation_purge(); - flush_scheduled_work(); + destroy_workqueue(afs_wq); afs_cell_purge(); #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 6153417caf57..e83c0336e7b5 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -268,8 +268,8 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) path_put(&nd->path); nd->path.mnt = newmnt; nd->path.dentry = dget(newmnt->mnt_root); - schedule_delayed_work(&afs_mntpt_expiry_timer, - afs_mntpt_expiry_timeout * HZ); + queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, + afs_mntpt_expiry_timeout * HZ); break; case -EBUSY: /* someone else made a mount here whilst we were busy */ @@ -295,8 +295,8 @@ static void afs_mntpt_expiry_timed_out(struct work_struct *work) if (!list_empty(&afs_vfsmounts)) { mark_mounts_for_expiry(&afs_vfsmounts); - schedule_delayed_work(&afs_mntpt_expiry_timer, - afs_mntpt_expiry_timeout * HZ); + queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, + afs_mntpt_expiry_timeout * HZ); } _leave(""); @@ -310,6 +310,5 @@ void afs_mntpt_kill_timer(void) _enter(""); ASSERT(list_empty(&afs_vfsmounts)); - cancel_delayed_work(&afs_mntpt_expiry_timer); - flush_scheduled_work(); + cancel_delayed_work_sync(&afs_mntpt_expiry_timer); } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 654d8fdbf01f..e45a323aebb4 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -410,7 +410,7 @@ static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID, if (!call) { /* its an incoming call for our callback service */ skb_queue_tail(&afs_incoming_calls, skb); - schedule_work(&afs_collect_incoming_call_work); + queue_work(afs_wq, &afs_collect_incoming_call_work); } else { /* route the messages directly to the appropriate call */ skb_queue_tail(&call->rx_queue, skb); diff --git a/fs/afs/server.c b/fs/afs/server.c index 9fdc7fe3a7bc..d59b7516e943 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -238,8 +238,8 @@ void afs_put_server(struct afs_server *server) if (atomic_read(&server->usage) == 0) { list_move_tail(&server->grave, &afs_server_graveyard); server->time_of_death = get_seconds(); - schedule_delayed_work(&afs_server_reaper, - afs_server_timeout * HZ); + queue_delayed_work(afs_wq, &afs_server_reaper, + afs_server_timeout * HZ); } spin_unlock(&afs_server_graveyard_lock); _leave(" [dead]"); @@ -285,10 +285,11 @@ static void afs_reap_server(struct work_struct *work) expiry = server->time_of_death + afs_server_timeout; if (expiry > now) { delay = (expiry - now) * HZ; - if (!schedule_delayed_work(&afs_server_reaper, delay)) { + if (!queue_delayed_work(afs_wq, &afs_server_reaper, + delay)) { cancel_delayed_work(&afs_server_reaper); - schedule_delayed_work(&afs_server_reaper, - delay); + queue_delayed_work(afs_wq, &afs_server_reaper, + delay); } break; } @@ -323,5 +324,5 @@ void __exit afs_purge_servers(void) { afs_server_timeout = 0; cancel_delayed_work(&afs_server_reaper); - schedule_delayed_work(&afs_server_reaper, 0); + queue_delayed_work(afs_wq, &afs_server_reaper, 0); } diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 9ac260d1361d..431984d2e372 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -507,8 +507,8 @@ void afs_put_vlocation(struct afs_vlocation *vl) _debug("buried"); list_move_tail(&vl->grave, &afs_vlocation_graveyard); vl->time_of_death = get_seconds(); - schedule_delayed_work(&afs_vlocation_reap, - afs_vlocation_timeout * HZ); + queue_delayed_work(afs_wq, &afs_vlocation_reap, + afs_vlocation_timeout * HZ); /* suspend updates on this record */ if (!list_empty(&vl->update)) { @@ -561,11 +561,11 @@ static void afs_vlocation_reaper(struct work_struct *work) if (expiry > now) { delay = (expiry - now) * HZ; _debug("delay %lu", delay); - if (!schedule_delayed_work(&afs_vlocation_reap, - delay)) { + if (!queue_delayed_work(afs_wq, &afs_vlocation_reap, + delay)) { cancel_delayed_work(&afs_vlocation_reap); - schedule_delayed_work(&afs_vlocation_reap, - delay); + queue_delayed_work(afs_wq, &afs_vlocation_reap, + delay); } break; } @@ -620,7 +620,7 @@ void afs_vlocation_purge(void) destroy_workqueue(afs_vlocation_update_worker); cancel_delayed_work(&afs_vlocation_reap); - schedule_delayed_work(&afs_vlocation_reap, 0); + queue_delayed_work(afs_wq, &afs_vlocation_reap, 0); } /* -- cgit v1.2.3 From 49731baa41df404c2c3f44555869ab387363af43 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Jan 2011 18:43:57 +0100 Subject: block: restore multiple bd_link_disk_holder() support Commit e09b457b (block: simplify holder symlink handling) incorrectly assumed that there is only one link at maximum. dm may use multiple links and expects block layer to track reference count for each link, which is different from and unrelated to the exclusive device holder identified by @holder when the device is opened. Remove the single holder assumption and automatic removal of the link and revive the per-link reference count tracking. The code essentially behaves the same as before commit e09b457b sans the unnecessary kobject reference count dancing. While at it, note that this facility should not be used by anyone else than the current ones. Sysfs symlinks shouldn't be abused like this and the whole thing doesn't belong in the block layer at all. Signed-off-by: Tejun Heo Reported-by: Milan Broz Cc: Jun'ichi Nomura Cc: Neil Brown Cc: linux-raid@vger.kernel.org Cc: Kay Sievers Signed-off-by: Jens Axboe --- drivers/md/dm-table.c | 1 + drivers/md/md.c | 1 + fs/block_dev.c | 93 +++++++++++++++++++++++++++++++++++++++++---------- include/linux/fs.h | 8 ++++- 4 files changed, 84 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index dffa0ac7c4f0..38e4eb1bb965 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -350,6 +350,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) if (!d->dm_dev.bdev) return; + bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md)); blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL); d->dm_dev.bdev = NULL; } diff --git a/drivers/md/md.c b/drivers/md/md.c index cf8594c5ea21..b76cfc89e1b5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1912,6 +1912,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) MD_BUG(); return; } + bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; diff --git a/fs/block_dev.c b/fs/block_dev.c index fe3f59c14a02..333a7bb4cb9c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -432,6 +432,9 @@ static void init_once(void *foo) mutex_init(&bdev->bd_mutex); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); +#ifdef CONFIG_SYSFS + INIT_LIST_HEAD(&bdev->bd_holder_disks); +#endif inode_init_once(&ei->vfs_inode); /* Initialize mutex for freeze. */ mutex_init(&bdev->bd_fsfreeze_mutex); @@ -779,6 +782,23 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, } #ifdef CONFIG_SYSFS +struct bd_holder_disk { + struct list_head list; + struct gendisk *disk; + int refcnt; +}; + +static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, + struct gendisk *disk) +{ + struct bd_holder_disk *holder; + + list_for_each_entry(holder, &bdev->bd_holder_disks, list) + if (holder->disk == disk) + return holder; + return NULL; +} + static int add_symlink(struct kobject *from, struct kobject *to) { return sysfs_create_link(from, to, kobject_name(to)); @@ -794,6 +814,8 @@ static void del_symlink(struct kobject *from, struct kobject *to) * @bdev: the claimed slave bdev * @disk: the holding disk * + * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. + * * This functions creates the following sysfs symlinks. * * - from "slaves" directory of the holder @disk to the claimed @bdev @@ -817,47 +839,83 @@ static void del_symlink(struct kobject *from, struct kobject *to) */ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { + struct bd_holder_disk *holder; int ret = 0; mutex_lock(&bdev->bd_mutex); - WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk); + WARN_ON_ONCE(!bdev->bd_holder); /* FIXME: remove the following once add_disk() handles errors */ if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) goto out_unlock; - ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); - if (ret) + holder = bd_find_holder_disk(bdev, disk); + if (holder) { + holder->refcnt++; goto out_unlock; + } - ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); - if (ret) { - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + holder = kzalloc(sizeof(*holder), GFP_KERNEL); + if (!holder) { + ret = -ENOMEM; goto out_unlock; } - bdev->bd_holder_disk = disk; + INIT_LIST_HEAD(&holder->list); + holder->disk = disk; + holder->refcnt = 1; + + ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + if (ret) + goto out_free; + + ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); + if (ret) + goto out_del; + + list_add(&holder->list, &bdev->bd_holder_disks); + goto out_unlock; + +out_del: + del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); +out_free: + kfree(holder); out_unlock: mutex_unlock(&bdev->bd_mutex); return ret; } EXPORT_SYMBOL_GPL(bd_link_disk_holder); -static void bd_unlink_disk_holder(struct block_device *bdev) +/** + * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder() + * @bdev: the calimed slave bdev + * @disk: the holding disk + * + * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. + * + * CONTEXT: + * Might sleep. + */ +void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { - struct gendisk *disk = bdev->bd_holder_disk; + struct bd_holder_disk *holder; - bdev->bd_holder_disk = NULL; - if (!disk) - return; + mutex_lock(&bdev->bd_mutex); - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); - del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); + holder = bd_find_holder_disk(bdev, disk); + + if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { + del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + del_symlink(bdev->bd_part->holder_dir, + &disk_to_dev(disk)->kobj); + list_del_init(&holder->list); + kfree(holder); + } + + mutex_unlock(&bdev->bd_mutex); } -#else -static inline void bd_unlink_disk_holder(struct block_device *bdev) -{ } +EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif /** @@ -1380,7 +1438,6 @@ int blkdev_put(struct block_device *bdev, fmode_t mode) * unblock evpoll if it was a write holder. */ if (bdev_free) { - bd_unlink_disk_holder(bdev); if (bdev->bd_write_holder) { disk_unblock_events(bdev->bd_disk); bdev->bd_write_holder = false; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3984f2358d1f..fb2190349cdf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -666,7 +666,7 @@ struct block_device { int bd_holders; bool bd_write_holder; #ifdef CONFIG_SYSFS - struct gendisk * bd_holder_disk; /* for sysfs slave linkng */ + struct list_head bd_holder_disks; #endif struct block_device * bd_contains; unsigned bd_block_size; @@ -2058,12 +2058,18 @@ extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, extern int blkdev_put(struct block_device *bdev, fmode_t mode); #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +extern void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk); #else static inline int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { return 0; } +static inline void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ +} #endif #endif -- cgit v1.2.3 From 56c24305d1494a7e345c75669dc60e8b231b735b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:25 -0500 Subject: cifs: cFYI the entire error code in map_smb_to_linux_error We currently only print the DOS error part. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/netmisc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 9aad47a2d62f..6783ce6cdc89 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -899,8 +899,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr) } /* else ERRHRD class errors or junk - return EIO */ - cFYI(1, "Mapping smb error code %d to POSIX err %d", - smberrcode, rc); + cFYI(1, "Mapping smb error code 0x%x to POSIX err %d", + le32_to_cpu(smb->Status.CifsError), rc); /* generic corrective action e.g. reconnect SMB session on * ERRbaduid could be added */ -- cgit v1.2.3 From bd7633195581c7665ce9dd80c665ec93466d1b64 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 10:33:24 -0500 Subject: cifs: add cruid= mount option In commit 3e4b3e1f we separated the "uid" mount option such that it no longer determined the owner of the credential cache by default. When we did this, we added a new option to cifs.upcall (--legacy-uid) to try to make it so that it would behave the same was as it did before. This ignored a rather important point -- the kernel has no way to know what options are being passed to cifs.upcall, so it doesn't know what uid it should use to determine whether to match an existing krb5 session. The simplest solution is to simply add a new "cruid=" mount option that only governs the uid owner of the credential cache for the mount. Unfortunately, this means that the --legacy-uid option in cifs.upcall was ill-considered and is now useless, but I don't see a better way to deal with this. A patch for the mount.cifs manpage will follow once this patch has been accepted. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a65d311d163a..9f59887badd2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1113,6 +1113,8 @@ cifs_parse_mount_options(char *options, const char *devname, } else if (!strnicmp(data, "uid", 3) && value && *value) { vol->linux_uid = simple_strtoul(value, &value, 0); uid_specified = true; + } else if (!strnicmp(data, "cruid", 5) && value && *value) { + vol->cred_uid = simple_strtoul(value, &value, 0); } else if (!strnicmp(data, "forceuid", 8)) { override_uid = 1; } else if (!strnicmp(data, "noforceuid", 10)) { -- cgit v1.2.3 From a8f2800b4f7b76cecb7209cb6a7d2b14904fc711 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 14 Jan 2011 14:25:48 -0500 Subject: nfsd4: fix callback restarting Ensure a new callback is added to the client's list of callbacks at most once. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 209e186386a0..ae93c5c83e87 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -639,9 +639,12 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) if (!nfsd41_cb_get_slot(clp, task)) return; } - cb->cb_done = false; spin_lock(&clp->cl_lock); - list_add(&cb->cb_per_client, &clp->cl_callbacks); + if (list_empty(&cb->cb_per_client)) { + /* This is the first call, not a restart */ + cb->cb_done = false; + list_add(&cb->cb_per_client, &clp->cl_callbacks); + } spin_unlock(&clp->cl_lock); rpc_call_start(task); } @@ -678,10 +681,10 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) nfsd4_cb_done(task, calldata); - if (current_rpc_client == NULL) { - /* We're shutting down; give up. */ - /* XXX: err, or is it ok just to fall through - * and rpc_restart_call? */ + if (current_rpc_client != task->tk_client) { + /* We're shutting down or changing cl_cb_client; leave + * it to nfsd4_process_cb_update to restart the call if + * necessary. */ return; } @@ -699,12 +702,6 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) default: /* Network partition? */ nfsd4_mark_cb_down(clp, task->tk_status); - if (current_rpc_client != task->tk_client) { - /* queue a callback on the new connection: */ - atomic_inc(&dp->dl_count); - run_nfsd4_cb(&dp->dl_recall); - return; - } } if (dp->dl_retries--) { rpc_delay(task, 2*HZ); -- cgit v1.2.3 From 6f7f7caab259026234277b659485d22c1dcb1ab4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 14 Jan 2011 13:26:18 -0800 Subject: Turn d_set_d_op() BUG_ON() into WARN_ON_ONCE() It's indicative of a real problem, and it actually triggers with autofs4, but the BUG_ON() is excessive. The autofs4 case is being fixed (to only set d_op in the ->lookup method) but not merged yet. In the meantime this gets the code limping along. Reported-by: Alex Elder Cc: Ian Kent Cc: Nick Piggin Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/dcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 0c6d5c549d84..274a22250380 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1357,8 +1357,8 @@ EXPORT_SYMBOL(d_alloc_name); void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) { - BUG_ON(dentry->d_op); - BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | + WARN_ON_ONCE(dentry->d_op); + WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE )); -- cgit v1.2.3 From 1a8edf40e7c3eee955e0dd0316a7c9d85e36f597 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Jan 2011 13:12:53 -0500 Subject: do_lookup() fix do_lookup() has a path leading from LOOKUP_RCU case to non-RCU crossing of mountpoints, which breaks things badly. If we hit need_revalidate: and do nothing in there, we need to come back into LOOKUP_RCU half of things, not to done: in non-RCU one. Signed-off-by: Al Viro --- fs/namei.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 8df7a78ace58..529e917ad2fc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1089,6 +1089,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, nd->seq = seq; if (dentry->d_flags & DCACHE_OP_REVALIDATE) goto need_revalidate; +done2: path->mnt = mnt; path->dentry = dentry; __follow_mount_rcu(nd, path, inode); @@ -1143,6 +1144,8 @@ need_revalidate: goto need_lookup; if (IS_ERR(dentry)) goto fail; + if (nd->flags & LOOKUP_RCU) + goto done2; goto done; fail: -- cgit v1.2.3 From 9875cf806403fae66b2410a3c2cc820d97731e04 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 18:45:21 +0000 Subject: Add a dentry op to handle automounting rather than abusing follow_link() Add a dentry op (d_automount) to handle automounting directories rather than abusing the follow_link() inode operation. The operation is keyed off a new dentry flag (DCACHE_NEED_AUTOMOUNT). This also makes it easier to add an AT_ flag to suppress terminal segment automount during pathwalk and removes the need for the kludge code in the pathwalk algorithm to handle directories with follow_link() semantics. The ->d_automount() dentry operation: struct vfsmount *(*d_automount)(struct path *mountpoint); takes a pointer to the directory to be mounted upon, which is expected to provide sufficient data to determine what should be mounted. If successful, it should return the vfsmount struct it creates (which it should also have added to the namespace using do_add_mount() or similar). If there's a collision with another automount attempt, NULL should be returned. If the directory specified by the parameter should be used directly rather than being mounted upon, -EISDIR should be returned. In any other case, an error code should be returned. The ->d_automount() operation is called with no locks held and may sleep. At this point the pathwalk algorithm will be in ref-walk mode. Within fs/namei.c itself, a new pathwalk subroutine (follow_automount()) is added to handle mountpoints. It will return -EREMOTE if the automount flag was set, but no d_automount() op was supplied, -ELOOP if we've encountered too many symlinks or mountpoints, -EISDIR if the walk point should be used without mounting and 0 if successful. The path will be updated to point to the mounted filesystem if a successful automount took place. __follow_mount() is replaced by follow_managed() which is more generic (especially with the patch that adds ->d_manage()). This handles transits from directories during pathwalk, including automounting and skipping over mountpoints (and holding processes with the next patch). __follow_mount_rcu() will jump out of RCU-walk mode if it encounters an automount point with nothing mounted on it. follow_dotdot*() does not handle automounts as you don't want to trigger them whilst following "..". I've also extracted the mount/don't-mount logic from autofs4 and included it here. It makes the mount go ahead anyway if someone calls open() or creat(), tries to traverse the directory, tries to chdir/chroot/etc. into the directory, or sticks a '/' on the end of the pathname. If they do a stat(), however, they'll only trigger the automount if they didn't also say O_NOFOLLOW. I've also added an inode flag (S_AUTOMOUNT) so that filesystems can mark their inodes as automount points. This flag is automatically propagated to the dentry as DCACHE_NEED_AUTOMOUNT by __d_instantiate(). This saves NFS and could save AFS a private flag bit apiece, but is not strictly necessary. It would be preferable to do the propagation in d_set_d_op(), but that doesn't normally have access to the inode. [AV: fixed breakage in case if __follow_mount_rcu() fails and nameidata_drop_rcu() succeeds in RCU case of do_lookup(); we need to fall through to non-RCU case after that, rather than just returning with ungrabbed *path] Signed-off-by: David Howells Was-Acked-by: Ian Kent Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 + Documentation/filesystems/vfs.txt | 14 +++ fs/dcache.c | 5 +- fs/namei.c | 226 ++++++++++++++++++++++++++++---------- include/linux/dcache.h | 7 +- include/linux/fs.h | 2 + 6 files changed, 198 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 977d8919cc69..5f0c52a07386 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -19,6 +19,7 @@ prototypes: void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); + struct vfsmount *(*d_automount)(struct path *path); locking rules: rename_lock ->d_lock may block rcu-walk @@ -29,6 +30,7 @@ d_delete: no yes no no d_release: no no yes no d_iput: no no yes no d_dname: no no no no +d_automount: no no yes no --------------------------- inode_operations --------------------------- prototypes: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index cae6d27c9f5b..726a4f6fa3c9 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -864,6 +864,7 @@ struct dentry_operations { void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); + struct vfsmount *(*d_automount)(struct path *); }; d_revalidate: called when the VFS needs to revalidate a dentry. This @@ -930,6 +931,19 @@ struct dentry_operations { at the end of the buffer, and returns a pointer to the first char. dynamic_dname() helper function is provided to take care of this. + d_automount: called when an automount dentry is to be traversed (optional). + This should create a new VFS mount record, mount it on the directory + and return the record to the caller. The caller is supplied with a + path parameter giving the automount directory to describe the automount + target and the parent VFS mount record to provide inheritable mount + parameters. NULL should be returned if someone else managed to make + the automount first. If the automount failed, then an error code + should be returned. + + This function is only used if DCACHE_NEED_AUTOMOUNT is set on the + dentry. This is set by __d_instantiate() if S_AUTOMOUNT is set on the + inode being added. + Example : static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) diff --git a/fs/dcache.c b/fs/dcache.c index 0c6d5c549d84..51f7bb6463af 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1380,8 +1380,11 @@ EXPORT_SYMBOL(d_set_d_op); static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); - if (inode) + if (inode) { + if (unlikely(IS_AUTOMOUNT(inode))) + dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; list_add(&dentry->d_alias, &inode->i_dentry); + } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); diff --git a/fs/namei.c b/fs/namei.c index 529e917ad2fc..16109da68bbf 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -896,51 +896,120 @@ int follow_up(struct path *path) } /* - * serialization is taken care of in namespace.c + * Perform an automount + * - return -EISDIR to tell follow_managed() to stop and return the path we + * were called with. */ -static void __follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode) +static int follow_automount(struct path *path, unsigned flags, + bool *need_mntput) { - while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted; - mounted = __lookup_mnt(path->mnt, path->dentry, 1); - if (!mounted) - return; - path->mnt = mounted; - path->dentry = mounted->mnt_root; - nd->seq = read_seqcount_begin(&path->dentry->d_seq); - *inode = path->dentry->d_inode; + struct vfsmount *mnt; + + if (!path->dentry->d_op || !path->dentry->d_op->d_automount) + return -EREMOTE; + + /* We want to mount if someone is trying to open/create a file of any + * type under the mountpoint, wants to traverse through the mountpoint + * or wants to open the mounted directory. + * + * We don't want to mount if someone's just doing a stat and they've + * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and + * appended a '/' to the name. + */ + if (!(flags & LOOKUP_FOLLOW) && + !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY | + LOOKUP_OPEN | LOOKUP_CREATE))) + return -EISDIR; + + current->total_link_count++; + if (current->total_link_count >= 40) + return -ELOOP; + + mnt = path->dentry->d_op->d_automount(path); + if (IS_ERR(mnt)) { + /* + * The filesystem is allowed to return -EISDIR here to indicate + * it doesn't want to automount. For instance, autofs would do + * this so that its userspace daemon can mount on this dentry. + * + * However, we can only permit this if it's a terminal point in + * the path being looked up; if it wasn't then the remainder of + * the path is inaccessible and we should say so. + */ + if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE)) + return -EREMOTE; + return PTR_ERR(mnt); } -} + if (!mnt) /* mount collision */ + return 0; -static int __follow_mount(struct path *path) -{ - int res = 0; - while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted = lookup_mnt(path); - if (!mounted) - break; - dput(path->dentry); - if (res) - mntput(path->mnt); - path->mnt = mounted; - path->dentry = dget(mounted->mnt_root); - res = 1; + if (mnt->mnt_sb == path->mnt->mnt_sb && + mnt->mnt_root == path->dentry) { + mntput(mnt); + return -ELOOP; } - return res; + + dput(path->dentry); + if (*need_mntput) + mntput(path->mnt); + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + *need_mntput = true; + return 0; } -static void follow_mount(struct path *path) +/* + * Handle a dentry that is managed in some way. + * - Flagged as mountpoint + * - Flagged as automount point + * + * This may only be called in refwalk mode. + * + * Serialization is taken care of in namespace.c + */ +static int follow_managed(struct path *path, unsigned flags) { - while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted = lookup_mnt(path); - if (!mounted) - break; - dput(path->dentry); - mntput(path->mnt); - path->mnt = mounted; - path->dentry = dget(mounted->mnt_root); + unsigned managed; + bool need_mntput = false; + int ret; + + /* Given that we're not holding a lock here, we retain the value in a + * local variable for each dentry as we look at it so that we don't see + * the components of that value change under us */ + while (managed = ACCESS_ONCE(path->dentry->d_flags), + managed &= DCACHE_MANAGED_DENTRY, + unlikely(managed != 0)) { + /* Transit to a mounted filesystem. */ + if (managed & DCACHE_MOUNTED) { + struct vfsmount *mounted = lookup_mnt(path); + if (mounted) { + dput(path->dentry); + if (need_mntput) + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); + need_mntput = true; + continue; + } + + /* Something is mounted on this dentry in another + * namespace and/or whatever was mounted there in this + * namespace got unmounted before we managed to get the + * vfsmount_lock */ + } + + /* Handle an automount point */ + if (managed & DCACHE_NEED_AUTOMOUNT) { + ret = follow_automount(path, flags, &need_mntput); + if (ret < 0) + return ret == -EISDIR ? 0 : ret; + continue; + } + + /* We didn't change the current path point */ + break; } + return 0; } int follow_down(struct path *path) @@ -958,13 +1027,37 @@ int follow_down(struct path *path) return 0; } +/* + * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we + * meet an automount point and we're not walking to "..". True is returned to + * continue, false to abort. + */ +static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, + struct inode **inode, bool reverse_transit) +{ + while (d_mountpoint(path->dentry)) { + struct vfsmount *mounted; + mounted = __lookup_mnt(path->mnt, path->dentry, 1); + if (!mounted) + break; + path->mnt = mounted; + path->dentry = mounted->mnt_root; + nd->seq = read_seqcount_begin(&path->dentry->d_seq); + *inode = path->dentry->d_inode; + } + + if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) + return reverse_transit; + return true; +} + static int follow_dotdot_rcu(struct nameidata *nd) { struct inode *inode = nd->inode; set_root_rcu(nd); - while(1) { + while (1) { if (nd->path.dentry == nd->root.dentry && nd->path.mnt == nd->root.mnt) { break; @@ -987,12 +1080,28 @@ static int follow_dotdot_rcu(struct nameidata *nd) nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); inode = nd->path.dentry->d_inode; } - __follow_mount_rcu(nd, &nd->path, &inode); + __follow_mount_rcu(nd, &nd->path, &inode, true); nd->inode = inode; return 0; } +/* + * Skip to top of mountpoint pile in refwalk mode for follow_dotdot() + */ +static void follow_mount(struct path *path) +{ + while (d_mountpoint(path->dentry)) { + struct vfsmount *mounted = lookup_mnt(path); + if (!mounted) + break; + dput(path->dentry); + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); + } +} + static void follow_dotdot(struct nameidata *nd) { set_root(nd); @@ -1057,12 +1166,14 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, struct vfsmount *mnt = nd->path.mnt; struct dentry *dentry, *parent = nd->path.dentry; struct inode *dir; + int err; + /* * See if the low-level filesystem might want * to use its own hash.. */ if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { - int err = parent->d_op->d_hash(parent, nd->inode, name); + err = parent->d_op->d_hash(parent, nd->inode, name); if (err < 0) return err; } @@ -1092,20 +1203,25 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, done2: path->mnt = mnt; path->dentry = dentry; - __follow_mount_rcu(nd, path, inode); - } else { - dentry = __d_lookup(parent, name); - if (!dentry) - goto need_lookup; + if (likely(__follow_mount_rcu(nd, path, inode, false))) + return 0; + if (nameidata_drop_rcu(nd)) + return -ECHILD; + /* fallthru */ + } + dentry = __d_lookup(parent, name); + if (!dentry) + goto need_lookup; found: - if (dentry->d_flags & DCACHE_OP_REVALIDATE) - goto need_revalidate; + if (dentry->d_flags & DCACHE_OP_REVALIDATE) + goto need_revalidate; done: - path->mnt = mnt; - path->dentry = dentry; - __follow_mount(path); - *inode = path->dentry->d_inode; - } + path->mnt = mnt; + path->dentry = dentry; + err = follow_managed(path, nd->flags); + if (unlikely(err < 0)) + return err; + *inode = path->dentry->d_inode; return 0; need_lookup: @@ -2203,11 +2319,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (open_flag & O_EXCL) goto exit_dput; - if (__follow_mount(path)) { - error = -ELOOP; - if (open_flag & O_NOFOLLOW) - goto exit_dput; - } + error = follow_managed(path, nd->flags); + if (error < 0) + goto exit_dput; error = -ENOENT; if (!path->dentry->d_inode) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 59fcd24b1468..ee6c26d142c3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -167,6 +167,7 @@ struct dentry_operations { void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); + struct vfsmount *(*d_automount)(struct path *); } ____cacheline_aligned; /* @@ -205,13 +206,17 @@ struct dentry_operations { #define DCACHE_CANT_MOUNT 0x0100 #define DCACHE_GENOCIDE 0x0200 -#define DCACHE_MOUNTED 0x0400 /* is a mountpoint */ #define DCACHE_OP_HASH 0x1000 #define DCACHE_OP_COMPARE 0x2000 #define DCACHE_OP_REVALIDATE 0x4000 #define DCACHE_OP_DELETE 0x8000 +#define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ +#define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ +#define DCACHE_MANAGED_DENTRY \ + (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT) + extern seqlock_t rename_lock; static inline int dname_external(struct dentry *dentry) diff --git a/include/linux/fs.h b/include/linux/fs.h index 3984f2358d1f..4c00ed53be8f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -242,6 +242,7 @@ struct inodes_stat_t { #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE 512 /* Inode is fs-internal */ #define S_IMA 1024 /* Inode has an associated IMA struct */ +#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -277,6 +278,7 @@ struct inodes_stat_t { #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) +#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ -- cgit v1.2.3 From cc53ce53c86924bfe98a12ea20b7465038a08792 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 18:45:26 +0000 Subject: Add a dentry op to allow processes to be held during pathwalk transit Add a dentry op (d_manage) to permit a filesystem to hold a process and make it sleep when it tries to transit away from one of that filesystem's directories during a pathwalk. The operation is keyed off a new dentry flag (DCACHE_MANAGE_TRANSIT). The filesystem is allowed to be selective about which processes it holds and which it permits to continue on or prohibits from transiting from each flagged directory. This will allow autofs to hold up client processes whilst letting its userspace daemon through to maintain the directory or the stuff behind it or mounted upon it. The ->d_manage() dentry operation: int (*d_manage)(struct path *path, bool mounting_here); takes a pointer to the directory about to be transited away from and a flag indicating whether the transit is undertaken by do_add_mount() or do_move_mount() skipping through a pile of filesystems mounted on a mountpoint. It should return 0 if successful and to let the process continue on its way; -EISDIR to prohibit the caller from skipping to overmounted filesystems or automounting, and to use this directory; or some other error code to return to the user. ->d_manage() is called with namespace_sem writelocked if mounting_here is true and no other locks held, so it may sleep. However, if mounting_here is true, it may not initiate or wait for a mount or unmount upon the parameter directory, even if the act is actually performed by userspace. Within fs/namei.c, follow_managed() is extended to check with d_manage() first on each managed directory, before transiting away from it or attempting to automount upon it. follow_down() is renamed follow_down_one() and should only be used where the filesystem deliberately intends to avoid management steps (e.g. autofs). A new follow_down() is added that incorporates the loop done by all other callers of follow_down() (do_add/move_mount(), autofs and NFSD; whilst AFS, NFS and CIFS do use it, their use is removed by converting them to use d_automount()). The new follow_down() calls d_manage() as appropriate. It also takes an extra parameter to indicate if it is being called from mount code (with namespace_sem writelocked) which it passes to d_manage(). follow_down() ignores automount points so that it can be used to mount on them. __follow_mount_rcu() is made to abort rcu-walk mode if it hits a directory with DCACHE_MANAGE_TRANSIT set on the basis that we're probably going to have to sleep. It would be possible to enter d_manage() in rcu-walk mode too, and have that determine whether to abort or not itself. That would allow the autofs daemon to continue on in rcu-walk mode. Note that DCACHE_MANAGE_TRANSIT on a directory should be cleared when it isn't required as every tranist from that directory will cause d_manage() to be invoked. It can always be set again when necessary. ========================== WHAT THIS MEANS FOR AUTOFS ========================== Autofs currently uses the lookup() inode op and the d_revalidate() dentry op to trigger the automounting of indirect mounts, and both of these can be called with i_mutex held. autofs knows that the i_mutex will be held by the caller in lookup(), and so can drop it before invoking the daemon - but this isn't so for d_revalidate(), since the lock is only held on _some_ of the code paths that call it. This means that autofs can't risk dropping i_mutex from its d_revalidate() function before it calls the daemon. The bug could manifest itself as, for example, a process that's trying to validate an automount dentry that gets made to wait because that dentry is expired and needs cleaning up: mkdir S ffffffff8014e05a 0 32580 24956 Call Trace: [] :autofs4:autofs4_wait+0x674/0x897 [] avc_has_perm+0x46/0x58 [] autoremove_wake_function+0x0/0x2e [] :autofs4:autofs4_expire_wait+0x41/0x6b [] :autofs4:autofs4_revalidate+0x91/0x149 [] __lookup_hash+0xa0/0x12f [] lookup_create+0x46/0x80 [] sys_mkdirat+0x56/0xe4 versus the automount daemon which wants to remove that dentry, but can't because the normal process is holding the i_mutex lock: automount D ffffffff8014e05a 0 32581 1 32561 Call Trace: [] __mutex_lock_slowpath+0x60/0x9b [] do_path_lookup+0x2ca/0x2f1 [] .text.lock.mutex+0xf/0x14 [] do_rmdir+0x77/0xde [] tracesys+0x71/0xe0 [] tracesys+0xd5/0xe0 which means that the system is deadlocked. This patch allows autofs to hold up normal processes whilst the daemon goes ahead and does things to the dentry tree behind the automouter point without risking a deadlock as almost no locks are held in d_manage() and none in d_automount(). Signed-off-by: David Howells Was-Acked-by: Ian Kent Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 ++ Documentation/filesystems/vfs.txt | 21 +++++++++++- drivers/staging/autofs/dirhash.c | 5 ++- fs/afs/mntpt.c | 5 +-- fs/autofs4/autofs_i.h | 13 ------- fs/autofs4/dev-ioctl.c | 2 +- fs/autofs4/expire.c | 2 +- fs/autofs4/root.c | 11 +++--- fs/cifs/cifs_dfs_ref.c | 5 +-- fs/namei.c | 72 +++++++++++++++++++++++++++++++++++++-- fs/namespace.c | 14 ++++---- fs/nfs/namespace.c | 5 +-- fs/nfsd/vfs.c | 5 +-- include/linux/dcache.h | 11 ++++-- include/linux/namei.h | 3 +- 15 files changed, 126 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 5f0c52a07386..cbf98b989b11 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -20,6 +20,7 @@ prototypes: void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); struct vfsmount *(*d_automount)(struct path *path); + int (*d_manage)(struct dentry *, bool); locking rules: rename_lock ->d_lock may block rcu-walk @@ -31,6 +32,7 @@ d_release: no no yes no d_iput: no no yes no d_dname: no no no no d_automount: no no yes no +d_manage: no no yes no --------------------------- inode_operations --------------------------- prototypes: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 726a4f6fa3c9..4682586b147a 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -865,6 +865,7 @@ struct dentry_operations { void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); + int (*d_manage)(struct dentry *, bool); }; d_revalidate: called when the VFS needs to revalidate a dentry. This @@ -938,12 +939,30 @@ struct dentry_operations { target and the parent VFS mount record to provide inheritable mount parameters. NULL should be returned if someone else managed to make the automount first. If the automount failed, then an error code - should be returned. + should be returned. If -EISDIR is returned, then the directory will + be treated as an ordinary directory and returned to pathwalk to + continue walking. This function is only used if DCACHE_NEED_AUTOMOUNT is set on the dentry. This is set by __d_instantiate() if S_AUTOMOUNT is set on the inode being added. + d_manage: called to allow the filesystem to manage the transition from a + dentry (optional). This allows autofs, for example, to hold up clients + waiting to explore behind a 'mountpoint' whilst letting the daemon go + past and construct the subtree there. 0 should be returned to let the + calling process continue. -EISDIR can be returned to tell pathwalk to + use this directory as an ordinary directory and to ignore anything + mounted on it and not to check the automount flag. Any other error + code will abort pathwalk completely. + + If the 'mounting_here' parameter is true, then namespace_sem is being + held by the caller and the function should not initiate any mounts or + unmounts that it will then wait for. + + This function is only used if DCACHE_MANAGE_TRANSIT is set on the + dentry being transited from. + Example : static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) diff --git a/drivers/staging/autofs/dirhash.c b/drivers/staging/autofs/dirhash.c index d3f42c8325f7..a08bd7355035 100644 --- a/drivers/staging/autofs/dirhash.c +++ b/drivers/staging/autofs/dirhash.c @@ -88,14 +88,13 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, } path.mnt = mnt; path_get(&path); - if (!follow_down(&path)) { + if (!follow_down_one(&path)) { path_put(&path); DPRINTK(("autofs: not expirable\ (not a mounted directory): %s\n", ent->name)); continue; } - while (d_mountpoint(path.dentry) && follow_down(&path)) - ; + follow_down(&path, false); // TODO: need to check error umount_ok = may_umount(path.mnt); path_put(&path); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index e83c0336e7b5..f3e891d57a2c 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -273,10 +273,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) break; case -EBUSY: /* someone else made a mount here whilst we were busy */ - while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path)) - ; - err = 0; + err = follow_down(&nd->path, false); default: mntput(newmnt); break; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 0fffe1c24cec..eb67953452bb 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -229,19 +229,6 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); void autofs4_catatonic_mode(struct autofs_sb_info *); -static inline int autofs4_follow_mount(struct path *path) -{ - int res = 0; - - while (d_mountpoint(path->dentry)) { - int followed = follow_down(path); - if (!followed) - break; - res = 1; - } - return res; -} - static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) { return new_encode_dev(sbi->sb->s_dev); diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index eff9a419469a..1442da4860e5 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -551,7 +551,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, err = have_submounts(path.dentry); - if (follow_down(&path)) + if (follow_down_one(&path)) magic = path.mnt->mnt_sb->s_magic; } diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cc1d01365905..6a930b90d389 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -56,7 +56,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) path_get(&path); - if (!follow_down(&path)) + if (!follow_down_one(&path)) goto done; if (is_autofs4_dentry(path.dentry)) { diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 651e4ef563b1..20225636a4e9 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -234,7 +234,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) nd->flags); /* * For an expire of a covered direct or offset mount we need - * to break out of follow_down() at the autofs mount trigger + * to break out of follow_down_one() at the autofs mount trigger * (d_mounted--), so we can see the expiring flag, and manage * the blocking and following here until the expire is completed. */ @@ -243,7 +243,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) if (ino->flags & AUTOFS_INF_EXPIRING) { spin_unlock(&sbi->fs_lock); /* Follow down to our covering mount. */ - if (!follow_down(&nd->path)) + if (!follow_down_one(&nd->path)) goto done; goto follow; } @@ -292,11 +292,10 @@ follow: * multi-mount with no root offset so we don't need * to follow it. */ - if (d_mountpoint(dentry)) { - if (!autofs4_follow_mount(&nd->path)) { - status = -ENOENT; + if (d_managed(dentry)) { + status = follow_down(&nd->path, false); + if (status < 0) goto out_error; - } } done: diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index c68a056f27fd..83479cf63f96 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -273,10 +273,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, break; case -EBUSY: /* someone else made a mount here whilst we were busy */ - while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path)) - ; - err = 0; + err = follow_down(&nd->path, false); default: mntput(newmnt); break; diff --git a/fs/namei.c b/fs/namei.c index 16109da68bbf..9d3033dc22e9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -960,6 +960,7 @@ static int follow_automount(struct path *path, unsigned flags, /* * Handle a dentry that is managed in some way. + * - Flagged for transit management (autofs) * - Flagged as mountpoint * - Flagged as automount point * @@ -979,6 +980,16 @@ static int follow_managed(struct path *path, unsigned flags) while (managed = ACCESS_ONCE(path->dentry->d_flags), managed &= DCACHE_MANAGED_DENTRY, unlikely(managed != 0)) { + /* Allow the filesystem to manage the transit without i_mutex + * being held. */ + if (managed & DCACHE_MANAGE_TRANSIT) { + BUG_ON(!path->dentry->d_op); + BUG_ON(!path->dentry->d_op->d_manage); + ret = path->dentry->d_op->d_manage(path->dentry, false); + if (ret < 0) + return ret == -EISDIR ? 0 : ret; + } + /* Transit to a mounted filesystem. */ if (managed & DCACHE_MOUNTED) { struct vfsmount *mounted = lookup_mnt(path); @@ -1012,7 +1023,7 @@ static int follow_managed(struct path *path, unsigned flags) return 0; } -int follow_down(struct path *path) +int follow_down_one(struct path *path) { struct vfsmount *mounted; @@ -1029,14 +1040,19 @@ int follow_down(struct path *path) /* * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we - * meet an automount point and we're not walking to "..". True is returned to + * meet a managed dentry and we're not walking to "..". True is returned to * continue, false to abort. */ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, struct inode **inode, bool reverse_transit) { + unsigned abort_mask = + reverse_transit ? 0 : DCACHE_MANAGE_TRANSIT; + while (d_mountpoint(path->dentry)) { struct vfsmount *mounted; + if (path->dentry->d_flags & abort_mask) + return true; mounted = __lookup_mnt(path->mnt, path->dentry, 1); if (!mounted) break; @@ -1086,6 +1102,57 @@ static int follow_dotdot_rcu(struct nameidata *nd) return 0; } +/* + * Follow down to the covering mount currently visible to userspace. At each + * point, the filesystem owning that dentry may be queried as to whether the + * caller is permitted to proceed or not. + * + * Care must be taken as namespace_sem may be held (indicated by mounting_here + * being true). + */ +int follow_down(struct path *path, bool mounting_here) +{ + unsigned managed; + int ret; + + while (managed = ACCESS_ONCE(path->dentry->d_flags), + unlikely(managed & DCACHE_MANAGED_DENTRY)) { + /* Allow the filesystem to manage the transit without i_mutex + * being held. + * + * We indicate to the filesystem if someone is trying to mount + * something here. This gives autofs the chance to deny anyone + * other than its daemon the right to mount on its + * superstructure. + * + * The filesystem may sleep at this point. + */ + if (managed & DCACHE_MANAGE_TRANSIT) { + BUG_ON(!path->dentry->d_op); + BUG_ON(!path->dentry->d_op->d_manage); + ret = path->dentry->d_op->d_manage(path->dentry, mounting_here); + if (ret < 0) + return ret == -EISDIR ? 0 : ret; + } + + /* Transit to a mounted filesystem. */ + if (managed & DCACHE_MOUNTED) { + struct vfsmount *mounted = lookup_mnt(path); + if (!mounted) + break; + dput(path->dentry); + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); + continue; + } + + /* Don't handle automount points here */ + break; + } + return 0; +} + /* * Skip to top of mountpoint pile in refwalk mode for follow_dotdot() */ @@ -3530,6 +3597,7 @@ const struct inode_operations page_symlink_inode_operations = { }; EXPORT_SYMBOL(user_path_at); +EXPORT_SYMBOL(follow_down_one); EXPORT_SYMBOL(follow_down); EXPORT_SYMBOL(follow_up); EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ diff --git a/fs/namespace.c b/fs/namespace.c index 3ddfd9046c44..d94ccd6ddafd 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1844,9 +1844,10 @@ static int do_move_mount(struct path *path, char *old_name) return err; down_write(&namespace_sem); - while (d_mountpoint(path->dentry) && - follow_down(path)) - ; + err = follow_down(path, true); + if (err < 0) + goto out; + err = -EINVAL; if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) goto out; @@ -1940,9 +1941,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, down_write(&namespace_sem); /* Something was mounted here while we slept */ - while (d_mountpoint(path->dentry) && - follow_down(path)) - ; + err = follow_down(path, true); + if (err < 0) + goto unlock; + err = -EINVAL; if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) goto unlock; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 74aaf3963c10..bfcb933e5755 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -176,10 +176,7 @@ out_err: path_put(&nd->path); goto out; out_follow: - while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path)) - ; - err = 0; + err = follow_down(&nd->path, false); goto out; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 230b79fbf005..0f79e33a65d7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -88,8 +88,9 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, .dentry = dget(dentry)}; int err = 0; - while (d_mountpoint(path.dentry) && follow_down(&path)) - ; + err = follow_down(&path, false); + if (err < 0) + goto out; exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ee6c26d142c3..1a87760d6532 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -168,6 +168,7 @@ struct dentry_operations { void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); + int (*d_manage)(struct dentry *, bool); } ____cacheline_aligned; /* @@ -214,8 +215,9 @@ struct dentry_operations { #define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ #define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ +#define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ #define DCACHE_MANAGED_DENTRY \ - (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT) + (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) extern seqlock_t rename_lock; @@ -404,7 +406,12 @@ static inline void dont_mount(struct dentry *dentry) extern void dput(struct dentry *); -static inline int d_mountpoint(struct dentry *dentry) +static inline bool d_managed(struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_MANAGED_DENTRY; +} + +static inline bool d_mountpoint(struct dentry *dentry) { return dentry->d_flags & DCACHE_MOUNTED; } diff --git a/include/linux/namei.h b/include/linux/namei.h index 18d06add0a40..8ef2c789c2a8 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -79,7 +79,8 @@ extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry extern struct dentry *lookup_one_len(const char *, struct dentry *, int); -extern int follow_down(struct path *); +extern int follow_down_one(struct path *); +extern int follow_down(struct path *, bool); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); -- cgit v1.2.3 From 6f45b65672c8017d5e210e338bb5858a938ef445 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 18:45:31 +0000 Subject: Add an AT_NO_AUTOMOUNT flag to suppress terminal automount Add an AT_NO_AUTOMOUNT flag to suppress terminal automounting of automount point directories. This can be used by fstatat() users to permit the gathering of attributes on an automount point and also prevent mass-automounting of a directory of automount points by ls. Signed-off-by: David Howells Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/namei.c | 6 ++++++ fs/stat.c | 4 +++- include/linux/fcntl.h | 1 + include/linux/namei.h | 2 ++ 4 files changed, 12 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 9d3033dc22e9..dc50bfb2f5d6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -908,6 +908,12 @@ static int follow_automount(struct path *path, unsigned flags, if (!path->dentry->d_op || !path->dentry->d_op->d_automount) return -EREMOTE; + /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT + * and this is the terminal part of the path. + */ + if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE)) + return -EISDIR; /* we actually want to stop here */ + /* We want to mount if someone is trying to open/create a file of any * type under the mountpoint, wants to traverse through the mountpoint * or wants to open the mounted directory. diff --git a/fs/stat.c b/fs/stat.c index 12e90e213900..d5c61cf2b703 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -75,11 +75,13 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int error = -EINVAL; int lookup_flags = 0; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) goto out; if (!(flag & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; + if (flag & AT_NO_AUTOMOUNT) + lookup_flags |= LOOKUP_NO_AUTOMOUNT; error = user_path_at(dfd, filename, lookup_flags, &path); if (error) diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index afc00af3229b..a562fa5fb4e3 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -45,6 +45,7 @@ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ #ifdef __KERNEL__ diff --git a/include/linux/namei.h b/include/linux/namei.h index 8ef2c789c2a8..f276d4fa01fc 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -45,6 +45,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; * - ending slashes ok even for nonexistent files * - internal "there are more path components" flag * - dentry cache is untrusted; force a real lookup + * - suppress terminal automount */ #define LOOKUP_FOLLOW 0x0001 #define LOOKUP_DIRECTORY 0x0002 @@ -53,6 +54,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_PARENT 0x0010 #define LOOKUP_REVAL 0x0020 #define LOOKUP_RCU 0x0040 +#define LOOKUP_NO_AUTOMOUNT 0x0080 /* * Intent data */ -- cgit v1.2.3 From d18610b0ce9eb48c60649d8fcbf68374c84349d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 19:04:05 +0000 Subject: AFS: Use d_automount() rather than abusing follow_link() Make AFS use the new d_automount() dentry operation rather than abusing follow_link() on directories. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/afs/dir.c | 1 + fs/afs/inode.c | 3 ++- fs/afs/internal.h | 1 + fs/afs/mntpt.c | 44 +++++++++++++++----------------------------- 4 files changed, 19 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/afs/dir.c b/fs/afs/dir.c index e6a4ab980e31..20c106f24927 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -66,6 +66,7 @@ const struct dentry_operations afs_fs_dentry_operations = { .d_revalidate = afs_d_revalidate, .d_delete = afs_d_delete, .d_release = afs_d_release, + .d_automount = afs_d_automount, }; #define AFS_DIR_HASHTBL_SIZE 128 diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 0747339011c3..db66c5201474 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -184,7 +184,8 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, inode->i_generation = 0; set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); - inode->i_flags |= S_NOATIME; + set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + inode->i_flags |= S_AUTOMOUNT | S_NOATIME; unlock_new_inode(inode); _leave(" = %p", inode); return inode; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 58c633b80246..5a9b6843bac1 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -592,6 +592,7 @@ extern const struct inode_operations afs_mntpt_inode_operations; extern const struct inode_operations afs_autocell_inode_operations; extern const struct file_operations afs_mntpt_file_operations; +extern struct vfsmount *afs_d_automount(struct path *); extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); extern void afs_mntpt_kill_timer(void); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index f3e891d57a2c..d23b2e344a78 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -24,7 +24,6 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); static int afs_mntpt_open(struct inode *inode, struct file *file); -static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd); static void afs_mntpt_expiry_timed_out(struct work_struct *work); const struct file_operations afs_mntpt_file_operations = { @@ -34,13 +33,11 @@ const struct file_operations afs_mntpt_file_operations = { const struct inode_operations afs_mntpt_inode_operations = { .lookup = afs_mntpt_lookup, - .follow_link = afs_mntpt_follow_link, .readlink = page_readlink, .getattr = afs_getattr, }; const struct inode_operations afs_autocell_inode_operations = { - .follow_link = afs_mntpt_follow_link, .getattr = afs_getattr, }; @@ -88,6 +85,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) _debug("symlink is a mountpoint"); spin_lock(&vnode->lock); set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + vnode->vfs_inode.i_flags |= S_AUTOMOUNT; spin_unlock(&vnode->lock); } @@ -238,49 +236,37 @@ error_no_devname: } /* - * follow a link from a mountpoint directory, thus causing it to be mounted + * handle an automount point */ -static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) +struct vfsmount *afs_d_automount(struct path *path) { struct vfsmount *newmnt; int err; - _enter("%p{%s},{%s:%p{%s},}", - dentry, - dentry->d_name.name, - nd->path.mnt->mnt_devname, - dentry, - nd->path.dentry->d_name.name); - - dput(nd->path.dentry); - nd->path.dentry = dget(dentry); + _enter("{%s,%s}", path->mnt->mnt_devname, path->dentry->d_name.name); - newmnt = afs_mntpt_do_automount(nd->path.dentry); - if (IS_ERR(newmnt)) { - path_put(&nd->path); - return (void *)newmnt; - } + newmnt = afs_mntpt_do_automount(path->dentry); + if (IS_ERR(newmnt)) + return newmnt; mntget(newmnt); - err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts); + err = do_add_mount(newmnt, path, MNT_SHRINKABLE, &afs_vfsmounts); switch (err) { case 0: - path_put(&nd->path); - nd->path.mnt = newmnt; - nd->path.dentry = dget(newmnt->mnt_root); queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, afs_mntpt_expiry_timeout * HZ); - break; + _leave(" = %p {%s}", newmnt, newmnt->mnt_devname); + return newmnt; case -EBUSY: /* someone else made a mount here whilst we were busy */ - err = follow_down(&nd->path, false); + mntput(newmnt); + _leave(" = NULL [EBUSY]"); + return NULL; default: mntput(newmnt); - break; + _leave(" = %d", err); + return ERR_PTR(err); } - - _leave(" = %d", err); - return ERR_PTR(err); } /* -- cgit v1.2.3 From 36d43a43761b004ad1879ac21471d8fc5f3157ec Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 18:45:42 +0000 Subject: NFS: Use d_automount() rather than abusing follow_link() Make NFS use the new d_automount() dentry operation rather than abusing follow_link() on directories. Signed-off-by: David Howells Acked-by: Trond Myklebust Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/nfs/dir.c | 4 ++- fs/nfs/inode.c | 4 +-- fs/nfs/internal.h | 1 + fs/nfs/namespace.c | 84 ++++++++++++++++++++++++-------------------------- include/linux/nfs_fs.h | 1 - 5 files changed, 46 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index df8c03a02161..2c3eb33b904d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -970,7 +970,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) { struct nfs_server *server = NFS_SERVER(inode); - if (test_bit(NFS_INO_MOUNTPOINT, &NFS_I(inode)->flags)) + if (IS_AUTOMOUNT(inode)) return 0; if (nd != NULL) { /* VFS wants an on-the-wire revalidation */ @@ -1173,6 +1173,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_revalidate = nfs_lookup_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, + .d_automount = nfs_d_automount, }; static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) @@ -1246,6 +1247,7 @@ const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs_open_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, + .d_automount = nfs_d_automount, }; /* diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ce00b704452c..d8512423ba72 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -300,7 +300,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) else inode->i_op = &nfs_mountpoint_inode_operations; inode->i_fop = NULL; - set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags); + inode->i_flags |= S_AUTOMOUNT; } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; @@ -1208,7 +1208,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Update the fsid? */ if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && !nfs_fsid_equal(&server->fsid, &fattr->fsid) && - !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags)) + !IS_AUTOMOUNT(inode)) server->fsid = fattr->fsid; /* diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index bfa3a34af801..4644f04b4b46 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -252,6 +252,7 @@ extern char *nfs_path(const char *base, const struct dentry *droot, const struct dentry *dentry, char *buffer, ssize_t buflen); +extern struct vfsmount *nfs_d_automount(struct path *path); /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index bfcb933e5755..f3fbb1bf3f18 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -97,9 +97,8 @@ Elong: } /* - * nfs_follow_mountpoint - handle crossing a mountpoint on the server - * @dentry - dentry of mountpoint - * @nd - nameidata info + * nfs_d_automount - Handle crossing a mountpoint on the server + * @path - The mountpoint * * When we encounter a mountpoint on the server, we want to set up * a mountpoint on the client too, to prevent inode numbers from @@ -109,84 +108,81 @@ Elong: * situation, and that different filesystems may want to use * different security flavours. */ -static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) +struct vfsmount *nfs_d_automount(struct path *path) { struct vfsmount *mnt; - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(path->dentry->d_inode); struct dentry *parent; struct nfs_fh *fh = NULL; struct nfs_fattr *fattr = NULL; int err; - dprintk("--> nfs_follow_mountpoint()\n"); + dprintk("--> nfs_d_automount()\n"); - err = -ESTALE; - if (IS_ROOT(dentry)) - goto out_err; + mnt = ERR_PTR(-ESTALE); + if (IS_ROOT(path->dentry)) + goto out_nofree; - err = -ENOMEM; + mnt = ERR_PTR(-ENOMEM); fh = nfs_alloc_fhandle(); fattr = nfs_alloc_fattr(); if (fh == NULL || fattr == NULL) - goto out_err; + goto out; dprintk("%s: enter\n", __func__); - dput(nd->path.dentry); - nd->path.dentry = dget(dentry); - /* Look it up again */ - parent = dget_parent(nd->path.dentry); + /* Look it up again to get its attributes */ + parent = dget_parent(path->dentry); err = server->nfs_client->rpc_ops->lookup(parent->d_inode, - &nd->path.dentry->d_name, + &path->dentry->d_name, fh, fattr); dput(parent); - if (err != 0) - goto out_err; + if (err != 0) { + mnt = ERR_PTR(err); + goto out; + } if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) - mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry); + mnt = nfs_do_refmount(path->mnt, path->dentry); else - mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh, - fattr); - err = PTR_ERR(mnt); + mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr); if (IS_ERR(mnt)) - goto out_err; + goto out; mntget(mnt); - err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, + err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE, &nfs_automount_list); - if (err < 0) { + switch (err) { + case 0: + dprintk("%s: done, success\n", __func__); + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); + break; + case -EBUSY: + /* someone else made a mount here whilst we were busy */ mntput(mnt); - if (err == -EBUSY) - goto out_follow; - goto out_err; + dprintk("%s: done, collision\n", __func__); + mnt = NULL; + break; + default: + mntput(mnt); + dprintk("%s: done, error %d\n", __func__, err); + mnt = ERR_PTR(err); + break; } - path_put(&nd->path); - nd->path.mnt = mnt; - nd->path.dentry = dget(mnt->mnt_root); - schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); + out: nfs_free_fattr(fattr); nfs_free_fhandle(fh); - dprintk("%s: done, returned %d\n", __func__, err); - - dprintk("<-- nfs_follow_mountpoint() = %d\n", err); - return ERR_PTR(err); -out_err: - path_put(&nd->path); - goto out; -out_follow: - err = follow_down(&nd->path, false); - goto out; +out_nofree: + dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt); + return mnt; } const struct inode_operations nfs_mountpoint_inode_operations = { - .follow_link = nfs_follow_mountpoint, .getattr = nfs_getattr, }; const struct inode_operations nfs_referral_inode_operations = { - .follow_link = nfs_follow_mountpoint, }; static void nfs_expire_automounts(struct work_struct *work) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0779bb8f95be..6023efa9f5d9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -215,7 +215,6 @@ struct nfs_inode { #define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus */ #define NFS_INO_STALE (1) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ -#define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ -- cgit v1.2.3 From 01c64feac45cea1317263eabc4f7ee1b240f297f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 18:45:47 +0000 Subject: CIFS: Use d_automount() rather than abusing follow_link() Make CIFS use the new d_automount() dentry operation rather than abusing follow_link() on directories. [NOTE: THIS IS UNTESTED!] Signed-off-by: David Howells Cc: Steve French Signed-off-by: Al Viro --- fs/cifs/cifs_dfs_ref.c | 131 +++++++++++++++++++++++++------------------------ fs/cifs/cifsfs.h | 6 +++ fs/cifs/dir.c | 2 + fs/cifs/inode.c | 8 +-- 4 files changed, 80 insertions(+), 67 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 83479cf63f96..0fc163808de3 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -255,32 +255,6 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, } -static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, - struct list_head *mntlist) -{ - /* stolen from afs code */ - int err; - - mntget(newmnt); - err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist); - switch (err) { - case 0: - path_put(&nd->path); - nd->path.mnt = newmnt; - nd->path.dentry = dget(newmnt->mnt_root); - schedule_delayed_work(&cifs_dfs_automount_task, - cifs_dfs_mountpoint_expiry_timeout); - break; - case -EBUSY: - /* someone else made a mount here whilst we were busy */ - err = follow_down(&nd->path, false); - default: - mntput(newmnt); - break; - } - return err; -} - static void dump_referral(const struct dfs_info3_param *ref) { cFYI(1, "DFS: ref path: %s", ref->path_name); @@ -290,45 +264,43 @@ static void dump_referral(const struct dfs_info3_param *ref) ref->path_consumed); } - -static void* -cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) +/* + * Create a vfsmount that we can automount + */ +static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) { struct dfs_info3_param *referrals = NULL; unsigned int num_referrals = 0; struct cifs_sb_info *cifs_sb; struct cifsSesInfo *ses; - char *full_path = NULL; + char *full_path; int xid, i; - int rc = 0; - struct vfsmount *mnt = ERR_PTR(-ENOENT); + int rc; + struct vfsmount *mnt; struct tcon_link *tlink; cFYI(1, "in %s", __func__); - BUG_ON(IS_ROOT(dentry)); + BUG_ON(IS_ROOT(mntpt)); xid = GetXid(); - dput(nd->path.dentry); - nd->path.dentry = dget(dentry); - /* * The MSDFS spec states that paths in DFS referral requests and * responses must be prefixed by a single '\' character instead of * the double backslashes usually used in the UNC. This function * gives us the latter, so we must adjust the result. */ - full_path = build_path_from_dentry(dentry); - if (full_path == NULL) { - rc = -ENOMEM; - goto out_err; - } + mnt = ERR_PTR(-ENOMEM); + full_path = build_path_from_dentry(mntpt); + if (full_path == NULL) + goto free_xid; - cifs_sb = CIFS_SB(dentry->d_inode->i_sb); + cifs_sb = CIFS_SB(mntpt->d_inode->i_sb); tlink = cifs_sb_tlink(cifs_sb); + mnt = ERR_PTR(-EINVAL); if (IS_ERR(tlink)) { - rc = PTR_ERR(tlink); - goto out_err; + mnt = ERR_CAST(tlink); + goto free_full_path; } ses = tlink_tcon(tlink)->ses; @@ -338,46 +310,77 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) cifs_put_tlink(tlink); + mnt = ERR_PTR(-ENOENT); for (i = 0; i < num_referrals; i++) { int len; - dump_referral(referrals+i); + dump_referral(referrals + i); /* connect to a node */ len = strlen(referrals[i].node_name); if (len < 2) { cERROR(1, "%s: Net Address path too short: %s", __func__, referrals[i].node_name); - rc = -EINVAL; - goto out_err; + mnt = ERR_PTR(-EINVAL); + break; } mnt = cifs_dfs_do_refmount(cifs_sb, full_path, referrals + i); cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, referrals[i].node_name, mnt); - - /* complete mount procedure if we accured submount */ if (!IS_ERR(mnt)) - break; + goto success; } - /* we need it cause for() above could exit without valid submount */ - rc = PTR_ERR(mnt); - if (IS_ERR(mnt)) - goto out_err; - - rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); + /* no valid submounts were found; return error from get_dfs_path() by + * preference */ + if (rc != 0) + mnt = ERR_PTR(rc); -out: - FreeXid(xid); +success: free_dfs_info_array(referrals, num_referrals); +free_full_path: kfree(full_path); +free_xid: + FreeXid(xid); cFYI(1, "leaving %s" , __func__); - return ERR_PTR(rc); -out_err: - path_put(&nd->path); - goto out; + return mnt; +} + +/* + * Attempt to automount the referral + */ +struct vfsmount *cifs_dfs_d_automount(struct path *path) +{ + struct vfsmount *newmnt; + int err; + + cFYI(1, "in %s", __func__); + + newmnt = cifs_dfs_do_automount(path->dentry); + if (IS_ERR(newmnt)) { + cFYI(1, "leaving %s [automount failed]" , __func__); + return newmnt; + } + + mntget(newmnt); + err = do_add_mount(newmnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE, + &cifs_dfs_automount_list); + switch (err) { + case 0: + schedule_delayed_work(&cifs_dfs_automount_task, + cifs_dfs_mountpoint_expiry_timeout); + cFYI(1, "leaving %s [ok]" , __func__); + return newmnt; + case -EBUSY: + /* someone else made a mount here whilst we were busy */ + mntput(newmnt); + cFYI(1, "leaving %s [EBUSY]" , __func__); + return NULL; + default: + mntput(newmnt); + cFYI(1, "leaving %s [error %d]" , __func__, err); + return ERR_PTR(err); + } } const struct inode_operations cifs_dfs_referral_inode_operations = { - .follow_link = cifs_dfs_follow_mountpoint, }; - diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 897b2b2b28b5..851030f74939 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -93,6 +93,12 @@ extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); extern const struct dentry_operations cifs_dentry_ops; extern const struct dentry_operations cifs_ci_dentry_ops; +#ifdef CONFIG_CIFS_DFS_UPCALL +extern struct vfsmount *cifs_dfs_d_automount(struct path *path); +#else +#define cifs_dfs_d_automount NULL +#endif + /* Functions related to symlinks */ extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); extern void cifs_put_link(struct dentry *direntry, diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 1e95dd635632..dd5f22918c33 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -675,6 +675,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) const struct dentry_operations cifs_dentry_ops = { .d_revalidate = cifs_d_revalidate, + .d_automount = cifs_dfs_d_automount, /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ }; @@ -711,4 +712,5 @@ const struct dentry_operations cifs_ci_dentry_ops = { .d_revalidate = cifs_d_revalidate, .d_hash = cifs_ci_hash, .d_compare = cifs_ci_compare, + .d_automount = cifs_dfs_d_automount, }; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b06b60620240..6c9ee8014ff0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -32,7 +32,7 @@ #include "fscache.h" -static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral) +static void cifs_set_ops(struct inode *inode) { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); @@ -60,7 +60,7 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral) break; case S_IFDIR: #ifdef CONFIG_CIFS_DFS_UPCALL - if (is_dfs_referral) { + if (IS_AUTOMOUNT(inode)) { inode->i_op = &cifs_dfs_referral_inode_operations; } else { #else /* NO DFS support, treat as a directory */ @@ -167,7 +167,9 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) } spin_unlock(&inode->i_lock); - cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL); + if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL) + inode->i_flags |= S_AUTOMOUNT; + cifs_set_ops(inode); } void -- cgit v1.2.3 From db3729153e82ba3ada89681f26c4f1b6d6807a80 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 18:45:53 +0000 Subject: Remove the automount through follow_link() kludge code from pathwalk Remove the automount through follow_link() kludge code from pathwalk in favour of using d_automount(). Signed-off-by: David Howells Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/namei.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index dc50bfb2f5d6..61995fba4e21 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1341,17 +1341,6 @@ fail: return PTR_ERR(dentry); } -/* - * This is a temporary kludge to deal with "automount" symlinks; proper - * solution is to trigger them on follow_mount(), so that do_lookup() - * would DTRT. To be killed before 2.6.34-final. - */ -static inline int follow_on_final(struct inode *inode, unsigned lookup_flags) -{ - return inode && unlikely(inode->i_op->follow_link) && - ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode)); -} - /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -1490,7 +1479,8 @@ last_component: err = do_lookup(nd, &this, &next, &inode); if (err) break; - if (follow_on_final(inode, lookup_flags)) { + if (inode && unlikely(inode->i_op->follow_link) && + (lookup_flags & LOOKUP_FOLLOW)) { if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) return -ECHILD; BUG_ON(inode != next.dentry->d_inode); @@ -2543,8 +2533,7 @@ reval: struct inode *linki = link.dentry->d_inode; void *cookie; error = -ELOOP; - /* S_ISDIR part is a temporary automount kludge */ - if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(linki->i_mode)) + if (!(nd.flags & LOOKUP_FOLLOW)) goto exit_dput; if (count++ == 32) goto exit_dput; -- cgit v1.2.3 From 10584211e48036182212b598cc53331776406d60 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 14 Jan 2011 18:45:58 +0000 Subject: autofs4: Add d_automount() dentry operation Add a function to use the newly defined ->d_automount() dentry operation for triggering mounts instead of doing the user space callback in ->lookup() and ->d_revalidate(). Note, to be useful the subsequent patch to add the ->d_manage() dentry operation is also needed so the discussion of functionality is deferred to that patch. Signed-off-by: Ian Kent Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 30 ++++++ fs/autofs4/expire.c | 6 ++ fs/autofs4/inode.c | 4 + fs/autofs4/root.c | 261 ++++++++++++++++++++++++++++---------------------- 4 files changed, 189 insertions(+), 112 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index eb67953452bb..1ebfe53872b5 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -218,6 +218,36 @@ extern const struct inode_operations autofs4_direct_root_inode_operations; extern const struct file_operations autofs4_dir_operations; extern const struct file_operations autofs4_root_operations; +/* Operations methods */ + +struct vfsmount *autofs4_d_automount(struct path *); + +/* VFS automount flags management functions */ + +static inline void __managed_dentry_set_automount(struct dentry *dentry) +{ + dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; +} + +static inline void managed_dentry_set_automount(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __managed_dentry_set_automount(dentry); + spin_unlock(&dentry->d_lock); +} + +static inline void __managed_dentry_clear_automount(struct dentry *dentry) +{ + dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT; +} + +static inline void managed_dentry_clear_automount(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __managed_dentry_clear_automount(dentry); + spin_unlock(&dentry->d_lock); +} + /* Initializing function */ int autofs4_fill_super(struct super_block *, void *, int); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 6a930b90d389..0571ec8352b7 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -300,6 +300,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, spin_unlock(&root->d_lock); } ino->flags |= AUTOFS_INF_EXPIRING; + managed_dentry_set_automount(root); init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); return root; @@ -408,6 +409,7 @@ found: expired, (int)expired->d_name.len, expired->d_name.name); ino = autofs4_dentry_ino(expired); ino->flags |= AUTOFS_INF_EXPIRING; + managed_dentry_set_automount(expired); init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); spin_lock(&autofs4_lock); @@ -479,6 +481,8 @@ int autofs4_expire_run(struct super_block *sb, spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); ino->flags &= ~AUTOFS_INF_EXPIRING; + if (!d_unhashed(dentry)) + managed_dentry_clear_automount(dentry); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); @@ -516,6 +520,8 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, ino->flags &= ~AUTOFS_INF_MOUNTPOINT; } ino->flags &= ~AUTOFS_INF_EXPIRING; + if (ret) + managed_dentry_clear_automount(dentry); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); dput(dentry); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index a7bdb9dcac84..d0aa38cac302 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -252,6 +252,7 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi) } static const struct dentry_operations autofs4_sb_dentry_operations = { + .d_automount = autofs4_d_automount, .d_release = autofs4_dentry_release, }; @@ -320,6 +321,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_dput; } + if (autofs_type_trigger(sbi->type)) + __managed_dentry_set_automount(root); + root_inode->i_fop = &autofs4_root_operations; root_inode->i_op = autofs_type_trigger(sbi->type) ? &autofs4_direct_root_inode_operations : diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 20225636a4e9..27dc53e111fd 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -35,7 +35,6 @@ static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); #endif static int autofs4_dir_open(struct inode *inode, struct file *file); static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); -static void *autofs4_follow_link(struct dentry *, struct nameidata *); #define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) #define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE) @@ -73,7 +72,6 @@ const struct inode_operations autofs4_direct_root_inode_operations = { .unlink = autofs4_dir_unlink, .mkdir = autofs4_dir_mkdir, .rmdir = autofs4_dir_rmdir, - .follow_link = autofs4_follow_link, }; const struct inode_operations autofs4_dir_inode_operations = { @@ -420,13 +418,12 @@ void autofs4_dentry_release(struct dentry *de) /* For dentries of directories in the root dir */ static const struct dentry_operations autofs4_root_dentry_operations = { - .d_revalidate = autofs4_revalidate, .d_release = autofs4_dentry_release, }; /* For other dentries */ static const struct dentry_operations autofs4_dentry_operations = { - .d_revalidate = autofs4_revalidate, + .d_automount = autofs4_d_automount, .d_release = autofs4_dentry_release, }; @@ -540,50 +537,176 @@ next: return NULL; } +static int autofs4_mount_wait(struct dentry *dentry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + int status; + + if (ino->flags & AUTOFS_INF_PENDING) { + DPRINTK("waiting for mount name=%.*s", + dentry->d_name.len, dentry->d_name.name); + status = autofs4_wait(sbi, dentry, NFY_MOUNT); + DPRINTK("mount wait done status=%d", status); + ino->last_used = jiffies; + return status; + } + return 0; +} + +static int do_expire_wait(struct dentry *dentry) +{ + struct dentry *expiring; + + expiring = autofs4_lookup_expiring(dentry); + if (!expiring) + return autofs4_expire_wait(dentry); + else { + /* + * If we are racing with expire the request might not + * be quite complete, but the directory has been removed + * so it must have been successful, just wait for it. + */ + autofs4_expire_wait(expiring); + autofs4_del_expiring(expiring); + dput(expiring); + } + return 0; +} + +static struct dentry *autofs4_mountpoint_changed(struct path *path) +{ + struct dentry *dentry = path->dentry; + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + + /* + * If this is an indirect mount the dentry could have gone away + * as a result of an expire and a new one created. + */ + if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { + struct dentry *parent = dentry->d_parent; + struct dentry *new = d_lookup(parent, &dentry->d_name); + if (!new) + return NULL; + dput(path->dentry); + path->dentry = new; + } + return path->dentry; +} + +struct vfsmount *autofs4_d_automount(struct path *path) +{ + struct dentry *dentry = path->dentry; + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + int status; + + DPRINTK("dentry=%p %.*s", + dentry, dentry->d_name.len, dentry->d_name.name); + + /* The daemon never triggers a mount. */ + if (autofs4_oz_mode(sbi)) + return NULL; + + /* + * If an expire request is pending everyone must wait. + * If the expire fails we're still mounted so continue + * the follow and return. A return of -EAGAIN (which only + * happens with indirect mounts) means the expire completed + * and the directory was removed, so just go ahead and try + * the mount. + */ + status = do_expire_wait(dentry); + if (status && status != -EAGAIN) + return NULL; + + /* Callback to the daemon to perform the mount or wait */ + spin_lock(&sbi->fs_lock); + if (ino->flags & AUTOFS_INF_PENDING) { + spin_unlock(&sbi->fs_lock); + status = autofs4_mount_wait(dentry); + if (status) + return ERR_PTR(status); + spin_lock(&sbi->fs_lock); + goto done; + } + + /* + * If the dentry is a symlink it's equivalent to a directory + * having d_mounted() true, so there's no need to call back + * to the daemon. + */ + if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) + goto done; + spin_lock(&dentry->d_lock); + if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { + ino->flags |= AUTOFS_INF_PENDING; + spin_unlock(&dentry->d_lock); + spin_unlock(&sbi->fs_lock); + status = autofs4_mount_wait(dentry); + if (status) + return ERR_PTR(status); + spin_lock(&sbi->fs_lock); + ino->flags &= ~AUTOFS_INF_PENDING; + goto done; + } + spin_unlock(&dentry->d_lock); +done: + /* + * Any needed mounting has been completed and the path updated + * so turn this into a normal dentry so we don't continually + * call ->d_automount(). + */ + managed_dentry_clear_automount(dentry); + spin_unlock(&sbi->fs_lock); + + /* Mount succeeded, check if we ended up with a new dentry */ + dentry = autofs4_mountpoint_changed(path); + if (!dentry) + return ERR_PTR(-ENOENT); + + return NULL; +} + /* Lookups in the root directory */ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi; struct autofs_info *ino; - struct dentry *expiring, *active; - int oz_mode; + struct dentry *active; - DPRINTK("name = %.*s", - dentry->d_name.len, dentry->d_name.name); + DPRINTK("name = %.*s", dentry->d_name.len, dentry->d_name.name); /* File name too long to exist */ if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); sbi = autofs4_sbi(dir->i_sb); - oz_mode = autofs4_oz_mode(sbi); DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", - current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); + current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); active = autofs4_lookup_active(dentry); if (active) { - dentry = active; - ino = autofs4_dentry_ino(dentry); + return active; } else { - /* - * Mark the dentry incomplete but don't hash it. We do this - * to serialize our inode creation operations (symlink and - * mkdir) which prevents deadlock during the callback to - * the daemon. Subsequent user space lookups for the same - * dentry are placed on the wait queue while the daemon - * itself is allowed passage unresticted so the create - * operation itself can then hash the dentry. Finally, - * we check for the hashed dentry and return the newly - * hashed dentry. - */ d_set_d_op(dentry, &autofs4_root_dentry_operations); /* - * And we need to ensure that the same dentry is used for - * all following lookup calls until it is hashed so that - * the dentry flags are persistent throughout the request. + * A dentry that is not within the root can never trigger a + * mount operation, unless the directory already exists, so we + * can return fail immediately. The daemon however does need + * to create directories within the file system. */ + if (!autofs4_oz_mode(sbi) && !IS_ROOT(dentry->d_parent)) + return ERR_PTR(-ENOENT); + + /* Mark entries in the root as mount triggers */ + if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) { + d_set_d_op(dentry, &autofs4_dentry_operations); + managed_dentry_set_automount(dentry); + } + ino = autofs4_init_ino(NULL, sbi, 0555); if (!ino) return ERR_PTR(-ENOMEM); @@ -595,82 +718,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s d_instantiate(dentry, NULL); } - - if (!oz_mode) { - mutex_unlock(&dir->i_mutex); - expiring = autofs4_lookup_expiring(dentry); - if (expiring) { - /* - * If we are racing with expire the request might not - * be quite complete but the directory has been removed - * so it must have been successful, so just wait for it. - */ - autofs4_expire_wait(expiring); - autofs4_del_expiring(expiring); - dput(expiring); - } - - spin_lock(&sbi->fs_lock); - ino->flags |= AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - if (dentry->d_op && dentry->d_op->d_revalidate) - (dentry->d_op->d_revalidate)(dentry, nd); - mutex_lock(&dir->i_mutex); - } - - /* - * If we are still pending, check if we had to handle - * a signal. If so we can force a restart.. - */ - if (ino->flags & AUTOFS_INF_PENDING) { - /* See if we were interrupted */ - if (signal_pending(current)) { - sigset_t *sigset = ¤t->pending.signal; - if (sigismember (sigset, SIGKILL) || - sigismember (sigset, SIGQUIT) || - sigismember (sigset, SIGINT)) { - if (active) - dput(active); - return ERR_PTR(-ERESTARTNOINTR); - } - } - if (!oz_mode) { - spin_lock(&sbi->fs_lock); - ino->flags &= ~AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - } - } - - /* - * If this dentry is unhashed, then we shouldn't honour this - * lookup. Returning ENOENT here doesn't do the right thing - * for all system calls, but it should be OK for the operations - * we permit from an autofs. - */ - if (!oz_mode && d_unhashed(dentry)) { - /* - * A user space application can (and has done in the past) - * remove and re-create this directory during the callback. - * This can leave us with an unhashed dentry, but a - * successful mount! So we need to perform another - * cached lookup in case the dentry now exists. - */ - struct dentry *parent = dentry->d_parent; - struct dentry *new = d_lookup(parent, &dentry->d_name); - if (new != NULL) - dentry = new; - else - dentry = ERR_PTR(-ENOENT); - - if (active) - dput(active); - - return dentry; - } - - if (active) - return active; - return NULL; } @@ -715,11 +762,6 @@ static int autofs4_dir_symlink(struct inode *dir, } d_add(dentry, inode); - if (dir == dir->i_sb->s_root->d_inode) - d_set_d_op(dentry, &autofs4_root_dentry_operations); - else - d_set_d_op(dentry, &autofs4_dentry_operations); - dentry->d_fsdata = ino; ino->dentry = dget(dentry); atomic_inc(&ino->count); @@ -850,11 +892,6 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) } d_add(dentry, inode); - if (dir == dir->i_sb->s_root->d_inode) - d_set_d_op(dentry, &autofs4_root_dentry_operations); - else - d_set_d_op(dentry, &autofs4_dentry_operations); - dentry->d_fsdata = ino; ino->dentry = dget(dentry); atomic_inc(&ino->count); -- cgit v1.2.3 From b5b801779d59165c4ecf1009009109545bd1f642 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 14 Jan 2011 18:46:03 +0000 Subject: autofs4: Add d_manage() dentry operation This patch required a previous patch to add the ->d_automount() dentry operation. Add a function to use the newly defined ->d_manage() dentry operation for blocking during mount and expire. Whether the VFS calls the dentry operations d_automount() and d_manage() is controled by the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags. autofs uses the d_automount() operation to callback to user space to request mount operations and the d_manage() operation to block walks into mounts that are under construction or destruction. In order to prevent these functions from being called unnecessarily the DMANAGED_* flags are cleared for cases which would cause this. In the common case the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags are both set for dentrys waiting to be mounted. The DMANAGED_TRANSIT flag is cleared upon successful mount request completion and set during expire runs, both during the dentry expire check, and if selected for expire, is left set until a subsequent successful mount request completes. The exception to this is the so-called rootless multi-mount which has no actual mount at its base. In this case the DMANAGED_AUTOMOUNT flag is cleared upon successful mount request completion as well and set again after a successful expire. Signed-off-by: Ian Kent Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 50 ++++++++++++++++++++++++++- fs/autofs4/expire.c | 51 +++++++++++++-------------- fs/autofs4/inode.c | 3 +- fs/autofs4/root.c | 95 ++++++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 159 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 1ebfe53872b5..f0c95e0460cf 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -99,7 +99,6 @@ struct autofs_info { }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ -#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ struct autofs_wait_queue { @@ -221,6 +220,7 @@ extern const struct file_operations autofs4_root_operations; /* Operations methods */ struct vfsmount *autofs4_d_automount(struct path *); +int autofs4_d_manage(struct dentry *, bool); /* VFS automount flags management functions */ @@ -248,6 +248,54 @@ static inline void managed_dentry_clear_automount(struct dentry *dentry) spin_unlock(&dentry->d_lock); } +static inline void __managed_dentry_set_transit(struct dentry *dentry) +{ + dentry->d_flags |= DCACHE_MANAGE_TRANSIT; +} + +static inline void managed_dentry_set_transit(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __managed_dentry_set_transit(dentry); + spin_unlock(&dentry->d_lock); +} + +static inline void __managed_dentry_clear_transit(struct dentry *dentry) +{ + dentry->d_flags &= ~DCACHE_MANAGE_TRANSIT; +} + +static inline void managed_dentry_clear_transit(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __managed_dentry_clear_transit(dentry); + spin_unlock(&dentry->d_lock); +} + +static inline void __managed_dentry_set_managed(struct dentry *dentry) +{ + dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); +} + +static inline void managed_dentry_set_managed(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __managed_dentry_set_managed(dentry); + spin_unlock(&dentry->d_lock); +} + +static inline void __managed_dentry_clear_managed(struct dentry *dentry) +{ + dentry->d_flags &= ~(DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); +} + +static inline void managed_dentry_clear_managed(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __managed_dentry_clear_managed(dentry); + spin_unlock(&dentry->d_lock); +} + /* Initializing function */ int autofs4_fill_super(struct super_block *, void *, int); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 0571ec8352b7..3ed79d76c233 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -26,10 +26,6 @@ static inline int autofs4_can_expire(struct dentry *dentry, if (ino == NULL) return 0; - /* No point expiring a pending mount */ - if (ino->flags & AUTOFS_INF_PENDING) - return 0; - if (!do_now) { /* Too young to die */ if (!timeout || time_after(ino->last_used + timeout, now)) @@ -283,6 +279,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, unsigned long timeout; struct dentry *root = dget(sb->s_root); int do_now = how & AUTOFS_EXP_IMMEDIATE; + struct autofs_info *ino; if (!root) return NULL; @@ -291,20 +288,21 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, timeout = sbi->exp_timeout; spin_lock(&sbi->fs_lock); + ino = autofs4_dentry_ino(root); + /* No point expiring a pending mount */ + if (ino->flags & AUTOFS_INF_PENDING) { + spin_unlock(&sbi->fs_lock); + return NULL; + } + managed_dentry_set_transit(root); if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { struct autofs_info *ino = autofs4_dentry_ino(root); - if (d_mountpoint(root)) { - ino->flags |= AUTOFS_INF_MOUNTPOINT; - spin_lock(&root->d_lock); - root->d_flags &= ~DCACHE_MOUNTED; - spin_unlock(&root->d_lock); - } ino->flags |= AUTOFS_INF_EXPIRING; - managed_dentry_set_automount(root); init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); return root; } + managed_dentry_clear_transit(root); spin_unlock(&sbi->fs_lock); dput(root); @@ -341,6 +339,10 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, while ((dentry = get_next_positive_dentry(dentry, root))) { spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); + /* No point expiring a pending mount */ + if (ino->flags & AUTOFS_INF_PENDING) + goto cont; + managed_dentry_set_transit(dentry); /* * Case 1: (i) indirect mount or top level pseudo direct mount @@ -400,6 +402,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, } } next: + managed_dentry_clear_transit(dentry); +cont: spin_unlock(&sbi->fs_lock); } return NULL; @@ -409,7 +413,6 @@ found: expired, (int)expired->d_name.len, expired->d_name.name); ino = autofs4_dentry_ino(expired); ino->flags |= AUTOFS_INF_EXPIRING; - managed_dentry_set_automount(expired); init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); spin_lock(&autofs4_lock); @@ -482,7 +485,7 @@ int autofs4_expire_run(struct super_block *sb, ino = autofs4_dentry_ino(dentry); ino->flags &= ~AUTOFS_INF_EXPIRING; if (!d_unhashed(dentry)) - managed_dentry_clear_automount(dentry); + managed_dentry_clear_transit(dentry); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); @@ -508,20 +511,18 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); spin_lock(&sbi->fs_lock); - if (ino->flags & AUTOFS_INF_MOUNTPOINT) { - spin_lock(&sb->s_root->d_lock); - /* - * If we haven't been expired away, then reset - * mounted status. - */ - if (mnt->mnt_parent != mnt) - sb->s_root->d_flags |= DCACHE_MOUNTED; - spin_unlock(&sb->s_root->d_lock); - ino->flags &= ~AUTOFS_INF_MOUNTPOINT; - } ino->flags &= ~AUTOFS_INF_EXPIRING; + spin_lock(&dentry->d_lock); if (ret) - managed_dentry_clear_automount(dentry); + __managed_dentry_clear_transit(dentry); + else { + if ((IS_ROOT(dentry) || + (autofs_type_indirect(sbi->type) && + IS_ROOT(dentry->d_parent))) && + !(dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) + __managed_dentry_set_automount(dentry); + } + spin_unlock(&dentry->d_lock); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); dput(dentry); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index d0aa38cac302..75c1ed8e2fb9 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -253,6 +253,7 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi) static const struct dentry_operations autofs4_sb_dentry_operations = { .d_automount = autofs4_d_automount, + .d_manage = autofs4_d_manage, .d_release = autofs4_dentry_release, }; @@ -322,7 +323,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) } if (autofs_type_trigger(sbi->type)) - __managed_dentry_set_automount(root); + __managed_dentry_set_managed(root); root_inode->i_fop = &autofs4_root_operations; root_inode->i_op = autofs_type_trigger(sbi->type) ? diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 27dc53e111fd..f1076b91a0fa 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -424,6 +424,7 @@ static const struct dentry_operations autofs4_root_dentry_operations = { /* For other dentries */ static const struct dentry_operations autofs4_dentry_operations = { .d_automount = autofs4_d_automount, + .d_manage = autofs4_d_manage, .d_release = autofs4_dentry_release, }; @@ -604,6 +605,18 @@ struct vfsmount *autofs4_d_automount(struct path *path) DPRINTK("dentry=%p %.*s", dentry, dentry->d_name.len, dentry->d_name.name); + /* + * Someone may have manually umounted this or it was a submount + * that has gone away. + */ + spin_lock(&dentry->d_lock); + if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { + if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) && + (dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) + __managed_dentry_set_transit(path->dentry); + } + spin_unlock(&dentry->d_lock); + /* The daemon never triggers a mount. */ if (autofs4_oz_mode(sbi)) return NULL; @@ -633,31 +646,63 @@ struct vfsmount *autofs4_d_automount(struct path *path) /* * If the dentry is a symlink it's equivalent to a directory - * having d_mounted() true, so there's no need to call back + * having d_mountpoint() true, so there's no need to call back * to the daemon. */ if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) goto done; - spin_lock(&dentry->d_lock); - if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { + if (!d_mountpoint(dentry)) { + /* + * It's possible that user space hasn't removed directories + * after umounting a rootless multi-mount, although it + * should. For v5 have_submounts() is sufficient to handle + * this because the leaves of the directory tree under the + * mount never trigger mounts themselves (they have an autofs + * trigger mount mounted on them). But v4 pseudo direct mounts + * do need the leaves to to trigger mounts. In this case we + * have no choice but to use the list_empty() check and + * require user space behave. + */ + if (sbi->version > 4) { + if (have_submounts(dentry)) + goto done; + } else { + spin_lock(&dentry->d_lock); + if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); + goto done; + } + spin_unlock(&dentry->d_lock); + } ino->flags |= AUTOFS_INF_PENDING; - spin_unlock(&dentry->d_lock); spin_unlock(&sbi->fs_lock); status = autofs4_mount_wait(dentry); if (status) return ERR_PTR(status); spin_lock(&sbi->fs_lock); ino->flags &= ~AUTOFS_INF_PENDING; - goto done; } - spin_unlock(&dentry->d_lock); done: - /* - * Any needed mounting has been completed and the path updated - * so turn this into a normal dentry so we don't continually - * call ->d_automount(). - */ - managed_dentry_clear_automount(dentry); + if (!(ino->flags & AUTOFS_INF_EXPIRING)) { + /* + * Any needed mounting has been completed and the path updated + * so turn this into a normal dentry so we don't continually + * call ->d_automount() and ->d_manage(). + */ + spin_lock(&dentry->d_lock); + __managed_dentry_clear_transit(dentry); + /* + * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and + * symlinks as in all other cases the dentry will be covered by + * an actual mount so ->d_automount() won't be called during + * the follow. + */ + if ((!d_mountpoint(dentry) && + !list_empty(&dentry->d_subdirs)) || + (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) + __managed_dentry_clear_automount(dentry); + spin_unlock(&dentry->d_lock); + } spin_unlock(&sbi->fs_lock); /* Mount succeeded, check if we ended up with a new dentry */ @@ -668,6 +713,30 @@ done: return NULL; } +int autofs4_d_manage(struct dentry *dentry, bool mounting_here) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + + DPRINTK("dentry=%p %.*s", + dentry, dentry->d_name.len, dentry->d_name.name); + + /* The daemon never waits. */ + if (autofs4_oz_mode(sbi) || mounting_here) { + if (!d_mountpoint(dentry)) + return -EISDIR; + return 0; + } + + /* Wait for pending expires */ + do_expire_wait(dentry); + + /* + * This dentry may be under construction so wait on mount + * completion. + */ + return autofs4_mount_wait(dentry); +} + /* Lookups in the root directory */ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { @@ -704,7 +773,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s /* Mark entries in the root as mount triggers */ if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) { d_set_d_op(dentry, &autofs4_dentry_operations); - managed_dentry_set_automount(dentry); + __managed_dentry_set_managed(dentry); } ino = autofs4_init_ino(NULL, sbi, 0555); -- cgit v1.2.3 From 8c13a676d5a56495c350f3141824a5ef6c6b4606 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 14 Jan 2011 18:46:08 +0000 Subject: autofs4: Remove unused code Remove code that is not used due to the use of ->d_automount() and ->d_manage(). Signed-off-by: Ian Kent Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 7 -- fs/autofs4/root.c | 243 -------------------------------------------------- 2 files changed, 250 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index f0c95e0460cf..1ee3b9afbe9e 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -175,13 +175,6 @@ static inline int autofs4_ispending(struct dentry *dentry) return 0; } -static inline void autofs4_copy_atime(struct file *src, struct file *dst) -{ - dst->f_path.dentry->d_inode->i_atime = - src->f_path.dentry->d_inode->i_atime; - return; -} - struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *); void autofs4_free_ino(struct autofs_info *); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index f1076b91a0fa..b2498c8cb0a7 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -36,9 +36,6 @@ static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); static int autofs4_dir_open(struct inode *inode, struct file *file); static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); -#define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) -#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE) - const struct file_operations autofs4_root_operations = { .open = dcache_dir_open, .release = dcache_dir_close, @@ -114,14 +111,6 @@ static void autofs4_del_active(struct dentry *dentry) return; } -static unsigned int autofs4_need_mount(unsigned int flags) -{ - unsigned int res = 0; - if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS)) - res = 1; - return res; -} - static int autofs4_dir_open(struct inode *inode, struct file *file) { struct dentry *dentry = file->f_path.dentry; @@ -156,238 +145,6 @@ out: return dcache_dir_open(inode, file); } -static int try_to_fill_dentry(struct dentry *dentry, int flags) -{ - struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); - struct autofs_info *ino = autofs4_dentry_ino(dentry); - int status; - - DPRINTK("dentry=%p %.*s ino=%p", - dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); - - /* - * Wait for a pending mount, triggering one if there - * isn't one already - */ - if (dentry->d_inode == NULL) { - DPRINTK("waiting for mount name=%.*s", - dentry->d_name.len, dentry->d_name.name); - - status = autofs4_wait(sbi, dentry, NFY_MOUNT); - - DPRINTK("mount done status=%d", status); - - /* Turn this into a real negative dentry? */ - if (status == -ENOENT) { - spin_lock(&sbi->fs_lock); - ino->flags &= ~AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - return status; - } else if (status) { - /* Return a negative dentry, but leave it "pending" */ - return status; - } - /* Trigger mount for path component or follow link */ - } else if (ino->flags & AUTOFS_INF_PENDING || - autofs4_need_mount(flags)) { - DPRINTK("waiting for mount name=%.*s", - dentry->d_name.len, dentry->d_name.name); - - spin_lock(&sbi->fs_lock); - ino->flags |= AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - status = autofs4_wait(sbi, dentry, NFY_MOUNT); - - DPRINTK("mount done status=%d", status); - - if (status) { - spin_lock(&sbi->fs_lock); - ino->flags &= ~AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - return status; - } - } - - /* Initialize expiry counter after successful mount */ - ino->last_used = jiffies; - - spin_lock(&sbi->fs_lock); - ino->flags &= ~AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - - return 0; -} - -/* For autofs direct mounts the follow link triggers the mount */ -static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); - struct autofs_info *ino = autofs4_dentry_ino(dentry); - int oz_mode = autofs4_oz_mode(sbi); - unsigned int lookup_type; - int status; - - DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d", - dentry, dentry->d_name.len, dentry->d_name.name, oz_mode, - nd->flags); - /* - * For an expire of a covered direct or offset mount we need - * to break out of follow_down_one() at the autofs mount trigger - * (d_mounted--), so we can see the expiring flag, and manage - * the blocking and following here until the expire is completed. - */ - if (oz_mode) { - spin_lock(&sbi->fs_lock); - if (ino->flags & AUTOFS_INF_EXPIRING) { - spin_unlock(&sbi->fs_lock); - /* Follow down to our covering mount. */ - if (!follow_down_one(&nd->path)) - goto done; - goto follow; - } - spin_unlock(&sbi->fs_lock); - goto done; - } - - /* If an expire request is pending everyone must wait. */ - autofs4_expire_wait(dentry); - - /* We trigger a mount for almost all flags */ - lookup_type = autofs4_need_mount(nd->flags); - spin_lock(&sbi->fs_lock); - spin_lock(&autofs4_lock); - spin_lock(&dentry->d_lock); - if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { - spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); - spin_unlock(&sbi->fs_lock); - goto follow; - } - - /* - * If the dentry contains directories then it is an autofs - * multi-mount with no root mount offset. So don't try to - * mount it again. - */ - if (ino->flags & AUTOFS_INF_PENDING || - (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { - spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); - spin_unlock(&sbi->fs_lock); - - status = try_to_fill_dentry(dentry, nd->flags); - if (status) - goto out_error; - - goto follow; - } - spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); - spin_unlock(&sbi->fs_lock); -follow: - /* - * If there is no root mount it must be an autofs - * multi-mount with no root offset so we don't need - * to follow it. - */ - if (d_managed(dentry)) { - status = follow_down(&nd->path, false); - if (status < 0) - goto out_error; - } - -done: - return NULL; - -out_error: - path_put(&nd->path); - return ERR_PTR(status); -} - -/* - * Revalidate is called on every cache lookup. Some of those - * cache lookups may actually happen while the dentry is not - * yet completely filled in, and revalidate has to delay such - * lookups.. - */ -static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - struct inode *dir; - struct autofs_sb_info *sbi; - int oz_mode; - int flags = nd ? nd->flags : 0; - int status = 1; - - if (flags & LOOKUP_RCU) - return -ECHILD; - - dir = dentry->d_parent->d_inode; - sbi = autofs4_sbi(dir->i_sb); - oz_mode = autofs4_oz_mode(sbi); - - /* Pending dentry */ - spin_lock(&sbi->fs_lock); - if (autofs4_ispending(dentry)) { - /* The daemon never causes a mount to trigger */ - spin_unlock(&sbi->fs_lock); - - if (oz_mode) - return 1; - - /* - * If the directory has gone away due to an expire - * we have been called as ->d_revalidate() and so - * we need to return false and proceed to ->lookup(). - */ - if (autofs4_expire_wait(dentry) == -EAGAIN) - return 0; - - /* - * A zero status is success otherwise we have a - * negative error code. - */ - status = try_to_fill_dentry(dentry, flags); - if (status == 0) - return 1; - - return status; - } - spin_unlock(&sbi->fs_lock); - - /* Negative dentry.. invalidate if "old" */ - if (dentry->d_inode == NULL) - return 0; - - /* Check for a non-mountpoint directory with no contents */ - spin_lock(&autofs4_lock); - spin_lock(&dentry->d_lock); - if (S_ISDIR(dentry->d_inode->i_mode) && - !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { - DPRINTK("dentry=%p %.*s, emptydir", - dentry, dentry->d_name.len, dentry->d_name.name); - spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); - - /* The daemon never causes a mount to trigger */ - if (oz_mode) - return 1; - - /* - * A zero status is success otherwise we have a - * negative error code. - */ - status = try_to_fill_dentry(dentry, flags); - if (status == 0) - return 1; - - return status; - } - spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); - - return 1; -} - void autofs4_dentry_release(struct dentry *de) { struct autofs_info *inf; -- cgit v1.2.3 From e61da20a50d21725ff27571a6dff9468e4fb7146 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 14 Jan 2011 18:46:14 +0000 Subject: autofs4: Clean up inode operations Since the use of ->follow_link() has been eliminated there is no need to separate the indirect and direct inode operations. Signed-off-by: Ian Kent Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 3 --- fs/autofs4/inode.c | 4 +--- fs/autofs4/root.c | 15 --------------- 3 files changed, 1 insertion(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 1ee3b9afbe9e..f4b4030cf406 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -204,9 +204,6 @@ void autofs_dev_ioctl_exit(void); extern const struct inode_operations autofs4_symlink_inode_operations; extern const struct inode_operations autofs4_dir_inode_operations; -extern const struct inode_operations autofs4_root_inode_operations; -extern const struct inode_operations autofs4_indirect_root_inode_operations; -extern const struct inode_operations autofs4_direct_root_inode_operations; extern const struct file_operations autofs4_dir_operations; extern const struct file_operations autofs4_root_operations; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 75c1ed8e2fb9..dac3dc79ccb4 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -326,9 +326,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) __managed_dentry_set_managed(root); root_inode->i_fop = &autofs4_root_operations; - root_inode->i_op = autofs_type_trigger(sbi->type) ? - &autofs4_direct_root_inode_operations : - &autofs4_indirect_root_inode_operations; + root_inode->i_op = &autofs4_dir_inode_operations; /* Couldn't this be tested earlier? */ if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index b2498c8cb0a7..52af410a831d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -56,21 +56,6 @@ const struct file_operations autofs4_dir_operations = { .llseek = dcache_dir_lseek, }; -const struct inode_operations autofs4_indirect_root_inode_operations = { - .lookup = autofs4_lookup, - .unlink = autofs4_dir_unlink, - .symlink = autofs4_dir_symlink, - .mkdir = autofs4_dir_mkdir, - .rmdir = autofs4_dir_rmdir, -}; - -const struct inode_operations autofs4_direct_root_inode_operations = { - .lookup = autofs4_lookup, - .unlink = autofs4_dir_unlink, - .mkdir = autofs4_dir_mkdir, - .rmdir = autofs4_dir_rmdir, -}; - const struct inode_operations autofs4_dir_inode_operations = { .lookup = autofs4_lookup, .unlink = autofs4_dir_unlink, -- cgit v1.2.3 From 71e469db242c2eeb00faf9caf7d9e00150c00a6e Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 14 Jan 2011 18:46:19 +0000 Subject: autofs4: Clean up dentry operations There are now two distinct dentry operations uses. One for dentrys that trigger mounts and one for dentrys that do not. Rationalize the use of these dentry operations and rename them to reflect their function. Signed-off-by: Ian Kent Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 7 ++----- fs/autofs4/inode.c | 12 ++++-------- fs/autofs4/root.c | 36 ++++++++++++++++++++---------------- 3 files changed, 26 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index f4b4030cf406..c28085cb82a5 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -206,11 +206,8 @@ extern const struct inode_operations autofs4_symlink_inode_operations; extern const struct inode_operations autofs4_dir_inode_operations; extern const struct file_operations autofs4_dir_operations; extern const struct file_operations autofs4_root_operations; - -/* Operations methods */ - -struct vfsmount *autofs4_d_automount(struct path *); -int autofs4_d_manage(struct dentry *, bool); +extern const struct dentry_operations autofs4_dentry_operations; +extern const struct dentry_operations autofs4_mount_dentry_operations; /* VFS automount flags management functions */ diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index dac3dc79ccb4..427c35746340 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -251,12 +251,6 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi) return ino; } -static const struct dentry_operations autofs4_sb_dentry_operations = { - .d_automount = autofs4_d_automount, - .d_manage = autofs4_d_manage, - .d_release = autofs4_dentry_release, -}; - int autofs4_fill_super(struct super_block *s, void *data, int silent) { struct inode * root_inode; @@ -311,7 +305,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_iput; pipe = NULL; - d_set_d_op(root, &autofs4_sb_dentry_operations); + d_set_d_op(root, &autofs4_dentry_operations); root->d_fsdata = ino; /* Can this call block? */ @@ -322,8 +316,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_dput; } - if (autofs_type_trigger(sbi->type)) + if (autofs_type_trigger(sbi->type)) { + d_set_d_op(root, &autofs4_mount_dentry_operations); __managed_dentry_set_managed(root); + } root_inode->i_fop = &autofs4_root_operations; root_inode->i_op = &autofs4_dir_inode_operations; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 52af410a831d..62c1229a4d31 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -35,6 +35,8 @@ static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); #endif static int autofs4_dir_open(struct inode *inode, struct file *file); static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); +static struct vfsmount *autofs4_d_automount(struct path *); +static int autofs4_d_manage(struct dentry *, bool); const struct file_operations autofs4_root_operations = { .open = dcache_dir_open, @@ -64,6 +66,18 @@ const struct inode_operations autofs4_dir_inode_operations = { .rmdir = autofs4_dir_rmdir, }; +/* For dentries that don't initiate mounting */ +const struct dentry_operations autofs4_dentry_operations = { + .d_release = autofs4_dentry_release, +}; + +/* For dentries that do initiate mounting */ +const struct dentry_operations autofs4_mount_dentry_operations = { + .d_automount = autofs4_d_automount, + .d_manage = autofs4_d_manage, + .d_release = autofs4_dentry_release, +}; + static void autofs4_add_active(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); @@ -158,18 +172,6 @@ void autofs4_dentry_release(struct dentry *de) } } -/* For dentries of directories in the root dir */ -static const struct dentry_operations autofs4_root_dentry_operations = { - .d_release = autofs4_dentry_release, -}; - -/* For other dentries */ -static const struct dentry_operations autofs4_dentry_operations = { - .d_automount = autofs4_d_automount, - .d_manage = autofs4_d_manage, - .d_release = autofs4_dentry_release, -}; - static struct dentry *autofs4_lookup_active(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); @@ -337,7 +339,7 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) return path->dentry; } -struct vfsmount *autofs4_d_automount(struct path *path) +static struct vfsmount *autofs4_d_automount(struct path *path) { struct dentry *dentry = path->dentry; struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); @@ -501,7 +503,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s if (active) { return active; } else { - d_set_d_op(dentry, &autofs4_root_dentry_operations); + d_set_d_op(dentry, &autofs4_dentry_operations); /* * A dentry that is not within the root can never trigger a @@ -514,7 +516,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s /* Mark entries in the root as mount triggers */ if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) { - d_set_d_op(dentry, &autofs4_dentry_operations); + d_set_d_op(dentry, &autofs4_mount_dentry_operations); __managed_dentry_set_managed(dentry); } @@ -573,6 +575,8 @@ static int autofs4_dir_symlink(struct inode *dir, } d_add(dentry, inode); + d_set_d_op(dentry, &autofs4_dentry_operations); + dentry->d_fsdata = ino; ino->dentry = dget(dentry); atomic_inc(&ino->count); @@ -791,7 +795,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) int is_autofs4_dentry(struct dentry *dentry) { return dentry && dentry->d_inode && - (dentry->d_op == &autofs4_root_dentry_operations || + (dentry->d_op == &autofs4_mount_dentry_operations || dentry->d_op == &autofs4_dentry_operations) && dentry->d_fsdata != NULL; } -- cgit v1.2.3 From 6651149371b842715906311b4631b8489cebf7e8 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 14 Jan 2011 18:46:24 +0000 Subject: autofs4: Clean up autofs4_free_ino() When this function is called the local reference count does't need to be updated since the dentry is going away and dput definitely must not be called here. Also the autofs info struct field inode isn't used so remove it. Signed-off-by: Ian Kent Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/autofs4/inode.c | 13 ------------- fs/autofs4/root.c | 9 --------- 2 files changed, 22 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 427c35746340..3ecd2e2bcdbd 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -45,7 +45,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, if (!reinit) { ino->flags = 0; - ino->inode = NULL; ino->dentry = NULL; ino->size = 0; INIT_LIST_HEAD(&ino->active); @@ -76,19 +75,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, void autofs4_free_ino(struct autofs_info *ino) { - struct autofs_info *p_ino; - if (ino->dentry) { ino->dentry->d_fsdata = NULL; - if (ino->dentry->d_inode) { - struct dentry *parent = ino->dentry->d_parent; - if (atomic_dec_and_test(&ino->count)) { - p_ino = autofs4_dentry_ino(parent); - if (p_ino && parent != ino->dentry) - atomic_dec(&p_ino->count); - } - dput(ino->dentry); - } ino->dentry = NULL; } if (ino->free) @@ -390,7 +378,6 @@ struct inode *autofs4_get_inode(struct super_block *sb, if (inode == NULL) return NULL; - inf->inode = inode; inode->i_mode = inf->mode; if (sb->s_root) { inode->i_uid = sb->s_root->d_inode->i_uid; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 62c1229a4d31..8315565ed7d4 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -151,11 +151,8 @@ void autofs4_dentry_release(struct dentry *de) DPRINTK("releasing %p", de); inf = autofs4_dentry_ino(de); - de->d_fsdata = NULL; - if (inf) { struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); - if (sbi) { spin_lock(&sbi->lookup_lock); if (!list_empty(&inf->active)) @@ -164,10 +161,6 @@ void autofs4_dentry_release(struct dentry *de) list_del(&inf->expiring); spin_unlock(&sbi->lookup_lock); } - - inf->dentry = NULL; - inf->inode = NULL; - autofs4_free_ino(inf); } } @@ -583,7 +576,6 @@ static int autofs4_dir_symlink(struct inode *dir, p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) atomic_inc(&p_ino->count); - ino->inode = inode; ino->u.symlink = cp; dir->i_mtime = CURRENT_TIME; @@ -713,7 +705,6 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) atomic_inc(&p_ino->count); - ino->inode = inode; inc_nlink(dir); dir->i_mtime = CURRENT_TIME; -- cgit v1.2.3 From 9e3fea16ba386fa549a0b2de8a203e5d412997a0 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 14 Jan 2011 18:46:30 +0000 Subject: autofs4: Fix wait validation It is possible for the check in wait.c:validate_request() to return an incorrect result if the dentry that was mounted upon has changed during the callback. Signed-off-by: Ian Kent Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/autofs4/waitq.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index c5f8459c905e..56010056b2e6 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -309,6 +309,9 @@ static int validate_request(struct autofs_wait_queue **wait, * completed while we waited on the mutex ... */ if (notify == NFY_MOUNT) { + struct dentry *new = NULL; + int valid = 1; + /* * If the dentry was successfully mounted while we slept * on the wait queue mutex we can return success. If it @@ -316,8 +319,20 @@ static int validate_request(struct autofs_wait_queue **wait, * a multi-mount with no mount at it's base) we can * continue on and create a new request. */ + if (!IS_ROOT(dentry)) { + if (dentry->d_inode && d_unhashed(dentry)) { + struct dentry *parent = dentry->d_parent; + new = d_lookup(parent, &dentry->d_name); + if (new) + dentry = new; + } + } if (have_submounts(dentry)) - return 0; + valid = 0; + + if (new) + dput(new); + return valid; } return 1; -- cgit v1.2.3 From dd89f90d2deb9aa5bc8e1b15d726ff5c0bb2b623 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 14 Jan 2011 18:46:35 +0000 Subject: autofs4: Add v4 pseudo direct mount support Version 4 of autofs provides a pseudo direct mount implementation that relies on directories at the leaves of a directory tree under an indirect mount to trigger mounts. This patch adds support for that functionality. Signed-off-by: Ian Kent Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/autofs4/root.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'fs') diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 8315565ed7d4..9194e274f849 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -630,6 +630,58 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) return 0; } +/* + * Version 4 of autofs provides a pseudo direct mount implementation + * that relies on directories at the leaves of a directory tree under + * an indirect mount to trigger mounts. To allow for this we need to + * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves + * of the directory tree. There is no need to clear the automount flag + * following a mount or restore it after an expire because these mounts + * are always covered. However, it is neccessary to ensure that these + * flags are clear on non-empty directories to avoid unnecessary calls + * during path walks. + */ +static void autofs_set_leaf_automount_flags(struct dentry *dentry) +{ + struct dentry *parent; + + /* root and dentrys in the root are already handled */ + if (IS_ROOT(dentry->d_parent)) + return; + + managed_dentry_set_managed(dentry); + + parent = dentry->d_parent; + /* only consider parents below dentrys in the root */ + if (IS_ROOT(parent->d_parent)) + return; + managed_dentry_clear_managed(parent); + return; +} + +static void autofs_clear_leaf_automount_flags(struct dentry *dentry) +{ + struct list_head *d_child; + struct dentry *parent; + + /* flags for dentrys in the root are handled elsewhere */ + if (IS_ROOT(dentry->d_parent)) + return; + + managed_dentry_clear_managed(dentry); + + parent = dentry->d_parent; + /* only consider parents below dentrys in the root */ + if (IS_ROOT(parent->d_parent)) + return; + d_child = &dentry->d_u.d_child; + /* Set parent managed if it's becoming empty */ + if (d_child->next == &parent->d_subdirs && + d_child->prev == &parent->d_subdirs) + managed_dentry_set_managed(parent); + return; +} + static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); @@ -657,6 +709,9 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); spin_unlock(&autofs4_lock); + if (sbi->version < 5) + autofs_clear_leaf_automount_flags(dentry); + if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) @@ -699,6 +754,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) } d_add(dentry, inode); + if (sbi->version < 5) + autofs_set_leaf_automount_flags(dentry); + dentry->d_fsdata = ino; ino->dentry = dget(dentry); atomic_inc(&ino->count); -- cgit v1.2.3 From 87556ef19926e97464e0163a7840140527ae6615 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 18:46:46 +0000 Subject: Remove a further kludge from __do_follow_link() Remove a further kludge from __do_follow_link() as it's no longer required with the automount code. This reverts the non-helper-function parts of 051d381259eb57d6074d02a6ba6e90e744f1a29f, which breaks union mounts. Reported-by: vaurora@redhat.com Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 61995fba4e21..373852012713 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -800,12 +800,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) touch_atime(link->mnt, dentry); nd_set_link(nd, NULL); - if (link->mnt != nd->path.mnt) { - path_to_nameidata(link, nd); - nd->inode = nd->path.dentry->d_inode; - dget(dentry); - } - mntget(link->mnt); + if (link->mnt == nd->path.mnt) + mntget(link->mnt); nd->last_type = LAST_BIND; *p = dentry->d_inode->i_op->follow_link(dentry, nd); -- cgit v1.2.3 From ab90911ff90cdab59b31c045c3f0ae480d14f29d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 18:46:51 +0000 Subject: Allow d_manage() to be used in RCU-walk mode Allow d_manage() to be called from pathwalk when it is in RCU-walk mode as well as when it is in Ref-walk mode. This permits __follow_mount_rcu() to call d_manage() directly. d_manage() needs a parameter to indicate that it is in RCU-walk mode as it isn't allowed to sleep if in that mode (but should return -ECHILD instead). autofs4_d_manage() can then be set to retain RCU-walk mode if the daemon accesses it and otherwise request dropping back to ref-walk mode. Signed-off-by: David Howells Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 7 ++++++- fs/autofs4/root.c | 8 ++++++-- fs/namei.c | 15 ++++++++------- include/linux/dcache.h | 2 +- 5 files changed, 22 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index cbf98b989b11..39707748ed2d 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -32,7 +32,7 @@ d_release: no no yes no d_iput: no no yes no d_dname: no no no no d_automount: no no yes no -d_manage: no no yes no +d_manage: no no yes (ref-walk) maybe --------------------------- inode_operations --------------------------- prototypes: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 4682586b147a..3c4b2f1b64d0 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -865,7 +865,7 @@ struct dentry_operations { void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); - int (*d_manage)(struct dentry *, bool); + int (*d_manage)(struct dentry *, bool, bool); }; d_revalidate: called when the VFS needs to revalidate a dentry. This @@ -960,6 +960,11 @@ struct dentry_operations { held by the caller and the function should not initiate any mounts or unmounts that it will then wait for. + If the 'rcu_walk' parameter is true, then the caller is doing a + pathwalk in RCU-walk mode. Sleeping is not permitted in this mode, + and the caller can be asked to leave it and call again by returing + -ECHILD. + This function is only used if DCACHE_MANAGE_TRANSIT is set on the dentry being transited from. diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 9194e274f849..dbd95512808c 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -36,7 +36,7 @@ static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); static int autofs4_dir_open(struct inode *inode, struct file *file); static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); static struct vfsmount *autofs4_d_automount(struct path *); -static int autofs4_d_manage(struct dentry *, bool); +static int autofs4_d_manage(struct dentry *, bool, bool); const struct file_operations autofs4_root_operations = { .open = dcache_dir_open, @@ -450,7 +450,7 @@ done: return NULL; } -int autofs4_d_manage(struct dentry *dentry, bool mounting_here) +int autofs4_d_manage(struct dentry *dentry, bool mounting_here, bool rcu_walk) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); @@ -464,6 +464,10 @@ int autofs4_d_manage(struct dentry *dentry, bool mounting_here) return 0; } + /* We need to sleep, so we need pathwalk to be in ref-mode */ + if (rcu_walk) + return -ECHILD; + /* Wait for pending expires */ do_expire_wait(dentry); diff --git a/fs/namei.c b/fs/namei.c index 373852012713..5c89695ae1e4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -987,7 +987,8 @@ static int follow_managed(struct path *path, unsigned flags) if (managed & DCACHE_MANAGE_TRANSIT) { BUG_ON(!path->dentry->d_op); BUG_ON(!path->dentry->d_op->d_manage); - ret = path->dentry->d_op->d_manage(path->dentry, false); + ret = path->dentry->d_op->d_manage(path->dentry, + false, false); if (ret < 0) return ret == -EISDIR ? 0 : ret; } @@ -1048,13 +1049,12 @@ int follow_down_one(struct path *path) static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, struct inode **inode, bool reverse_transit) { - unsigned abort_mask = - reverse_transit ? 0 : DCACHE_MANAGE_TRANSIT; - while (d_mountpoint(path->dentry)) { struct vfsmount *mounted; - if (path->dentry->d_flags & abort_mask) - return true; + if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && + !reverse_transit && + path->dentry->d_op->d_manage(path->dentry, false, true) < 0) + return false; mounted = __lookup_mnt(path->mnt, path->dentry, 1); if (!mounted) break; @@ -1132,7 +1132,8 @@ int follow_down(struct path *path, bool mounting_here) if (managed & DCACHE_MANAGE_TRANSIT) { BUG_ON(!path->dentry->d_op); BUG_ON(!path->dentry->d_op->d_manage); - ret = path->dentry->d_op->d_manage(path->dentry, mounting_here); + ret = path->dentry->d_op->d_manage( + path->dentry, mounting_here, false); if (ret < 0) return ret == -EISDIR ? 0 : ret; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1a87760d6532..f958c19e3ca5 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -168,7 +168,7 @@ struct dentry_operations { void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); - int (*d_manage)(struct dentry *, bool); + int (*d_manage)(struct dentry *, bool, bool); } ____cacheline_aligned; /* -- cgit v1.2.3 From ea5b778a8b98c85a87d66bf844904f9c3802b869 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2011 19:10:03 +0000 Subject: Unexport do_add_mount() and add in follow_automount(), not ->d_automount() Unexport do_add_mount() and make ->d_automount() return the vfsmount to be added rather than calling do_add_mount() itself. follow_automount() will then do the addition. This slightly complicates things as ->d_automount() normally wants to add the new vfsmount to an expiration list and start an expiration timer. The problem with that is that the vfsmount will be deleted if it has a refcount of 1 and the timer will not repeat if the expiration list is empty. To this end, we require the vfsmount to be returned from d_automount() with a refcount of (at least) 2. One of these refs will be dropped unconditionally. In addition, follow_automount() must get a 3rd ref around the call to do_add_mount() lest it eat a ref and return an error, leaving the mount we have open to being expired as we would otherwise have only 1 ref on it. d_automount() should also add the the vfsmount to the expiration list (by calling mnt_set_expiry()) and start the expiration timer before returning, if this mechanism is to be used. The vfsmount will be unlinked from the expiration list by follow_automount() if do_add_mount() fails. This patch also fixes the call to do_add_mount() for AFS to propagate the mount flags from the parent vfsmount. Signed-off-by: David Howells Signed-off-by: Al Viro --- Documentation/filesystems/vfs.txt | 23 ++++++++++++--------- fs/afs/mntpt.c | 25 ++++++----------------- fs/cifs/cifs_dfs_ref.c | 26 ++++++------------------ fs/internal.h | 2 ++ fs/namei.c | 42 ++++++++++++++++++++++++++++++++------- fs/namespace.c | 41 ++++++++++++++++++++++++++++++-------- fs/nfs/namespace.c | 24 ++++------------------ include/linux/mount.h | 7 +------ 8 files changed, 101 insertions(+), 89 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 3c4b2f1b64d0..94cf97b901d7 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -933,15 +933,20 @@ struct dentry_operations { dynamic_dname() helper function is provided to take care of this. d_automount: called when an automount dentry is to be traversed (optional). - This should create a new VFS mount record, mount it on the directory - and return the record to the caller. The caller is supplied with a - path parameter giving the automount directory to describe the automount - target and the parent VFS mount record to provide inheritable mount - parameters. NULL should be returned if someone else managed to make - the automount first. If the automount failed, then an error code - should be returned. If -EISDIR is returned, then the directory will - be treated as an ordinary directory and returned to pathwalk to - continue walking. + This should create a new VFS mount record and return the record to the + caller. The caller is supplied with a path parameter giving the + automount directory to describe the automount target and the parent + VFS mount record to provide inheritable mount parameters. NULL should + be returned if someone else managed to make the automount first. If + the vfsmount creation failed, then an error code should be returned. + If -EISDIR is returned, then the directory will be treated as an + ordinary directory and returned to pathwalk to continue walking. + + If a vfsmount is returned, the caller will attempt to mount it on the + mountpoint and will remove the vfsmount from its expiration list in + the case of failure. The vfsmount should be returned with 2 refs on + it to prevent automatic expiration - the caller will clean up the + additional ref. This function is only used if DCACHE_NEED_AUTOMOUNT is set on the dentry. This is set by __d_instantiate() if S_AUTOMOUNT is set on the diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index d23b2e344a78..aa59184151d0 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -241,7 +241,6 @@ error_no_devname: struct vfsmount *afs_d_automount(struct path *path) { struct vfsmount *newmnt; - int err; _enter("{%s,%s}", path->mnt->mnt_devname, path->dentry->d_name.name); @@ -249,24 +248,12 @@ struct vfsmount *afs_d_automount(struct path *path) if (IS_ERR(newmnt)) return newmnt; - mntget(newmnt); - err = do_add_mount(newmnt, path, MNT_SHRINKABLE, &afs_vfsmounts); - switch (err) { - case 0: - queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, - afs_mntpt_expiry_timeout * HZ); - _leave(" = %p {%s}", newmnt, newmnt->mnt_devname); - return newmnt; - case -EBUSY: - /* someone else made a mount here whilst we were busy */ - mntput(newmnt); - _leave(" = NULL [EBUSY]"); - return NULL; - default: - mntput(newmnt); - _leave(" = %d", err); - return ERR_PTR(err); - } + mntget(newmnt); /* prevent immediate expiration */ + mnt_set_expiry(newmnt, &afs_vfsmounts); + queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, + afs_mntpt_expiry_timeout * HZ); + _leave(" = %p {%s}", newmnt, newmnt->mnt_devname); + return newmnt; } /* diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 0fc163808de3..7ed36536e754 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -351,7 +351,6 @@ free_xid: struct vfsmount *cifs_dfs_d_automount(struct path *path) { struct vfsmount *newmnt; - int err; cFYI(1, "in %s", __func__); @@ -361,25 +360,12 @@ struct vfsmount *cifs_dfs_d_automount(struct path *path) return newmnt; } - mntget(newmnt); - err = do_add_mount(newmnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE, - &cifs_dfs_automount_list); - switch (err) { - case 0: - schedule_delayed_work(&cifs_dfs_automount_task, - cifs_dfs_mountpoint_expiry_timeout); - cFYI(1, "leaving %s [ok]" , __func__); - return newmnt; - case -EBUSY: - /* someone else made a mount here whilst we were busy */ - mntput(newmnt); - cFYI(1, "leaving %s [EBUSY]" , __func__); - return NULL; - default: - mntput(newmnt); - cFYI(1, "leaving %s [error %d]" , __func__, err); - return ERR_PTR(err); - } + mntget(newmnt); /* prevent immediate expiration */ + mnt_set_expiry(newmnt, &cifs_dfs_automount_list); + schedule_delayed_work(&cifs_dfs_automount_task, + cifs_dfs_mountpoint_expiry_timeout); + cFYI(1, "leaving %s [ok]" , __func__); + return newmnt; } const struct inode_operations cifs_dfs_referral_inode_operations = { diff --git a/fs/internal.h b/fs/internal.h index 9687c2ee2735..4931060fd089 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -70,6 +70,8 @@ extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, extern void release_mounts(struct list_head *); extern void umount_tree(struct vfsmount *, int, struct list_head *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); +extern int do_add_mount(struct vfsmount *, struct path *, int); +extern void mnt_clear_expiry(struct vfsmount *); extern void __init mnt_init(void); diff --git a/fs/namei.c b/fs/namei.c index 5c89695ae1e4..c2e37727e3ab 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -900,6 +900,7 @@ static int follow_automount(struct path *path, unsigned flags, bool *need_mntput) { struct vfsmount *mnt; + int err; if (!path->dentry->d_op || !path->dentry->d_op->d_automount) return -EREMOTE; @@ -942,22 +943,49 @@ static int follow_automount(struct path *path, unsigned flags, return -EREMOTE; return PTR_ERR(mnt); } + if (!mnt) /* mount collision */ return 0; + /* The new mount record should have at least 2 refs to prevent it being + * expired before we get a chance to add it + */ + BUG_ON(mnt_get_count(mnt) < 2); + if (mnt->mnt_sb == path->mnt->mnt_sb && mnt->mnt_root == path->dentry) { + mnt_clear_expiry(mnt); + mntput(mnt); mntput(mnt); return -ELOOP; } - dput(path->dentry); - if (*need_mntput) - mntput(path->mnt); - path->mnt = mnt; - path->dentry = dget(mnt->mnt_root); - *need_mntput = true; - return 0; + /* We need to add the mountpoint to the parent. The filesystem may + * have placed it on an expiry list, and so we need to make sure it + * won't be expired under us if do_add_mount() fails (do_add_mount() + * will eat a reference unconditionally). + */ + mntget(mnt); + err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE); + switch (err) { + case -EBUSY: + /* Someone else made a mount here whilst we were busy */ + err = 0; + default: + mnt_clear_expiry(mnt); + mntput(mnt); + mntput(mnt); + return err; + case 0: + mntput(mnt); + dput(path->dentry); + if (*need_mntput) + mntput(path->mnt); + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + *need_mntput = true; + return 0; + } } /* diff --git a/fs/namespace.c b/fs/namespace.c index d94ccd6ddafd..bfcb701f9490 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1925,15 +1925,14 @@ static int do_new_mount(struct path *path, char *type, int flags, if (IS_ERR(mnt)) return PTR_ERR(mnt); - return do_add_mount(mnt, path, mnt_flags, NULL); + return do_add_mount(mnt, path, mnt_flags); } /* * add a mount into a namespace's mount tree - * - provide the option of adding the new mount to an expiration list + * - this unconditionally eats one of the caller's references to newmnt. */ -int do_add_mount(struct vfsmount *newmnt, struct path *path, - int mnt_flags, struct list_head *fslist) +int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) { int err; @@ -1963,9 +1962,6 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, if ((err = graft_tree(newmnt, path))) goto unlock; - if (fslist) /* add to the specified expiration list */ - list_add_tail(&newmnt->mnt_expire, fslist); - up_write(&namespace_sem); return 0; @@ -1975,7 +1971,36 @@ unlock: return err; } -EXPORT_SYMBOL_GPL(do_add_mount); +/** + * mnt_set_expiry - Put a mount on an expiration list + * @mnt: The mount to list. + * @expiry_list: The list to add the mount to. + */ +void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) +{ + down_write(&namespace_sem); + br_write_lock(vfsmount_lock); + + list_add_tail(&mnt->mnt_expire, expiry_list); + + br_write_unlock(vfsmount_lock); + up_write(&namespace_sem); +} +EXPORT_SYMBOL(mnt_set_expiry); + +/* + * Remove a vfsmount from any expiration list it may be on + */ +void mnt_clear_expiry(struct vfsmount *mnt) +{ + if (!list_empty(&mnt->mnt_expire)) { + down_write(&namespace_sem); + br_write_lock(vfsmount_lock); + list_del_init(&mnt->mnt_expire); + br_write_unlock(vfsmount_lock); + up_write(&namespace_sem); + } +} /* * process a list of expirable mountpoints with the intent of discarding any diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index f3fbb1bf3f18..f32b8603dca8 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -149,26 +149,10 @@ struct vfsmount *nfs_d_automount(struct path *path) if (IS_ERR(mnt)) goto out; - mntget(mnt); - err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE, - &nfs_automount_list); - switch (err) { - case 0: - dprintk("%s: done, success\n", __func__); - schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); - break; - case -EBUSY: - /* someone else made a mount here whilst we were busy */ - mntput(mnt); - dprintk("%s: done, collision\n", __func__); - mnt = NULL; - break; - default: - mntput(mnt); - dprintk("%s: done, error %d\n", __func__, err); - mnt = ERR_PTR(err); - break; - } + dprintk("%s: done, success\n", __func__); + mntget(mnt); /* prevent immediate expiration */ + mnt_set_expiry(mnt, &nfs_automount_list); + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); out: nfs_free_fattr(fattr); diff --git a/include/linux/mount.h b/include/linux/mount.h index 1869ea24a739..af4765ea8fde 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -110,12 +110,7 @@ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); -struct nameidata; - -struct path; -extern int do_add_mount(struct vfsmount *newmnt, struct path *path, - int mnt_flags, struct list_head *fslist); - +extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); extern dev_t name_to_dev_t(char *name); -- cgit v1.2.3 From b650c858c26bd9ba29ebc82d30f09355845a294a Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 15 Jan 2011 10:51:57 +0000 Subject: autofs4: Merge the remaining dentry ops tables Merge the remaining autofs4 dentry ops tables. It doesn't matter if d_automount and d_manage are present on something that's not mountable or holdable as these ops are only used if the appropriate flags are set in dentry->d_flags. [AV] switch to ->s_d_op, since now _everything_ on autofs4 is using the same dentry_operations. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 1 - fs/autofs4/inode.c | 6 ++---- fs/autofs4/root.c | 17 ++--------------- 3 files changed, 4 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index c28085cb82a5..1f016bfb42d5 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -207,7 +207,6 @@ extern const struct inode_operations autofs4_dir_inode_operations; extern const struct file_operations autofs4_dir_operations; extern const struct file_operations autofs4_root_operations; extern const struct dentry_operations autofs4_dentry_operations; -extern const struct dentry_operations autofs4_mount_dentry_operations; /* VFS automount flags management functions */ diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 3ecd2e2bcdbd..9e1a9dad23e1 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -276,6 +276,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) s->s_blocksize_bits = 10; s->s_magic = AUTOFS_SUPER_MAGIC; s->s_op = &autofs4_sops; + s->s_d_op = &autofs4_dentry_operations; s->s_time_gran = 1; /* @@ -293,7 +294,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_iput; pipe = NULL; - d_set_d_op(root, &autofs4_dentry_operations); root->d_fsdata = ino; /* Can this call block? */ @@ -304,10 +304,8 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_dput; } - if (autofs_type_trigger(sbi->type)) { - d_set_d_op(root, &autofs4_mount_dentry_operations); + if (autofs_type_trigger(sbi->type)) __managed_dentry_set_managed(root); - } root_inode->i_fop = &autofs4_root_operations; root_inode->i_op = &autofs4_dir_inode_operations; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index dbd95512808c..1dba035fc376 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -66,13 +66,7 @@ const struct inode_operations autofs4_dir_inode_operations = { .rmdir = autofs4_dir_rmdir, }; -/* For dentries that don't initiate mounting */ const struct dentry_operations autofs4_dentry_operations = { - .d_release = autofs4_dentry_release, -}; - -/* For dentries that do initiate mounting */ -const struct dentry_operations autofs4_mount_dentry_operations = { .d_automount = autofs4_d_automount, .d_manage = autofs4_d_manage, .d_release = autofs4_dentry_release, @@ -500,8 +494,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s if (active) { return active; } else { - d_set_d_op(dentry, &autofs4_dentry_operations); - /* * A dentry that is not within the root can never trigger a * mount operation, unless the directory already exists, so we @@ -512,10 +504,8 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s return ERR_PTR(-ENOENT); /* Mark entries in the root as mount triggers */ - if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) { - d_set_d_op(dentry, &autofs4_mount_dentry_operations); + if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) __managed_dentry_set_managed(dentry); - } ino = autofs4_init_ino(NULL, sbi, 0555); if (!ino) @@ -572,8 +562,6 @@ static int autofs4_dir_symlink(struct inode *dir, } d_add(dentry, inode); - d_set_d_op(dentry, &autofs4_dentry_operations); - dentry->d_fsdata = ino; ino->dentry = dget(dentry); atomic_inc(&ino->count); @@ -848,8 +836,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) int is_autofs4_dentry(struct dentry *dentry) { return dentry && dentry->d_inode && - (dentry->d_op == &autofs4_mount_dentry_operations || - dentry->d_op == &autofs4_dentry_operations) && + dentry->d_op == &autofs4_dentry_operations && dentry->d_fsdata != NULL; } -- cgit v1.2.3 From f580eb0931fbcb6dc3916f094f471671facd1daa Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 12 Jan 2011 09:30:42 +0000 Subject: fs/btrfs: Fix build of ctree CC [M] fs/btrfs/ctree.o In file included from fs/btrfs/ctree.c:21:0: fs/btrfs/ctree.h:1003:17: error: field <91>super_kobj<92> has incomplete type fs/btrfs/ctree.h:1074:17: error: field <91>root_kobj<92> has incomplete type make[2]: *** [fs/btrfs/ctree.o] Error 1 make[1]: *** [fs/btrfs] Error 2 make: *** [fs] Error 2 We need to include kobject.h here. Reported-by: Jeff Garzik Fix-suggested-by: Li Zefan Signed-off-by: Stefan Schmidt Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4acd4c611efa..0cb322cc4fc0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "extent_io.h" #include "extent_map.h" -- cgit v1.2.3 From 299a08b1c34f9397797946a0fa215c5fd145c5cf Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 5 Jan 2011 10:07:15 +0000 Subject: btrfs: fix wrong data space statistics Josef has implemented mixed data/metadata chunks, we must add those chunks' space just like data chunks. Signed-off-by: Miao Xie Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/super.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a1a76b2a61f9..caa5bcc62f16 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -790,11 +790,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { - if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | - BTRFS_BLOCK_GROUP_SYSTEM)) - total_used_data += found->disk_total; - else + if (found->flags & BTRFS_BLOCK_GROUP_DATA) total_used_data += found->disk_used; + else + total_used_data += found->disk_total; total_used += found->disk_used; } rcu_read_unlock(); -- cgit v1.2.3 From d52a5b5f1fa40804f681cf9868d4a8f90661bdf3 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 5 Jan 2011 10:07:18 +0000 Subject: btrfs: try to reclaim some space when chunk allocation fails We cannot write data into files when when there is tiny space in the filesystem. Reproduce steps: # mkfs.btrfs /dev/sda1 # mount /dev/sda1 /mnt # dd if=/dev/zero of=/mnt/tmpfile0 bs=4K count=1 # dd if=/dev/zero of=/mnt/tmpfile1 bs=4K count=99999999999999 (fill the filesystem) # umount /mnt # mount /dev/sda1 /mnt # rm -f /mnt/tmpfile0 # dd if=/dev/zero of=/mnt/tmpfile0 bs=4K count=1 (failed with nospec) But if we do the last step again, we can write data successfully. The reason of the problem is that btrfs didn't try to commit the current transaction and reclaim some space when chunk allocation failed. This patch fixes it by committing the current transaction to reclaim some space when chunk allocation fails. Signed-off-by: Miao Xie Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b180efdc8b68..3c71d95111fe 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3162,8 +3162,12 @@ alloc: bytes + 2 * 1024 * 1024, alloc_target, 0); btrfs_end_transaction(trans, root); - if (ret < 0) - return ret; + if (ret < 0) { + if (ret != -ENOSPC) + return ret; + else + goto commit_trans; + } if (!data_sinfo) { btrfs_set_inode_space_info(root, inode); @@ -3174,6 +3178,7 @@ alloc: spin_unlock(&data_sinfo->lock); /* commit the current transaction and try again */ +commit_trans: if (!committed && !root->fs_info->open_ioctl_trans) { committed = 1; trans = btrfs_join_transaction(root, 1); -- cgit v1.2.3 From 1974a3b42d8cf7a9c74f1e0310c593023617037a Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 5 Jan 2011 10:07:24 +0000 Subject: btrfs: fix wrong calculation of stripe size There are two tiny problem: - One is When we check the chunk size is greater than the max chunk size or not, we should take mirrors into account, but the original code didn't. - The other is btrfs shouldn't use the size of the residual free space as the length of of a dup chunk when doing chunk allocation. It is because the device space that a dup chunk needs is twice as large as the chunk size, if we use the size of the residual free space as the length of a dup chunk, we can not get enough free space. Fix it. Signed-off-by: Miao Xie Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 177b73179590..c50a85e0d08f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2177,6 +2177,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int num_stripes = 1; int min_stripes = 1; int sub_stripes = 0; + int ncopies = 1; int looped = 0; int ret; int index; @@ -2197,12 +2198,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & (BTRFS_BLOCK_GROUP_DUP)) { num_stripes = 2; min_stripes = 2; + ncopies = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { if (fs_devices->rw_devices < 2) return -ENOSPC; num_stripes = 2; min_stripes = 2; + ncopies = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { num_stripes = fs_devices->rw_devices; @@ -2210,6 +2213,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, return -ENOSPC; num_stripes &= ~(u32)1; sub_stripes = 2; + ncopies = 2; min_stripes = 4; } @@ -2239,8 +2243,8 @@ again: map->num_stripes = num_stripes; } - if (calc_size * num_stripes > max_chunk_size) { - calc_size = max_chunk_size; + if (calc_size * num_stripes > max_chunk_size * ncopies) { + calc_size = max_chunk_size * ncopies; do_div(calc_size, num_stripes); do_div(calc_size, stripe_len); calc_size *= stripe_len; @@ -2321,6 +2325,8 @@ again: if (!looped && max_avail > 0) { looped = 1; calc_size = max_avail; + if (type & BTRFS_BLOCK_GROUP_DUP) + do_div(calc_size, 2); goto again; } kfree(map); -- cgit v1.2.3 From 7bfc837df935d850fe996dfe92ef48975cd4170a Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 5 Jan 2011 10:07:26 +0000 Subject: btrfs: restructure find_free_dev_extent() - make it return the start position and length of the max free space when it can not find a suitable free space. - make it more readability Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +- fs/btrfs/volumes.c | 155 ++++++++++++++++++++++++++++--------------------- 2 files changed, 91 insertions(+), 68 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3c71d95111fe..1e1c9a177626 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8099,7 +8099,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) mutex_lock(&root->fs_info->chunk_mutex); list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { u64 min_free = btrfs_block_group_used(&block_group->item); - u64 dev_offset, max_avail; + u64 dev_offset; /* * check to make sure we can actually find a chunk with enough @@ -8107,7 +8107,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) */ if (device->total_bytes > device->bytes_used + min_free) { ret = find_free_dev_extent(NULL, device, min_free, - &dev_offset, &max_avail); + &dev_offset, NULL); if (!ret) break; ret = -1; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c50a85e0d08f..4838bd395e49 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -729,58 +729,82 @@ error: } /* + * find_free_dev_extent - find free space in the specified device + * @trans: transaction handler + * @device: the device which we search the free space in + * @num_bytes: the size of the free space that we need + * @start: store the start of the free space. + * @len: the size of the free space. that we find, or the size of the max + * free space if we don't find suitable free space + * * this uses a pretty simple search, the expectation is that it is * called very infrequently and that a given device has a small number * of extents + * + * @start is used to store the start of the free space if we find. But if we + * don't find suitable free space, it will be used to store the start position + * of the max free space. + * + * @len is used to store the size of the free space that we find. + * But if we don't find suitable free space, it is used to store the size of + * the max free space. */ int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, - u64 *start, u64 *max_avail) + u64 *start, u64 *len) { struct btrfs_key key; struct btrfs_root *root = device->dev_root; - struct btrfs_dev_extent *dev_extent = NULL; + struct btrfs_dev_extent *dev_extent; struct btrfs_path *path; - u64 hole_size = 0; - u64 last_byte = 0; - u64 search_start = 0; + u64 hole_size; + u64 max_hole_start; + u64 max_hole_size; + u64 extent_end; + u64 search_start; u64 search_end = device->total_bytes; int ret; - int slot = 0; - int start_found; + int slot; struct extent_buffer *l; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - path->reada = 2; - start_found = 0; - /* FIXME use last free of some kind */ /* we don't want to overwrite the superblock on the drive, * so we make sure to start at an offset of at least 1MB */ - search_start = max((u64)1024 * 1024, search_start); + search_start = 1024 * 1024; - if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) + if (root->fs_info->alloc_start + num_bytes <= search_end) search_start = max(root->fs_info->alloc_start, search_start); + max_hole_start = search_start; + max_hole_size = 0; + + if (search_start >= search_end) { + ret = -ENOSPC; + goto error; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto error; + } + path->reada = 2; + key.objectid = device->devid; key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); if (ret < 0) - goto error; + goto out; if (ret > 0) { ret = btrfs_previous_item(root, path, key.objectid, key.type); if (ret < 0) - goto error; - if (ret > 0) - start_found = 1; + goto out; } - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &key, path->slots[0]); + while (1) { l = path->nodes[0]; slot = path->slots[0]; @@ -789,24 +813,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, if (ret == 0) continue; if (ret < 0) - goto error; -no_more_items: - if (!start_found) { - if (search_start >= search_end) { - ret = -ENOSPC; - goto error; - } - *start = search_start; - start_found = 1; - goto check_pending; - } - *start = last_byte > search_start ? - last_byte : search_start; - if (search_end <= *start) { - ret = -ENOSPC; - goto error; - } - goto check_pending; + goto out; + + break; } btrfs_item_key_to_cpu(l, &key, slot); @@ -814,48 +823,62 @@ no_more_items: goto next; if (key.objectid > device->devid) - goto no_more_items; + break; - if (key.offset >= search_start && key.offset > last_byte && - start_found) { - if (last_byte < search_start) - last_byte = search_start; - hole_size = key.offset - last_byte; + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) + goto next; - if (hole_size > *max_avail) - *max_avail = hole_size; + if (key.offset > search_start) { + hole_size = key.offset - search_start; - if (key.offset > last_byte && - hole_size >= num_bytes) { - *start = last_byte; - goto check_pending; + if (hole_size > max_hole_size) { + max_hole_start = search_start; + max_hole_size = hole_size; + } + + /* + * If this free space is greater than which we need, + * it must be the max free space that we have found + * until now, so max_hole_start must point to the start + * of this free space and the length of this free space + * is stored in max_hole_size. Thus, we return + * max_hole_start and max_hole_size and go back to the + * caller. + */ + if (hole_size >= num_bytes) { + ret = 0; + goto out; } } - if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) - goto next; - start_found = 1; dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); - last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); + extent_end = key.offset + btrfs_dev_extent_length(l, + dev_extent); + if (extent_end > search_start) + search_start = extent_end; next: path->slots[0]++; cond_resched(); } -check_pending: - /* we have to make sure we didn't find an extent that has already - * been allocated by the map tree or the original allocation - */ - BUG_ON(*start < search_start); - if (*start + num_bytes > search_end) { - ret = -ENOSPC; - goto error; + hole_size = search_end- search_start; + if (hole_size > max_hole_size) { + max_hole_start = search_start; + max_hole_size = hole_size; } - /* check for pending inserts here */ - ret = 0; -error: + /* See above. */ + if (hole_size < num_bytes) + ret = -ENOSPC; + else + ret = 0; + +out: btrfs_free_path(path); +error: + *start = max_hole_start; + if (len && max_hole_size > *len) + *len = max_hole_size; return ret; } -- cgit v1.2.3 From b2117a39fa96cf4814e7cab8c11494149ba6f29d Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 5 Jan 2011 10:07:28 +0000 Subject: btrfs: make the chunk allocator utilize the devices better With this patch, we change the handling method when we can not get enough free extents with default size. Implementation: 1. Look up the suitable free extent on each device and keep the search result. If not find a suitable free extent, keep the max free extent 2. If we get enough suitable free extents with default size, chunk allocation succeeds. 3. If we can not get enough free extents, but the number of the extent with default size is >= min_stripes, we just change the mapping information (reduce the number of stripes in the extent map), and chunk allocation succeeds. 4. If the number of the extent with default size is < min_stripes, sort the devices by its max free extent's size descending 5. Use the size of the max free extent on the (num_stripes - 1)th device as the stripe size to allocate the device space By this way, the chunk allocator can allocate chunks as large as possible when the devices' space is not enough and make full use of the devices. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 379 ++++++++++++++++++++++++++++++++++++++--------------- fs/btrfs/volumes.h | 24 ++++ 2 files changed, 300 insertions(+), 103 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4838bd395e49..c22784b989b7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -877,7 +877,7 @@ out: btrfs_free_path(path); error: *start = max_hole_start; - if (len && max_hole_size > *len) + if (len) *len = max_hole_size; return ret; } @@ -2176,70 +2176,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, return calc_size * num_stripes; } -static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct map_lookup **map_ret, - u64 *num_bytes, u64 *stripe_size, - u64 start, u64 type) +/* Used to sort the devices by max_avail(descending sort) */ +int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) { - struct btrfs_fs_info *info = extent_root->fs_info; - struct btrfs_device *device = NULL; - struct btrfs_fs_devices *fs_devices = info->fs_devices; - struct list_head *cur; - struct map_lookup *map = NULL; - struct extent_map_tree *em_tree; - struct extent_map *em; - struct list_head private_devs; - int min_stripe_size = 1 * 1024 * 1024; - u64 calc_size = 1024 * 1024 * 1024; - u64 max_chunk_size = calc_size; - u64 min_free; - u64 avail; - u64 max_avail = 0; - u64 dev_offset; - int num_stripes = 1; - int min_stripes = 1; - int sub_stripes = 0; - int ncopies = 1; - int looped = 0; - int ret; - int index; - int stripe_len = 64 * 1024; + if (((struct btrfs_device_info *)dev_info1)->max_avail > + ((struct btrfs_device_info *)dev_info2)->max_avail) + return -1; + else if (((struct btrfs_device_info *)dev_info1)->max_avail < + ((struct btrfs_device_info *)dev_info2)->max_avail) + return 1; + else + return 0; +} - if ((type & BTRFS_BLOCK_GROUP_RAID1) && - (type & BTRFS_BLOCK_GROUP_DUP)) { - WARN_ON(1); - type &= ~BTRFS_BLOCK_GROUP_DUP; - } - if (list_empty(&fs_devices->alloc_list)) - return -ENOSPC; +static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, + int *num_stripes, int *min_stripes, + int *sub_stripes) +{ + *num_stripes = 1; + *min_stripes = 1; + *sub_stripes = 0; if (type & (BTRFS_BLOCK_GROUP_RAID0)) { - num_stripes = fs_devices->rw_devices; - min_stripes = 2; + *num_stripes = fs_devices->rw_devices; + *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_DUP)) { - num_stripes = 2; - min_stripes = 2; - ncopies = 2; + *num_stripes = 2; + *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { if (fs_devices->rw_devices < 2) return -ENOSPC; - num_stripes = 2; - min_stripes = 2; - ncopies = 2; + *num_stripes = 2; + *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { - num_stripes = fs_devices->rw_devices; - if (num_stripes < 4) + *num_stripes = fs_devices->rw_devices; + if (*num_stripes < 4) return -ENOSPC; - num_stripes &= ~(u32)1; - sub_stripes = 2; - ncopies = 2; - min_stripes = 4; + *num_stripes &= ~(u32)1; + *sub_stripes = 2; + *min_stripes = 4; } + return 0; +} + +static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, + u64 proposed_size, u64 type, + int num_stripes, int small_stripe) +{ + int min_stripe_size = 1 * 1024 * 1024; + u64 calc_size = proposed_size; + u64 max_chunk_size = calc_size; + int ncopies = 1; + + if (type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID10)) + ncopies = 2; + if (type & BTRFS_BLOCK_GROUP_DATA) { max_chunk_size = 10 * calc_size; min_stripe_size = 64 * 1024 * 1024; @@ -2256,51 +2253,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), max_chunk_size); -again: - max_avail = 0; - if (!map || map->num_stripes != num_stripes) { - kfree(map); - map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); - if (!map) - return -ENOMEM; - map->num_stripes = num_stripes; - } - if (calc_size * num_stripes > max_chunk_size * ncopies) { calc_size = max_chunk_size * ncopies; do_div(calc_size, num_stripes); - do_div(calc_size, stripe_len); - calc_size *= stripe_len; + do_div(calc_size, BTRFS_STRIPE_LEN); + calc_size *= BTRFS_STRIPE_LEN; } /* we don't want tiny stripes */ - if (!looped) + if (!small_stripe) calc_size = max_t(u64, min_stripe_size, calc_size); /* - * we're about to do_div by the stripe_len so lets make sure + * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure * we end up with something bigger than a stripe */ - calc_size = max_t(u64, calc_size, stripe_len * 4); + calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); + + do_div(calc_size, BTRFS_STRIPE_LEN); + calc_size *= BTRFS_STRIPE_LEN; + + return calc_size; +} + +static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, + int num_stripes) +{ + struct map_lookup *new; + size_t len = map_lookup_size(num_stripes); + + BUG_ON(map->num_stripes < num_stripes); + + if (map->num_stripes == num_stripes) + return map; + + new = kmalloc(len, GFP_NOFS); + if (!new) { + /* just change map->num_stripes */ + map->num_stripes = num_stripes; + return map; + } + + memcpy(new, map, len); + new->num_stripes = num_stripes; + kfree(map); + return new; +} + +/* + * helper to allocate device space from btrfs_device_info, in which we stored + * max free space information of every device. It is used when we can not + * allocate chunks by default size. + * + * By this helper, we can allocate a new chunk as larger as possible. + */ +static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, + struct btrfs_fs_devices *fs_devices, + struct btrfs_device_info *devices, + int nr_device, u64 type, + struct map_lookup **map_lookup, + int min_stripes, u64 *stripe_size) +{ + int i, index, sort_again = 0; + int min_devices = min_stripes; + u64 max_avail, min_free; + struct map_lookup *map = *map_lookup; + int ret; + + if (nr_device < min_stripes) + return -ENOSPC; + + btrfs_descending_sort_devices(devices, nr_device); + + max_avail = devices[0].max_avail; + if (!max_avail) + return -ENOSPC; + + for (i = 0; i < nr_device; i++) { + /* + * if dev_offset = 0, it means the free space of this device + * is less than what we need, and we didn't search max avail + * extent on this device, so do it now. + */ + if (!devices[i].dev_offset) { + ret = find_free_dev_extent(trans, devices[i].dev, + max_avail, + &devices[i].dev_offset, + &devices[i].max_avail); + if (ret != 0 && ret != -ENOSPC) + return ret; + sort_again = 1; + } + } + + /* we update the max avail free extent of each devices, sort again */ + if (sort_again) + btrfs_descending_sort_devices(devices, nr_device); + + if (type & BTRFS_BLOCK_GROUP_DUP) + min_devices = 1; + + if (!devices[min_devices - 1].max_avail) + return -ENOSPC; + + max_avail = devices[min_devices - 1].max_avail; + if (type & BTRFS_BLOCK_GROUP_DUP) + do_div(max_avail, 2); - do_div(calc_size, stripe_len); - calc_size *= stripe_len; + max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, + min_stripes, 1); + if (type & BTRFS_BLOCK_GROUP_DUP) + min_free = max_avail * 2; + else + min_free = max_avail; + + if (min_free > devices[min_devices - 1].max_avail) + return -ENOSPC; + + map = __shrink_map_lookup_stripes(map, min_stripes); + *stripe_size = max_avail; + + index = 0; + for (i = 0; i < min_stripes; i++) { + map->stripes[i].dev = devices[index].dev; + map->stripes[i].physical = devices[index].dev_offset; + if (type & BTRFS_BLOCK_GROUP_DUP) { + i++; + map->stripes[i].dev = devices[index].dev; + map->stripes[i].physical = devices[index].dev_offset + + max_avail; + } + index++; + } + *map_lookup = map; + + return 0; +} + +static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct map_lookup **map_ret, + u64 *num_bytes, u64 *stripe_size, + u64 start, u64 type) +{ + struct btrfs_fs_info *info = extent_root->fs_info; + struct btrfs_device *device = NULL; + struct btrfs_fs_devices *fs_devices = info->fs_devices; + struct list_head *cur; + struct map_lookup *map; + struct extent_map_tree *em_tree; + struct extent_map *em; + struct btrfs_device_info *devices_info; + struct list_head private_devs; + u64 calc_size = 1024 * 1024 * 1024; + u64 min_free; + u64 avail; + u64 dev_offset; + int num_stripes; + int min_stripes; + int sub_stripes; + int min_devices; /* the min number of devices we need */ + int i; + int ret; + int index; + + if ((type & BTRFS_BLOCK_GROUP_RAID1) && + (type & BTRFS_BLOCK_GROUP_DUP)) { + WARN_ON(1); + type &= ~BTRFS_BLOCK_GROUP_DUP; + } + if (list_empty(&fs_devices->alloc_list)) + return -ENOSPC; + + ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, + &min_stripes, &sub_stripes); + if (ret) + return ret; + + devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, + GFP_NOFS); + if (!devices_info) + return -ENOMEM; + + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); + if (!map) { + ret = -ENOMEM; + goto error; + } + map->num_stripes = num_stripes; cur = fs_devices->alloc_list.next; index = 0; + i = 0; - if (type & BTRFS_BLOCK_GROUP_DUP) + calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, + num_stripes, 0); + + if (type & BTRFS_BLOCK_GROUP_DUP) { min_free = calc_size * 2; - else + min_devices = 1; + } else { min_free = calc_size; - - /* - * we add 1MB because we never use the first 1MB of the device, unless - * we've looped, then we are likely allocating the maximum amount of - * space left already - */ - if (!looped) - min_free += 1024 * 1024; + min_devices = min_stripes; + } INIT_LIST_HEAD(&private_devs); while (index < num_stripes) { @@ -2313,27 +2468,39 @@ again: cur = cur->next; if (device->in_fs_metadata && avail >= min_free) { - ret = find_free_dev_extent(trans, device, - min_free, &dev_offset, - &max_avail); + ret = find_free_dev_extent(trans, device, min_free, + &devices_info[i].dev_offset, + &devices_info[i].max_avail); if (ret == 0) { list_move_tail(&device->dev_alloc_list, &private_devs); map->stripes[index].dev = device; - map->stripes[index].physical = dev_offset; + map->stripes[index].physical = + devices_info[i].dev_offset; index++; if (type & BTRFS_BLOCK_GROUP_DUP) { map->stripes[index].dev = device; map->stripes[index].physical = - dev_offset + calc_size; + devices_info[i].dev_offset + + calc_size; index++; } - } - } else if (device->in_fs_metadata && avail > max_avail) - max_avail = avail; + } else if (ret != -ENOSPC) + goto error; + + devices_info[i].dev = device; + i++; + } else if (device->in_fs_metadata && + avail >= BTRFS_STRIPE_LEN) { + devices_info[i].dev = device; + devices_info[i].max_avail = avail; + i++; + } + if (cur == &fs_devices->alloc_list) break; } + list_splice(&private_devs, &fs_devices->alloc_list); if (index < num_stripes) { if (index >= min_stripes) { @@ -2342,36 +2509,36 @@ again: num_stripes /= sub_stripes; num_stripes *= sub_stripes; } - looped = 1; - goto again; - } - if (!looped && max_avail > 0) { - looped = 1; - calc_size = max_avail; - if (type & BTRFS_BLOCK_GROUP_DUP) - do_div(calc_size, 2); - goto again; + + map = __shrink_map_lookup_stripes(map, num_stripes); + } else if (i >= min_devices) { + ret = __btrfs_alloc_tiny_space(trans, fs_devices, + devices_info, i, type, + &map, min_stripes, + &calc_size); + if (ret) + goto error; + } else { + ret = -ENOSPC; + goto error; } - kfree(map); - return -ENOSPC; } map->sector_size = extent_root->sectorsize; - map->stripe_len = stripe_len; - map->io_align = stripe_len; - map->io_width = stripe_len; + map->stripe_len = BTRFS_STRIPE_LEN; + map->io_align = BTRFS_STRIPE_LEN; + map->io_width = BTRFS_STRIPE_LEN; map->type = type; - map->num_stripes = num_stripes; map->sub_stripes = sub_stripes; *map_ret = map; *stripe_size = calc_size; *num_bytes = chunk_bytes_by_type(type, calc_size, - num_stripes, sub_stripes); + map->num_stripes, sub_stripes); em = alloc_extent_map(GFP_NOFS); if (!em) { - kfree(map); - return -ENOMEM; + ret = -ENOMEM; + goto error; } em->bdev = (struct block_device *)map; em->start = start; @@ -2404,7 +2571,13 @@ again: index++; } + kfree(devices_info); return 0; + +error: + kfree(map); + kfree(devices_info); + return ret; } static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index a668c0116982..a5cfedf393f9 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -20,8 +20,11 @@ #define __BTRFS_VOLUMES_ #include +#include #include "async-thread.h" +#define BTRFS_STRIPE_LEN (64 * 1024) + struct buffer_head; struct btrfs_pending_bios { struct bio *head; @@ -137,6 +140,27 @@ struct btrfs_multi_bio { struct btrfs_bio_stripe stripes[]; }; +struct btrfs_device_info { + struct btrfs_device *dev; + u64 dev_offset; + u64 max_avail; +}; + +/* Used to sort the devices by max_avail(descending sort) */ +int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); + +/* + * sort the devices by max_avail, in which max free extent size of each device + * is stored.(Descending Sort) + */ +static inline void btrfs_descending_sort_devices( + struct btrfs_device_info *devices, + size_t nr_devices) +{ + sort(devices, nr_devices, sizeof(struct btrfs_device_info), + btrfs_cmp_device_free_bytes, NULL); +} + #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ (sizeof(struct btrfs_bio_stripe) * (n))) -- cgit v1.2.3 From 6d07bcec969af335d4e35b3921131b7929bd634e Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 5 Jan 2011 10:07:31 +0000 Subject: btrfs: fix wrong free space information of btrfs When we store data by raid profile in btrfs with two or more different size disks, df command shows there is some free space in the filesystem, but the user can not write any data in fact, df command shows the wrong free space information of btrfs. # mkfs.btrfs -d raid1 /dev/sda9 /dev/sda10 # btrfs-show Label: none uuid: a95cd49e-6e33-45b8-8741-a36153ce4b64 Total devices 2 FS bytes used 28.00KB devid 1 size 5.01GB used 2.03GB path /dev/sda9 devid 2 size 10.00GB used 2.01GB path /dev/sda10 # btrfs device scan /dev/sda9 /dev/sda10 # mount /dev/sda9 /mnt # dd if=/dev/zero of=tmpfile0 bs=4K count=9999999999 (fill the filesystem) # sync # df -TH Filesystem Type Size Used Avail Use% Mounted on /dev/sda9 btrfs 17G 8.6G 5.4G 62% /mnt # btrfs-show Label: none uuid: a95cd49e-6e33-45b8-8741-a36153ce4b64 Total devices 2 FS bytes used 3.99GB devid 1 size 5.01GB used 5.01GB path /dev/sda9 devid 2 size 10.00GB used 4.99GB path /dev/sda10 It is because btrfs cannot allocate chunks when one of the pairing disks has no space, the free space on the other disks can not be used for ever, and should be subtracted from the total space, but btrfs doesn't subtract this space from the total. It is strange to the user. This patch fixes it by calcing the free space that can be used to allocate chunks. Implementation: 1. get all the devices free space, and align them by stripe length. 2. sort the devices by the free space. 3. check the free space of the devices, 3.1. if it is not zero, and then check the number of the devices that has more free space than this device, if the number of the devices is beyond the min stripe number, the free space can be used, and add into total free space. if the number of the devices is below the min stripe number, we can not use the free space, the check ends. 3.2. if the free space is zero, check the next devices, goto 3.1 This implementation is just likely fake chunk allocation. After appling this patch, df can show correct space information: # df -TH Filesystem Type Size Used Avail Use% Mounted on /dev/sda9 btrfs 17G 8.6G 0 100% /mnt Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/extent-tree.c | 58 +++++++++++++++++++- fs/btrfs/super.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/volumes.c | 84 ++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 3 + 5 files changed, 286 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0cb322cc4fc0..0995f4f68d7a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2158,6 +2158,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); +u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); int btrfs_check_data_free_space(struct inode *inode, u64 bytes); @@ -2201,6 +2202,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, int btrfs_set_block_group_rw(struct btrfs_root *root, struct btrfs_block_group_cache *cache); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); +u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1e1c9a177626..04bfc3a2bd9f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3090,7 +3090,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) return btrfs_reduce_alloc_profile(root, flags); } -static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) +u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) { u64 flags; @@ -8019,6 +8019,62 @@ out: return ret; } +/* + * helper to account the unused space of all the readonly block group in the + * list. takes mirrors into account. + */ +static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) +{ + struct btrfs_block_group_cache *block_group; + u64 free_bytes = 0; + int factor; + + list_for_each_entry(block_group, groups_list, list) { + spin_lock(&block_group->lock); + + if (!block_group->ro) { + spin_unlock(&block_group->lock); + continue; + } + + if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP)) + factor = 2; + else + factor = 1; + + free_bytes += (block_group->key.offset - + btrfs_block_group_used(&block_group->item)) * + factor; + + spin_unlock(&block_group->lock); + } + + return free_bytes; +} + +/* + * helper to account the unused space of all the readonly block group in the + * space_info. takes mirrors into account. + */ +u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) +{ + int i; + u64 free_bytes = 0; + + spin_lock(&sinfo->lock); + + for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) + if (!list_empty(&sinfo->block_groups[i])) + free_bytes += __btrfs_get_ro_block_group_free_space( + &sinfo->block_groups[i]); + + spin_unlock(&sinfo->lock); + + return free_bytes; +} + int btrfs_set_block_group_rw(struct btrfs_root *root, struct btrfs_block_group_cache *cache) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index caa5bcc62f16..2963376e77f4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -777,6 +777,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) return 0; } +/* + * The helper to calc the free space on the devices that can be used to store + * file data. + */ +static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_device_info *devices_info; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_device *device; + u64 skip_space; + u64 type; + u64 avail_space; + u64 used_space; + u64 min_stripe_size; + int min_stripes = 1; + int i = 0, nr_devices; + int ret; + + nr_devices = fs_info->fs_devices->rw_devices; + BUG_ON(!nr_devices); + + devices_info = kmalloc(sizeof(*devices_info) * nr_devices, + GFP_NOFS); + if (!devices_info) + return -ENOMEM; + + /* calc min stripe number for data space alloction */ + type = btrfs_get_alloc_profile(root, 1); + if (type & BTRFS_BLOCK_GROUP_RAID0) + min_stripes = 2; + else if (type & BTRFS_BLOCK_GROUP_RAID1) + min_stripes = 2; + else if (type & BTRFS_BLOCK_GROUP_RAID10) + min_stripes = 4; + + if (type & BTRFS_BLOCK_GROUP_DUP) + min_stripe_size = 2 * BTRFS_STRIPE_LEN; + else + min_stripe_size = BTRFS_STRIPE_LEN; + + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { + if (!device->in_fs_metadata) + continue; + + avail_space = device->total_bytes - device->bytes_used; + + /* align with stripe_len */ + do_div(avail_space, BTRFS_STRIPE_LEN); + avail_space *= BTRFS_STRIPE_LEN; + + /* + * In order to avoid overwritting the superblock on the drive, + * btrfs starts at an offset of at least 1MB when doing chunk + * allocation. + */ + skip_space = 1024 * 1024; + + /* user can set the offset in fs_info->alloc_start. */ + if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= + device->total_bytes) + skip_space = max(fs_info->alloc_start, skip_space); + + /* + * btrfs can not use the free space in [0, skip_space - 1], + * we must subtract it from the total. In order to implement + * it, we account the used space in this range first. + */ + ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, + &used_space); + if (ret) { + kfree(devices_info); + return ret; + } + + /* calc the free space in [0, skip_space - 1] */ + skip_space -= used_space; + + /* + * we can use the free space in [0, skip_space - 1], subtract + * it from the total. + */ + if (avail_space && avail_space >= skip_space) + avail_space -= skip_space; + else + avail_space = 0; + + if (avail_space < min_stripe_size) + continue; + + devices_info[i].dev = device; + devices_info[i].max_avail = avail_space; + + i++; + } + + nr_devices = i; + + btrfs_descending_sort_devices(devices_info, nr_devices); + + i = nr_devices - 1; + avail_space = 0; + while (nr_devices >= min_stripes) { + if (devices_info[i].max_avail >= min_stripe_size) { + int j; + u64 alloc_size; + + avail_space += devices_info[i].max_avail * min_stripes; + alloc_size = devices_info[i].max_avail; + for (j = i + 1 - min_stripes; j <= i; j++) + devices_info[j].max_avail -= alloc_size; + } + i--; + nr_devices--; + } + + kfree(devices_info); + *free_bytes = avail_space; + return 0; +} + static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); @@ -784,16 +905,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct list_head *head = &root->fs_info->space_info; struct btrfs_space_info *found; u64 total_used = 0; - u64 total_used_data = 0; + u64 total_free_data = 0; int bits = dentry->d_sb->s_blocksize_bits; __be32 *fsid = (__be32 *)root->fs_info->fsid; + int ret; + /* holding chunk_muext to avoid allocating new chunks */ + mutex_lock(&root->fs_info->chunk_mutex); rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { - if (found->flags & BTRFS_BLOCK_GROUP_DATA) - total_used_data += found->disk_used; - else - total_used_data += found->disk_total; + if (found->flags & BTRFS_BLOCK_GROUP_DATA) { + total_free_data += found->disk_total - found->disk_used; + total_free_data -= + btrfs_account_ro_block_groups_free_space(found); + } + total_used += found->disk_used; } rcu_read_unlock(); @@ -801,9 +927,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = BTRFS_NAME_LEN; buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; buf->f_bfree = buf->f_blocks - (total_used >> bits); - buf->f_bavail = buf->f_blocks - (total_used_data >> bits); buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; + buf->f_bavail = total_free_data; + ret = btrfs_calc_avail_data_space(root, &total_free_data); + if (ret) { + mutex_unlock(&root->fs_info->chunk_mutex); + return ret; + } + buf->f_bavail += total_free_data; + buf->f_bavail = buf->f_bavail >> bits; + mutex_unlock(&root->fs_info->chunk_mutex); /* We treat it as constant endianness (it doesn't matter _which_) because we want the fsid to come out the same whether mounted diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c22784b989b7..0c7f478cf645 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -728,6 +728,90 @@ error: return ret; } +/* helper to account the used device space in the range */ +int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, + u64 end, u64 *length) +{ + struct btrfs_key key; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *dev_extent; + struct btrfs_path *path; + u64 extent_end; + int ret; + int slot; + struct extent_buffer *l; + + *length = 0; + + if (start >= device->total_bytes) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = 2; + + key.objectid = device->devid; + key.offset = start; + key.type = BTRFS_DEV_EXTENT_KEY; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = btrfs_previous_item(root, path, key.objectid, key.type); + if (ret < 0) + goto out; + } + + while (1) { + l = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(l)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto out; + + break; + } + btrfs_item_key_to_cpu(l, &key, slot); + + if (key.objectid < device->devid) + goto next; + + if (key.objectid > device->devid) + break; + + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) + goto next; + + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); + extent_end = key.offset + btrfs_dev_extent_length(l, + dev_extent); + if (key.offset <= start && extent_end > end) { + *length = end - start + 1; + break; + } else if (key.offset <= start && extent_end > start) + *length += extent_end - start; + else if (key.offset > start && extent_end <= end) + *length += extent_end - key.offset; + else if (key.offset > start && key.offset <= end) { + *length += end - key.offset + 1; + break; + } else if (key.offset > end) + break; + +next: + path->slots[0]++; + } + ret = 0; +out: + btrfs_free_path(path); + return ret; +} + /* * find_free_dev_extent - find free space in the specified device * @trans: transaction handler diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index a5cfedf393f9..7af6144a7954 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -161,6 +161,9 @@ static inline void btrfs_descending_sort_devices( btrfs_cmp_device_free_bytes, NULL); } +int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, + u64 end, u64 *length); + #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ (sizeof(struct btrfs_bio_stripe) * (n))) -- cgit v1.2.3 From 42838bb265b9cff3de9587fcacc398b5112dc2d9 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 6 Jan 2011 21:45:21 +0000 Subject: btrfs: Mem leak in btrfs_get_acl() It seems to me that we leak the memory allocated to 'value' in btrfs_get_acl() if the call to posix_acl_from_xattr() fails. Here's a patch that attempts to correct that problem. Signed-off-by: Jesper Juhl Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 2222d161c7b6..6d1410e392d3 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) size = __btrfs_getxattr(inode, name, value, size); if (size > 0) { acl = posix_acl_from_xattr(value, size); - if (IS_ERR(acl)) + if (IS_ERR(acl)) { + kfree(value); return acl; + } set_cached_acl(inode, type, acl); } kfree(value); -- cgit v1.2.3 From 20b450773d17e325190c158e10bfdb25dc21d2d6 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sat, 8 Jan 2011 10:09:13 +0000 Subject: btrfs: mount failure return value fix I happened to pass swap partition as root partition in cmdline, then kernel panic and tell me about "Cannot open root device". It is not correct, in fact it is a fs type mismatch instead of 'no device'. Eventually I found btrfs mounting failed with -EIO, it should be -EINVAL. The logic in init/do_mounts.c: for (p = fs_names; *p; p += strlen(p)+1) { int err = do_mount_root(name, p, flags, root_mount_data); switch (err) { case 0: goto out; case -EACCES: flags |= MS_RDONLY; goto retry; case -EINVAL: continue; } print "Cannot open root device" panic } SO fs type after btrfs will have no chance to mount Here fix the return value as -EINVAL Signed-off-by: Dave Young Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +++- fs/btrfs/volumes.c | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f88eb2ce7919..f9efb68fc2e3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1713,8 +1713,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info, BTRFS_ROOT_TREE_OBJECTID); bh = btrfs_read_dev_super(fs_devices->latest_bdev); - if (!bh) + if (!bh) { + err = -EINVAL; goto fail_iput; + } memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); memcpy(&fs_info->super_for_commit, &fs_info->super_copy, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0c7f478cf645..e8be478178aa 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -600,8 +600,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, set_blocksize(bdev, 4096); bh = btrfs_read_dev_super(bdev); - if (!bh) + if (!bh) { + ret = -EINVAL; goto error_close; + } disk_super = (struct btrfs_super_block *)bh->b_data; devid = btrfs_stack_device_id(&disk_super->dev_item); @@ -702,7 +704,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, goto error_close; bh = btrfs_read_dev_super(bdev); if (!bh) { - ret = -EIO; + ret = -EINVAL; goto error_close; } disk_super = (struct btrfs_super_block *)bh->b_data; @@ -1302,7 +1304,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) set_blocksize(bdev, 4096); bh = btrfs_read_dev_super(bdev); if (!bh) { - ret = -EIO; + ret = -EINVAL; goto error_close; } disk_super = (struct btrfs_super_block *)bh->b_data; -- cgit v1.2.3 From ff175d57f057f77d2d3031d674c2af9167a4af02 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 25 Dec 2010 21:22:30 +0000 Subject: btrfs: Don't pass NULL ptr to func that may deref it. Hi, In fs/btrfs/inode.c::fixup_tree_root_location() we have this code: ... if (!path) { err = -ENOMEM; goto out; } ... out: btrfs_free_path(path); return err; btrfs_free_path() passes its argument on to other functions and some of them end up dereferencing the pointer. In the code above that pointer is clearly NULL, so btrfs_free_path() will eventually cause a NULL dereference. There are many ways to cut this cake (fix the bug). The one I chose was to make btrfs_free_path() deal gracefully with NULL pointers. If you disagree, feel free to come up with an alternative patch. Signed-off-by: Jesper Juhl Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9ac171599258..99599f1c1554 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, /* this also releases the path */ void btrfs_free_path(struct btrfs_path *p) { + if (!p) + return; btrfs_release_path(NULL, p); kmem_cache_free(btrfs_path_cachep, p); } -- cgit v1.2.3 From 91ca338d776e0cefb255bf2979b6448febd880f5 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Wed, 5 Jan 2011 02:32:22 +0000 Subject: btrfs: check NULL or not Should check if functions returns NULL or not. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 6 ++++++ fs/btrfs/disk-io.c | 8 ++++++++ fs/btrfs/extent_io.c | 2 ++ 3 files changed, 16 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 99599f1c1554..b5baff0dccfe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2516,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_assert_tree_locked(path->nodes[1]); right = read_node_slot(root, upper, slot + 1); + if (right == NULL) + return 1; + btrfs_tree_lock(right); btrfs_set_lock_blocking(right); @@ -2766,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_assert_tree_locked(path->nodes[1]); left = read_node_slot(root, path->nodes[1], slot - 1); + if (left == NULL) + return 1; + btrfs_tree_lock(left); btrfs_set_lock_blocking(left); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f9efb68fc2e3..a0c37b2ee9ed 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -353,6 +353,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(len == 0); eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + if (eb == NULL) { + WARN_ON(1); + goto out; + } ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); @@ -427,6 +431,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, WARN_ON(len == 0); eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + if (eb == NULL) { + ret = -EIO; + goto out; + } found_start = btrfs_header_bytenr(eb); if (found_start != start) { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f1d198128959..8b8d3d99ae68 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3075,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, #endif eb = kmem_cache_zalloc(extent_buffer_cache, mask); + if (eb == NULL) + return NULL; eb->start = start; eb->len = len; spin_lock_init(&eb->lock); -- cgit v1.2.3 From 5e540f7715b8cd83b8e60beaaa525b125cc122de Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 27 Dec 2010 06:53:10 +0000 Subject: btrfs: Fix memory leak in btrfs_read_fs_root_no_radix() In btrfs_read_fs_root_no_radix(), 'root' is not freed if btrfs_search_slot() returns error. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a0c37b2ee9ed..9b1dd4138072 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1153,6 +1153,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, } btrfs_free_path(path); if (ret) { + kfree(root); if (ret > 0) ret = -ENOENT; return ERR_PTR(ret); -- cgit v1.2.3 From f690efb1aa2a961dd6655529c1797fcac60ad6d9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 12 Jan 2011 21:04:22 +0000 Subject: Btrfs: don't warn if we get ENOSPC in btrfs_block_rsv_check If we run low on space we could get a bunch of warnings out of btrfs_block_rsv_check, but this is mostly just called via the transaction code to see if we need to end the transaction, it expects to see failures, so let's not WARN and freak everybody out for no reason. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 04bfc3a2bd9f..055b837eab19 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3727,11 +3727,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, return 0; } - WARN_ON(1); - printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", - block_rsv->size, block_rsv->reserved, - block_rsv->freed[0], block_rsv->freed[1]); - return -ENOSPC; } -- cgit v1.2.3 From 6f88a4403def422bd8e276ddf6863d6ac71435d2 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Dec 2010 14:55:03 +0000 Subject: btrfs: Require CAP_SYS_ADMIN for filesystem rebalance Filesystem rebalancing (BTRFS_IOC_BALANCE) affects the entire filesystem and may run uninterruptibly for a long time. This does not seem to be something that an unprivileged user should be able to do. Reported-by: Aron Xu Signed-off-by: Ben Hutchings Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e8be478178aa..f2d2f4ccc738 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "compat.h" #include "ctree.h" @@ -2024,6 +2025,9 @@ int btrfs_balance(struct btrfs_root *dev_root) if (dev_root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + mutex_lock(&dev_root->fs_info->volume_mutex); dev_root = dev_root->fs_info->dev_root; -- cgit v1.2.3 From 7b8a53fd815deb39542085897743fa0063f9fe06 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Jan 2011 20:08:44 -0500 Subject: fix old umount_tree() breakage Expiry-related code calls umount_tree() several times with the same list to collect vfsmounts to. Which is fine, except that umount_tree() implicitly assumed that the list would be empty on each call - it moves the victims over there and then iterates through the list kicking them out. It's *almost* idempotent, so everything nearly worked. However, mnt->ghosts handling (and thus expirability checks) had been broken - that part was not idempotent... The fix is trivial - use local temporary list, splice it to the the collector list when we are through. Signed-off-by: Al Viro --- fs/namespace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index bfcb701f9490..d7fc05fac753 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1226,15 +1226,16 @@ void release_mounts(struct list_head *head) */ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) { + LIST_HEAD(tmp_list); struct vfsmount *p; for (p = mnt; p; p = next_mnt(p, mnt)) - list_move(&p->mnt_hash, kill); + list_move(&p->mnt_hash, &tmp_list); if (propagate) - propagate_umount(kill); + propagate_umount(&tmp_list); - list_for_each_entry(p, kill, mnt_hash) { + list_for_each_entry(p, &tmp_list, mnt_hash) { list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); @@ -1246,6 +1247,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) } change_mnt_propagation(p, MS_PRIVATE); } + list_splice(&tmp_list, kill); } static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts); -- cgit v1.2.3 From f03c65993b98eeb909a4012ce7833c5857d74755 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Jan 2011 22:30:21 -0500 Subject: sanitize vfsmount refcounting changes Instead of splitting refcount between (per-cpu) mnt_count and (SMP-only) mnt_longrefs, make all references contribute to mnt_count again and keep track of how many are longterm ones. Accounting rules for longterm count: * 1 for each fs_struct.root.mnt * 1 for each fs_struct.pwd.mnt * 1 for having non-NULL ->mnt_ns * decrement to 0 happens only under vfsmount lock exclusive That allows nice common case for mntput() - since we can't drop the final reference until after mnt_longterm has reached 0 due to the rules above, mntput() can grab vfsmount lock shared and check mnt_longterm. If it turns out to be non-zero (which is the common case), we know that this is not the final mntput() and can just blindly decrement percpu mnt_count. Otherwise we grab vfsmount lock exclusive and do usual decrement-and-check of percpu mnt_count. For fs_struct.c we have mnt_make_longterm() and mnt_make_shortterm(); namespace.c uses the latter in places where we don't already hold vfsmount lock exclusive and opencodes a few remaining spots where we need to manipulate mnt_longterm. Note that we mostly revert the code outside of fs/namespace.c back to what we used to have; in particular, normal code doesn't need to care about two kinds of references, etc. And we get to keep the optimization Nick's variant had bought us... Signed-off-by: Al Viro --- drivers/mtd/mtdchar.c | 2 +- fs/anon_inodes.c | 2 +- fs/fs_struct.c | 35 ++++++++++----- fs/internal.h | 3 ++ fs/namei.c | 24 ----------- fs/namespace.c | 116 +++++++++++++++++++------------------------------- fs/pipe.c | 2 +- fs/super.c | 2 +- include/linux/mount.h | 4 +- include/linux/path.h | 2 - 10 files changed, 75 insertions(+), 117 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index ee4bb3330bdf..98240575a18d 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1201,7 +1201,7 @@ err_unregister_chdev: static void __exit cleanup_mtdchar(void) { unregister_mtd_user(&mtdchar_notifier); - mntput_long(mtd_inode_mnt); + mntput(mtd_inode_mnt); unregister_filesystem(&mtd_inodefs_type); __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index cbe57f3c4d89..c5567cb78432 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -233,7 +233,7 @@ static int __init anon_inode_init(void) return 0; err_mntput: - mntput_long(anon_inode_mnt); + mntput(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 68ca487bedb1..78b519c13536 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -4,6 +4,19 @@ #include #include #include +#include "internal.h" + +static inline void path_get_longterm(struct path *path) +{ + path_get(path); + mnt_make_longterm(path->mnt); +} + +static inline void path_put_longterm(struct path *path) +{ + mnt_make_shortterm(path->mnt); + path_put(path); +} /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. @@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; - path_get_long(path); + path_get_longterm(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put_long(&old_root); + path_put_longterm(&old_root); } /* @@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; - path_get_long(path); + path_get_longterm(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put_long(&old_pwd); + path_put_longterm(&old_pwd); } void chroot_fs_refs(struct path *old_root, struct path *new_root) @@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) write_seqcount_begin(&fs->seq); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { - path_get_long(new_root); + path_get_longterm(new_root); fs->root = *new_root; count++; } if (fs->pwd.dentry == old_root->dentry && fs->pwd.mnt == old_root->mnt) { - path_get_long(new_root); + path_get_longterm(new_root); fs->pwd = *new_root; count++; } @@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put_long(old_root); + path_put_longterm(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put_long(&fs->root); - path_put_long(&fs->pwd); + path_put_longterm(&fs->root); + path_put_longterm(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) spin_lock(&old->lock); fs->root = old->root; - path_get_long(&fs->root); + path_get_longterm(&fs->root); fs->pwd = old->pwd; - path_get_long(&fs->pwd); + path_get_longterm(&fs->pwd); spin_unlock(&old->lock); } return fs; diff --git a/fs/internal.h b/fs/internal.h index 4931060fd089..12ccb86edef7 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -73,6 +73,9 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern int do_add_mount(struct vfsmount *, struct path *, int); extern void mnt_clear_expiry(struct vfsmount *); +extern void mnt_make_longterm(struct vfsmount *); +extern void mnt_make_shortterm(struct vfsmount *); + extern void __init mnt_init(void); DECLARE_BRLOCK(vfsmount_lock); diff --git a/fs/namei.c b/fs/namei.c index c2e37727e3ab..8f7b41a14882 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -367,18 +367,6 @@ void path_get(struct path *path) } EXPORT_SYMBOL(path_get); -/** - * path_get_long - get a long reference to a path - * @path: path to get the reference to - * - * Given a path increment the reference count to the dentry and the vfsmount. - */ -void path_get_long(struct path *path) -{ - mntget_long(path->mnt); - dget(path->dentry); -} - /** * path_put - put a reference to a path * @path: path to put the reference to @@ -392,18 +380,6 @@ void path_put(struct path *path) } EXPORT_SYMBOL(path_put); -/** - * path_put_long - put a long reference to a path - * @path: path to put the reference to - * - * Given a path decrement the reference count to the dentry and the vfsmount. - */ -void path_put_long(struct path *path) -{ - dput(path->dentry); - mntput_long(path->mnt); -} - /** * nameidata_drop_rcu - drop this nameidata out of rcu-walk * @nd: nameidata pathwalk data to drop diff --git a/fs/namespace.c b/fs/namespace.c index d7fc05fac753..48809e21f270 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt) unsigned int mnt_get_count(struct vfsmount *mnt) { #ifdef CONFIG_SMP - unsigned int count = atomic_read(&mnt->mnt_longrefs); + unsigned int count = 0; int cpu; for_each_possible_cpu(cpu) { @@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) if (!mnt->mnt_pcp) goto out_free_devname; - atomic_set(&mnt->mnt_longrefs, 1); + this_cpu_add(mnt->mnt_pcp->mnt_count, 1); #else mnt->mnt_count = 1; mnt->mnt_writers = 0; @@ -624,8 +624,11 @@ static void commit_tree(struct vfsmount *mnt) BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); - list_for_each_entry(m, &head, mnt_list) + list_for_each_entry(m, &head, mnt_list) { m->mnt_ns = n; + atomic_inc(&m->mnt_longterm); + } + list_splice(&head, n->list.prev); list_add_tail(&mnt->mnt_hash, mount_hashtable + @@ -734,51 +737,30 @@ static inline void mntfree(struct vfsmount *mnt) deactivate_super(sb); } -#ifdef CONFIG_SMP -static inline void __mntput(struct vfsmount *mnt, int longrefs) +static void mntput_no_expire(struct vfsmount *mnt) { - if (!longrefs) { put_again: - br_read_lock(vfsmount_lock); - if (likely(atomic_read(&mnt->mnt_longrefs))) { - mnt_dec_count(mnt); - br_read_unlock(vfsmount_lock); - return; - } +#ifdef CONFIG_SMP + br_read_lock(vfsmount_lock); + if (likely(atomic_read(&mnt->mnt_longterm))) { + mnt_dec_count(mnt); br_read_unlock(vfsmount_lock); - } else { - BUG_ON(!atomic_read(&mnt->mnt_longrefs)); - if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) - return; + return; } + br_read_unlock(vfsmount_lock); br_write_lock(vfsmount_lock); - if (!longrefs) - mnt_dec_count(mnt); - else - atomic_dec(&mnt->mnt_longrefs); + mnt_dec_count(mnt); if (mnt_get_count(mnt)) { br_write_unlock(vfsmount_lock); return; } - if (unlikely(mnt->mnt_pinned)) { - mnt_add_count(mnt, mnt->mnt_pinned + 1); - mnt->mnt_pinned = 0; - br_write_unlock(vfsmount_lock); - acct_auto_close_mnt(mnt); - goto put_again; - } - br_write_unlock(vfsmount_lock); - mntfree(mnt); -} #else -static inline void __mntput(struct vfsmount *mnt, int longrefs) -{ -put_again: mnt_dec_count(mnt); if (likely(mnt_get_count(mnt))) return; br_write_lock(vfsmount_lock); +#endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; @@ -789,12 +771,6 @@ put_again: br_write_unlock(vfsmount_lock); mntfree(mnt); } -#endif - -static void mntput_no_expire(struct vfsmount *mnt) -{ - __mntput(mnt, 0); -} void mntput(struct vfsmount *mnt) { @@ -802,7 +778,7 @@ void mntput(struct vfsmount *mnt) /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ if (unlikely(mnt->mnt_expiry_mark)) mnt->mnt_expiry_mark = 0; - __mntput(mnt, 0); + mntput_no_expire(mnt); } } EXPORT_SYMBOL(mntput); @@ -815,33 +791,6 @@ struct vfsmount *mntget(struct vfsmount *mnt) } EXPORT_SYMBOL(mntget); -void mntput_long(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - if (mnt) { - /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ - if (unlikely(mnt->mnt_expiry_mark)) - mnt->mnt_expiry_mark = 0; - __mntput(mnt, 1); - } -#else - mntput(mnt); -#endif -} -EXPORT_SYMBOL(mntput_long); - -struct vfsmount *mntget_long(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - if (mnt) - atomic_inc(&mnt->mnt_longrefs); - return mnt; -#else - return mntget(mnt); -#endif -} -EXPORT_SYMBOL(mntget_long); - void mnt_pin(struct vfsmount *mnt) { br_write_lock(vfsmount_lock); @@ -1216,7 +1165,7 @@ void release_mounts(struct list_head *head) dput(dentry); mntput(m); } - mntput_long(mnt); + mntput(mnt); } } @@ -1240,6 +1189,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; + atomic_dec(&p->mnt_longterm); list_del_init(&p->mnt_child); if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; @@ -1969,7 +1919,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) unlock: up_write(&namespace_sem); - mntput_long(newmnt); + mntput(newmnt); return err; } @@ -2291,6 +2241,20 @@ static struct mnt_namespace *alloc_mnt_ns(void) return new_ns; } +void mnt_make_longterm(struct vfsmount *mnt) +{ + atomic_inc(&mnt->mnt_longterm); +} + +void mnt_make_shortterm(struct vfsmount *mnt) +{ + if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) + return; + br_write_lock(vfsmount_lock); + atomic_dec(&mnt->mnt_longterm); + br_write_unlock(vfsmount_lock); +} + /* * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. @@ -2328,14 +2292,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, q = new_ns->root; while (p) { q->mnt_ns = new_ns; + atomic_inc(&q->mnt_longterm); if (fs) { if (p == fs->root.mnt) { + fs->root.mnt = mntget(q); + atomic_inc(&q->mnt_longterm); + mnt_make_shortterm(p); rootmnt = p; - fs->root.mnt = mntget_long(q); } if (p == fs->pwd.mnt) { + fs->pwd.mnt = mntget(q); + atomic_inc(&q->mnt_longterm); + mnt_make_shortterm(p); pwdmnt = p; - fs->pwd.mnt = mntget_long(q); } } p = next_mnt(p, mnt_ns->root); @@ -2344,9 +2313,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, up_write(&namespace_sem); if (rootmnt) - mntput_long(rootmnt); + mntput(rootmnt); if (pwdmnt) - mntput_long(pwdmnt); + mntput(pwdmnt); return new_ns; } @@ -2379,6 +2348,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) new_ns = alloc_mnt_ns(); if (!IS_ERR(new_ns)) { mnt->mnt_ns = new_ns; + atomic_inc(&mnt->mnt_longterm); new_ns->root = mnt; list_add(&new_ns->list, &new_ns->root->mnt_list); } diff --git a/fs/pipe.c b/fs/pipe.c index e2e95fb46a1e..89e9e19b1b2e 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void) static void __exit exit_pipe_fs(void) { unregister_filesystem(&pipe_fs_type); - mntput_long(pipe_mnt); + mntput(pipe_mnt); } fs_initcall(init_pipe_fs); diff --git a/fs/super.c b/fs/super.c index 4f6a3571a634..74e149efed81 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return mnt; err: - mntput_long(mnt); + mntput(mnt); return ERR_PTR(err); } diff --git a/include/linux/mount.h b/include/linux/mount.h index af4765ea8fde..604f122a2326 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -60,7 +60,7 @@ struct vfsmount { struct super_block *mnt_sb; /* pointer to superblock */ #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; - atomic_t mnt_longrefs; + atomic_t mnt_longterm; /* how many of the refs are longterm */ #else int mnt_count; int mnt_writers; @@ -96,8 +96,6 @@ extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); -extern void mntput_long(struct vfsmount *mnt); -extern struct vfsmount *mntget_long(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); diff --git a/include/linux/path.h b/include/linux/path.h index a581e8c06533..edc98dec6266 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -10,9 +10,7 @@ struct path { }; extern void path_get(struct path *); -extern void path_get_long(struct path *); extern void path_put(struct path *); -extern void path_put_long(struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { -- cgit v1.2.3 From f8b18087fd3277e424a24e13ce0edf30abe97ce0 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 12 Jan 2011 10:30:42 +0100 Subject: fs/btrfs: Fix build of ctree Fix the build failure in some configurations: CC [M] fs/btrfs/ctree.o In file included from fs/btrfs/ctree.c:21:0: fs/btrfs/ctree.h:1003:17: error: field 'super_kobj' has incomplete type fs/btrfs/ctree.h:1074:17: error: field 'root_kobj' has incomplete type make[2]: *** [fs/btrfs/ctree.o] Error 1 make[1]: *** [fs/btrfs] Error 2 make: *** [fs] Error 2 caused by commit 57cc7215b708 ("headers: kobject.h redux") We need to include kobject.h here. Reported-by: Jeff Garzik Fix-suggested-by: Li Zefan Signed-off-by: Stefan Schmidt Signed-off-by: Linus Torvalds --- fs/btrfs/ctree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a142d204b526..b875d445ea81 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "extent_io.h" #include "extent_map.h" -- cgit v1.2.3 From e205117285d6035af135c9d6c34a30ee6b8d1f2e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 16 Jan 2011 21:11:25 +0000 Subject: configfs: change depends -> select SYSFS This patch changes configfs to select SYSFS to fix the following: warning: (TARGET_CORE && GFS2_FS) selects CONFIGFS_FS which has unmet direct dependencies (SYSFS) Reported-by: Randy Dunlap Signed-off-by: Nicholas A. Bellinger Acked-by: Joel Becker --- fs/configfs/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/configfs/Kconfig b/fs/configfs/Kconfig index 13587cc97a0b..9febcdefdfdc 100644 --- a/fs/configfs/Kconfig +++ b/fs/configfs/Kconfig @@ -1,8 +1,8 @@ config CONFIGFS_FS tristate "Userspace-driven configuration filesystem" - depends on SYSFS + select SYSFS help - configfs is a ram-based filesystem that provides the converse + configfs is a RAM-based filesystem that provides the converse of sysfs's functionality. Where sysfs is a filesystem-based view of kernel objects, configfs is a filesystem-based manager of kernel objects, or config_items. -- cgit v1.2.3 From 86c747d2a4f028fe2fdf091c3a81d0e187827682 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 16 Jan 2011 21:14:52 +0000 Subject: dlm: Make DLM depend on CONFIGFS_FS This patch fixes the following kconfig error after changing CONFIGFS_FS -> select SYSFS: fs/sysfs/Kconfig:1:error: recursive dependency detected! fs/sysfs/Kconfig:1: symbol SYSFS is selected by CONFIGFS_FS fs/configfs/Kconfig:1: symbol CONFIGFS_FS is selected by DLM fs/dlm/Kconfig:1: symbol DLM depends on SYSFS Signed-off-by: Nicholas A. Bellinger Cc: Joel Becker Cc: Randy Dunlap Cc: Stephen Rothwell Cc: James Bottomley --- fs/dlm/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index 2dbb422e8116..1897eb1b4b6a 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -1,8 +1,7 @@ menuconfig DLM tristate "Distributed Lock Manager (DLM)" depends on EXPERIMENTAL && INET - depends on SYSFS && (IPV6 || IPV6=n) - select CONFIGFS_FS + depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n) select IP_SCTP help A general purpose distributed lock manager for kernel or userspace -- cgit v1.2.3 From 7b1fff7e4fdf2805fce7afd6247912588d72d604 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 16 Jan 2011 21:16:07 +0000 Subject: ocfs2: Make OCFS2_FS depend on CONFIGFS_FS This patch fixes the following kconfig error after changing CONFIGFS_FS -> select SYSFS: fs/sysfs/Kconfig:1:error: recursive dependency detected! fs/sysfs/Kconfig:1: symbol SYSFS is selected by CONFIGFS_FS fs/configfs/Kconfig:1: symbol CONFIGFS_FS is selected by OCFS2_FS fs/ocfs2/Kconfig:1: symbol OCFS2_FS depends on SYSFS Signed-off-by: Nicholas A. Bellinger Cc: Joel Becker Cc: Randy Dunlap Cc: Stephen Rothwell Cc: James Bottomley --- fs/ocfs2/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig index ab152c00cd3a..77a8de5f7119 100644 --- a/fs/ocfs2/Kconfig +++ b/fs/ocfs2/Kconfig @@ -1,7 +1,6 @@ config OCFS2_FS tristate "OCFS2 file system support" - depends on NET && SYSFS - select CONFIGFS_FS + depends on NET && SYSFS && CONFIGFS_FS select JBD2 select CRC32 select QUOTA -- cgit v1.2.3 From 7e3d0eb0b028ed9e9384e6afcae2f22993bbdf25 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jan 2011 16:32:11 -0500 Subject: VFS: Fix UP compile error in fs/namespace.c mnt_longterm is there only on SMP Reported-and-tested-by: Joachim Eastwood Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- fs/namespace.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 48809e21f270..9f544f35ed34 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -611,6 +611,21 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); } +static inline void __mnt_make_longterm(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + atomic_inc(&mnt->mnt_longterm); +#endif +} + +/* needs vfsmount lock for write */ +static inline void __mnt_make_shortterm(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + atomic_dec(&mnt->mnt_longterm); +#endif +} + /* * vfsmount lock must be held for write */ @@ -626,7 +641,7 @@ static void commit_tree(struct vfsmount *mnt) list_add_tail(&head, &mnt->mnt_list); list_for_each_entry(m, &head, mnt_list) { m->mnt_ns = n; - atomic_inc(&m->mnt_longterm); + __mnt_make_longterm(m); } list_splice(&head, n->list.prev); @@ -1189,7 +1204,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; - atomic_dec(&p->mnt_longterm); + __mnt_make_shortterm(p); list_del_init(&p->mnt_child); if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; @@ -2243,16 +2258,18 @@ static struct mnt_namespace *alloc_mnt_ns(void) void mnt_make_longterm(struct vfsmount *mnt) { - atomic_inc(&mnt->mnt_longterm); + __mnt_make_longterm(mnt); } void mnt_make_shortterm(struct vfsmount *mnt) { +#ifdef CONFIG_SMP if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) return; br_write_lock(vfsmount_lock); atomic_dec(&mnt->mnt_longterm); br_write_unlock(vfsmount_lock); +#endif } /* @@ -2292,17 +2309,17 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, q = new_ns->root; while (p) { q->mnt_ns = new_ns; - atomic_inc(&q->mnt_longterm); + __mnt_make_longterm(q); if (fs) { if (p == fs->root.mnt) { fs->root.mnt = mntget(q); - atomic_inc(&q->mnt_longterm); + __mnt_make_longterm(q); mnt_make_shortterm(p); rootmnt = p; } if (p == fs->pwd.mnt) { fs->pwd.mnt = mntget(q); - atomic_inc(&q->mnt_longterm); + __mnt_make_longterm(q); mnt_make_shortterm(p); pwdmnt = p; } @@ -2348,7 +2365,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) new_ns = alloc_mnt_ns(); if (!IS_ERR(new_ns)) { mnt->mnt_ns = new_ns; - atomic_inc(&mnt->mnt_longterm); + __mnt_make_longterm(mnt); new_ns->root = mnt; list_add(&new_ns->list, &new_ns->root->mnt_list); } -- cgit v1.2.3 From 19a167af7c97248ec646552ebc9140bc6aa3552a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 17 Jan 2011 01:35:23 -0500 Subject: Take the completion of automount into new helper ... and shift it from namei.c to namespace.c Signed-off-by: Al Viro --- fs/internal.h | 1 + fs/namei.c | 31 +++++-------------------------- fs/namespace.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/internal.h b/fs/internal.h index 12ccb86edef7..e8a0b245177d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -70,6 +70,7 @@ extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, extern void release_mounts(struct list_head *); extern void umount_tree(struct vfsmount *, int, struct list_head *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); +extern int finish_automount(struct vfsmount *, struct path *); extern int do_add_mount(struct vfsmount *, struct path *, int); extern void mnt_clear_expiry(struct vfsmount *); diff --git a/fs/namei.c b/fs/namei.c index 8f7b41a14882..b753192d8c3f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -923,37 +923,13 @@ static int follow_automount(struct path *path, unsigned flags, if (!mnt) /* mount collision */ return 0; - /* The new mount record should have at least 2 refs to prevent it being - * expired before we get a chance to add it - */ - BUG_ON(mnt_get_count(mnt) < 2); - - if (mnt->mnt_sb == path->mnt->mnt_sb && - mnt->mnt_root == path->dentry) { - mnt_clear_expiry(mnt); - mntput(mnt); - mntput(mnt); - return -ELOOP; - } + err = finish_automount(mnt, path); - /* We need to add the mountpoint to the parent. The filesystem may - * have placed it on an expiry list, and so we need to make sure it - * won't be expired under us if do_add_mount() fails (do_add_mount() - * will eat a reference unconditionally). - */ - mntget(mnt); - err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE); switch (err) { case -EBUSY: /* Someone else made a mount here whilst we were busy */ - err = 0; - default: - mnt_clear_expiry(mnt); - mntput(mnt); - mntput(mnt); - return err; + return 0; case 0: - mntput(mnt); dput(path->dentry); if (*need_mntput) mntput(path->mnt); @@ -961,7 +937,10 @@ static int follow_automount(struct path *path, unsigned flags, path->dentry = dget(mnt->mnt_root); *need_mntput = true; return 0; + default: + return err; } + } /* diff --git a/fs/namespace.c b/fs/namespace.c index 9f544f35ed34..bec51e4e0549 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1895,6 +1895,39 @@ static int do_new_mount(struct path *path, char *type, int flags, return do_add_mount(mnt, path, mnt_flags); } +int finish_automount(struct vfsmount *m, struct path *path) +{ + int err; + /* The new mount record should have at least 2 refs to prevent it being + * expired before we get a chance to add it + */ + BUG_ON(mnt_get_count(m) < 2); + + if (m->mnt_sb == path->mnt->mnt_sb && + m->mnt_root == path->dentry) { + mnt_clear_expiry(m); + mntput(m); + mntput(m); + return -ELOOP; + } + + /* We need to add the mountpoint to the parent. The filesystem may + * have placed it on an expiry list, and so we need to make sure it + * won't be expired under us if do_add_mount() fails (do_add_mount() + * will eat a reference unconditionally). + */ + mntget(m); + err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE); + if (err) { + mnt_clear_expiry(m); + mntput(m); + mntput(m); + } else { + mntput(m); + } + return err; +} + /* * add a mount into a namespace's mount tree * - this unconditionally eats one of the caller's references to newmnt. -- cgit v1.2.3 From 15f9a3f3e199647fe0cac19302c5033cf031372d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 17 Jan 2011 01:41:58 -0500 Subject: don't drop newmnt on error in do_add_mount() That gets rid of the kludge in finish_automount() - we need to keep refcount on the vfsmount as-is until we evict it from expiry list. Signed-off-by: Al Viro --- fs/namespace.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index bec51e4e0549..31aefc8e5fa6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1880,6 +1880,7 @@ static int do_new_mount(struct path *path, char *type, int flags, int mnt_flags, char *name, void *data) { struct vfsmount *mnt; + int err; if (!type) return -EINVAL; @@ -1892,7 +1893,10 @@ static int do_new_mount(struct path *path, char *type, int flags, if (IS_ERR(mnt)) return PTR_ERR(mnt); - return do_add_mount(mnt, path, mnt_flags); + err = do_add_mount(mnt, path, mnt_flags); + if (err) + mntput(mnt); + return err; } int finish_automount(struct vfsmount *m, struct path *path) @@ -1911,26 +1915,17 @@ int finish_automount(struct vfsmount *m, struct path *path) return -ELOOP; } - /* We need to add the mountpoint to the parent. The filesystem may - * have placed it on an expiry list, and so we need to make sure it - * won't be expired under us if do_add_mount() fails (do_add_mount() - * will eat a reference unconditionally). - */ - mntget(m); err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE); if (err) { mnt_clear_expiry(m); mntput(m); mntput(m); - } else { - mntput(m); } return err; } /* * add a mount into a namespace's mount tree - * - this unconditionally eats one of the caller's references to newmnt. */ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) { @@ -1967,7 +1962,6 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) unlock: up_write(&namespace_sem); - mntput(newmnt); return err; } -- cgit v1.2.3 From b1e75df45a3d8a490b8648e44632debc5eea04b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 17 Jan 2011 01:47:59 -0500 Subject: tidy up around finish_automount() do_add_mount() and mnt_clear_expiry() are not needed outside of namespace.c anymore, now that namei has finish_automount() to use. Signed-off-by: Al Viro --- fs/internal.h | 2 -- fs/namespace.c | 46 ++++++++++++++++++---------------------------- 2 files changed, 18 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/internal.h b/fs/internal.h index e8a0b245177d..0663568b1247 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -71,8 +71,6 @@ extern void release_mounts(struct list_head *); extern void umount_tree(struct vfsmount *, int, struct list_head *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern int finish_automount(struct vfsmount *, struct path *); -extern int do_add_mount(struct vfsmount *, struct path *, int); -extern void mnt_clear_expiry(struct vfsmount *); extern void mnt_make_longterm(struct vfsmount *); extern void mnt_make_shortterm(struct vfsmount *); diff --git a/fs/namespace.c b/fs/namespace.c index 31aefc8e5fa6..7b0b95371696 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1872,6 +1872,8 @@ out: return err; } +static int do_add_mount(struct vfsmount *, struct path *, int); + /* * create a new mount for userspace and request it to be added into the * namespace's tree @@ -1909,25 +1911,31 @@ int finish_automount(struct vfsmount *m, struct path *path) if (m->mnt_sb == path->mnt->mnt_sb && m->mnt_root == path->dentry) { - mnt_clear_expiry(m); - mntput(m); - mntput(m); - return -ELOOP; + err = -ELOOP; + goto fail; } err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE); - if (err) { - mnt_clear_expiry(m); - mntput(m); - mntput(m); + if (!err) + return 0; +fail: + /* remove m from any expiration list it may be on */ + if (!list_empty(&m->mnt_expire)) { + down_write(&namespace_sem); + br_write_lock(vfsmount_lock); + list_del_init(&m->mnt_expire); + br_write_unlock(vfsmount_lock); + up_write(&namespace_sem); } + mntput(m); + mntput(m); return err; } /* * add a mount into a namespace's mount tree */ -int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) +static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) { int err; @@ -1954,11 +1962,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) goto unlock; newmnt->mnt_flags = mnt_flags; - if ((err = graft_tree(newmnt, path))) - goto unlock; - - up_write(&namespace_sem); - return 0; + err = graft_tree(newmnt, path); unlock: up_write(&namespace_sem); @@ -1982,20 +1986,6 @@ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) } EXPORT_SYMBOL(mnt_set_expiry); -/* - * Remove a vfsmount from any expiration list it may be on - */ -void mnt_clear_expiry(struct vfsmount *mnt) -{ - if (!list_empty(&mnt->mnt_expire)) { - down_write(&namespace_sem); - br_write_lock(vfsmount_lock); - list_del_init(&mnt->mnt_expire); - br_write_unlock(vfsmount_lock); - up_write(&namespace_sem); - } -} - /* * process a list of expirable mountpoints with the intent of discarding any * mountpoints that aren't in use and haven't been touched since last we came -- cgit v1.2.3 From 64c23e86873ee410554d6d1c76b60da47025e96f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Jan 2011 13:07:30 +0100 Subject: make the feature checks in ->fallocate future proof Instead of various home grown checks that might need updates for new flags just check for any bit outside the mask of the features supported by the filesystem. This makes the check future proof for any newly added flag. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/btrfs/inode.c | 2 +- fs/ext4/extents.c | 2 +- fs/gfs2/ops_inode.c | 2 +- fs/ocfs2/file.c | 2 ++ fs/xfs/linux-2.6/xfs_iops.c | 3 +++ 5 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a3798a3aa0d2..64daf2acd0d5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7116,7 +7116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, alloc_end = (offset + len + mask) & ~mask; /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode && (mode != FALLOC_FL_KEEP_SIZE)) + if (mode & ~FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; /* diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c4068f6abf03..4bdd160854eb 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3645,7 +3645,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) unsigned int credits, blkbits = inode->i_blkbits; /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode && (mode != FALLOC_FL_KEEP_SIZE)) + if (mode & ~FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; /* diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 040b5a2e6556..c09528c07f3d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -1426,7 +1426,7 @@ static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, next = (next + 1) << sdp->sd_sb.sb_bsize_shift; /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode && (mode != FALLOC_FL_KEEP_SIZE)) + if (mode & ~FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; offset = (offset >> sdp->sd_sb.sb_bsize_shift) << diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 63e3fca266e0..cf254ce8c941 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1997,6 +1997,8 @@ static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset, int change_size = 1; int cmd = OCFS2_IOC_RESVSP64; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; if (!ocfs2_writes_unwritten_extents(osb)) return -EOPNOTSUPP; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index da54403633b6..a4ecc2188a09 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -518,6 +518,9 @@ xfs_vn_fallocate( xfs_inode_t *ip = XFS_I(inode); int cmd = XFS_IOC_RESVSP; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + /* preallocation on directories not yet supported */ error = -ENODEV; if (S_ISDIR(inode->i_mode)) -- cgit v1.2.3 From 2fe17c1075836b66678ed2a305fd09b6773883aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Jan 2011 13:07:43 +0100 Subject: fallocate should be a file operation Currently all filesystems except XFS implement fallocate asynchronously, while XFS forced a commit. Both of these are suboptimal - in case of O_SYNC I/O we really want our allocation on disk, especially for the !KEEP_SIZE case where we actually grow the file with user-visible zeroes. On the other hand always commiting the transaction is a bad idea for fast-path uses of fallocate like for example in recent Samba versions. Given that block allocation is a data plane operation anyway change it from an inode operation to a file operation so that we have the file structure available that lets us check for O_SYNC. This also includes moving the code around for a few of the filesystems, and remove the already unnedded S_ISDIR checks given that we only wire up fallocate for regular files. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 3 +- fs/btrfs/file.c | 113 +++++++++++++++++ fs/btrfs/inode.c | 111 ---------------- fs/ext4/ext4.h | 2 +- fs/ext4/extents.c | 9 +- fs/ext4/file.c | 2 +- fs/gfs2/file.c | 258 ++++++++++++++++++++++++++++++++++++++ fs/gfs2/ops_inode.c | 258 -------------------------------------- fs/ocfs2/file.c | 8 +- fs/open.c | 4 +- fs/xfs/linux-2.6/xfs_file.c | 56 +++++++++ fs/xfs/linux-2.6/xfs_iops.c | 60 --------- include/linux/fs.h | 4 +- 13 files changed, 440 insertions(+), 448 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 651d5237c155..4471a416c274 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -60,7 +60,6 @@ ata *); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); - long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); locking rules: @@ -88,7 +87,6 @@ getxattr: no listxattr: no removexattr: yes truncate_range: yes -fallocate: no fiemap: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. @@ -437,6 +435,7 @@ prototypes: ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **); + long (*fallocate)(struct file *, int, loff_t, loff_t); }; locking rules: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 66836d85763b..a9e0a4eaf3d9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1237,6 +1238,117 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } +static long btrfs_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct extent_state *cached_state = NULL; + u64 cur_offset; + u64 last_byte; + u64 alloc_start; + u64 alloc_end; + u64 alloc_hint = 0; + u64 locked_end; + u64 mask = BTRFS_I(inode)->root->sectorsize - 1; + struct extent_map *em; + int ret; + + alloc_start = offset & ~mask; + alloc_end = (offset + len + mask) & ~mask; + + /* We only support the FALLOC_FL_KEEP_SIZE mode */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + /* + * wait for ordered IO before we have any locks. We'll loop again + * below with the locks held. + */ + btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); + + mutex_lock(&inode->i_mutex); + ret = inode_newsize_ok(inode, alloc_end); + if (ret) + goto out; + + if (alloc_start > inode->i_size) { + ret = btrfs_cont_expand(inode, alloc_start); + if (ret) + goto out; + } + + ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); + if (ret) + goto out; + + locked_end = alloc_end - 1; + while (1) { + struct btrfs_ordered_extent *ordered; + + /* the extent lock is ordered inside the running + * transaction + */ + lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, + locked_end, 0, &cached_state, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, + alloc_end - 1); + if (ordered && + ordered->file_offset + ordered->len > alloc_start && + ordered->file_offset < alloc_end) { + btrfs_put_ordered_extent(ordered); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, + alloc_start, locked_end, + &cached_state, GFP_NOFS); + /* + * we can't wait on the range with the transaction + * running or with the extent lock held + */ + btrfs_wait_ordered_range(inode, alloc_start, + alloc_end - alloc_start); + } else { + if (ordered) + btrfs_put_ordered_extent(ordered); + break; + } + } + + cur_offset = alloc_start; + while (1) { + em = btrfs_get_extent(inode, NULL, 0, cur_offset, + alloc_end - cur_offset, 0); + BUG_ON(IS_ERR(em) || !em); + last_byte = min(extent_map_end(em), alloc_end); + last_byte = (last_byte + mask) & ~mask; + if (em->block_start == EXTENT_MAP_HOLE || + (cur_offset >= inode->i_size && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { + ret = btrfs_prealloc_file_range(inode, mode, cur_offset, + last_byte - cur_offset, + 1 << inode->i_blkbits, + offset + len, + &alloc_hint); + if (ret < 0) { + free_extent_map(em); + break; + } + } + free_extent_map(em); + + cur_offset = last_byte; + if (cur_offset >= alloc_end) { + ret = 0; + break; + } + } + unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, + &cached_state, GFP_NOFS); + + btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); +out: + mutex_unlock(&inode->i_mutex); + return ret; +} + const struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -1248,6 +1360,7 @@ const struct file_operations btrfs_file_operations = { .open = generic_file_open, .release = btrfs_release_file, .fsync = btrfs_sync_file, + .fallocate = btrfs_fallocate, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 64daf2acd0d5..902afbf50811 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7098,116 +7098,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, min_size, actual_len, alloc_hint, trans); } -static long btrfs_fallocate(struct inode *inode, int mode, - loff_t offset, loff_t len) -{ - struct extent_state *cached_state = NULL; - u64 cur_offset; - u64 last_byte; - u64 alloc_start; - u64 alloc_end; - u64 alloc_hint = 0; - u64 locked_end; - u64 mask = BTRFS_I(inode)->root->sectorsize - 1; - struct extent_map *em; - int ret; - - alloc_start = offset & ~mask; - alloc_end = (offset + len + mask) & ~mask; - - /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode & ~FALLOC_FL_KEEP_SIZE) - return -EOPNOTSUPP; - - /* - * wait for ordered IO before we have any locks. We'll loop again - * below with the locks held. - */ - btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); - - mutex_lock(&inode->i_mutex); - ret = inode_newsize_ok(inode, alloc_end); - if (ret) - goto out; - - if (alloc_start > inode->i_size) { - ret = btrfs_cont_expand(inode, alloc_start); - if (ret) - goto out; - } - - ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); - if (ret) - goto out; - - locked_end = alloc_end - 1; - while (1) { - struct btrfs_ordered_extent *ordered; - - /* the extent lock is ordered inside the running - * transaction - */ - lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, - locked_end, 0, &cached_state, GFP_NOFS); - ordered = btrfs_lookup_first_ordered_extent(inode, - alloc_end - 1); - if (ordered && - ordered->file_offset + ordered->len > alloc_start && - ordered->file_offset < alloc_end) { - btrfs_put_ordered_extent(ordered); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, - alloc_start, locked_end, - &cached_state, GFP_NOFS); - /* - * we can't wait on the range with the transaction - * running or with the extent lock held - */ - btrfs_wait_ordered_range(inode, alloc_start, - alloc_end - alloc_start); - } else { - if (ordered) - btrfs_put_ordered_extent(ordered); - break; - } - } - - cur_offset = alloc_start; - while (1) { - em = btrfs_get_extent(inode, NULL, 0, cur_offset, - alloc_end - cur_offset, 0); - BUG_ON(IS_ERR(em) || !em); - last_byte = min(extent_map_end(em), alloc_end); - last_byte = (last_byte + mask) & ~mask; - if (em->block_start == EXTENT_MAP_HOLE || - (cur_offset >= inode->i_size && - !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { - ret = btrfs_prealloc_file_range(inode, mode, cur_offset, - last_byte - cur_offset, - 1 << inode->i_blkbits, - offset + len, - &alloc_hint); - if (ret < 0) { - free_extent_map(em); - break; - } - } - free_extent_map(em); - - cur_offset = last_byte; - if (cur_offset >= alloc_end) { - ret = 0; - break; - } - } - unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - &cached_state, GFP_NOFS); - - btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); -out: - mutex_unlock(&inode->i_mutex); - return ret; -} - static int btrfs_set_page_dirty(struct page *page) { return __set_page_dirty_nobuffers(page); @@ -7310,7 +7200,6 @@ static const struct inode_operations btrfs_file_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .permission = btrfs_permission, - .fallocate = btrfs_fallocate, .fiemap = btrfs_fiemap, }; static const struct inode_operations btrfs_special_inode_operations = { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1de65f572033..0c8d97b56f34 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2065,7 +2065,7 @@ extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, extern void ext4_ext_truncate(struct inode *); extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); -extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, +extern long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, ssize_t len); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4bdd160854eb..63a75810b7c3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3627,14 +3627,15 @@ static void ext4_falloc_update_inode(struct inode *inode, } /* - * preallocate space for a file. This implements ext4's fallocate inode + * preallocate space for a file. This implements ext4's fallocate file * operation, which gets called from sys_fallocate system call. * For block-mapped files, posix_fallocate should fall back to the method * of writing zeroes to the required new blocks (the same behavior which is * expected for file systems which do not support fallocate() system call). */ -long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) +long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { + struct inode *inode = file->f_path.dentry->d_inode; handle_t *handle; loff_t new_size; unsigned int max_blocks; @@ -3655,10 +3656,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EOPNOTSUPP; - /* preallocation to directories is currently not supported */ - if (S_ISDIR(inode->i_mode)) - return -ENODEV; - map.m_lblk = offset >> blkbits; /* * We can't just convert len to max_blocks because diff --git a/fs/ext4/file.c b/fs/ext4/file.c index bb003dc9ffff..2e8322c8aa88 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -210,6 +210,7 @@ const struct file_operations ext4_file_operations = { .fsync = ext4_sync_file, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, + .fallocate = ext4_fallocate, }; const struct inode_operations ext4_file_inode_operations = { @@ -223,7 +224,6 @@ const struct inode_operations ext4_file_inode_operations = { .removexattr = generic_removexattr, #endif .check_acl = ext4_check_acl, - .fallocate = ext4_fallocate, .fiemap = ext4_fiemap, }; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index fca6689e12e6..7cfdcb913363 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -610,6 +612,260 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return generic_file_aio_write(iocb, iov, nr_segs, pos); } +static void empty_write_end(struct page *page, unsigned from, + unsigned to) +{ + struct gfs2_inode *ip = GFS2_I(page->mapping->host); + + page_zero_new_buffers(page, from, to); + flush_dcache_page(page); + mark_page_accessed(page); + + if (!gfs2_is_writeback(ip)) + gfs2_page_add_databufs(ip, page, from, to); + + block_commit_write(page, from, to); +} + +static int write_empty_blocks(struct page *page, unsigned from, unsigned to) +{ + unsigned start, end, next; + struct buffer_head *bh, *head; + int error; + + if (!page_has_buffers(page)) { + error = __block_write_begin(page, from, to - from, gfs2_block_map); + if (unlikely(error)) + return error; + + empty_write_end(page, from, to); + return 0; + } + + bh = head = page_buffers(page); + next = end = 0; + while (next < from) { + next += bh->b_size; + bh = bh->b_this_page; + } + start = next; + do { + next += bh->b_size; + if (buffer_mapped(bh)) { + if (end) { + error = __block_write_begin(page, start, end - start, + gfs2_block_map); + if (unlikely(error)) + return error; + empty_write_end(page, start, end); + end = 0; + } + start = next; + } + else + end = next; + bh = bh->b_this_page; + } while (next < to); + + if (end) { + error = __block_write_begin(page, start, end - start, gfs2_block_map); + if (unlikely(error)) + return error; + empty_write_end(page, start, end); + } + + return 0; +} + +static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, + int mode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct buffer_head *dibh; + int error; + u64 start = offset >> PAGE_CACHE_SHIFT; + unsigned int start_offset = offset & ~PAGE_CACHE_MASK; + u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; + pgoff_t curr; + struct page *page; + unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; + unsigned int from, to; + + if (!end_offset) + end_offset = PAGE_CACHE_SIZE; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (unlikely(error)) + goto out; + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, NULL); + if (unlikely(error)) + goto out; + } + + curr = start; + offset = start << PAGE_CACHE_SHIFT; + from = start_offset; + to = PAGE_CACHE_SIZE; + while (curr <= end) { + page = grab_cache_page_write_begin(inode->i_mapping, curr, + AOP_FLAG_NOFS); + if (unlikely(!page)) { + error = -ENOMEM; + goto out; + } + + if (curr == end) + to = end_offset; + error = write_empty_blocks(page, from, to); + if (!error && offset + to > inode->i_size && + !(mode & FALLOC_FL_KEEP_SIZE)) { + i_size_write(inode, offset + to); + } + unlock_page(page); + page_cache_release(page); + if (error) + goto out; + curr++; + offset += PAGE_CACHE_SIZE; + from = 0; + } + + gfs2_dinode_out(ip, dibh->b_data); + mark_inode_dirty(inode); + + brelse(dibh); + +out: + return error; +} + +static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, + unsigned int *data_blocks, unsigned int *ind_blocks) +{ + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; + unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); + + for (tmp = max_data; tmp > sdp->sd_diptrs;) { + tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); + max_data -= tmp; + } + /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, + so it might end up with fewer data blocks */ + if (max_data <= *data_blocks) + return; + *data_blocks = max_data; + *ind_blocks = max_blocks - max_data; + *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; + if (*len > max) { + *len = max; + gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); + } +} + +static long gfs2_fallocate(struct file *file, int mode, loff_t offset, + loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_inode *ip = GFS2_I(inode); + unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + loff_t bytes, max_bytes; + struct gfs2_alloc *al; + int error; + loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; + next = (next + 1) << sdp->sd_sb.sb_bsize_shift; + + /* We only support the FALLOC_FL_KEEP_SIZE mode */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + offset = (offset >> sdp->sd_sb.sb_bsize_shift) << + sdp->sd_sb.sb_bsize_shift; + + len = next - offset; + bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; + if (!bytes) + bytes = UINT_MAX; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); + error = gfs2_glock_nq(&ip->i_gh); + if (unlikely(error)) + goto out_uninit; + + if (!gfs2_write_alloc_required(ip, offset, len)) + goto out_unlock; + + while (len > 0) { + if (len < bytes) + bytes = len; + al = gfs2_alloc_get(ip); + if (!al) { + error = -ENOMEM; + goto out_unlock; + } + + error = gfs2_quota_lock_check(ip); + if (error) + goto out_alloc_put; + +retry: + gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); + + al->al_requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip); + if (error) { + if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { + bytes >>= 1; + goto retry; + } + goto out_qunlock; + } + max_bytes = bytes; + calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); + al->al_requested = data_blocks + ind_blocks; + + rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + + RES_RG_HDR + gfs2_rg_blocks(al); + if (gfs2_is_jdata(ip)) + rblocks += data_blocks ? data_blocks : 1; + + error = gfs2_trans_begin(sdp, rblocks, + PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); + if (error) + goto out_trans_fail; + + error = fallocate_chunk(inode, offset, max_bytes, mode); + gfs2_trans_end(sdp); + + if (error) + goto out_trans_fail; + + len -= max_bytes; + offset += max_bytes; + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + } + goto out_unlock; + +out_trans_fail: + gfs2_inplace_release(ip); +out_qunlock: + gfs2_quota_unlock(ip); +out_alloc_put: + gfs2_alloc_put(ip); +out_unlock: + gfs2_glock_dq(&ip->i_gh); +out_uninit: + gfs2_holder_uninit(&ip->i_gh); + return error; +} + #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** @@ -765,6 +1021,7 @@ const struct file_operations gfs2_file_fops = { .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .setlease = gfs2_setlease, + .fallocate = gfs2_fallocate, }; const struct file_operations gfs2_dir_fops = { @@ -794,6 +1051,7 @@ const struct file_operations gfs2_file_fops_nolock = { .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .setlease = generic_setlease, + .fallocate = gfs2_fallocate, }; const struct file_operations gfs2_dir_fops_nolock = { diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index c09528c07f3d..d8b26ac2e20b 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -18,8 +18,6 @@ #include #include #include -#include -#include #include #include "gfs2.h" @@ -1257,261 +1255,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) return ret; } -static void empty_write_end(struct page *page, unsigned from, - unsigned to) -{ - struct gfs2_inode *ip = GFS2_I(page->mapping->host); - - page_zero_new_buffers(page, from, to); - flush_dcache_page(page); - mark_page_accessed(page); - - if (!gfs2_is_writeback(ip)) - gfs2_page_add_databufs(ip, page, from, to); - - block_commit_write(page, from, to); -} - - -static int write_empty_blocks(struct page *page, unsigned from, unsigned to) -{ - unsigned start, end, next; - struct buffer_head *bh, *head; - int error; - - if (!page_has_buffers(page)) { - error = __block_write_begin(page, from, to - from, gfs2_block_map); - if (unlikely(error)) - return error; - - empty_write_end(page, from, to); - return 0; - } - - bh = head = page_buffers(page); - next = end = 0; - while (next < from) { - next += bh->b_size; - bh = bh->b_this_page; - } - start = next; - do { - next += bh->b_size; - if (buffer_mapped(bh)) { - if (end) { - error = __block_write_begin(page, start, end - start, - gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); - end = 0; - } - start = next; - } - else - end = next; - bh = bh->b_this_page; - } while (next < to); - - if (end) { - error = __block_write_begin(page, start, end - start, gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); - } - - return 0; -} - -static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, - int mode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct buffer_head *dibh; - int error; - u64 start = offset >> PAGE_CACHE_SHIFT; - unsigned int start_offset = offset & ~PAGE_CACHE_MASK; - u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; - pgoff_t curr; - struct page *page; - unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; - unsigned int from, to; - - if (!end_offset) - end_offset = PAGE_CACHE_SIZE; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (unlikely(error)) - goto out; - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, NULL); - if (unlikely(error)) - goto out; - } - - curr = start; - offset = start << PAGE_CACHE_SHIFT; - from = start_offset; - to = PAGE_CACHE_SIZE; - while (curr <= end) { - page = grab_cache_page_write_begin(inode->i_mapping, curr, - AOP_FLAG_NOFS); - if (unlikely(!page)) { - error = -ENOMEM; - goto out; - } - - if (curr == end) - to = end_offset; - error = write_empty_blocks(page, from, to); - if (!error && offset + to > inode->i_size && - !(mode & FALLOC_FL_KEEP_SIZE)) { - i_size_write(inode, offset + to); - } - unlock_page(page); - page_cache_release(page); - if (error) - goto out; - curr++; - offset += PAGE_CACHE_SIZE; - from = 0; - } - - gfs2_dinode_out(ip, dibh->b_data); - mark_inode_dirty(inode); - - brelse(dibh); - -out: - return error; -} - -static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, - unsigned int *data_blocks, unsigned int *ind_blocks) -{ - const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; - unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); - - for (tmp = max_data; tmp > sdp->sd_diptrs;) { - tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); - max_data -= tmp; - } - /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, - so it might end up with fewer data blocks */ - if (max_data <= *data_blocks) - return; - *data_blocks = max_data; - *ind_blocks = max_blocks - max_data; - *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; - if (*len > max) { - *len = max; - gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); - } -} - -static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, - loff_t len) -{ - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_inode *ip = GFS2_I(inode); - unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - loff_t bytes, max_bytes; - struct gfs2_alloc *al; - int error; - loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; - next = (next + 1) << sdp->sd_sb.sb_bsize_shift; - - /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode & ~FALLOC_FL_KEEP_SIZE) - return -EOPNOTSUPP; - - offset = (offset >> sdp->sd_sb.sb_bsize_shift) << - sdp->sd_sb.sb_bsize_shift; - - len = next - offset; - bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; - if (!bytes) - bytes = UINT_MAX; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); - if (unlikely(error)) - goto out_uninit; - - if (!gfs2_write_alloc_required(ip, offset, len)) - goto out_unlock; - - while (len > 0) { - if (len < bytes) - bytes = len; - al = gfs2_alloc_get(ip); - if (!al) { - error = -ENOMEM; - goto out_unlock; - } - - error = gfs2_quota_lock_check(ip); - if (error) - goto out_alloc_put; - -retry: - gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - - al->al_requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip); - if (error) { - if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { - bytes >>= 1; - goto retry; - } - goto out_qunlock; - } - max_bytes = bytes; - calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); - al->al_requested = data_blocks + ind_blocks; - - rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(al); - if (gfs2_is_jdata(ip)) - rblocks += data_blocks ? data_blocks : 1; - - error = gfs2_trans_begin(sdp, rblocks, - PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); - if (error) - goto out_trans_fail; - - error = fallocate_chunk(inode, offset, max_bytes, mode); - gfs2_trans_end(sdp); - - if (error) - goto out_trans_fail; - - len -= max_bytes; - offset += max_bytes; - gfs2_inplace_release(ip); - gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); - } - goto out_unlock; - -out_trans_fail: - gfs2_inplace_release(ip); -out_qunlock: - gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_alloc_put(ip); -out_unlock: - gfs2_glock_dq(&ip->i_gh); -out_uninit: - gfs2_holder_uninit(&ip->i_gh); - return error; -} - - static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -1562,7 +1305,6 @@ const struct inode_operations gfs2_file_iops = { .getxattr = gfs2_getxattr, .listxattr = gfs2_listxattr, .removexattr = gfs2_removexattr, - .fallocate = gfs2_fallocate, .fiemap = gfs2_fiemap, }; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index cf254ce8c941..a6651956482e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1989,9 +1989,10 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); } -static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset, +static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { + struct inode *inode = file->f_path.dentry->d_inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_space_resv sr; int change_size = 1; @@ -2002,9 +2003,6 @@ static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset, if (!ocfs2_writes_unwritten_extents(osb)) return -EOPNOTSUPP; - if (S_ISDIR(inode->i_mode)) - return -ENODEV; - if (mode & FALLOC_FL_KEEP_SIZE) change_size = 0; @@ -2612,7 +2610,6 @@ const struct inode_operations ocfs2_file_iops = { .getxattr = generic_getxattr, .listxattr = ocfs2_listxattr, .removexattr = generic_removexattr, - .fallocate = ocfs2_fallocate, .fiemap = ocfs2_fiemap, }; @@ -2644,6 +2641,7 @@ const struct file_operations ocfs2_fops = { .flock = ocfs2_flock, .splice_read = ocfs2_file_splice_read, .splice_write = ocfs2_file_splice_write, + .fallocate = ocfs2_fallocate, }; const struct file_operations ocfs2_dops = { diff --git a/fs/open.c b/fs/open.c index 5b6ef7e2859e..e52389e1f05b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -255,10 +255,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) return -EFBIG; - if (!inode->i_op->fallocate) + if (!file->f_op->fallocate) return -EOPNOTSUPP; - return inode->i_op->fallocate(inode, mode, offset, len); + return file->f_op->fallocate(file, mode, offset, len); } SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index ef51eb43e137..a55c1b46b219 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -37,6 +37,7 @@ #include "xfs_trace.h" #include +#include static const struct vm_operations_struct xfs_file_vm_ops; @@ -882,6 +883,60 @@ out_unlock: return ret; } +STATIC long +xfs_file_fallocate( + struct file *file, + int mode, + loff_t offset, + loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + long error; + loff_t new_size = 0; + xfs_flock64_t bf; + xfs_inode_t *ip = XFS_I(inode); + int cmd = XFS_IOC_RESVSP; + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + if (mode & FALLOC_FL_PUNCH_HOLE) + cmd = XFS_IOC_UNRESVSP; + + /* check the new inode size is valid before allocating */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) { + new_size = offset + len; + error = inode_newsize_ok(inode, new_size); + if (error) + goto out_unlock; + } + + error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); + if (error) + goto out_unlock; + + /* Change file size if needed */ + if (new_size) { + struct iattr iattr; + + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = new_size; + error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); + } + +out_unlock: + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; +} + + STATIC int xfs_file_open( struct inode *inode, @@ -1000,6 +1055,7 @@ const struct file_operations xfs_file_operations = { .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, + .fallocate = xfs_file_fallocate, }; const struct file_operations xfs_dir_file_operations = { diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index a4ecc2188a09..bd5727852fd6 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include @@ -505,64 +504,6 @@ xfs_vn_setattr( return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } -STATIC long -xfs_vn_fallocate( - struct inode *inode, - int mode, - loff_t offset, - loff_t len) -{ - long error; - loff_t new_size = 0; - xfs_flock64_t bf; - xfs_inode_t *ip = XFS_I(inode); - int cmd = XFS_IOC_RESVSP; - - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) - return -EOPNOTSUPP; - - /* preallocation on directories not yet supported */ - error = -ENODEV; - if (S_ISDIR(inode->i_mode)) - goto out_error; - - bf.l_whence = 0; - bf.l_start = offset; - bf.l_len = len; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - if (mode & FALLOC_FL_PUNCH_HOLE) - cmd = XFS_IOC_UNRESVSP; - - /* check the new inode size is valid before allocating */ - if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { - new_size = offset + len; - error = inode_newsize_ok(inode, new_size); - if (error) - goto out_unlock; - } - - error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); - if (error) - goto out_unlock; - - /* Change file size if needed */ - if (new_size) { - struct iattr iattr; - - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = new_size; - error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); - } - -out_unlock: - xfs_iunlock(ip, XFS_IOLOCK_EXCL); -out_error: - return error; -} - #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) /* @@ -656,7 +597,6 @@ static const struct inode_operations xfs_inode_operations = { .getxattr = generic_getxattr, .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, - .fallocate = xfs_vn_fallocate, .fiemap = xfs_vn_fiemap, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 177b4ddea418..09b5bd6a7c6b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1552,6 +1552,8 @@ struct file_operations { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **); + long (*fallocate)(struct file *file, int mode, loff_t offset, + loff_t len); }; #define IPERM_FLAG_RCU 0x0001 @@ -1582,8 +1584,6 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); - long (*fallocate)(struct inode *inode, int mode, loff_t offset, - loff_t len); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); } ____cacheline_aligned; -- cgit v1.2.3 From 3bc0ba4305fa99b32caac8c60df84a2f14fce228 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Dec 2010 19:38:09 -0500 Subject: fs: Remove unlikely() from fget_light() There's an unlikely() in fget_light() that assumes the file ref count will be 1. Running the annotate branch profiler on a desktop that is performing daily tasks (running firefox, evolution, xchat and is also part of a distcc farm), it shows that the ref count is not 1 that often. correct incorrect % Function File Line ------- --------- - -------- ---- ---- 1035099358 6209599193 85 fget_light file_table.c 315 Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Steven Rostedt Signed-off-by: Al Viro --- fs/file_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/file_table.c b/fs/file_table.c index c3dee381f1b4..c3e89adf53c0 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -311,7 +311,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed) struct files_struct *files = current->files; *fput_needed = 0; - if (likely((atomic_read(&files->count) == 1))) { + if (atomic_read(&files->count) == 1) { file = fcheck_files(files, fd); } else { rcu_read_lock(); -- cgit v1.2.3 From 16ebe911eba8afa88879213dc4388f2dd701561e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 2 Jan 2011 14:44:00 -0800 Subject: fs: FS_POSIX_ACL does not depend on BLOCK - Fix a kconfig unmet dependency warning. - Remove the comment that identifies which filesystems use POSIX ACL utility routines. - Move the FS_POSIX_ACL symbol outside of the BLOCK symbol if/endif block because its functions do not depend on BLOCK and some of the filesystems that use it do not depend on BLOCK. warning: (GENERIC_ACL && JFFS2_FS_POSIX_ACL && NFSD_V4 && NFS_ACL_SUPPORT && 9P_FS_POSIX_ACL) selects FS_POSIX_ACL which has unmet direct dependencies (BLOCK) Signed-off-by: Randy Dunlap Cc: Al Viro Signed-off-by: Al Viro --- fs/Kconfig | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index 771f457402d4..9a7921ae4763 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -30,15 +30,6 @@ config FS_MBCACHE source "fs/reiserfs/Kconfig" source "fs/jfs/Kconfig" -config FS_POSIX_ACL -# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4) -# -# NOTE: you can implement Posix ACLs without these helpers (XFS does). -# Never use this symbol for ifdefs. -# - bool - default n - source "fs/xfs/Kconfig" source "fs/gfs2/Kconfig" source "fs/ocfs2/Kconfig" @@ -47,6 +38,14 @@ source "fs/nilfs2/Kconfig" endif # BLOCK +# Posix ACL utility routines +# +# Note: Posix ACLs can be implemented without these helpers. Never use +# this symbol for ifdefs in core code. +# +config FS_POSIX_ACL + def_bool n + config EXPORTFS tristate -- cgit v1.2.3 From 6a5640f10255a8941a3a57396dda20af7a5c9a9e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 27 Dec 2010 01:41:52 +0900 Subject: compat: remove unnecessary assignment in compat_rw_copy_check_uvector() *@ret_pointer is initialized to @fast_pointer thus the assignment is redundant. Signed-off-by: Namhyung Kim Cc: Jeff Moyer Signed-off-by: Al Viro --- fs/compat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index eb1740ac8c0a..d717442c4133 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -597,10 +597,8 @@ ssize_t compat_rw_copy_check_uvector(int type, if (nr_segs > fast_segs) { ret = -ENOMEM; iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); - if (iov == NULL) { - *ret_pointer = fast_pointer; + if (iov == NULL) goto out; - } } *ret_pointer = iov; -- cgit v1.2.3 From 974d879e8070bbb132bd4e79ef314703853d0b82 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 27 Dec 2010 01:41:53 +0900 Subject: compat: update comment of compat statfs syscalls The commit 7ed1ee6118ae ("Take statfs variants to fs/statfs.c") separates out statfs syscalls from fs/open.c. Thus the comment should be changed also. Signed-off-by: Namhyung Kim Cc: Jiri Kosina Signed-off-by: Al Viro --- fs/compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index d717442c4133..c62b5e6a1c15 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -257,7 +257,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * } /* - * The following statfs calls are copies of code from fs/open.c and + * The following statfs calls are copies of code from fs/statfs.c and * should be checked against those from time to time */ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) -- cgit v1.2.3 From e0bb6bda43e20aa1db5774c73a519cd52c463a55 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 27 Dec 2010 01:41:54 +0900 Subject: compat: copy missing fields in compat_statfs64 to user f_flags and f_spare fields were not copied to userspace when compat_sys_[f]statfs64 called. Signed-off-by: Namhyung Kim Cc: Christoph Hellwig Signed-off-by: Al Viro --- fs/compat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index c62b5e6a1c15..f6fd0a00e6cc 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -320,7 +320,9 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat __put_user(kbuf->f_namelen, &ubuf->f_namelen) || __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || - __put_user(kbuf->f_frsize, &ubuf->f_frsize)) + __put_user(kbuf->f_frsize, &ubuf->f_frsize) || + __put_user(kbuf->f_flags, &ubuf->f_flags) || + __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) return -EFAULT; return 0; } -- cgit v1.2.3 From 274052ef0bac011249925f6616d147b1491fc601 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 13 Dec 2010 17:09:52 +0000 Subject: hpfs_setattr error case avoids unlock_kernel This fixed a case that 'sparse' spotted where hpfs_setattr has an error return that didn't go through it's path that unlocks. This is against git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git version 6313e3c21743cc88bb5bd8aa72948ee1e83937b6. Build tested only, I don't have an hpfs file system to test. Dave Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Al Viro --- fs/hpfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 56f0da1cfd10..1ae35baa539e 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -281,7 +281,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_size != i_size_read(inode)) { error = vmtruncate(inode, attr->ia_size); if (error) - return error; + goto out_unlock; } setattr_copy(inode, attr); -- cgit v1.2.3 From 27eaa1c90c608aa907336c2743d5ecf35c469440 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 Dec 2010 00:06:25 +0900 Subject: aio: check return value of create_workqueue() Signed-off-by: Namhyung Kim Signed-off-by: Al Viro --- fs/aio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 5e00f15c54aa..fc557a3be0a9 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -87,7 +87,7 @@ static int __init aio_setup(void) aio_wq = create_workqueue("aio"); abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); - BUG_ON(!abe_pool); + BUG_ON(!aio_wq || !abe_pool); pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); -- cgit v1.2.3 From ecf5632dd189ab4c366cef853d6e5fe7adfe52e5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 16 Jan 2011 23:28:17 +0900 Subject: fs: fix address space warnings in ioctl_fiemap() The fi_extents_start field of struct fiemap_extent_info is a user pointer but was not marked as __user. This makes sparse emit following warnings: CHECK fs/ioctl.c fs/ioctl.c:114:26: warning: incorrect type in argument 1 (different address spaces) fs/ioctl.c:114:26: expected void [noderef] *dst fs/ioctl.c:114:26: got struct fiemap_extent *[assigned] dest fs/ioctl.c:202:14: warning: incorrect type in argument 1 (different address spaces) fs/ioctl.c:202:14: expected void const volatile [noderef] * fs/ioctl.c:202:14: got struct fiemap_extent *[assigned] fi_extents_start fs/ioctl.c:212:27: warning: incorrect type in argument 1 (different address spaces) fs/ioctl.c:212:27: expected void [noderef] *dst fs/ioctl.c:212:27: got char * Also add 'ufiemap' variable to eliminate unnecessary casts. Signed-off-by: Namhyung Kim Signed-off-by: Al Viro --- fs/ioctl.c | 10 +++++----- include/linux/fs.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ioctl.c b/fs/ioctl.c index d6cc16476620..a59635e295fa 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -86,7 +86,7 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, u64 phys, u64 len, u32 flags) { struct fiemap_extent extent; - struct fiemap_extent *dest = fieinfo->fi_extents_start; + struct fiemap_extent __user *dest = fieinfo->fi_extents_start; /* only count the extents */ if (fieinfo->fi_extents_max == 0) { @@ -173,6 +173,7 @@ static int fiemap_check_ranges(struct super_block *sb, static int ioctl_fiemap(struct file *filp, unsigned long arg) { struct fiemap fiemap; + struct fiemap __user *ufiemap = (struct fiemap __user *) arg; struct fiemap_extent_info fieinfo = { 0, }; struct inode *inode = filp->f_path.dentry->d_inode; struct super_block *sb = inode->i_sb; @@ -182,8 +183,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) if (!inode->i_op->fiemap) return -EOPNOTSUPP; - if (copy_from_user(&fiemap, (struct fiemap __user *)arg, - sizeof(struct fiemap))) + if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap))) return -EFAULT; if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) @@ -196,7 +196,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) fieinfo.fi_flags = fiemap.fm_flags; fieinfo.fi_extents_max = fiemap.fm_extent_count; - fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); + fieinfo.fi_extents_start = ufiemap->fm_extents; if (fiemap.fm_extent_count != 0 && !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, @@ -209,7 +209,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); fiemap.fm_flags = fieinfo.fi_flags; fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; - if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) + if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap))) error = -EFAULT; return error; diff --git a/include/linux/fs.h b/include/linux/fs.h index 09b5bd6a7c6b..32b38cd829d3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1483,8 +1483,8 @@ struct fiemap_extent_info { unsigned int fi_flags; /* Flags as passed from user */ unsigned int fi_extents_mapped; /* Number of mapped extents */ unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent - * array */ + struct fiemap_extent __user *fi_extents_start; /* Start of + fiemap_extent array */ }; int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, u64 phys, u64 len, u32 flags); -- cgit v1.2.3 From 38a708d7759476318d0eec64af174513032ec67a Mon Sep 17 00:00:00 2001 From: Edward Shishkin Date: Sat, 30 Oct 2010 00:11:50 +0200 Subject: ecryptfs: fix truncation error in ecryptfs_read_update_atime This is similar to the bug found in direct-io not so long ago. Fix up truncation (ssize_t->int). This only matters with >2G reads/writes, which the kernel doesn't permit. Signed-off-by: Edward Shishkin Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Eric Sandeen Signed-off-by: Tyler Hicks --- fs/ecryptfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 91da02987bff..679817e82484 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -47,7 +47,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - int rc; + ssize_t rc; struct dentry *lower_dentry; struct vfsmount *lower_vfsmount; struct file *file = iocb->ki_filp; -- cgit v1.2.3 From 2a8652f4e0d11ee27b1d2870c600fd1300661a6e Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 3 Nov 2010 11:11:15 +0100 Subject: ecryptfs: moved ECRYPTFS_SUPER_MAGIC definition to linux/magic.h The definition of ECRYPTFS_SUPER_MAGIC has been moved to the include file 'linux/magic.h' to become available to other kernel subsystems. Signed-off-by: Roberto Sassu Signed-off-by: Tyler Hicks --- fs/ecryptfs/ecryptfs_kernel.h | 1 - include/linux/magic.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 413a3c48f0bb..bc530a81e4ce 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -192,7 +192,6 @@ ecryptfs_get_key_payload_data(struct key *key) (((struct user_key_payload*)key->payload.data)->data); } -#define ECRYPTFS_SUPER_MAGIC 0xf15f #define ECRYPTFS_MAX_KEYSET_SIZE 1024 #define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 #define ECRYPTFS_MAX_NUM_ENC_KEYS 64 diff --git a/include/linux/magic.h b/include/linux/magic.h index ff690d05f129..62730ea2b56e 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -16,6 +16,7 @@ #define TMPFS_MAGIC 0x01021994 #define HUGETLBFS_MAGIC 0x958458f6 /* some random number */ #define SQUASHFS_MAGIC 0x73717368 +#define ECRYPTFS_SUPER_MAGIC 0xf15f #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 -- cgit v1.2.3 From 070baa51286e5cf59dde6be52fa23647ffb5d32d Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 3 Nov 2010 11:11:22 +0100 Subject: ecryptfs: missing initialization of the superblock 'magic' field This patch initializes the 'magic' field of ecryptfs filesystems to ECRYPTFS_SUPER_MAGIC. Signed-off-by: Roberto Sassu [tyhicks: merge with 66cb76666d69] Signed-off-by: Tyler Hicks --- fs/ecryptfs/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index d3b28abdd6aa..19f04504f625 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ecryptfs_kernel.h" /** @@ -564,6 +565,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags ecryptfs_set_superblock_lower(s, path.dentry->d_sb); s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; + s->s_magic = ECRYPTFS_SUPER_MAGIC; inode = ecryptfs_get_inode(path.dentry->d_inode, s); rc = PTR_ERR(inode); -- cgit v1.2.3 From 27992890b02d340198a3a22fc210d13684a41564 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 3 Nov 2010 11:11:28 +0100 Subject: ecryptfs: test lower_file pointer when lower_file_mutex is locked This patch prevents the lower_file pointer in the 'ecryptfs_inode_info' structure to be checked when the mutex 'lower_file_mutex' is not locked. Signed-off-by: Roberto Sassu Signed-off-by: Tyler Hicks --- fs/ecryptfs/file.c | 16 +++++++--------- fs/ecryptfs/inode.c | 32 ++++++++++++++------------------ 2 files changed, 21 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 679817e82484..99259f850e58 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -191,15 +191,13 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | ECRYPTFS_ENCRYPTED); } mutex_unlock(&crypt_stat->cs_mutex); - if (!ecryptfs_inode_to_private(inode)->lower_file) { - rc = ecryptfs_init_persistent_file(ecryptfs_dentry); - if (rc) { - printk(KERN_ERR "%s: Error attempting to initialize " - "the persistent file for the dentry with name " - "[%s]; rc = [%d]\n", __func__, - ecryptfs_dentry->d_name.name, rc); - goto out_free; - } + rc = ecryptfs_init_persistent_file(ecryptfs_dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the persistent file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + ecryptfs_dentry->d_name.name, rc); + goto out_free; } if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) && !(file->f_flags & O_RDONLY)) { diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 64ff02330752..bd33f87a1907 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -185,15 +185,13 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) "context; rc = [%d]\n", rc); goto out; } - if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) { - rc = ecryptfs_init_persistent_file(ecryptfs_dentry); - if (rc) { - printk(KERN_ERR "%s: Error attempting to initialize " - "the persistent file for the dentry with name " - "[%s]; rc = [%d]\n", __func__, - ecryptfs_dentry->d_name.name, rc); - goto out; - } + rc = ecryptfs_init_persistent_file(ecryptfs_dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the persistent file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + ecryptfs_dentry->d_name.name, rc); + goto out; } rc = ecryptfs_write_metadata(ecryptfs_dentry); if (rc) { @@ -302,15 +300,13 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, rc = -ENOMEM; goto out; } - if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) { - rc = ecryptfs_init_persistent_file(ecryptfs_dentry); - if (rc) { - printk(KERN_ERR "%s: Error attempting to initialize " - "the persistent file for the dentry with name " - "[%s]; rc = [%d]\n", __func__, - ecryptfs_dentry->d_name.name, rc); - goto out_free_kmem; - } + rc = ecryptfs_init_persistent_file(ecryptfs_dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the persistent file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + ecryptfs_dentry->d_name.name, rc); + goto out_free_kmem; } crypt_stat = &ecryptfs_inode_to_private( ecryptfs_dentry->d_inode)->crypt_stat; -- cgit v1.2.3 From 0abe1169470571c473ee720c35fe5b3481c46c46 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 3 Nov 2010 11:11:34 +0100 Subject: ecryptfs: fixed testing of file descriptor flags This patch replaces the check (lower_file->f_flags & O_RDONLY) with ((lower_file & O_ACCMODE) == O_RDONLY). Signed-off-by: Roberto Sassu Signed-off-by: Tyler Hicks --- fs/ecryptfs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 99259f850e58..e069e786da43 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -199,8 +199,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file) ecryptfs_dentry->d_name.name, rc); goto out_free; } - if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) - && !(file->f_flags & O_RDONLY)) { + if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE) + == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) { rc = -EPERM; printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " "file must hence be opened RO\n", __func__); -- cgit v1.2.3 From 888d57bbc91ebd031451d4ab1c669baee826a06c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 10 Nov 2010 15:46:16 -0800 Subject: fs/ecryptfs: Add printf format/argument verification and fix fallout Add __attribute__((format... to __ecryptfs_printk Make formats and arguments match. Add casts to (unsigned long long) for %llu. Signed-off-by: Joe Perches [tyhicks: 80 columns cleanup and fixed typo] Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 26 ++++++++++++-------------- fs/ecryptfs/ecryptfs_kernel.h | 1 + fs/ecryptfs/file.c | 6 +++--- fs/ecryptfs/keystore.c | 6 +++--- fs/ecryptfs/main.c | 7 ++++--- fs/ecryptfs/mmap.c | 13 +++++++------ 6 files changed, 30 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index cbadc1bee6e7..57bdd7a13207 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -413,10 +413,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, rc = ecryptfs_derive_iv(extent_iv, crypt_stat, (extent_base + extent_offset)); if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to " - "derive IV for extent [0x%.16x]; " - "rc = [%d]\n", (extent_base + extent_offset), - rc); + ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " + "extent [0x%.16llx]; rc = [%d]\n", + (unsigned long long)(extent_base + extent_offset), rc); goto out; } if (unlikely(ecryptfs_verbosity > 0)) { @@ -443,9 +442,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, } rc = 0; if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; " - "rc = [%d]\n", (extent_base + extent_offset), - rc); + ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; " + "rc = [%d]\n", + (unsigned long long)(extent_base + extent_offset), rc); ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " "encryption:\n"); ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8); @@ -540,10 +539,9 @@ static int ecryptfs_decrypt_extent(struct page *page, rc = ecryptfs_derive_iv(extent_iv, crypt_stat, (extent_base + extent_offset)); if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to " - "derive IV for extent [0x%.16x]; " - "rc = [%d]\n", (extent_base + extent_offset), - rc); + ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " + "extent [0x%.16llx]; rc = [%d]\n", + (unsigned long long)(extent_base + extent_offset), rc); goto out; } if (unlikely(ecryptfs_verbosity > 0)) { @@ -571,9 +569,9 @@ static int ecryptfs_decrypt_extent(struct page *page, } rc = 0; if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16x]; " - "rc = [%d]\n", (extent_base + extent_offset), - rc); + ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; " + "rc = [%d]\n", + (unsigned long long)(extent_base + extent_offset), rc); ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " "decryption:\n"); ecryptfs_dump_hex((char *)(page_address(page) diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index bc530a81e4ce..dbc84ed96336 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -583,6 +583,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) #define ecryptfs_printk(type, fmt, arg...) \ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); +__attribute__ ((format(printf, 1, 2))) void __ecryptfs_printk(const char *fmt, ...); extern const struct file_operations ecryptfs_main_fops; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index e069e786da43..81e10e6a9443 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -241,9 +241,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) } } mutex_unlock(&crypt_stat->cs_mutex); - ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] " - "size: [0x%.16x]\n", inode, inode->i_ino, - i_size_read(inode)); + ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = " + "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, + (unsigned long long)i_size_read(inode)); goto out; out_free: kmem_cache_free(ecryptfs_file_info_cache, diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index b1f6858a5223..25fd7f595c99 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -59,7 +59,7 @@ static int process_request_key_err(long err_code) break; default: ecryptfs_printk(KERN_WARNING, "Unknown error code: " - "[0x%.16x]\n", err_code); + "[0x%.16lx]\n", err_code); rc = -EINVAL; } return rc; @@ -1864,8 +1864,8 @@ found_matching_auth_tok: "session key for authentication token with sig " "[%.*s]; rc = [%d]. Removing auth tok " "candidate from the list and searching for " - "the next match.\n", candidate_auth_tok_sig, - ECRYPTFS_SIG_SIZE_HEX, rc); + "the next match.\n", ECRYPTFS_SIG_SIZE_HEX, + candidate_auth_tok_sig, rc); list_for_each_entry_safe(auth_tok_list_item, auth_tok_list_item_tmp, &auth_tok_list, list) { diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 19f04504f625..758323a0f09a 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -810,9 +810,10 @@ static int __init ecryptfs_init(void) ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is " "larger than the host's page size, and so " "eCryptfs cannot run on this system. The " - "default eCryptfs extent size is [%d] bytes; " - "the page size is [%d] bytes.\n", - ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE); + "default eCryptfs extent size is [%u] bytes; " + "the page size is [%lu] bytes.\n", + ECRYPTFS_DEFAULT_EXTENT_SIZE, + (unsigned long)PAGE_CACHE_SIZE); goto out; } rc = ecryptfs_init_kmem_caches(); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index b1d82756544b..4b9011392736 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -65,7 +65,7 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting " - "page (upper index [0x%.16x])\n", page->index); + "page (upper index [0x%.16lx])\n", page->index); ClearPageUptodate(page); goto out; } @@ -237,7 +237,7 @@ out: ClearPageUptodate(page); else SetPageUptodate(page); - ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", + ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16lx]\n", page->index); unlock_page(page); return rc; @@ -488,7 +488,7 @@ static int ecryptfs_write_end(struct file *file, } else ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" - "(page w/ index = [0x%.16x], to = [%d])\n", index, to); + "(page w/ index = [0x%.16lx], to = [%d])\n", index, to); if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0, to); @@ -503,19 +503,20 @@ static int ecryptfs_write_end(struct file *file, rc = fill_zeros_to_end_of_page(page, to); if (rc) { ecryptfs_printk(KERN_WARNING, "Error attempting to fill " - "zeros in page with index = [0x%.16x]\n", index); + "zeros in page with index = [0x%.16lx]\n", index); goto out; } rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " - "index [0x%.16x])\n", index); + "index [0x%.16lx])\n", index); goto out; } if (pos + copied > i_size_read(ecryptfs_inode)) { i_size_write(ecryptfs_inode, pos + copied); ecryptfs_printk(KERN_DEBUG, "Expanded file size to " - "[0x%.16x]\n", i_size_read(ecryptfs_inode)); + "[0x%.16llx]\n", + (unsigned long long)i_size_read(ecryptfs_inode)); } rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); if (rc) -- cgit v1.2.3 From f24b38874e1e37bb70291bbc4c5c3c13f5f9dac8 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 15 Nov 2010 17:36:38 -0600 Subject: ecryptfs: Fix ecryptfs_printk() size_t warnings Commit cb55d21f6fa19d8c6c2680d90317ce88c1f57269 revealed a number of missing 'z' length modifiers in calls to ecryptfs_printk() when printing variables of type size_t. This patch fixes those compiler warnings. Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 4 ++-- fs/ecryptfs/keystore.c | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 57bdd7a13207..bfd8b680e648 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -348,7 +348,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, BUG_ON(!crypt_stat || !crypt_stat->tfm || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n", + ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); @@ -778,7 +778,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) } ecryptfs_printk(KERN_DEBUG, "Initializing cipher [%s]; strlen = [%d]; " - "key_size_bits = [%d]\n", + "key_size_bits = [%zd]\n", crypt_stat->cipher, (int)strlen(crypt_stat->cipher), crypt_stat->key_size << 3); if (crypt_stat->tfm) { diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 25fd7f595c99..c1436cff6f2d 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -130,7 +130,7 @@ int ecryptfs_write_packet_length(char *dest, size_t size, } else { rc = -EINVAL; ecryptfs_printk(KERN_WARNING, - "Unsupported packet size: [%d]\n", size); + "Unsupported packet size: [%zd]\n", size); } return rc; } @@ -1672,7 +1672,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, auth_tok->session_key.decrypted_key_size); crypt_stat->flags |= ECRYPTFS_KEY_VALID; if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "FEK of size [%d]:\n", + ecryptfs_printk(KERN_DEBUG, "FEK of size [%zd]:\n", crypt_stat->key_size); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); @@ -1754,7 +1754,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) { ecryptfs_printk(KERN_ERR, "Expected " "signature of size [%d]; " - "read size [%d]\n", + "read size [%zd]\n", ECRYPTFS_SIG_SIZE, tag_11_contents_size); rc = -EIO; @@ -1787,8 +1787,8 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, goto out_wipe_list; break; default: - ecryptfs_printk(KERN_DEBUG, "No packet at offset " - "[%d] of the file header; hex value of " + ecryptfs_printk(KERN_DEBUG, "No packet at offset [%zd] " + "of the file header; hex value of " "character is [0x%.2x]\n", i, src[i]); next_packet_is_auth_tok_packet = 0; } @@ -2168,7 +2168,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, if (encrypted_session_key_valid) { ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; " "using auth_tok->session_key.encrypted_key, " - "where key_rec->enc_key_size = [%d]\n", + "where key_rec->enc_key_size = [%zd]\n", key_rec->enc_key_size); memcpy(key_rec->enc_key, auth_tok->session_key.encrypted_key, @@ -2198,7 +2198,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, if (rc < 1 || rc > 2) { ecryptfs_printk(KERN_ERR, "Error generating scatterlist " "for crypt_stat session key; expected rc = 1; " - "got rc = [%d]. key_rec->enc_key_size = [%d]\n", + "got rc = [%d]. key_rec->enc_key_size = [%zd]\n", rc, key_rec->enc_key_size); rc = -ENOMEM; goto out; @@ -2209,7 +2209,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, ecryptfs_printk(KERN_ERR, "Error generating scatterlist " "for crypt_stat encrypted session key; " "expected rc = 1; got rc = [%d]. " - "key_rec->enc_key_size = [%d]\n", rc, + "key_rec->enc_key_size = [%zd]\n", rc, key_rec->enc_key_size); rc = -ENOMEM; goto out; @@ -2224,7 +2224,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, goto out; } rc = 0; - ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", + ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n", crypt_stat->key_size); rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg, (*key_rec).enc_key_size); @@ -2235,7 +2235,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, } ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); if (ecryptfs_verbosity > 0) { - ecryptfs_printk(KERN_DEBUG, "EFEK of size [%d]:\n", + ecryptfs_printk(KERN_DEBUG, "EFEK of size [%zd]:\n", key_rec->enc_key_size); ecryptfs_dump_hex(key_rec->enc_key, key_rec->enc_key_size); -- cgit v1.2.3 From 24562486be76cf223b8d911f45e1d26eb3364b13 Mon Sep 17 00:00:00 2001 From: Frank Swiderski Date: Mon, 15 Nov 2010 10:43:22 -0800 Subject: ecryptfs: remove unnecessary decrypt when extending a file Removes an unecessary page decrypt from ecryptfs_begin_write when the page is beyond the current file size. Previously, the call to ecryptfs_decrypt_page would result in a read of 0 bytes, but still attempt to decrypt an entire page. This patch detects that case and merely zeros the page before marking it up-to-date. Signed-off-by: Frank Swiderski Signed-off-by: Tyler Hicks --- fs/ecryptfs/mmap.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 4b9011392736..cc64fca89f8d 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -290,6 +290,7 @@ static int ecryptfs_write_begin(struct file *file, return -ENOMEM; *pagep = page; + prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT); if (!PageUptodate(page)) { struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(mapping->host)->crypt_stat; @@ -335,18 +336,23 @@ static int ecryptfs_write_begin(struct file *file, SetPageUptodate(page); } } else { - rc = ecryptfs_decrypt_page(page); - if (rc) { - printk(KERN_ERR "%s: Error decrypting page " - "at index [%ld]; rc = [%d]\n", - __func__, page->index, rc); - ClearPageUptodate(page); - goto out; + if (prev_page_end_size + >= i_size_read(page->mapping->host)) { + zero_user(page, 0, PAGE_CACHE_SIZE); + } else { + rc = ecryptfs_decrypt_page(page); + if (rc) { + printk(KERN_ERR "%s: Error decrypting " + "page at index [%ld]; " + "rc = [%d]\n", + __func__, page->index, rc); + ClearPageUptodate(page); + goto out; + } } SetPageUptodate(page); } } - prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT); /* If creating a page or more of holes, zero them out via truncate. * Note, this will increase i_size. */ if (index != 0) { -- cgit v1.2.3 From acce952b0263825da32cf10489413dec78053347 Mon Sep 17 00:00:00 2001 From: liubo Date: Thu, 6 Jan 2011 19:30:25 +0800 Subject: Btrfs: forced readonly mounts on errors This patch comes from "Forced readonly mounts on errors" ideas. As we know, this is the first step in being more fault tolerant of disk corruptions instead of just using BUG() statements. The major content: - add a framework for generating errors that should result in filesystems going readonly. - keep FS state in disk super block. - make sure that all of resource will be freed and released at umount time. - make sure that fter FS is forced readonly on error, there will be no more disk change before FS is corrected. For this, we should stop write operation. After this patch is applied, the conversion from BUG() to such a framework can happen incrementally. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 24 +++ fs/btrfs/disk-io.c | 391 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 11 ++ fs/btrfs/file.c | 11 ++ fs/btrfs/super.c | 84 +++++++++++ fs/btrfs/transaction.c | 3 + 7 files changed, 523 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0995f4f68d7a..72195378bef9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -295,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) #define BTRFS_FSID_SIZE 16 #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) + +/* + * File system states + */ + +/* Errors detected */ +#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) + #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) @@ -1058,6 +1066,9 @@ struct btrfs_fs_info { unsigned metadata_ratio; void *bdev_holder; + + /* filesystem state */ + u64 fs_state; }; /* @@ -2203,6 +2214,11 @@ int btrfs_set_block_group_rw(struct btrfs_root *root, struct btrfs_block_group_cache *cache); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); +int btrfs_error_unpin_extent_range(struct btrfs_root *root, + u64 start, u64 end); +int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, + u64 num_bytes); + /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); @@ -2556,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* super.c */ int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); +void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, + unsigned int line, int errno); + +#define btrfs_std_error(fs_info, errno) \ +do { \ + if ((errno)) \ + __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ +} while (0) /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9b1dd4138072..1a3af9e8e0c4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,6 +44,20 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); +static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, + int read_only); +static int btrfs_destroy_ordered_operations(struct btrfs_root *root); +static int btrfs_destroy_ordered_extents(struct btrfs_root *root); +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root); +static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); +static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); +static int btrfs_destroy_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages, + int mark); +static int btrfs_destroy_pinned_extent(struct btrfs_root *root, + struct extent_io_tree *pinned_extents); +static int btrfs_cleanup_transaction(struct btrfs_root *root); /* * end_io_wq structs are used to do processing in task context when an IO is @@ -1738,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_iput; + /* check FS state, whether FS is broken. */ + fs_info->fs_state |= btrfs_super_flags(disk_super); + + btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); + ret = btrfs_parse_options(tree_root, options); if (ret) { err = ret; @@ -1968,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_set_opt(fs_info->mount_opt, SSD); } - if (btrfs_super_log_root(disk_super) != 0) { + /* do not make disk changes in broken FS */ + if (btrfs_super_log_root(disk_super) != 0 && + !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { @@ -2464,8 +2485,28 @@ int close_ctree(struct btrfs_root *root) smp_mb(); btrfs_put_block_group_cache(fs_info); + + /* + * Here come 2 situations when btrfs is broken to flip readonly: + * + * 1. when btrfs flips readonly somewhere else before + * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, + * and btrfs will skip to write sb directly to keep + * ERROR state on disk. + * + * 2. when btrfs flips readonly just in btrfs_commit_super, + * and in such case, btrfs cannnot write sb via btrfs_commit_super, + * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, + * btrfs will cleanup all FS resources first and write sb then. + */ if (!(fs_info->sb->s_flags & MS_RDONLY)) { - ret = btrfs_commit_super(root); + ret = btrfs_commit_super(root); + if (ret) + printk(KERN_ERR "btrfs: commit super ret %d\n", ret); + } + + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + ret = btrfs_error_commit_super(root); if (ret) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } @@ -2641,6 +2682,352 @@ out: return 0; } +static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, + int read_only) +{ + if (read_only) + return; + + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + printk(KERN_WARNING "warning: mount fs with errors, " + "running btrfsck is recommended\n"); +} + +int btrfs_error_commit_super(struct btrfs_root *root) +{ + int ret; + + mutex_lock(&root->fs_info->cleaner_mutex); + btrfs_run_delayed_iputs(root); + mutex_unlock(&root->fs_info->cleaner_mutex); + + down_write(&root->fs_info->cleanup_work_sem); + up_write(&root->fs_info->cleanup_work_sem); + + /* cleanup FS via transaction */ + btrfs_cleanup_transaction(root); + + ret = write_ctree_super(NULL, root, 0); + + return ret; +} + +static int btrfs_destroy_ordered_operations(struct btrfs_root *root) +{ + struct btrfs_inode *btrfs_inode; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + mutex_lock(&root->fs_info->ordered_operations_mutex); + spin_lock(&root->fs_info->ordered_extent_lock); + + list_splice_init(&root->fs_info->ordered_operations, &splice); + while (!list_empty(&splice)) { + btrfs_inode = list_entry(splice.next, struct btrfs_inode, + ordered_operations); + + list_del_init(&btrfs_inode->ordered_operations); + + btrfs_invalidate_inodes(btrfs_inode->root); + } + + spin_unlock(&root->fs_info->ordered_extent_lock); + mutex_unlock(&root->fs_info->ordered_operations_mutex); + + return 0; +} + +static int btrfs_destroy_ordered_extents(struct btrfs_root *root) +{ + struct list_head splice; + struct btrfs_ordered_extent *ordered; + struct inode *inode; + + INIT_LIST_HEAD(&splice); + + spin_lock(&root->fs_info->ordered_extent_lock); + + list_splice_init(&root->fs_info->ordered_extents, &splice); + while (!list_empty(&splice)) { + ordered = list_entry(splice.next, struct btrfs_ordered_extent, + root_extent_list); + + list_del_init(&ordered->root_extent_list); + atomic_inc(&ordered->refs); + + /* the inode may be getting freed (in sys_unlink path). */ + inode = igrab(ordered->inode); + + spin_unlock(&root->fs_info->ordered_extent_lock); + if (inode) + iput(inode); + + atomic_set(&ordered->refs, 1); + btrfs_put_ordered_extent(ordered); + + spin_lock(&root->fs_info->ordered_extent_lock); + } + + spin_unlock(&root->fs_info->ordered_extent_lock); + + return 0; +} + +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root) +{ + struct rb_node *node; + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_node *ref; + int ret = 0; + + delayed_refs = &trans->delayed_refs; + + spin_lock(&delayed_refs->lock); + if (delayed_refs->num_entries == 0) { + printk(KERN_INFO "delayed_refs has NO entry\n"); + return ret; + } + + node = rb_first(&delayed_refs->root); + while (node) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + node = rb_next(node); + + ref->in_tree = 0; + rb_erase(&ref->rb_node, &delayed_refs->root); + delayed_refs->num_entries--; + + atomic_set(&ref->refs, 1); + if (btrfs_delayed_ref_is_head(ref)) { + struct btrfs_delayed_ref_head *head; + + head = btrfs_delayed_node_to_head(ref); + mutex_lock(&head->mutex); + kfree(head->extent_op); + delayed_refs->num_heads--; + if (list_empty(&head->cluster)) + delayed_refs->num_heads_ready--; + list_del_init(&head->cluster); + mutex_unlock(&head->mutex); + } + + spin_unlock(&delayed_refs->lock); + btrfs_put_delayed_ref(ref); + + cond_resched(); + spin_lock(&delayed_refs->lock); + } + + spin_unlock(&delayed_refs->lock); + + return ret; +} + +static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) +{ + struct btrfs_pending_snapshot *snapshot; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + list_splice_init(&t->pending_snapshots, &splice); + + while (!list_empty(&splice)) { + snapshot = list_entry(splice.next, + struct btrfs_pending_snapshot, + list); + + list_del_init(&snapshot->list); + + kfree(snapshot); + } + + return 0; +} + +static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) +{ + struct btrfs_inode *btrfs_inode; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + list_splice_init(&root->fs_info->delalloc_inodes, &splice); + + spin_lock(&root->fs_info->delalloc_lock); + + while (!list_empty(&splice)) { + btrfs_inode = list_entry(splice.next, struct btrfs_inode, + delalloc_inodes); + + list_del_init(&btrfs_inode->delalloc_inodes); + + btrfs_invalidate_inodes(btrfs_inode->root); + } + + spin_unlock(&root->fs_info->delalloc_lock); + + return 0; +} + +static int btrfs_destroy_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages, + int mark) +{ + int ret; + struct page *page; + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_buffer *eb; + u64 start = 0; + u64 end; + u64 offset; + unsigned long index; + + while (1) { + ret = find_first_extent_bit(dirty_pages, start, &start, &end, + mark); + if (ret) + break; + + clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); + while (start <= end) { + index = start >> PAGE_CACHE_SHIFT; + start = (u64)(index + 1) << PAGE_CACHE_SHIFT; + page = find_get_page(btree_inode->i_mapping, index); + if (!page) + continue; + offset = page_offset(page); + + spin_lock(&dirty_pages->buffer_lock); + eb = radix_tree_lookup( + &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, + offset >> PAGE_CACHE_SHIFT); + spin_unlock(&dirty_pages->buffer_lock); + if (eb) { + ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, + &eb->bflags); + atomic_set(&eb->refs, 1); + } + if (PageWriteback(page)) + end_page_writeback(page); + + lock_page(page); + if (PageDirty(page)) { + clear_page_dirty_for_io(page); + spin_lock_irq(&page->mapping->tree_lock); + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irq(&page->mapping->tree_lock); + } + + page->mapping->a_ops->invalidatepage(page, 0); + unlock_page(page); + } + } + + return ret; +} + +static int btrfs_destroy_pinned_extent(struct btrfs_root *root, + struct extent_io_tree *pinned_extents) +{ + struct extent_io_tree *unpin; + u64 start; + u64 end; + int ret; + + unpin = pinned_extents; + while (1) { + ret = find_first_extent_bit(unpin, 0, &start, &end, + EXTENT_DIRTY); + if (ret) + break; + + /* opt_discard */ + ret = btrfs_error_discard_extent(root, start, end + 1 - start); + + clear_extent_dirty(unpin, start, end, GFP_NOFS); + btrfs_error_unpin_extent_range(root, start, end); + cond_resched(); + } + + return 0; +} + +static int btrfs_cleanup_transaction(struct btrfs_root *root) +{ + struct btrfs_transaction *t; + LIST_HEAD(list); + + WARN_ON(1); + + mutex_lock(&root->fs_info->trans_mutex); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + + list_splice_init(&root->fs_info->trans_list, &list); + while (!list_empty(&list)) { + t = list_entry(list.next, struct btrfs_transaction, list); + if (!t) + break; + + btrfs_destroy_ordered_operations(root); + + btrfs_destroy_ordered_extents(root); + + btrfs_destroy_delayed_refs(t, root); + + btrfs_block_rsv_release(root, + &root->fs_info->trans_block_rsv, + t->dirty_pages.dirty_bytes); + + /* FIXME: cleanup wait for commit */ + t->in_commit = 1; + t->blocked = 1; + if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) + wake_up(&root->fs_info->transaction_blocked_wait); + + t->blocked = 0; + if (waitqueue_active(&root->fs_info->transaction_wait)) + wake_up(&root->fs_info->transaction_wait); + mutex_unlock(&root->fs_info->trans_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + t->commit_done = 1; + if (waitqueue_active(&t->commit_wait)) + wake_up(&t->commit_wait); + mutex_unlock(&root->fs_info->trans_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + + btrfs_destroy_pending_snapshots(t); + + btrfs_destroy_delalloc_inodes(root); + + spin_lock(&root->fs_info->new_trans_lock); + root->fs_info->running_transaction = NULL; + spin_unlock(&root->fs_info->new_trans_lock); + + btrfs_destroy_marked_extents(root, &t->dirty_pages, + EXTENT_DIRTY); + + btrfs_destroy_pinned_extent(root, + root->fs_info->pinned_extents); + + t->use_count = 0; + list_del_init(&t->list); + memset(t, 0, sizeof(*t)); + kmem_cache_free(btrfs_transaction_cachep, t); + } + + mutex_unlock(&root->fs_info->transaction_kthread_mutex); + mutex_unlock(&root->fs_info->trans_mutex); + + return 0; +} + static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 88e825a0bf21..07b20dc2fd95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors); struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_commit_super(struct btrfs_root *root); +int btrfs_error_commit_super(struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 055b837eab19..bcf303204f7f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8642,3 +8642,14 @@ out: btrfs_free_path(path); return ret; } + +int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) +{ + return unpin_extent_range(root, start, end); +} + +int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, + u64 num_bytes) +{ + return btrfs_discard_extent(root, bytenr, num_bytes); +} diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 05df688c96f4..f903433f5bdf 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -892,6 +892,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, if (err) goto out; + /* + * If BTRFS flips readonly due to some impossible error + * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), + * although we have opened a file as writable, we have + * to stop this write operation to ensure FS consistency. + */ + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + err = -EROFS; + goto out; + } + file_update_time(file); BTRFS_I(inode)->sequence++; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2963376e77f4..52e903b0a293 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -54,6 +54,90 @@ static const struct super_operations btrfs_super_ops; +static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, + char nbuf[16]) +{ + char *errstr = NULL; + + switch (errno) { + case -EIO: + errstr = "IO failure"; + break; + case -ENOMEM: + errstr = "Out of memory"; + break; + case -EROFS: + errstr = "Readonly filesystem"; + break; + default: + if (nbuf) { + if (snprintf(nbuf, 16, "error %d", -errno) >= 0) + errstr = nbuf; + } + break; + } + + return errstr; +} + +static void __save_error_info(struct btrfs_fs_info *fs_info) +{ + /* + * today we only save the error info into ram. Long term we'll + * also send it down to the disk + */ + fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; +} + +/* NOTE: + * We move write_super stuff at umount in order to avoid deadlock + * for umount hold all lock. + */ +static void save_error_info(struct btrfs_fs_info *fs_info) +{ + __save_error_info(fs_info); +} + +/* btrfs handle error by forcing the filesystem readonly */ +static void btrfs_handle_error(struct btrfs_fs_info *fs_info) +{ + struct super_block *sb = fs_info->sb; + + if (sb->s_flags & MS_RDONLY) + return; + + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + sb->s_flags |= MS_RDONLY; + printk(KERN_INFO "btrfs is forced readonly\n"); + } +} + +/* + * __btrfs_std_error decodes expected errors from the caller and + * invokes the approciate error response. + */ +void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, + unsigned int line, int errno) +{ + struct super_block *sb = fs_info->sb; + char nbuf[16]; + const char *errstr; + + /* + * Special case: if the error is EROFS, and we're already + * under MS_RDONLY, then it is safe here. + */ + if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) + return; + + errstr = btrfs_decode_error(fs_info, errno, nbuf); + printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", + sb->s_id, function, line, errstr); + save_error_info(fs_info); + + btrfs_handle_error(fs_info); +} + static void btrfs_put_super(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 29e30d832ec9..bae5c7b8bbe2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; int ret; + + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + return ERR_PTR(-EROFS); again: h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); if (!h) -- cgit v1.2.3 From cf78859f520f8275318f47d7864f4459d940cb6b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 17 Jan 2011 21:21:14 +0100 Subject: xfs: Do not name variables "panic" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On platforms that call panic() inside their BUG() macro (m68k/sun3, and all platforms that don't set HAVE_ARCH_BUG), compilation fails with: | fs/xfs/support/debug.c: In function ‘xfs_cmn_err’: | fs/xfs/support/debug.c:92: error: called object ‘panic’ is not a function as the local variable "panic" conflicts with the "panic()" function. Rename the local variable to resolve this. Signed-off-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- fs/xfs/support/debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index e6cf955ec0fc..0df88897ef84 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -75,11 +75,11 @@ xfs_cmn_err( { struct va_format vaf; va_list args; - int panic = 0; + int do_panic = 0; if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { printk(KERN_ALERT "XFS: Transforming an alert into a BUG."); - panic = 1; + do_panic = 1; } va_start(args, fmt); @@ -89,7 +89,7 @@ xfs_cmn_err( printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf); va_end(args); - BUG_ON(panic); + BUG_ON(do_panic); } void -- cgit v1.2.3 From c14cc63a63e94d490ac6517a555113c30d420db4 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 18 Jan 2011 12:06:04 +0800 Subject: autofs4 - fix get_next_positive_dentry() The initialization condition in fs/autofs4/expire.c:get_next_positive_dentry() appears to be incorrect. If prev == NULL I believe that root should be returned. Further down, at the current dentry check for it being simple_positive() it looks like the d_lock for dentry p should be dropped instead of dentry ret, otherwise when p is assinged to ret we end up with no lock on p and a lost lock on ret, which leads to a deadlock. Signed-off-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/expire.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 3ed79d76c233..f43100b9662b 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -96,7 +96,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev, struct dentry *p, *ret; if (prev == NULL) - return dget(prev); + return dget(root); spin_lock(&autofs4_lock); relock: @@ -133,7 +133,7 @@ again: spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); /* Negative dentry - try next */ if (!simple_positive(ret)) { - spin_unlock(&ret->d_lock); + spin_unlock(&p->d_lock); p = ret; goto again; } -- cgit v1.2.3 From 8931221411f9ff950de8fd686dc5ab881394cb9a Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 18 Jan 2011 12:06:10 +0800 Subject: vfs - fix dentry ref count in do_lookup() There is a ref count problem in fs/namei.c:do_lookup(). When walking in ref-walk mode, if follow_managed() returns a fail we need to drop dentry and possibly vfsmount. Clean up properly, as we do in the other caller of follow_managed(). Signed-off-by: Ian Kent Signed-off-by: Al Viro --- fs/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index b753192d8c3f..7d77f24d32a9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1272,8 +1272,10 @@ done: path->mnt = mnt; path->dentry = dentry; err = follow_managed(path, nd->flags); - if (unlikely(err < 0)) + if (unlikely(err < 0)) { + path_put_conditional(path, nd); return err; + } *inode = path->dentry->d_inode; return 0; -- cgit v1.2.3 From c0bcc9d55252012805300ca01b9b7a143b4daf85 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 18 Jan 2011 12:06:15 +0800 Subject: autofs4 - fix debug print in autofs4_lookup() oz_mode isn't defined any more, use autofs4_oz_mode(sbi) instead. Signed-off-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/root.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 1dba035fc376..427129ab5292 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -488,7 +488,8 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s sbi = autofs4_sbi(dir->i_sb); DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", - current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); + current->pid, task_pgrp_nr(current), sbi->catatonic, + autofs4_oz_mode(sbi)); active = autofs4_lookup_active(dentry); if (active) { -- cgit v1.2.3 From 292c5ee802e9b969b84ee671a5e3001d94230f5b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 17 Jan 2011 00:47:38 -0500 Subject: autofs4: keep symlink body in inode->i_private gets rid of all ->free()/->u.symlink machinery in autofs; we simply keep symlink bodies in inode->i_private and free them in ->evict_inode(). Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 5 ----- fs/autofs4/inode.c | 27 +++++++-------------------- fs/autofs4/root.c | 2 +- fs/autofs4/symlink.c | 3 +-- 4 files changed, 9 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 1f016bfb42d5..99a4af8d9c83 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -91,11 +91,6 @@ struct autofs_info { mode_t mode; size_t size; - - void (*free)(struct autofs_info *); - union { - const char *symlink; - } u; }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 9e1a9dad23e1..cf8abc793d50 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -22,14 +22,6 @@ #include "autofs_i.h" #include -static void ino_lnkfree(struct autofs_info *ino) -{ - if (ino->u.symlink) { - kfree(ino->u.symlink); - ino->u.symlink = NULL; - } -} - struct autofs_info *autofs4_init_ino(struct autofs_info *ino, struct autofs_sb_info *sbi, mode_t mode) { @@ -60,16 +52,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, ino->sbi = sbi; - if (reinit && ino->free) - (ino->free)(ino); - - memset(&ino->u, 0, sizeof(ino->u)); - - ino->free = NULL; - - if (S_ISLNK(mode)) - ino->free = ino_lnkfree; - return ino; } @@ -79,8 +61,6 @@ void autofs4_free_ino(struct autofs_info *ino) ino->dentry->d_fsdata = NULL; ino->dentry = NULL; } - if (ino->free) - (ino->free)(ino); kfree(ino); } @@ -136,9 +116,16 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } +static void autofs4_evict_inode(struct inode *inode) +{ + end_writeback(inode); + kfree(inode->i_private); +} + static const struct super_operations autofs4_sops = { .statfs = simple_statfs, .show_options = autofs4_show_options, + .evict_inode = autofs4_evict_inode, }; enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 427129ab5292..f47aceabf58f 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -561,6 +561,7 @@ static int autofs4_dir_symlink(struct inode *dir, kfree(ino); return -ENOMEM; } + inode->i_private = cp; d_add(dentry, inode); dentry->d_fsdata = ino; @@ -570,7 +571,6 @@ static int autofs4_dir_symlink(struct inode *dir, if (p_ino && dentry->d_parent != dentry) atomic_inc(&p_ino->count); - ino->u.symlink = cp; dir->i_mtime = CURRENT_TIME; return 0; diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c index b4ea82934d2e..f27c094a1919 100644 --- a/fs/autofs4/symlink.c +++ b/fs/autofs4/symlink.c @@ -14,8 +14,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct autofs_info *ino = autofs4_dentry_ino(dentry); - nd_set_link(nd, (char *)ino->u.symlink); + nd_set_link(nd, dentry->d_inode->i_private); return NULL; } -- cgit v1.2.3 From 14a2f00bde7668fe18d1c8355d26c7c96961e1f7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jan 2011 17:13:33 -0500 Subject: autofs4: autofs4_mkroot() is not different from autofs4_init_ino() Kill it. Mind you, it's been an obfuscated call of autofs4_init_ino() ever since 2.3.99pre6-4... Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/inode.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index cf8abc793d50..7421b47b1bb9 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -215,17 +215,6 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, return (*pipefd < 0); } -static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi) -{ - struct autofs_info *ino; - - ino = autofs4_init_ino(NULL, sbi, S_IFDIR | 0755); - if (!ino) - return NULL; - - return ino; -} - int autofs4_fill_super(struct super_block *s, void *data, int silent) { struct inode * root_inode; @@ -269,7 +258,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) /* * Get the root inode and dentry, but defer checking for errors. */ - ino = autofs4_mkroot(sbi); + ino = autofs4_init_ino(NULL, sbi, S_IFDIR | 0755); if (!ino) goto fail_free; root_inode = autofs4_get_inode(s, ino); -- cgit v1.2.3 From 09f12c03fa699ce7d030c47add60577138927d4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jan 2011 17:20:23 -0500 Subject: autofs4: pass mode to autofs4_get_inode() explicitly In all cases we'd set inf->mode to know value just before passing it to autofs4_get_inode(). That kills the need to store it in autofs_info and pass it to autofs_init_ino() Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 5 ++--- fs/autofs4/inode.c | 16 ++++++++-------- fs/autofs4/root.c | 10 +++++----- 3 files changed, 15 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 99a4af8d9c83..c6d66db67ff1 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -89,7 +89,6 @@ struct autofs_info { uid_t uid; gid_t gid; - mode_t mode; size_t size; }; @@ -170,7 +169,7 @@ static inline int autofs4_ispending(struct dentry *dentry) return 0; } -struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *); +struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *, mode_t); void autofs4_free_ino(struct autofs_info *); /* Expiration */ @@ -280,7 +279,7 @@ static inline void managed_dentry_clear_managed(struct dentry *dentry) /* Initializing function */ int autofs4_fill_super(struct super_block *, void *, int); -struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info *sbi, mode_t mode); +struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info *sbi); /* Queue management functions */ diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 7421b47b1bb9..6b6f43f00c46 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -23,7 +23,7 @@ #include struct autofs_info *autofs4_init_ino(struct autofs_info *ino, - struct autofs_sb_info *sbi, mode_t mode) + struct autofs_sb_info *sbi) { int reinit = 1; @@ -47,7 +47,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, ino->uid = 0; ino->gid = 0; - ino->mode = mode; ino->last_used = jiffies; ino->sbi = sbi; @@ -258,10 +257,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) /* * Get the root inode and dentry, but defer checking for errors. */ - ino = autofs4_init_ino(NULL, sbi, S_IFDIR | 0755); + ino = autofs4_init_ino(NULL, sbi); if (!ino) goto fail_free; - root_inode = autofs4_get_inode(s, ino); + root_inode = autofs4_get_inode(s, ino, S_IFDIR | 0755); if (!root_inode) goto fail_ino; @@ -345,14 +344,15 @@ fail_unlock: } struct inode *autofs4_get_inode(struct super_block *sb, - struct autofs_info *inf) + struct autofs_info *inf, + mode_t mode) { struct inode *inode = new_inode(sb); if (inode == NULL) return NULL; - inode->i_mode = inf->mode; + inode->i_mode = mode; if (sb->s_root) { inode->i_uid = sb->s_root->d_inode->i_uid; inode->i_gid = sb->s_root->d_inode->i_gid; @@ -360,11 +360,11 @@ struct inode *autofs4_get_inode(struct super_block *sb, inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_ino = get_next_ino(); - if (S_ISDIR(inf->mode)) { + if (S_ISDIR(mode)) { inode->i_nlink = 2; inode->i_op = &autofs4_dir_inode_operations; inode->i_fop = &autofs4_dir_operations; - } else if (S_ISLNK(inf->mode)) { + } else if (S_ISLNK(mode)) { inode->i_size = inf->size; inode->i_op = &autofs4_symlink_inode_operations; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index f47aceabf58f..e55dcdbeb450 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -508,7 +508,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) __managed_dentry_set_managed(dentry); - ino = autofs4_init_ino(NULL, sbi, 0555); + ino = autofs4_init_ino(NULL, sbi); if (!ino) return ERR_PTR(-ENOMEM); @@ -538,7 +538,7 @@ static int autofs4_dir_symlink(struct inode *dir, if (!autofs4_oz_mode(sbi)) return -EACCES; - ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555); + ino = autofs4_init_ino(ino, sbi); if (!ino) return -ENOMEM; @@ -554,7 +554,7 @@ static int autofs4_dir_symlink(struct inode *dir, strcpy(cp, symname); - inode = autofs4_get_inode(dir->i_sb, ino); + inode = autofs4_get_inode(dir->i_sb, ino, S_IFLNK | 0555); if (!inode) { kfree(cp); if (!dentry->d_fsdata) @@ -733,13 +733,13 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) DPRINTK("dentry %p, creating %.*s", dentry, dentry->d_name.len, dentry->d_name.name); - ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555); + ino = autofs4_init_ino(ino, sbi); if (!ino) return -ENOMEM; autofs4_del_active(dentry); - inode = autofs4_get_inode(dir->i_sb, ino); + inode = autofs4_get_inode(dir->i_sb, ino, S_IFDIR | 0555); if (!inode) { if (!dentry->d_fsdata) kfree(ino); -- cgit v1.2.3 From 0bf71d4d005176f6b6587ba64a377f9798213f21 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jan 2011 17:39:15 -0500 Subject: autofs4: kill ->size in autofs_info It's used only to pass the length of symlink body to autofs4_get_inode() in autofs4_dir_symlink(). We can bloody well set inode->i_size in autofs4_dir_symlink() directly and be done with that. Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 2 -- fs/autofs4/inode.c | 2 -- fs/autofs4/root.c | 5 +++-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index c6d66db67ff1..0925bacb5c3c 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -88,8 +88,6 @@ struct autofs_info { uid_t uid; gid_t gid; - - size_t size; }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 6b6f43f00c46..ac1a99ce820b 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -38,7 +38,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, if (!reinit) { ino->flags = 0; ino->dentry = NULL; - ino->size = 0; INIT_LIST_HEAD(&ino->active); ino->active_count = 0; INIT_LIST_HEAD(&ino->expiring); @@ -365,7 +364,6 @@ struct inode *autofs4_get_inode(struct super_block *sb, inode->i_op = &autofs4_dir_inode_operations; inode->i_fop = &autofs4_dir_operations; } else if (S_ISLNK(mode)) { - inode->i_size = inf->size; inode->i_op = &autofs4_symlink_inode_operations; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e55dcdbeb450..1ad3c6ca9b03 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -530,6 +530,7 @@ static int autofs4_dir_symlink(struct inode *dir, struct autofs_info *ino = autofs4_dentry_ino(dentry); struct autofs_info *p_ino; struct inode *inode; + size_t size = strlen(symname); char *cp; DPRINTK("%s <- %.*s", symname, @@ -544,8 +545,7 @@ static int autofs4_dir_symlink(struct inode *dir, autofs4_del_active(dentry); - ino->size = strlen(symname); - cp = kmalloc(ino->size + 1, GFP_KERNEL); + cp = kmalloc(size + 1, GFP_KERNEL); if (!cp) { if (!dentry->d_fsdata) kfree(ino); @@ -562,6 +562,7 @@ static int autofs4_dir_symlink(struct inode *dir, return -ENOMEM; } inode->i_private = cp; + inode->i_size = size; d_add(dentry, inode); dentry->d_fsdata = ino; -- cgit v1.2.3 From 726a5e0688fd344110d8f2979d87f243a4ba1a48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jan 2011 17:43:52 -0500 Subject: autofs4: autofs4_get_inode() doesn't need autofs_info * argument anymore Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 2 +- fs/autofs4/inode.c | 6 ++---- fs/autofs4/root.c | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 0925bacb5c3c..8f15162f1672 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -167,7 +167,7 @@ static inline int autofs4_ispending(struct dentry *dentry) return 0; } -struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *, mode_t); +struct inode *autofs4_get_inode(struct super_block *, mode_t); void autofs4_free_ino(struct autofs_info *); /* Expiration */ diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index ac1a99ce820b..b3f9477c9745 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -259,7 +259,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) ino = autofs4_init_ino(NULL, sbi); if (!ino) goto fail_free; - root_inode = autofs4_get_inode(s, ino, S_IFDIR | 0755); + root_inode = autofs4_get_inode(s, S_IFDIR | 0755); if (!root_inode) goto fail_ino; @@ -342,9 +342,7 @@ fail_unlock: return -EINVAL; } -struct inode *autofs4_get_inode(struct super_block *sb, - struct autofs_info *inf, - mode_t mode) +struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode) { struct inode *inode = new_inode(sb); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 1ad3c6ca9b03..83e5379c5ade 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -554,7 +554,7 @@ static int autofs4_dir_symlink(struct inode *dir, strcpy(cp, symname); - inode = autofs4_get_inode(dir->i_sb, ino, S_IFLNK | 0555); + inode = autofs4_get_inode(dir->i_sb, S_IFLNK | 0555); if (!inode) { kfree(cp); if (!dentry->d_fsdata) @@ -740,7 +740,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) autofs4_del_active(dentry); - inode = autofs4_get_inode(dir->i_sb, ino, S_IFDIR | 0555); + inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555); if (!inode) { if (!dentry->d_fsdata) kfree(ino); -- cgit v1.2.3 From 5a37db302e698a83209eff22ca8f3fd05eb1d84b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jan 2011 18:29:35 -0500 Subject: autofs4: mkdir and symlink always get a dentry that had passed lookup ... so ->d_fsdata will have been set up before we get there Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/root.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 83e5379c5ade..a5b93e8f49b5 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -539,18 +539,15 @@ static int autofs4_dir_symlink(struct inode *dir, if (!autofs4_oz_mode(sbi)) return -EACCES; - ino = autofs4_init_ino(ino, sbi); - if (!ino) - return -ENOMEM; + BUG_ON(!ino); + + autofs4_init_ino(ino, sbi); autofs4_del_active(dentry); cp = kmalloc(size + 1, GFP_KERNEL); - if (!cp) { - if (!dentry->d_fsdata) - kfree(ino); + if (!cp) return -ENOMEM; - } strcpy(cp, symname); @@ -565,8 +562,7 @@ static int autofs4_dir_symlink(struct inode *dir, inode->i_size = size; d_add(dentry, inode); - dentry->d_fsdata = ino; - ino->dentry = dget(dentry); + dget(dentry); atomic_inc(&ino->count); p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) @@ -734,25 +730,21 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) DPRINTK("dentry %p, creating %.*s", dentry, dentry->d_name.len, dentry->d_name.name); - ino = autofs4_init_ino(ino, sbi); - if (!ino) - return -ENOMEM; + BUG_ON(!ino); + + autofs4_init_ino(ino, sbi); autofs4_del_active(dentry); inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555); - if (!inode) { - if (!dentry->d_fsdata) - kfree(ino); + if (!inode) return -ENOMEM; - } d_add(dentry, inode); if (sbi->version < 5) autofs_set_leaf_automount_flags(dentry); - dentry->d_fsdata = ino; - ino->dentry = dget(dentry); + dget(dentry); atomic_inc(&ino->count); p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) -- cgit v1.2.3 From 26e6c910670171410577c7df2aebe94cef76e150 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jan 2011 18:43:40 -0500 Subject: autofs4: split autofs4_init_ino() split init_ino into new_ino and clean_ino; the former is what used to be init_ino(NULL, sbi), the latter is for cases where we passed non-NULL ino. Lose unused arguments. Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 3 ++- fs/autofs4/inode.c | 32 ++++++++++---------------------- fs/autofs4/root.c | 6 +++--- 3 files changed, 15 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 8f15162f1672..bfa0c6e542f2 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -277,7 +277,8 @@ static inline void managed_dentry_clear_managed(struct dentry *dentry) /* Initializing function */ int autofs4_fill_super(struct super_block *, void *, int); -struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info *sbi); +struct autofs_info *autofs4_new_ino(struct autofs_sb_info *); +void autofs4_clean_ino(struct autofs_info *); /* Queue management functions */ diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index b3f9477c9745..0df0c7c46fa2 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -22,35 +22,23 @@ #include "autofs_i.h" #include -struct autofs_info *autofs4_init_ino(struct autofs_info *ino, - struct autofs_sb_info *sbi) +struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) { - int reinit = 1; - - if (ino == NULL) { - reinit = 0; - ino = kmalloc(sizeof(*ino), GFP_KERNEL); - } - - if (ino == NULL) - return NULL; - - if (!reinit) { - ino->flags = 0; - ino->dentry = NULL; + struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL); + if (ino) { INIT_LIST_HEAD(&ino->active); - ino->active_count = 0; INIT_LIST_HEAD(&ino->expiring); - atomic_set(&ino->count, 0); + ino->last_used = jiffies; + ino->sbi = sbi; } + return ino; +} +void autofs4_clean_ino(struct autofs_info *ino) +{ ino->uid = 0; ino->gid = 0; ino->last_used = jiffies; - - ino->sbi = sbi; - - return ino; } void autofs4_free_ino(struct autofs_info *ino) @@ -256,7 +244,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) /* * Get the root inode and dentry, but defer checking for errors. */ - ino = autofs4_init_ino(NULL, sbi); + ino = autofs4_new_ino(sbi); if (!ino) goto fail_free; root_inode = autofs4_get_inode(s, S_IFDIR | 0755); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index a5b93e8f49b5..f7c97c084c1f 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -508,7 +508,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) __managed_dentry_set_managed(dentry); - ino = autofs4_init_ino(NULL, sbi); + ino = autofs4_new_ino(sbi); if (!ino) return ERR_PTR(-ENOMEM); @@ -541,7 +541,7 @@ static int autofs4_dir_symlink(struct inode *dir, BUG_ON(!ino); - autofs4_init_ino(ino, sbi); + autofs4_clean_ino(ino); autofs4_del_active(dentry); @@ -732,7 +732,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) BUG_ON(!ino); - autofs4_init_ino(ino, sbi); + autofs4_clean_ino(ino); autofs4_del_active(dentry); -- cgit v1.2.3 From b89b12b46211d971d75e5ca8249817bc9e11c453 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jan 2011 21:42:32 -0500 Subject: autofs4: clean ->d_release() and autofs4_free_ino() up The latter is called only when both ino and dentry are about to be freed, so cleaning ->d_fsdata and ->dentry is pointless. Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 1 - fs/autofs4/inode.c | 4 ---- fs/autofs4/root.c | 30 ++++++++++++++++-------------- 3 files changed, 16 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index bfa0c6e542f2..54f923792728 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -338,5 +338,4 @@ static inline void autofs4_del_expiring(struct dentry *dentry) return; } -void autofs4_dentry_release(struct dentry *); extern void autofs4_kill_sb(struct super_block *); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 0df0c7c46fa2..180fa2425e49 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -43,10 +43,6 @@ void autofs4_clean_ino(struct autofs_info *ino) void autofs4_free_ino(struct autofs_info *ino) { - if (ino->dentry) { - ino->dentry->d_fsdata = NULL; - ino->dentry = NULL; - } kfree(ino); } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index f7c97c084c1f..014e7aba3b08 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -37,6 +37,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file); static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); static struct vfsmount *autofs4_d_automount(struct path *); static int autofs4_d_manage(struct dentry *, bool, bool); +static void autofs4_dentry_release(struct dentry *); const struct file_operations autofs4_root_operations = { .open = dcache_dir_open, @@ -138,25 +139,26 @@ out: return dcache_dir_open(inode, file); } -void autofs4_dentry_release(struct dentry *de) +static void autofs4_dentry_release(struct dentry *de) { - struct autofs_info *inf; + struct autofs_info *ino = autofs4_dentry_ino(de); + struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); DPRINTK("releasing %p", de); - inf = autofs4_dentry_ino(de); - if (inf) { - struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); - if (sbi) { - spin_lock(&sbi->lookup_lock); - if (!list_empty(&inf->active)) - list_del(&inf->active); - if (!list_empty(&inf->expiring)) - list_del(&inf->expiring); - spin_unlock(&sbi->lookup_lock); - } - autofs4_free_ino(inf); + if (!ino) + return; + + if (sbi) { + spin_lock(&sbi->lookup_lock); + if (!list_empty(&ino->active)) + list_del(&ino->active); + if (!list_empty(&ino->expiring)) + list_del(&ino->expiring); + spin_unlock(&sbi->lookup_lock); } + + autofs4_free_ino(ino); } static struct dentry *autofs4_lookup_active(struct dentry *dentry) -- cgit v1.2.3 From 23c3010808de86f21436eb822aacfa551bfc17e4 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 14 Jan 2011 22:39:16 -0600 Subject: GFS2: remove iopen glocks from cache on failed deletes When a file gets deleted on GFS2, if a node can't get an exclusive lock on the file's iopen glock, it punts on actually freeing up the space, because another node is using the file. When it does this, it needs to drop the iopen glock from its cache so that the other node can get an exclusive lock on it. Now, gfs2_delete_inode() sets GL_NOCACHE before dropping the shared lock on the iopen glock in preparation for grabbing it in the exclusive state. Since the node needs the glock in the exclusive state, dropping the shared lock from the cache doesn't slow down the case where no other nodes are using the file. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 16c2ecac7eb7..ec73ed70bae1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1336,6 +1336,7 @@ static void gfs2_evict_inode(struct inode *inode) if (error) goto out_truncate; + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(&ip->i_iopen_gh); gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); error = gfs2_glock_nq(&ip->i_iopen_gh); -- cgit v1.2.3 From 24d9765fc18c7838ccdbb0d71fb706321d9b824c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 18 Jan 2011 14:49:08 +0000 Subject: GFS2: Fix error path in gfs2_lookup_by_inum() In the (impossible, except if there is fs corruption) error path in gfs2_lookup_by_inum() if the call to gfs2_inode_refresh() fails, it was leaving the function by calling iput() rather than iget_failed(). This would cause future lookups of the same inode to block forever. This patch fixes the problem by moving the call to gfs2_inode_refresh() into gfs2_inode_lookup() where iget_failed() is part of the error path already. Also this cleans up some unreachable code and makes gfs2_set_iop() static. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 72 ++++++++++++++++++--------------------------------------- fs/gfs2/inode.h | 1 - 2 files changed, 22 insertions(+), 51 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2232b3c780bd..7aa7d4f8984a 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -74,16 +74,14 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) } /** - * GFS2 lookup code fills in vfs inode contents based on info obtained - * from directory entry inside gfs2_inode_lookup(). This has caused issues - * with NFS code path since its get_dentry routine doesn't have the relevant - * directory entry when gfs2_inode_lookup() is invoked. Part of the code - * segment inside gfs2_inode_lookup code needs to get moved around. + * gfs2_set_iop - Sets inode operations + * @inode: The inode with correct i_mode filled in * - * Clears I_NEW as well. - **/ + * GFS2 lookup code fills in vfs inode contents based on info obtained + * from directory entry inside gfs2_inode_lookup(). + */ -void gfs2_set_iop(struct inode *inode) +static void gfs2_set_iop(struct inode *inode) { struct gfs2_sbd *sdp = GFS2_SB(inode); umode_t mode = inode->i_mode; @@ -106,8 +104,6 @@ void gfs2_set_iop(struct inode *inode) inode->i_op = &gfs2_file_iops; init_special_inode(inode, inode->i_mode, inode->i_rdev); } - - unlock_new_inode(inode); } /** @@ -119,10 +115,8 @@ void gfs2_set_iop(struct inode *inode) * Returns: A VFS inode, or an error */ -struct inode *gfs2_inode_lookup(struct super_block *sb, - unsigned int type, - u64 no_addr, - u64 no_formal_ino) +struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, + u64 no_addr, u64 no_formal_ino) { struct inode *inode; struct gfs2_inode *ip; @@ -152,51 +146,37 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail_iopen; - ip->i_iopen_gh.gh_gl->gl_object = ip; + ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); io_gl = NULL; - if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) - goto gfs2_nfsbypass; - - inode->i_mode = DT2IF(type); - - /* - * We must read the inode in order to work out its type in - * this case. Note that this doesn't happen often as we normally - * know the type beforehand. This code path only occurs during - * unlinked inode recovery (where it is safe to do this glock, - * which is not true in the general case). - */ if (type == DT_UNKNOWN) { - struct gfs2_holder gh; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - if (unlikely(error)) - goto fail_glock; - /* Inode is now uptodate */ - gfs2_glock_dq_uninit(&gh); + /* Inode glock must be locked already */ + error = gfs2_inode_refresh(GFS2_I(inode)); + if (error) + goto fail_refresh; + } else { + inode->i_mode = DT2IF(type); } gfs2_set_iop(inode); + unlock_new_inode(inode); } -gfs2_nfsbypass: return inode; -fail_glock: - gfs2_glock_dq(&ip->i_iopen_gh); + +fail_refresh: + ip->i_iopen_gh.gh_gl->gl_object = NULL; + gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_iopen: if (io_gl) gfs2_glock_put(io_gl); fail_put: - if (inode->i_state & I_NEW) - ip->i_gl->gl_object = NULL; + ip->i_gl->gl_object = NULL; gfs2_glock_put(ip->i_gl); fail: - if (inode->i_state & I_NEW) - iget_failed(inode); - else - iput(inode); + iget_failed(inode); return ERR_PTR(error); } @@ -221,14 +201,6 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, if (IS_ERR(inode)) goto fail; - error = gfs2_inode_refresh(GFS2_I(inode)); - if (error) - goto fail_iput; - - /* Pick up the works we bypass in gfs2_inode_lookup */ - if (inode->i_state & I_NEW) - gfs2_set_iop(inode); - /* Two extra checks for NFS only */ if (no_formal_ino) { error = -ESTALE; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 732a183efdb3..3e00a66e7cbd 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -96,7 +96,6 @@ err: return -EIO; } -extern void gfs2_set_iop(struct inode *inode); extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, u64 no_addr, u64 no_formal_ino); extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, -- cgit v1.2.3 From 50aac4fec503960380ab594a93a6fbfdf3f8915f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 07:59:40 -0800 Subject: ceph: fix cap_wanted_delay_{min,max} mount option initialization These were initialized to 0 instead of the default, fallout from the RBD refactor in 3d14c5d2b6e15c21d8e5467dc62d33127c23a644. Signed-off-by: Sage Weil --- fs/ceph/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bf6f0f34082a..9c5085465a63 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -290,6 +290,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); + fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; + fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; -- cgit v1.2.3 From 24be0c481067560b11441e794e27f166a3568863 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 08:48:06 -0800 Subject: ceph: fix erroneous cap flush to non-auth mds The int flushing is global and not clear on each iteration of the loop, which can cause a second flush of caps to any MDSs with ids greater than the auth. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 60d27bc9eb83..f654c7e933ac 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1658,6 +1658,8 @@ ack: if (cap == ci->i_auth_cap && ci->i_dirty_caps) flushing = __mark_caps_flushing(inode, session); + else + flushing = 0; mds = cap->mds; /* remember mds, so we don't repeat */ sent++; -- cgit v1.2.3 From 088b3f5e9ee2649f5cfc2f08d8ce654e3eeba310 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 08:56:01 -0800 Subject: ceph: fix flushing of caps vs cap import If we are mid-flush and a cap is migrated to another node, we need to resend the cap flush message to the new MDS, and do so with the original flush_seq to avoid leaking across a sync boundary. Previously we didn't redo the flush (we only flushed newly dirty data), which would cause a later sync to hang forever. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f654c7e933ac..7def3f5903dd 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1560,9 +1560,10 @@ retry_locked: /* NOTE: no side-effects allowed, until we take s_mutex */ revoking = cap->implemented & ~cap->issued; - if (revoking) - dout(" mds%d revoking %s\n", cap->mds, - ceph_cap_string(revoking)); + dout(" mds%d cap %p issued %s implemented %s revoking %s\n", + cap->mds, cap, ceph_cap_string(cap->issued), + ceph_cap_string(cap->implemented), + ceph_cap_string(revoking)); if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { @@ -1942,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, } } +static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_cap *cap; + int delayed = 0; + + spin_lock(&inode->i_lock); + cap = ci->i_auth_cap; + dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, + ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); + __ceph_flush_snaps(ci, &session, 1); + if (ci->i_flushing_caps) { + delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, + __ceph_caps_used(ci), + __ceph_caps_wanted(ci), + cap->issued | cap->implemented, + ci->i_flushing_caps, NULL); + if (delayed) { + spin_lock(&inode->i_lock); + __cap_delay_requeue(mdsc, ci); + spin_unlock(&inode->i_lock); + } + } else { + spin_unlock(&inode->i_lock); + } +} + /* * Take references to capabilities we hold, so that we don't release @@ -2689,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ceph_add_cap(inode, session, cap_id, -1, issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, NULL /* no caps context */); - try_flush_caps(inode, session, NULL); + kick_flushing_inode_caps(mdsc, session, inode); up_read(&mdsc->snap_rwsem); /* make sure we re-request max_size, if necessary */ -- cgit v1.2.3 From 7e57b81c7688c762bc9e775bc83f9fc17946f527 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 09:00:01 -0800 Subject: ceph: avoid immediate cap check after import The NODELAY flag avoids the heuristics that delay cap (issued/wanted) release. There's no reason for that after we import a cap, and it kills whatever benefit we get from those delays. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7def3f5903dd..6b61ded701e1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2817,8 +2817,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, session, snaptrace, snaptrace_len); - ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, - session); + ceph_check_caps(ceph_inode(inode), 0, session); goto done_unlocked; } -- cgit v1.2.3 From 12fed00de963433128b5366a21a55808fab2f756 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 17 Jan 2011 20:15:44 +0300 Subject: CIFS: Fix oplock break handling (try #2) When we get oplock break notification we should set the appropriate value of OplockLevel field in oplock break acknowledge according to the oplock level held by the client in this time. As we only can have level II oplock or no oplock in the case of oplock break, we should be aware only about clientCanCacheRead field in cifsInodeInfo structure. Also fix bug connected with wrong interpretation of OplockLevel field during oplock break notification processing. Signed-off-by: Pavel Shilovsky Cc: Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 2 +- fs/cifs/cifssmb.c | 4 +++- fs/cifs/file.c | 21 +++++++++++---------- fs/cifs/misc.c | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e6d1481b16c1..95d5dbbb4c7a 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -347,7 +347,7 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, const __u16 netfid, const __u64 len, const __u64 offset, const __u32 numUnlock, const __u32 numLock, const __u8 lockType, - const bool waitFlag); + const bool waitFlag, const __u8 oplock_level); extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const int get_flag, const __u64 len, struct file_lock *, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 2f6795e524d3..3652cc60314c 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1663,7 +1663,8 @@ int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const __u64 len, const __u64 offset, const __u32 numUnlock, - const __u32 numLock, const __u8 lockType, const bool waitFlag) + const __u32 numLock, const __u8 lockType, + const bool waitFlag, const __u8 oplock_level) { int rc = 0; LOCK_REQ *pSMB = NULL; @@ -1691,6 +1692,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, pSMB->NumberOfLocks = cpu_to_le16(numLock); pSMB->NumberOfUnlocks = cpu_to_le16(numUnlock); pSMB->LockType = lockType; + pSMB->OplockLevel = oplock_level; pSMB->AndXCommand = 0xFF; /* none */ pSMB->Fid = smb_file_id; /* netfid stays le */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d843631c028d..af371910f543 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -726,12 +726,12 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) /* BB we could chain these into one lock request BB */ rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start, - 0, 1, lockType, 0 /* wait flag */ ); + 0, 1, lockType, 0 /* wait flag */, 0); if (rc == 0) { rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start, 1 /* numUnlock */ , 0 /* numLock */ , lockType, - 0 /* wait flag */ ); + 0 /* wait flag */, 0); pfLock->fl_type = F_UNLCK; if (rc != 0) cERROR(1, "Error unlocking previously locked " @@ -748,13 +748,13 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start, 0, 1, lockType | LOCKING_ANDX_SHARED_LOCK, - 0 /* wait flag */); + 0 /* wait flag */, 0); if (rc == 0) { rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start, 1, 0, lockType | LOCKING_ANDX_SHARED_LOCK, - 0 /* wait flag */); + 0 /* wait flag */, 0); pfLock->fl_type = F_RDLCK; if (rc != 0) cERROR(1, "Error unlocking " @@ -797,8 +797,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) if (numLock) { rc = CIFSSMBLock(xid, tcon, netfid, length, - pfLock->fl_start, - 0, numLock, lockType, wait_flag); + pfLock->fl_start, 0, numLock, lockType, + wait_flag, 0); if (rc == 0) { /* For Windows locks we must store them. */ @@ -818,9 +818,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) (pfLock->fl_start + length) >= (li->offset + li->length)) { stored_rc = CIFSSMBLock(xid, tcon, - netfid, - li->length, li->offset, - 1, 0, li->type, false); + netfid, li->length, + li->offset, 1, 0, + li->type, false, 0); if (stored_rc) rc = stored_rc; else { @@ -2192,7 +2192,8 @@ void cifs_oplock_break(struct work_struct *work) */ if (!cfile->oplock_break_cancelled) { rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, 0, - 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false); + 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false, + cinode->clientCanCacheRead ? 1 : 0); cFYI(1, "Oplock release rc = %d", rc); } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 43f10281bc19..09bfcf08a90f 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -571,7 +571,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) pCifsInode = CIFS_I(netfile->dentry->d_inode); cifs_set_oplock_level(pCifsInode, - pSMB->OplockLevel); + pSMB->OplockLevel ? OPLOCK_READ : 0); /* * cifs_oplock_break_put() can't be called * from here. Get reference after queueing -- cgit v1.2.3 From 941b853d779de3298e39f1eb4e252984464eaea8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:01 -0500 Subject: cifs: don't fail writepages on -EAGAIN errors If CIFSSMBWrite2 returns -EAGAIN, then the error should be considered temporary. CIFS should retry the write instead of setting an error on the mapping and returning. For WB_SYNC_ALL, just retry the write immediately. In the WB_SYNC_NONE case, call redirty_page_for_writeback on all of the pages that didn't get written out and then move on. Also, fix up the handling of a short write with a successful return code. MS-CIFS says that 0 bytes_written means ENOSPC or EFBIG. It doesn't mention what a short, but non-zero write means, so for now treat it as we would an -EAGAIN return. Reviewed-by: Suresh Jayaraman Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/file.c | 49 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index af371910f543..cfa2e5ebcafe 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1377,6 +1377,7 @@ retry: break; } if (n_iov) { +retry_write: open_file = find_writable_file(CIFS_I(mapping->host), false); if (!open_file) { @@ -1389,31 +1390,55 @@ retry: &bytes_written, iov, n_iov, long_op); cifsFileInfo_put(open_file); - cifs_update_eof(cifsi, offset, bytes_written); } - if (rc || bytes_written < bytes_to_write) { - cERROR(1, "Write2 ret %d, wrote %d", - rc, bytes_written); - mapping_set_error(mapping, rc); - } else { + cFYI(1, "Write2 rc=%d, wrote=%u", rc, bytes_written); + + /* + * For now, treat a short write as if nothing got + * written. A zero length write however indicates + * ENOSPC or EFBIG. We have no way to know which + * though, so call it ENOSPC for now. EFBIG would + * get translated to AS_EIO anyway. + * + * FIXME: make it take into account the data that did + * get written + */ + if (rc == 0) { + if (bytes_written == 0) + rc = -ENOSPC; + else if (bytes_written < bytes_to_write) + rc = -EAGAIN; + } + + /* retry on data-integrity flush */ + if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) + goto retry_write; + + /* fix the stats and EOF */ + if (bytes_written > 0) { cifs_stats_bytes_written(tcon, bytes_written); + cifs_update_eof(cifsi, offset, bytes_written); } for (i = 0; i < n_iov; i++) { page = pvec.pages[first + i]; - /* Should we also set page error on - success rc but too little data written? */ - /* BB investigate retry logic on temporary - server crash cases and how recovery works - when page marked as error */ - if (rc) + /* on retryable write error, redirty page */ + if (rc == -EAGAIN) + redirty_page_for_writepage(wbc, page); + else if (rc != 0) SetPageError(page); kunmap(page); unlock_page(page); end_page_writeback(page); page_cache_release(page); } + + if (rc != -EAGAIN) + mapping_set_error(mapping, rc); + else + rc = 0; + if ((wbc->nr_to_write -= n_iov) <= 0) done = 1; index = next; -- cgit v1.2.3 From 9d78315b03fc91228306db42edc533efa69cb518 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:01 -0500 Subject: cifs: no need to mark smb_ses_list as cifs_demultiplex_thread is exiting The TCP_Server_Info is refcounted and every SMB session holds a reference to it. Thus, smb_ses_list is always going to be empty when cifsd is coming down. This is dead code. Reviewed-by: Suresh Jayaraman Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 44 +++----------------------------------------- 1 file changed, 3 insertions(+), 41 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9f59887badd2..75b538f50b12 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -346,7 +346,6 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) struct kvec iov; struct socket *csocket = server->ssocket; struct list_head *tmp; - struct cifsSesInfo *ses; struct task_struct *task_to_wake = NULL; struct mid_q_entry *mid_entry; char temp; @@ -677,44 +676,19 @@ multi_t2_fnd: if (smallbuf) /* no sense logging a debug message if NULL */ cifs_small_buf_release(smallbuf); - /* - * BB: we shouldn't have to do any of this. It shouldn't be - * possible to exit from the thread with active SMB sessions - */ - spin_lock(&cifs_tcp_ses_lock); - if (list_empty(&server->pending_mid_q)) { - /* loop through server session structures attached to this and - mark them dead */ - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifsSesInfo, - smb_ses_list); - ses->status = CifsExiting; - ses->server = NULL; - } - spin_unlock(&cifs_tcp_ses_lock); - } else { - /* although we can not zero the server struct pointer yet, - since there are active requests which may depnd on them, - mark the corresponding SMB sessions as exiting too */ - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifsSesInfo, - smb_ses_list); - ses->status = CifsExiting; - } - + if (!list_empty(&server->pending_mid_q)) { spin_lock(&GlobalMid_Lock); list_for_each(tmp, &server->pending_mid_q) { - mid_entry = list_entry(tmp, struct mid_q_entry, qhead); + mid_entry = list_entry(tmp, struct mid_q_entry, qhead); if (mid_entry->midState == MID_REQUEST_SUBMITTED) { cFYI(1, "Clearing Mid 0x%x - waking up ", - mid_entry->mid); + mid_entry->mid); task_to_wake = mid_entry->tsk; if (task_to_wake) wake_up_process(task_to_wake); } } spin_unlock(&GlobalMid_Lock); - spin_unlock(&cifs_tcp_ses_lock); /* 1/8th of sec is more than enough time for them to exit */ msleep(125); } @@ -732,18 +706,6 @@ multi_t2_fnd: coming home not much else we can do but free the memory */ } - /* last chance to mark ses pointers invalid - if there are any pointing to this (e.g - if a crazy root user tried to kill cifsd - kernel thread explicitly this might happen) */ - /* BB: This shouldn't be necessary, see above */ - spin_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); - ses->server = NULL; - } - spin_unlock(&cifs_tcp_ses_lock); - kfree(server->hostname); task_to_wake = xchg(&server->tsk, NULL); kfree(server); -- cgit v1.2.3 From c5797a945cac4c470f0113fc839c521aab0d799d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:01 -0500 Subject: cifs: make wait_for_free_request take a TCP_Server_Info pointer The cifsSesInfo pointer is only used to get at the server. Reviewed-by: Suresh Jayaraman Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 59ca81b16919..9a14f77e0ab2 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -244,31 +244,31 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, return smb_sendv(server, &iov, 1); } -static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) +static int wait_for_free_request(struct TCP_Server_Info *server, + const int long_op) { if (long_op == CIFS_ASYNC_OP) { /* oplock breaks must not be held up */ - atomic_inc(&ses->server->inFlight); + atomic_inc(&server->inFlight); return 0; } spin_lock(&GlobalMid_Lock); while (1) { - if (atomic_read(&ses->server->inFlight) >= - cifs_max_pending){ + if (atomic_read(&server->inFlight) >= cifs_max_pending) { spin_unlock(&GlobalMid_Lock); #ifdef CONFIG_CIFS_STATS2 - atomic_inc(&ses->server->num_waiters); + atomic_inc(&server->num_waiters); #endif - wait_event(ses->server->request_q, - atomic_read(&ses->server->inFlight) + wait_event(server->request_q, + atomic_read(&server->inFlight) < cifs_max_pending); #ifdef CONFIG_CIFS_STATS2 - atomic_dec(&ses->server->num_waiters); + atomic_dec(&server->num_waiters); #endif spin_lock(&GlobalMid_Lock); } else { - if (ses->server->tcpStatus == CifsExiting) { + if (server->tcpStatus == CifsExiting) { spin_unlock(&GlobalMid_Lock); return -ENOENT; } @@ -278,7 +278,7 @@ static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) /* update # of requests on the wire to server */ if (long_op != CIFS_BLOCKING_OP) - atomic_inc(&ses->server->inFlight); + atomic_inc(&server->inFlight); spin_unlock(&GlobalMid_Lock); break; } @@ -413,7 +413,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, to the same server. We may make this configurable later or use ses->maxReq */ - rc = wait_for_free_request(ses, long_op); + rc = wait_for_free_request(ses->server, long_op); if (rc) { cifs_small_buf_release(in_buf); return rc; @@ -610,7 +610,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, return -EIO; } - rc = wait_for_free_request(ses, long_op); + rc = wait_for_free_request(ses->server, long_op); if (rc) return rc; @@ -845,7 +845,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, return -EIO; } - rc = wait_for_free_request(ses, CIFS_BLOCKING_OP); + rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP); if (rc) return rc; -- cgit v1.2.3 From 8097531a5cb55c6472118da094dc88caf9be66ac Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:02 -0500 Subject: cifs: clean up accesses to midCount It's an atomic_t and the code accesses the "counter" field in it directly instead of using atomic_read(). It also is sometimes accessed under a spinlock and sometimes not. Move it out of the spinlock since we don't need belt-and-suspenders for something that's just informational. Reviewed-by: Suresh Jayaraman Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 2 +- fs/cifs/connect.c | 2 +- fs/cifs/transport.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index ede98300a8cd..e2d0d5d455fa 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -331,7 +331,7 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) atomic_read(&totSmBufAllocCount)); #endif /* CONFIG_CIFS_STATS2 */ - seq_printf(m, "Operations (MIDs): %d\n", midCount.counter); + seq_printf(m, "Operations (MIDs): %d\n", atomic_read(&midCount)); seq_printf(m, "\n%d session %d share reconnects\n", tcpSesReconnectCount.counter, tconInfoReconnectCount.counter); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 75b538f50b12..465ecad6d7cc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -628,7 +628,7 @@ multi_t2_fnd: } else if (!is_valid_oplock_break(smb_buffer, server) && !isMultiRsp) { cERROR(1, "No task to wake, unknown frame received! " - "NumMids %d", midCount.counter); + "NumMids %d", atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", (char *)smb_buffer, sizeof(struct smb_hdr)); #ifdef CONFIG_CIFS_DEBUG2 diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 9a14f77e0ab2..b9eb0cffa003 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -61,10 +61,10 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp->tsk = current; } - spin_lock(&GlobalMid_Lock); - list_add_tail(&temp->qhead, &server->pending_mid_q); atomic_inc(&midCount); temp->midState = MID_REQUEST_ALLOCATED; + spin_lock(&GlobalMid_Lock); + list_add_tail(&temp->qhead, &server->pending_mid_q); spin_unlock(&GlobalMid_Lock); return temp; } @@ -78,8 +78,8 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) spin_lock(&GlobalMid_Lock); midEntry->midState = MID_FREE; list_del(&midEntry->qhead); - atomic_dec(&midCount); spin_unlock(&GlobalMid_Lock); + atomic_dec(&midCount); if (midEntry->largeBuf) cifs_buf_release(midEntry->resp_buf); else -- cgit v1.2.3 From ddc8cf8fc718587a3788330bf4f32b379f08b250 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:02 -0500 Subject: cifs: move locked sections out of DeleteMidQEntry and AllocMidQEntry In later patches, we're going to need to have finer-grained control over the addition and removal of these structs from the pending_mid_q and we'll need to be able to call the destructor while holding the spinlock. Move the locked sections out of both routines and into the callers. Fix up current callers of DeleteMidQEntry to call a new routine that dequeues the entry and then destroys it. Reviewed-by: Suresh Jayaraman Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b9eb0cffa003..801726b11e1e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -63,9 +63,6 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) atomic_inc(&midCount); temp->midState = MID_REQUEST_ALLOCATED; - spin_lock(&GlobalMid_Lock); - list_add_tail(&temp->qhead, &server->pending_mid_q); - spin_unlock(&GlobalMid_Lock); return temp; } @@ -75,10 +72,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) #ifdef CONFIG_CIFS_STATS2 unsigned long now; #endif - spin_lock(&GlobalMid_Lock); midEntry->midState = MID_FREE; - list_del(&midEntry->qhead); - spin_unlock(&GlobalMid_Lock); atomic_dec(&midCount); if (midEntry->largeBuf) cifs_buf_release(midEntry->resp_buf); @@ -103,6 +97,16 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) mempool_free(midEntry, cifs_mid_poolp); } +static void +delete_mid(struct mid_q_entry *mid) +{ + spin_lock(&GlobalMid_Lock); + list_del(&mid->qhead); + spin_unlock(&GlobalMid_Lock); + + DeleteMidQEntry(mid); +} + static int smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) { @@ -308,6 +312,9 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, *ppmidQ = AllocMidQEntry(in_buf, ses->server); if (*ppmidQ == NULL) return -ENOMEM; + spin_lock(&GlobalMid_Lock); + list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q); + spin_unlock(&GlobalMid_Lock); return 0; } @@ -508,7 +515,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, } } spin_unlock(&GlobalMid_Lock); - DeleteMidQEntry(midQ); + delete_mid(midQ); /* Update # of requests on wire to server */ atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); @@ -564,14 +571,14 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, if ((flags & CIFS_NO_RESP) == 0) midQ->resp_buf = NULL; /* mark it so buf will not be freed by - DeleteMidQEntry */ + delete_mid */ } else { rc = -EIO; cFYI(1, "Bad MID state?"); } out: - DeleteMidQEntry(midQ); + delete_mid(midQ); atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); @@ -699,7 +706,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, } } spin_unlock(&GlobalMid_Lock); - DeleteMidQEntry(midQ); + delete_mid(midQ); /* Update # of requests on wire to server */ atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); @@ -755,7 +762,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, } out: - DeleteMidQEntry(midQ); + delete_mid(midQ); atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); @@ -863,7 +870,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); if (rc) { - DeleteMidQEntry(midQ); + delete_mid(midQ); mutex_unlock(&ses->server->srv_mutex); return rc; } @@ -880,7 +887,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, mutex_unlock(&ses->server->srv_mutex); if (rc < 0) { - DeleteMidQEntry(midQ); + delete_mid(midQ); return rc; } @@ -902,7 +909,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, rc = send_nt_cancel(tcon, in_buf, midQ); if (rc) { - DeleteMidQEntry(midQ); + delete_mid(midQ); return rc; } } else { @@ -914,7 +921,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, /* If we get -ENOLCK back the lock may have already been removed. Don't exit in this case. */ if (rc && rc != -ENOLCK) { - DeleteMidQEntry(midQ); + delete_mid(midQ); return rc; } } @@ -951,7 +958,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, } } spin_unlock(&GlobalMid_Lock); - DeleteMidQEntry(midQ); + delete_mid(midQ); return rc; } @@ -1001,7 +1008,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); out: - DeleteMidQEntry(midQ); + delete_mid(midQ); if (rstart && rc == -EACCES) return -ERESTARTSYS; return rc; -- cgit v1.2.3 From 053d50344568e5a4054266b44040297531125281 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:02 -0500 Subject: cifs: move mid result processing into common function Reviewed-by: Suresh Jayaraman Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 121 +++++++++++++++++++--------------------------------- 1 file changed, 43 insertions(+), 78 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 801726b11e1e..15059c7ef2ae 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -389,6 +389,42 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses, return rc; } +static int +sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) +{ + int rc = 0; + + spin_lock(&GlobalMid_Lock); + + if (mid->resp_buf) { + spin_unlock(&GlobalMid_Lock); + return rc; + } + + cERROR(1, "No response to cmd %d mid %d", mid->command, mid->mid); + if (mid->midState == MID_REQUEST_SUBMITTED) { + if (server->tcpStatus == CifsExiting) + rc = -EHOSTDOWN; + else { + server->tcpStatus = CifsNeedReconnect; + mid->midState = MID_RETRY_NEEDED; + } + } + + if (rc != -EHOSTDOWN) { + if (mid->midState == MID_RETRY_NEEDED) { + rc = -EAGAIN; + cFYI(1, "marking request for retry"); + } else { + rc = -EIO; + } + } + spin_unlock(&GlobalMid_Lock); + + delete_mid(mid); + return rc; +} + int SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, struct kvec *iov, int n_vec, int *pRespBufType /* ret */, @@ -492,37 +528,13 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, /* No user interrupts in wait - wreaks havoc with performance */ wait_for_response(ses, midQ, timeout, 10 * HZ); - spin_lock(&GlobalMid_Lock); - - if (midQ->resp_buf == NULL) { - cERROR(1, "No response to cmd %d mid %d", - midQ->command, midQ->mid); - if (midQ->midState == MID_REQUEST_SUBMITTED) { - if (ses->server->tcpStatus == CifsExiting) - rc = -EHOSTDOWN; - else { - ses->server->tcpStatus = CifsNeedReconnect; - midQ->midState = MID_RETRY_NEEDED; - } - } - - if (rc != -EHOSTDOWN) { - if (midQ->midState == MID_RETRY_NEEDED) { - rc = -EAGAIN; - cFYI(1, "marking request for retry"); - } else { - rc = -EIO; - } - } - spin_unlock(&GlobalMid_Lock); - delete_mid(midQ); - /* Update # of requests on wire to server */ + rc = sync_mid_result(midQ, ses->server); + if (rc != 0) { atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return rc; } - spin_unlock(&GlobalMid_Lock); receive_len = midQ->resp_buf->smb_buf_length; if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { @@ -684,36 +696,13 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, /* No user interrupts in wait - wreaks havoc with performance */ wait_for_response(ses, midQ, timeout, 10 * HZ); - spin_lock(&GlobalMid_Lock); - if (midQ->resp_buf == NULL) { - cERROR(1, "No response for cmd %d mid %d", - midQ->command, midQ->mid); - if (midQ->midState == MID_REQUEST_SUBMITTED) { - if (ses->server->tcpStatus == CifsExiting) - rc = -EHOSTDOWN; - else { - ses->server->tcpStatus = CifsNeedReconnect; - midQ->midState = MID_RETRY_NEEDED; - } - } - - if (rc != -EHOSTDOWN) { - if (midQ->midState == MID_RETRY_NEEDED) { - rc = -EAGAIN; - cFYI(1, "marking request for retry"); - } else { - rc = -EIO; - } - } - spin_unlock(&GlobalMid_Lock); - delete_mid(midQ); - /* Update # of requests on wire to server */ + rc = sync_mid_result(midQ, ses->server); + if (rc != 0) { atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return rc; } - spin_unlock(&GlobalMid_Lock); receive_len = midQ->resp_buf->smb_buf_length; if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { @@ -933,35 +922,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, } } - spin_lock(&GlobalMid_Lock); - if (midQ->resp_buf) { - spin_unlock(&GlobalMid_Lock); - receive_len = midQ->resp_buf->smb_buf_length; - } else { - cERROR(1, "No response for cmd %d mid %d", - midQ->command, midQ->mid); - if (midQ->midState == MID_REQUEST_SUBMITTED) { - if (ses->server->tcpStatus == CifsExiting) - rc = -EHOSTDOWN; - else { - ses->server->tcpStatus = CifsNeedReconnect; - midQ->midState = MID_RETRY_NEEDED; - } - } - - if (rc != -EHOSTDOWN) { - if (midQ->midState == MID_RETRY_NEEDED) { - rc = -EAGAIN; - cFYI(1, "marking request for retry"); - } else { - rc = -EIO; - } - } - spin_unlock(&GlobalMid_Lock); - delete_mid(midQ); + rc = sync_mid_result(midQ, ses->server); + if (rc != 0) return rc; - } + receive_len = midQ->resp_buf->smb_buf_length; if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { cERROR(1, "Frame too large received. Length: %d Xid: %d", receive_len, xid); -- cgit v1.2.3 From 1cd3508d5eab6a88fa643119cedd34b04215cffe Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 19 Jan 2011 17:53:44 +0000 Subject: [CIFS] Update cifs version number Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 851030f74939..4739a531cded 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -118,5 +118,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.68" +#define CIFS_VERSION "1.69" #endif /* _CIFSFS_H */ -- cgit v1.2.3 From 540b2e377797d8715469d408b887baa9310c5f3e Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Tue, 18 Jan 2011 22:33:54 -0600 Subject: cifs: Fix regression during share-level security mounts (Repost) NTLM response length was changed to 16 bytes instead of 24 bytes that are sent in Tree Connection Request during share-level security share mounts. Revert it back to 24 bytes. Reported-and-Tested-by: Grzegorz Ozanski Acked-by: Jeff Layton Signed-off-by: Shirish Pargaonkar Acked-by: Suresh Jayaraman Cc: stable@kernel.org Signed-off-by: Steve French --- fs/cifs/connect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 465ecad6d7cc..5c7f8450dbe0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2927,7 +2927,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; /* skip password */ /* already aligned so no need to do it below */ } else { - pSMB->PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); + pSMB->PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); /* BB FIXME add code to fail this if NTLMv2 or Kerberos specified as required (when that support is added to the vfs in the future) as only NTLM or the much @@ -2945,7 +2945,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, #endif /* CIFS_WEAK_PW_HASH */ SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr); - bcc_ptr += CIFS_SESS_KEY_SIZE; + bcc_ptr += CIFS_AUTH_RESP_SIZE; if (ses->capabilities & CAP_UNICODE) { /* must align unicode strings */ *bcc_ptr = 0; /* null byte password */ -- cgit v1.2.3 From 0da2a4ac33c291728d8be5bdb865467dcb078d13 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 19 Jan 2011 14:18:50 -0500 Subject: NFS: fix handling of malloc failure during nfs_flush_multi() Cleanup of the allocated list entries should not call put_nfs_open_context() on each entry, as the context will always be NULL, causing an oops. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 10d648ea128b..c8278f4046cb 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -932,7 +932,7 @@ out_bad: while (!list_empty(&list)) { data = list_entry(list.next, struct nfs_write_data, pages); list_del(&data->pages); - nfs_writedata_release(data); + nfs_writedata_free(data); } nfs_redirty_request(req); return -ENOMEM; -- cgit v1.2.3 From 2fbc2f1729e785a7b2faf9d8d60926bb1ff62af0 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Mon, 6 Dec 2010 14:56:46 -0600 Subject: cifs: Use mask of ACEs for SID Everyone to calculate all three permissions user, group, and other If a DACL has entries for ACEs for SID Everyone and Authenticated Users, factor in mask in respective entries during calculation of permissions for all three, user, group, and other. http://technet.microsoft.com/en-us/library/bb463216.aspx Signed-off-by: Shirish Pargaonkar Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index a437ec391a01..1e7636b145a8 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -41,9 +41,12 @@ static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { ; -/* security id for everyone */ +/* security id for everyone/world system group */ static const struct cifs_sid sid_everyone = { 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; +/* security id for Authenticated Users system group */ +static const struct cifs_sid sid_authusers = { + 1, 1, {0, 0, 0, 0, 0, 5}, {11} }; /* group users */ static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; @@ -365,7 +368,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, if (num_aces > 0) { umode_t user_mask = S_IRWXU; umode_t group_mask = S_IRWXG; - umode_t other_mask = S_IRWXO; + umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO; ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), GFP_KERNEL); @@ -390,6 +393,12 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, ppace[i]->type, &fattr->cf_mode, &other_mask); + if (compare_sids(&(ppace[i]->sid), &sid_authusers)) + access_flags_to_mode(ppace[i]->access_req, + ppace[i]->type, + &fattr->cf_mode, + &other_mask); + /* memcpy((void *)(&(cifscred->aces[i])), (void *)ppace[i], -- cgit v1.2.3 From 0ade640e9cda805692dbf688f4bb69e94719275a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:02 -0500 Subject: cifs: wait indefinitely for responses The client should not be timing out on individual SMB requests. Too much of the state between client and server is tied to the state of the socket. If we time out requests and issue spurious disconnects then that comprimises data integrity. Instead of doing this complicated dance where we try to decide how long to wait for a response for particular requests, have the client instead wait indefinitely for a response. Also, use a TASK_KILLABLE sleep here so that fatal signals will break out of this waiting. Later patches will add support for detecting dead peers and forcing reconnects based on that. Reviewed-by: Suresh Jayaraman Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 110 ++++++++-------------------------------------------- 1 file changed, 17 insertions(+), 93 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 15059c7ef2ae..c41c9c4f0a79 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -318,48 +318,17 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, return 0; } -static int wait_for_response(struct cifsSesInfo *ses, - struct mid_q_entry *midQ, - unsigned long timeout, - unsigned long time_to_wait) +static int +wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) { - unsigned long curr_timeout; - - for (;;) { - curr_timeout = timeout + jiffies; - wait_event_timeout(ses->server->response_q, - midQ->midState != MID_REQUEST_SUBMITTED, timeout); - - if (time_after(jiffies, curr_timeout) && - (midQ->midState == MID_REQUEST_SUBMITTED) && - ((ses->server->tcpStatus == CifsGood) || - (ses->server->tcpStatus == CifsNew))) { - - unsigned long lrt; + int error; - /* We timed out. Is the server still - sending replies ? */ - spin_lock(&GlobalMid_Lock); - lrt = ses->server->lstrp; - spin_unlock(&GlobalMid_Lock); + error = wait_event_killable(server->response_q, + midQ->midState != MID_REQUEST_SUBMITTED); + if (error < 0) + return -ERESTARTSYS; - /* Calculate time_to_wait past last receive time. - Although we prefer not to time out if the - server is still responding - we will time - out if the server takes more than 15 (or 45 - or 180) seconds to respond to this request - and has not responded to any request from - other threads on the client within 10 seconds */ - lrt += time_to_wait; - if (time_after(jiffies, lrt)) { - /* No replies for time_to_wait. */ - cERROR(1, "server not responding"); - return -1; - } - } else { - return 0; - } - } + return 0; } @@ -433,7 +402,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, int rc = 0; int long_op; unsigned int receive_len; - unsigned long timeout; struct mid_q_entry *midQ; struct smb_hdr *in_buf = iov[0].iov_base; @@ -500,33 +468,12 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, if (rc < 0) goto out; - if (long_op == CIFS_STD_OP) - timeout = 15 * HZ; - else if (long_op == CIFS_VLONG_OP) /* e.g. slow writes past EOF */ - timeout = 180 * HZ; - else if (long_op == CIFS_LONG_OP) - timeout = 45 * HZ; /* should be greater than - servers oplock break timeout (about 43 seconds) */ - else if (long_op == CIFS_ASYNC_OP) - goto out; - else if (long_op == CIFS_BLOCKING_OP) - timeout = 0x7FFFFFFF; /* large, but not so large as to wrap */ - else { - cERROR(1, "unknown timeout flag %d", long_op); - rc = -EIO; + if (long_op == CIFS_ASYNC_OP) goto out; - } - - /* wait for 15 seconds or until woken up due to response arriving or - due to last connection to this server being unmounted */ - if (signal_pending(current)) { - /* if signal pending do not hold up user for full smb timeout - but we still give response a chance to complete */ - timeout = 2 * HZ; - } - /* No user interrupts in wait - wreaks havoc with performance */ - wait_for_response(ses, midQ, timeout, 10 * HZ); + rc = wait_for_response(ses->server, midQ); + if (rc != 0) + goto out; rc = sync_mid_result(midQ, ses->server); if (rc != 0) { @@ -604,7 +551,6 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, { int rc = 0; unsigned int receive_len; - unsigned long timeout; struct mid_q_entry *midQ; if (ses == NULL) { @@ -668,33 +614,12 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, if (rc < 0) goto out; - if (long_op == CIFS_STD_OP) - timeout = 15 * HZ; - /* wait for 15 seconds or until woken up due to response arriving or - due to last connection to this server being unmounted */ - else if (long_op == CIFS_ASYNC_OP) - goto out; - else if (long_op == CIFS_VLONG_OP) /* writes past EOF can be slow */ - timeout = 180 * HZ; - else if (long_op == CIFS_LONG_OP) - timeout = 45 * HZ; /* should be greater than - servers oplock break timeout (about 43 seconds) */ - else if (long_op == CIFS_BLOCKING_OP) - timeout = 0x7FFFFFFF; /* large but no so large as to wrap */ - else { - cERROR(1, "unknown timeout flag %d", long_op); - rc = -EIO; + if (long_op == CIFS_ASYNC_OP) goto out; - } - if (signal_pending(current)) { - /* if signal pending do not hold up user for full smb timeout - but we still give response a chance to complete */ - timeout = 2 * HZ; - } - - /* No user interrupts in wait - wreaks havoc with performance */ - wait_for_response(ses, midQ, timeout, 10 * HZ); + rc = wait_for_response(ses->server, midQ); + if (rc != 0) + goto out; rc = sync_mid_result(midQ, ses->server); if (rc != 0) { @@ -915,8 +840,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, } } - /* Wait 5 seconds for the response. */ - if (wait_for_response(ses, midQ, 5 * HZ, 5 * HZ) == 0) { + if (wait_for_response(ses->server, midQ) == 0) { /* We got the response - restart system call. */ rstart = 1; } -- cgit v1.2.3 From dad255b182363db1d1124458cd3fb0a4deac0d0f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:02 -0500 Subject: cifs: don't reconnect server when we don't get a response We only want to force a reconnect to the server under very limited and specific circumstances. Now that we have processes waiting indefinitely for responses, we shouldn't reach this point unless a reconnect is already in process. Thus, there's no reason to re-mark the server for reconnect here. Reviewed-by: Suresh Jayaraman Reviewed-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index c41c9c4f0a79..f65cdec042e4 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -374,10 +374,8 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) if (mid->midState == MID_REQUEST_SUBMITTED) { if (server->tcpStatus == CifsExiting) rc = -EHOSTDOWN; - else { - server->tcpStatus = CifsNeedReconnect; + else mid->midState = MID_RETRY_NEEDED; - } } if (rc != -EHOSTDOWN) { -- cgit v1.2.3 From 74dd92a881b62014ca3c754db6868e1f142f2fb9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:02 -0500 Subject: cifs: clean up sync_mid_result Make it use a switch statement based on the value of the midStatus. If the resp_buf is set, then MID_RESPONSE_RECEIVED is too. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index f65cdec042e4..6abd1445c983 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -363,28 +363,29 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) { int rc = 0; - spin_lock(&GlobalMid_Lock); + cFYI(1, "%s: cmd=%d mid=%d state=%d", __func__, mid->command, + mid->mid, mid->midState); - if (mid->resp_buf) { + spin_lock(&GlobalMid_Lock); + switch (mid->midState) { + case MID_RESPONSE_RECEIVED: spin_unlock(&GlobalMid_Lock); return rc; - } - - cERROR(1, "No response to cmd %d mid %d", mid->command, mid->mid); - if (mid->midState == MID_REQUEST_SUBMITTED) { - if (server->tcpStatus == CifsExiting) + case MID_REQUEST_SUBMITTED: + /* socket is going down, reject all calls */ + if (server->tcpStatus == CifsExiting) { + cERROR(1, "%s: canceling mid=%d cmd=0x%x state=%d", + __func__, mid->mid, mid->command, mid->midState); rc = -EHOSTDOWN; - else - mid->midState = MID_RETRY_NEEDED; - } - - if (rc != -EHOSTDOWN) { - if (mid->midState == MID_RETRY_NEEDED) { - rc = -EAGAIN; - cFYI(1, "marking request for retry"); - } else { - rc = -EIO; + break; } + case MID_RETRY_NEEDED: + rc = -EAGAIN; + break; + default: + cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, + mid->mid, mid->midState); + rc = -EIO; } spin_unlock(&GlobalMid_Lock); -- cgit v1.2.3 From 2b84a36c5529da136d28b268e75268892d09869c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:21 -0500 Subject: cifs: allow for different handling of received response In order to incorporate async requests, we need to allow for a more general way to do things on receive, rather than just waking up a process. Turn the task pointer in the mid_q_entry into a callback function and a generic data pointer. When a response comes in, or the socket is reconnected, cifsd can call the callback function in order to wake up the process. The default is to just wake up the current process which should mean no change in behavior for existing code. Also, clean up the locking in cifs_reconnect. There doesn't seem to be any need to hold both the srv_mutex and GlobalMid_Lock when walking the list of mids. Reviewed-by: Suresh Jayaraman Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 8 ++++---- fs/cifs/cifsglob.h | 15 ++++++++++++++- fs/cifs/connect.c | 53 +++++++++++++++++++++++++--------------------------- fs/cifs/transport.c | 19 +++++++++++++++++-- 4 files changed, 60 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index e2d0d5d455fa..65829d32128c 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -79,11 +79,11 @@ void cifs_dump_mids(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); list_for_each(tmp, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - cERROR(1, "State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", + cERROR(1, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %d", mid_entry->midState, (int)mid_entry->command, mid_entry->pid, - mid_entry->tsk, + mid_entry->callback_data, mid_entry->mid); #ifdef CONFIG_CIFS_STATS2 cERROR(1, "IsLarge: %d buf: %p time rcv: %ld now: %ld", @@ -218,11 +218,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) mid_entry = list_entry(tmp3, struct mid_q_entry, qhead); seq_printf(m, "\tState: %d com: %d pid:" - " %d tsk: %p mid %d\n", + " %d cbdata: %p mid %d\n", mid_entry->midState, (int)mid_entry->command, mid_entry->pid, - mid_entry->tsk, + mid_entry->callback_data, mid_entry->mid); } spin_unlock(&GlobalMid_Lock); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 606ca8bb7102..4de737575959 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -508,6 +508,18 @@ static inline void cifs_stats_bytes_read(struct cifsTconInfo *tcon, #endif +struct mid_q_entry; + +/* + * This is the prototype for the mid callback function. When creating one, + * take special care to avoid deadlocks. Things to bear in mind: + * + * - it will be called by cifsd + * - the GlobalMid_Lock will be held + * - the mid will be removed from the pending_mid_q list + */ +typedef void (mid_callback_t)(struct mid_q_entry *mid); + /* one of these for every pending CIFS request to the server */ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ @@ -519,7 +531,8 @@ struct mid_q_entry { unsigned long when_sent; /* time when smb send finished */ unsigned long when_received; /* when demux complete (taken off wire) */ #endif - struct task_struct *tsk; /* task waiting for response */ + mid_callback_t *callback; /* call completion callback */ + void *callback_data; /* general purpose pointer for callback */ struct smb_hdr *resp_buf; /* response buffer */ int midState; /* wish this were enum but can not pass to wait_event */ __u8 command; /* smb command code */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5c7f8450dbe0..aa66de1db5f5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -152,6 +152,7 @@ cifs_reconnect(struct TCP_Server_Info *server) /* before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they are not used until reconnected */ + cFYI(1, "%s: marking sessions and tcons for reconnect", __func__); spin_lock(&cifs_tcp_ses_lock); list_for_each(tmp, &server->smb_ses_list) { ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); @@ -163,7 +164,9 @@ cifs_reconnect(struct TCP_Server_Info *server) } } spin_unlock(&cifs_tcp_ses_lock); + /* do not want to be sending data on a socket we are freeing */ + cFYI(1, "%s: tearing down socket", __func__); mutex_lock(&server->srv_mutex); if (server->ssocket) { cFYI(1, "State: 0x%x Flags: 0x%lx", server->ssocket->state, @@ -180,22 +183,19 @@ cifs_reconnect(struct TCP_Server_Info *server) kfree(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; + mutex_unlock(&server->srv_mutex); + /* mark submitted MIDs for retry and issue callback */ + cFYI(1, "%s: issuing mid callbacks", __func__); spin_lock(&GlobalMid_Lock); - list_for_each(tmp, &server->pending_mid_q) { - mid_entry = list_entry(tmp, struct - mid_q_entry, - qhead); - if (mid_entry->midState == MID_REQUEST_SUBMITTED) { - /* Mark other intransit requests as needing - retry so we do not immediately mark the - session bad again (ie after we reconnect - below) as they timeout too */ + list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { + mid_entry = list_entry(tmp, struct mid_q_entry, qhead); + if (mid_entry->midState == MID_REQUEST_SUBMITTED) mid_entry->midState = MID_RETRY_NEEDED; - } + list_del_init(&mid_entry->qhead); + mid_entry->callback(mid_entry); } spin_unlock(&GlobalMid_Lock); - mutex_unlock(&server->srv_mutex); while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) { @@ -212,10 +212,9 @@ cifs_reconnect(struct TCP_Server_Info *server) if (server->tcpStatus != CifsExiting) server->tcpStatus = CifsGood; spin_unlock(&GlobalMid_Lock); - /* atomic_set(&server->inFlight,0);*/ - wake_up(&server->response_q); } } + return rc; } @@ -345,7 +344,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) struct msghdr smb_msg; struct kvec iov; struct socket *csocket = server->ssocket; - struct list_head *tmp; + struct list_head *tmp, *tmp2; struct task_struct *task_to_wake = NULL; struct mid_q_entry *mid_entry; char temp; @@ -558,10 +557,9 @@ incomplete_rcv: continue; } - - task_to_wake = NULL; + mid_entry = NULL; spin_lock(&GlobalMid_Lock); - list_for_each(tmp, &server->pending_mid_q) { + list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); if ((mid_entry->mid == smb_buffer->Mid) && @@ -602,8 +600,9 @@ incomplete_rcv: mid_entry->resp_buf = smb_buffer; mid_entry->largeBuf = isLargeBuf; multi_t2_fnd: - task_to_wake = mid_entry->tsk; mid_entry->midState = MID_RESPONSE_RECEIVED; + list_del_init(&mid_entry->qhead); + mid_entry->callback(mid_entry); #ifdef CONFIG_CIFS_STATS2 mid_entry->when_received = jiffies; #endif @@ -613,9 +612,11 @@ multi_t2_fnd: server->lstrp = jiffies; break; } + mid_entry = NULL; } spin_unlock(&GlobalMid_Lock); - if (task_to_wake) { + + if (mid_entry != NULL) { /* Was previous buf put in mpx struct for multi-rsp? */ if (!isMultiRsp) { /* smb buffer will be freed by user thread */ @@ -624,7 +625,6 @@ multi_t2_fnd: else smallbuf = NULL; } - wake_up_process(task_to_wake); } else if (!is_valid_oplock_break(smb_buffer, server) && !isMultiRsp) { cERROR(1, "No task to wake, unknown frame received! " @@ -678,15 +678,12 @@ multi_t2_fnd: if (!list_empty(&server->pending_mid_q)) { spin_lock(&GlobalMid_Lock); - list_for_each(tmp, &server->pending_mid_q) { + list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if (mid_entry->midState == MID_REQUEST_SUBMITTED) { - cFYI(1, "Clearing Mid 0x%x - waking up ", - mid_entry->mid); - task_to_wake = mid_entry->tsk; - if (task_to_wake) - wake_up_process(task_to_wake); - } + cFYI(1, "Clearing Mid 0x%x - issuing callback", + mid_entry->mid); + list_del_init(&mid_entry->qhead); + mid_entry->callback(mid_entry); } spin_unlock(&GlobalMid_Lock); /* 1/8th of sec is more than enough time for them to exit */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 6abd1445c983..d77b6154cf22 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -36,6 +36,12 @@ extern mempool_t *cifs_mid_poolp; +static void +wake_up_task(struct mid_q_entry *mid) +{ + wake_up_process(mid->callback_data); +} + static struct mid_q_entry * AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) { @@ -58,7 +64,13 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ /* when mid allocated can be before when sent */ temp->when_alloc = jiffies; - temp->tsk = current; + + /* + * The default is for the mid to be synchronous, so the + * default callback just wakes up the current task. + */ + temp->callback = wake_up_task; + temp->callback_data = current; } atomic_inc(&midCount); @@ -367,6 +379,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) mid->mid, mid->midState); spin_lock(&GlobalMid_Lock); + /* ensure that it's no longer on the pending_mid_q */ + list_del_init(&mid->qhead); + switch (mid->midState) { case MID_RESPONSE_RECEIVED: spin_unlock(&GlobalMid_Lock); @@ -389,7 +404,7 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) } spin_unlock(&GlobalMid_Lock); - delete_mid(mid); + DeleteMidQEntry(mid); return rc; } -- cgit v1.2.3 From a6827c184ea9f5452e4aaa7c799dd3c7cc9ba05e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:21 -0500 Subject: cifs: add cifs_call_async Add a function that will send a request, and set up the mid for an async reply. Reviewed-by: Suresh Jayaraman Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 5 +++++ fs/cifs/transport.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 95d5dbbb4c7a..7f2988bb8929 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -61,6 +61,11 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata, const char *fullpath, const struct dfs_info3_param *ref, char **devname); /* extern void renew_parental_timestamps(struct dentry *direntry);*/ +extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, + struct TCP_Server_Info *server); +extern int cifs_call_async(struct TCP_Server_Info *server, + struct smb_hdr *in_buf, mid_callback_t *callback, + void *cbdata); extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, struct smb_hdr * /* input */ , struct smb_hdr * /* out */ , diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index d77b6154cf22..166b65a3763e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -42,7 +42,7 @@ wake_up_task(struct mid_q_entry *mid) wake_up_process(mid->callback_data); } -static struct mid_q_entry * +struct mid_q_entry * AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) { struct mid_q_entry *temp; @@ -344,6 +344,62 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) } +/* + * Send a SMB request and set the callback function in the mid to handle + * the result. Caller is responsible for dealing with timeouts. + */ +int +cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf, + mid_callback_t *callback, void *cbdata) +{ + int rc; + struct mid_q_entry *mid; + + rc = wait_for_free_request(server, CIFS_ASYNC_OP); + if (rc) + return rc; + + mutex_lock(&server->srv_mutex); + mid = AllocMidQEntry(in_buf, server); + if (mid == NULL) { + mutex_unlock(&server->srv_mutex); + return -ENOMEM; + } + + /* put it on the pending_mid_q */ + spin_lock(&GlobalMid_Lock); + list_add_tail(&mid->qhead, &server->pending_mid_q); + spin_unlock(&GlobalMid_Lock); + + rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); + if (rc) { + mutex_unlock(&server->srv_mutex); + goto out_err; + } + + mid->callback = callback; + mid->callback_data = cbdata; + mid->midState = MID_REQUEST_SUBMITTED; +#ifdef CONFIG_CIFS_STATS2 + atomic_inc(&server->inSend); +#endif + rc = smb_send(server, in_buf, in_buf->smb_buf_length); +#ifdef CONFIG_CIFS_STATS2 + atomic_dec(&server->inSend); + mid->when_sent = jiffies; +#endif + mutex_unlock(&server->srv_mutex); + if (rc) + goto out_err; + + return rc; +out_err: + delete_mid(mid); + atomic_dec(&server->inFlight); + wake_up(&server->request_q); + return rc; +} + /* * * Send an SMB Request. No response info (other than return code) -- cgit v1.2.3 From 766fdbb57fdb1e53bc34c431103e95383d7f13ba Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:21 -0500 Subject: cifs: add ability to send an echo request Reviewed-by: Suresh Jayaraman Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 15 +++++++++++++++ fs/cifs/cifsproto.h | 2 ++ fs/cifs/cifssmb.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/transport.c | 2 +- 4 files changed, 65 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index de36b09763a8..ea205b4fcad2 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -50,6 +50,7 @@ #define SMB_COM_SETATTR 0x09 /* trivial response */ #define SMB_COM_LOCKING_ANDX 0x24 /* trivial response */ #define SMB_COM_COPY 0x29 /* trivial rsp, fail filename ignrd*/ +#define SMB_COM_ECHO 0x2B /* echo request */ #define SMB_COM_OPEN_ANDX 0x2D /* Legacy open for old servers */ #define SMB_COM_READ_ANDX 0x2E #define SMB_COM_WRITE_ANDX 0x2F @@ -760,6 +761,20 @@ typedef struct smb_com_tconx_rsp_ext { * */ +typedef struct smb_com_echo_req { + struct smb_hdr hdr; + __le16 EchoCount; + __le16 ByteCount; + char Data[1]; +} __attribute__((packed)) ECHO_REQ; + +typedef struct smb_com_echo_rsp { + struct smb_hdr hdr; + __le16 SequenceNumber; + __le16 ByteCount; + char Data[1]; +} __attribute__((packed)) ECHO_RSP; + typedef struct smb_com_logoff_andx_req { struct smb_hdr hdr; /* wct = 2 */ __u8 AndXCommand; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 7f2988bb8929..982895fa7615 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -63,6 +63,7 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata, /* extern void renew_parental_timestamps(struct dentry *direntry);*/ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); +extern void DeleteMidQEntry(struct mid_q_entry *midEntry); extern int cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf, mid_callback_t *callback, void *cbdata); @@ -358,6 +359,7 @@ extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u64 len, struct file_lock *, const __u16 lock_type, const bool waitFlag); extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); +extern int CIFSSMBEcho(struct TCP_Server_Info *server); extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); extern struct cifsSesInfo *sesInfoAlloc(void); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 3652cc60314c..54b9f5d8d1db 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -706,6 +706,53 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) return rc; } +/* + * This is a no-op for now. We're not really interested in the reply, but + * rather in the fact that the server sent one and that server->lstrp + * gets updated. + * + * FIXME: maybe we should consider checking that the reply matches request? + */ +static void +cifs_echo_callback(struct mid_q_entry *mid) +{ + struct TCP_Server_Info *server = mid->callback_data; + + DeleteMidQEntry(mid); + atomic_dec(&server->inFlight); + wake_up(&server->request_q); +} + +int +CIFSSMBEcho(struct TCP_Server_Info *server) +{ + ECHO_REQ *smb; + int rc = 0; + + cFYI(1, "In echo request"); + + rc = small_smb_init(SMB_COM_ECHO, 0, NULL, (void **)&smb); + if (rc) + return rc; + + /* set up echo request */ + smb->hdr.Tid = cpu_to_le16(0xffff); + smb->hdr.WordCount = cpu_to_le16(1); + smb->EchoCount = cpu_to_le16(1); + smb->ByteCount = cpu_to_le16(1); + smb->Data[0] = 'a'; + smb->hdr.smb_buf_length += 3; + + rc = cifs_call_async(server, (struct smb_hdr *)smb, + cifs_echo_callback, server); + if (rc) + cFYI(1, "Echo request failed: %d", rc); + + cifs_small_buf_release(smb); + + return rc; +} + int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 166b65a3763e..a0cef4960516 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -78,7 +78,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) return temp; } -static void +void DeleteMidQEntry(struct mid_q_entry *midEntry) { #ifdef CONFIG_CIFS_STATS2 -- cgit v1.2.3 From c74093b694998d30105d9904686da5e3576497c4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:23 -0500 Subject: cifs: set up recurring workqueue job to do SMB echo requests Reviewed-by: Suresh Jayaraman Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 + fs/cifs/connect.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 4de737575959..9c728dd5b146 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -218,6 +218,7 @@ struct TCP_Server_Info { bool sec_kerberosu2u; /* supports U2U Kerberos */ bool sec_ntlmssp; /* supports NTLMSSP */ bool session_estab; /* mark when very first sess is established */ + struct delayed_work echo; /* echo ping workqueue job */ #ifdef CONFIG_CIFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ #endif diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index aa66de1db5f5..f38ca084c9d2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -52,6 +52,9 @@ #define CIFS_PORT 445 #define RFC1001_PORT 139 +/* SMB echo "timeout" -- FIXME: tunable? */ +#define SMB_ECHO_INTERVAL (60 * HZ) + extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); @@ -333,6 +336,26 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) } +static void +cifs_echo_request(struct work_struct *work) +{ + int rc; + struct TCP_Server_Info *server = container_of(work, + struct TCP_Server_Info, echo.work); + + /* no need to ping if we got a response recently */ + if (time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) + goto requeue_echo; + + rc = CIFSSMBEcho(server); + if (rc) + cFYI(1, "Unable to send echo request to server: %s", + server->hostname); + +requeue_echo: + queue_delayed_work(system_nrt_wq, &server->echo, SMB_ECHO_INTERVAL); +} + static int cifs_demultiplex_thread(struct TCP_Server_Info *server) { @@ -1571,6 +1594,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) list_del_init(&server->tcp_ses_list); spin_unlock(&cifs_tcp_ses_lock); + cancel_delayed_work_sync(&server->echo); + spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); @@ -1662,6 +1687,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->sequence_number = 0; INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); + INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); /* * at this point we are the only ones with the pointer @@ -1710,6 +1736,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info) cifs_fscache_get_client_cookie(tcp_ses); + /* queue echo request delayed work */ + queue_delayed_work(system_nrt_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL); + return tcp_ses; out_err_crypto_release: -- cgit v1.2.3 From fda3594362184383e73f0a2a5fa5b38ac0e04fd8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 20 Jan 2011 18:06:34 +0000 Subject: [CIFS] cifs: reconnect unresponsive servers If the server isn't responding to echoes, we don't want to leave tasks hung waiting for it to reply. At that point, we'll want to reconnect so that soft mounts can return an error to userspace quickly. If the client hasn't received a reply after a specified number of echo intervals, assume that the transport is down and attempt to reconnect the socket. The number of echo_intervals to wait before attempting to reconnect is tunable via a module parameter. Setting it to 0, means that the client will never attempt to reconnect. The default is 5. Signed-off-by: Jeff Layton --- fs/cifs/cifsfs.c | 6 +++++- fs/cifs/cifsglob.h | 3 +++ fs/cifs/connect.c | 21 +++++++++++++++++---- 3 files changed, 25 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d9f652a522a6..99d777a03dd0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -77,7 +77,11 @@ unsigned int cifs_max_pending = CIFS_MAX_REQ; module_param(cifs_max_pending, int, 0); MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " "Default: 50 Range: 2 to 256"); - +unsigned short echo_retries = 5; +module_param(echo_retries, ushort, 0644); +MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and " + "reconnecting server. Default: 5. 0 means " + "never reconnect."); extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 9c728dd5b146..7040abc638fa 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -804,6 +804,9 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ +/* reconnect after this many failed echo attempts */ +GLOBAL_EXTERN unsigned short echo_retries; + void cifs_oplock_break(struct work_struct *work); void cifs_oplock_break_get(struct cifsFileInfo *cfile); void cifs_oplock_break_put(struct cifsFileInfo *cfile); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f38ca084c9d2..f5d7b59a3553 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -186,6 +186,7 @@ cifs_reconnect(struct TCP_Server_Info *server) kfree(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; + server->lstrp = jiffies; mutex_unlock(&server->srv_mutex); /* mark submitted MIDs for retry and issue callback */ @@ -420,7 +421,20 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; pdu_length = 4; /* enough to get RFC1001 header */ + incomplete_rcv: + if (echo_retries > 0 && + time_after(jiffies, server->lstrp + + (echo_retries * SMB_ECHO_INTERVAL))) { + cERROR(1, "Server %s has not responded in %d seconds. " + "Reconnecting...", server->hostname, + (echo_retries * SMB_ECHO_INTERVAL / HZ)); + cifs_reconnect(server); + csocket = server->ssocket; + wake_up(&server->response_q); + continue; + } + length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, pdu_length, 0 /* BB other flags? */); @@ -581,6 +595,8 @@ incomplete_rcv: } mid_entry = NULL; + server->lstrp = jiffies; + spin_lock(&GlobalMid_Lock); list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); @@ -629,10 +645,6 @@ multi_t2_fnd: #ifdef CONFIG_CIFS_STATS2 mid_entry->when_received = jiffies; #endif - /* so we do not time out requests to server - which is still responding (since server could - be busy but not dead) */ - server->lstrp = jiffies; break; } mid_entry = NULL; @@ -1685,6 +1697,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); tcp_ses->session_estab = false; tcp_ses->sequence_number = 0; + tcp_ses->lstrp = jiffies; INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); -- cgit v1.2.3 From 7749981ec31aa40e28a1ef5687e46bc1aa278fae Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:23 -0500 Subject: cifs: remove code for setting timeouts on requests Since we don't time out individual requests anymore, remove the code that we used to use for setting timeouts on different requests. Reviewed-by: Pavel Shilovsky Reviewed-by: Suresh Jayaraman Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 9 +++------ fs/cifs/cifssmb.c | 8 ++++---- fs/cifs/connect.c | 2 +- fs/cifs/file.c | 44 +++++++------------------------------------- fs/cifs/sess.c | 2 +- fs/cifs/transport.c | 2 +- 6 files changed, 17 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7040abc638fa..571132c95231 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -636,12 +636,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define CIFS_IOVEC 4 /* array of response buffers */ /* Type of Request to SendReceive2 */ -#define CIFS_STD_OP 0 /* normal request timeout */ -#define CIFS_LONG_OP 1 /* long op (up to 45 sec, oplock time) */ -#define CIFS_VLONG_OP 2 /* sloow op - can take up to 180 seconds */ -#define CIFS_BLOCKING_OP 4 /* operation can block */ -#define CIFS_ASYNC_OP 8 /* do not wait for response */ -#define CIFS_TIMEOUT_MASK 0x00F /* only one of 5 above set in req */ +#define CIFS_BLOCKING_OP 1 /* operation can block */ +#define CIFS_ASYNC_OP 2 /* do not wait for response */ +#define CIFS_TIMEOUT_MASK 0x003 /* only one of above set in req */ #define CIFS_LOG_ERROR 0x010 /* log NT STATUS if non-zero */ #define CIFS_LARGE_BUF_OP 0x020 /* large request buffer */ #define CIFS_NO_RESP 0x040 /* no response buffer required */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 54b9f5d8d1db..37113450757b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1240,7 +1240,7 @@ OldOpenRetry: pSMB->ByteCount = cpu_to_le16(count); /* long_op set to 1 to allow for oplock break timeouts */ rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP); + (struct smb_hdr *)pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->num_opens); if (rc) { cFYI(1, "Error in Open = %d", rc); @@ -1353,7 +1353,7 @@ openRetry: pSMB->ByteCount = cpu_to_le16(count); /* long_op set to 1 to allow for oplock break timeouts */ rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP); + (struct smb_hdr *)pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->num_opens); if (rc) { cFYI(1, "Error in Open = %d", rc); @@ -1435,7 +1435,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid, iov[0].iov_base = (char *)pSMB; iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, - &resp_buf_type, CIFS_STD_OP | CIFS_LOG_ERROR); + &resp_buf_type, CIFS_LOG_ERROR); cifs_stats_inc(&tcon->num_reads); pSMBr = (READ_RSP *)iov[0].iov_base; if (rc) { @@ -3136,7 +3136,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, - CIFS_STD_OP); + 0); cifs_stats_inc(&tcon->num_acl_get); if (rc) { cFYI(1, "Send error in QuerySecDesc = %d", rc); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f5d7b59a3553..8d4657596301 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3022,7 +3022,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, pSMB->ByteCount = cpu_to_le16(count); rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, - CIFS_STD_OP); + 0); /* above now done in SendReceive */ if ((rc == 0) && (tcon != NULL)) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cfa2e5ebcafe..bd2a028af833 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -839,29 +839,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) return rc; } -/* - * Set the timeout on write requests past EOF. For some servers (Windows) - * these calls can be very long. - * - * If we're writing >10M past the EOF we give a 180s timeout. Anything less - * than that gets a 45s timeout. Writes not past EOF get 15s timeouts. - * The 10M cutoff is totally arbitrary. A better scheme for this would be - * welcome if someone wants to suggest one. - * - * We may be able to do a better job with this if there were some way to - * declare that a file should be sparse. - */ -static int -cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset) -{ - if (offset <= cifsi->server_eof) - return CIFS_STD_OP; - else if (offset > (cifsi->server_eof + (10 * 1024 * 1024))) - return CIFS_VLONG_OP; - else - return CIFS_LONG_OP; -} - /* update the file size (if needed) after a write */ static void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, @@ -882,7 +859,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, unsigned int total_written; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; - int xid, long_op; + int xid; struct cifsFileInfo *open_file; struct cifsInodeInfo *cifsi = CIFS_I(inode); @@ -903,7 +880,6 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, xid = GetXid(); - long_op = cifs_write_timeout(cifsi, *poffset); for (total_written = 0; write_size > total_written; total_written += bytes_written) { rc = -EAGAIN; @@ -931,7 +907,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, min_t(const int, cifs_sb->wsize, write_size - total_written), *poffset, &bytes_written, - NULL, write_data + total_written, long_op); + NULL, write_data + total_written, 0); } if (rc || (bytes_written == 0)) { if (total_written) @@ -944,8 +920,6 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, cifs_update_eof(cifsi, *poffset, bytes_written); *poffset += bytes_written; } - long_op = CIFS_STD_OP; /* subsequent writes fast - - 15 seconds is plenty */ } cifs_stats_bytes_written(pTcon, total_written); @@ -974,7 +948,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, unsigned int total_written; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; - int xid, long_op; + int xid; struct dentry *dentry = open_file->dentry; struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); @@ -987,7 +961,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, xid = GetXid(); - long_op = cifs_write_timeout(cifsi, *poffset); for (total_written = 0; write_size > total_written; total_written += bytes_written) { rc = -EAGAIN; @@ -1017,7 +990,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, len, *poffset, &bytes_written, - iov, 1, long_op); + iov, 1, 0); } else rc = CIFSSMBWrite(xid, pTcon, open_file->netfid, @@ -1025,7 +998,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, write_size - total_written), *poffset, &bytes_written, write_data + total_written, - NULL, long_op); + NULL, 0); } if (rc || (bytes_written == 0)) { if (total_written) @@ -1038,8 +1011,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, cifs_update_eof(cifsi, *poffset, bytes_written); *poffset += bytes_written; } - long_op = CIFS_STD_OP; /* subsequent writes fast - - 15 seconds is plenty */ } cifs_stats_bytes_written(pTcon, total_written); @@ -1239,7 +1210,7 @@ static int cifs_writepages(struct address_space *mapping, struct pagevec pvec; int rc = 0; int scanned = 0; - int xid, long_op; + int xid; cifs_sb = CIFS_SB(mapping->host->i_sb); @@ -1384,11 +1355,10 @@ retry_write: cERROR(1, "No writable handles for inode"); rc = -EBADF; } else { - long_op = cifs_write_timeout(cifsi, offset); rc = CIFSSMBWrite2(xid, tcon, open_file->netfid, bytes_to_write, offset, &bytes_written, iov, n_iov, - long_op); + 0); cifsFileInfo_put(open_file); } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index eb746486e49e..1cffd82c4f13 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -879,7 +879,7 @@ ssetup_ntlmssp_authenticate: BCC_LE(smb_buf) = cpu_to_le16(count); rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, - CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR); + CIFS_LOG_ERROR); /* SMB request buf freed in SendReceive2 */ pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index a0cef4960516..fe92c4cb75f5 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -798,7 +798,7 @@ send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon, pSMB->hdr.Mid = GetNextMid(ses->server); return SendReceive(xid, ses, in_buf, out_buf, - &bytes_returned, CIFS_STD_OP); + &bytes_returned, 0); } int -- cgit v1.2.3 From 76dcc26f1d7f1c98c3f595379dcd9562f01bf38d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Jan 2011 07:24:24 -0500 Subject: cifs: mangle existing header for SMB_COM_NT_CANCEL The NT_CANCEL command looks just like the original command, except for a few small differences. The send_nt_cancel function however currently takes a tcon, which we don't have in SendReceive and SendReceive2. Instead of "respinning" the entire header for an NT_CANCEL, just mangle the existing header by replacing just the fields we need. This means we don't need a tcon and allows us to call it from other places. Reviewed-by: Pavel Shilovsky Reviewed-by: Suresh Jayaraman Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/transport.c | 63 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index fe92c4cb75f5..c8e2808cd5e6 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -464,6 +464,43 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) return rc; } +/* + * An NT cancel request header looks just like the original request except: + * + * The Command is SMB_COM_NT_CANCEL + * The WordCount is zeroed out + * The ByteCount is zeroed out + * + * This function mangles an existing request buffer into a + * SMB_COM_NT_CANCEL request and then sends it. + */ +static int +send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf, + struct mid_q_entry *mid) +{ + int rc = 0; + + /* -4 for RFC1001 length and +2 for BCC field */ + in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2; + in_buf->Command = SMB_COM_NT_CANCEL; + in_buf->WordCount = 0; + BCC_LE(in_buf) = 0; + + mutex_lock(&server->srv_mutex); + rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); + if (rc) { + mutex_unlock(&server->srv_mutex); + return rc; + } + rc = smb_send(server, in_buf, in_buf->smb_buf_length); + mutex_unlock(&server->srv_mutex); + + cFYI(1, "issued NT_CANCEL for mid %u, rc = %d", + in_buf->Mid, rc); + + return rc; +} + int SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, struct kvec *iov, int n_vec, int *pRespBufType /* ret */, @@ -753,29 +790,6 @@ out: return rc; } -/* Send an NT_CANCEL SMB to cause the POSIX blocking lock to return. */ - -static int -send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, - struct mid_q_entry *midQ) -{ - int rc = 0; - struct cifsSesInfo *ses = tcon->ses; - __u16 mid = in_buf->Mid; - - header_assemble(in_buf, SMB_COM_NT_CANCEL, tcon, 0); - in_buf->Mid = mid; - mutex_lock(&ses->server->srv_mutex); - rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); - if (rc) { - mutex_unlock(&ses->server->srv_mutex); - return rc; - } - rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); - mutex_unlock(&ses->server->srv_mutex); - return rc; -} - /* We send a LOCKINGX_CANCEL_LOCK to cause the Windows blocking lock to return. */ @@ -890,8 +904,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, if (in_buf->Command == SMB_COM_TRANSACTION2) { /* POSIX lock. We send a NT_CANCEL SMB to cause the blocking lock to return. */ - - rc = send_nt_cancel(tcon, in_buf, midQ); + rc = send_nt_cancel(ses->server, in_buf, midQ); if (rc) { delete_mid(midQ); return rc; -- cgit v1.2.3 From 4f8ba8a0c095933dd54a2c281750c8a85b329b26 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sun, 21 Nov 2010 22:36:12 +0300 Subject: CIFS: Make cifsFileInfo_put work with strict cache mode On strict cache mode when we close the last file handle of the inode we should set invalid_mapping flag on this inode to prevent data coherency problem when we open it again but it has been modified on the server. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifs_fs_sb.h | 1 + fs/cifs/file.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 7852cd677051..ac51cd2d33ae 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -40,6 +40,7 @@ #define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */ #define CIFS_MOUNT_MF_SYMLINKS 0x10000 /* Minshall+French Symlinks enabled */ #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ +#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ struct cifs_sb_info { struct rb_root tlink_tree; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index bd2a028af833..1b26c2717599 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -287,6 +287,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) struct inode *inode = cifs_file->dentry->d_inode; struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsLockInfo *li, *tmp; spin_lock(&cifs_file_list_lock); @@ -302,6 +303,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) if (list_empty(&cifsi->openFileList)) { cFYI(1, "closing last open instance for inode %p", cifs_file->dentry->d_inode); + + /* in strict cache mode we need invalidate mapping on the last + close because it may cause a error when we open this file + again and get at least level II oplock */ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) + CIFS_I(inode)->invalid_mapping = true; + cifs_set_oplock_level(cifsi, 0); } spin_unlock(&cifs_file_list_lock); -- cgit v1.2.3 From 8be7e6ba142423e6ad98fed293c96f196f685229 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sun, 12 Dec 2010 13:11:13 +0300 Subject: CIFS: Implement cifs_strict_fsync Invalidate inode mapping if we don't have at least Level II oplock in cifs_strict_fsync. Also remove filemap_write_and_wait call from cifs_fsync because it is previously called from vfs_fsync_range. Add file operations' structures for strict cache mode. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 38 ++++++++++++++++++++++++++++++++++++++ fs/cifs/cifsfs.h | 8 ++++++-- fs/cifs/file.c | 36 ++++++++++++++++++++++++++++-------- fs/cifs/inode.c | 8 ++++++-- 4 files changed, 78 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 99d777a03dd0..f6093e401c5a 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -733,6 +733,25 @@ const struct file_operations cifs_file_ops = { .setlease = cifs_setlease, }; +const struct file_operations cifs_file_strict_ops = { + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = cifs_file_aio_write, + .open = cifs_open, + .release = cifs_close, + .lock = cifs_lock, + .fsync = cifs_strict_fsync, + .flush = cifs_flush, + .mmap = cifs_file_mmap, + .splice_read = generic_file_splice_read, + .llseek = cifs_llseek, +#ifdef CONFIG_CIFS_POSIX + .unlocked_ioctl = cifs_ioctl, +#endif /* CONFIG_CIFS_POSIX */ + .setlease = cifs_setlease, +}; + const struct file_operations cifs_file_direct_ops = { /* no aio, no readv - BB reevaluate whether they can be done with directio, no cache */ @@ -751,6 +770,7 @@ const struct file_operations cifs_file_direct_ops = { .llseek = cifs_llseek, .setlease = cifs_setlease, }; + const struct file_operations cifs_file_nobrl_ops = { .read = do_sync_read, .write = do_sync_write, @@ -769,6 +789,24 @@ const struct file_operations cifs_file_nobrl_ops = { .setlease = cifs_setlease, }; +const struct file_operations cifs_file_strict_nobrl_ops = { + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = cifs_file_aio_write, + .open = cifs_open, + .release = cifs_close, + .fsync = cifs_strict_fsync, + .flush = cifs_flush, + .mmap = cifs_file_mmap, + .splice_read = generic_file_splice_read, + .llseek = cifs_llseek, +#ifdef CONFIG_CIFS_POSIX + .unlocked_ioctl = cifs_ioctl, +#endif /* CONFIG_CIFS_POSIX */ + .setlease = cifs_setlease, +}; + const struct file_operations cifs_file_direct_nobrl_ops = { /* no mmap, no aio, no readv - BB reevaluate whether they can be done with directio, no cache */ diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 4739a531cded..10c4303c282d 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -61,6 +61,7 @@ extern int cifs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); extern int cifs_revalidate_file(struct file *filp); extern int cifs_revalidate_dentry(struct dentry *); +extern void cifs_invalidate_mapping(struct inode *inode); extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int cifs_setattr(struct dentry *, struct iattr *); @@ -72,8 +73,10 @@ extern const struct inode_operations cifs_dfs_referral_inode_operations; /* Functions related to files and directories */ extern const struct file_operations cifs_file_ops; extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */ -extern const struct file_operations cifs_file_nobrl_ops; -extern const struct file_operations cifs_file_direct_nobrl_ops; /* no brlocks */ +extern const struct file_operations cifs_file_strict_ops; /* if strictio mnt */ +extern const struct file_operations cifs_file_nobrl_ops; /* no brlocks */ +extern const struct file_operations cifs_file_direct_nobrl_ops; +extern const struct file_operations cifs_file_strict_nobrl_ops; extern int cifs_open(struct inode *inode, struct file *file); extern int cifs_close(struct inode *inode, struct file *file); extern int cifs_closedir(struct inode *inode, struct file *file); @@ -83,6 +86,7 @@ extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, size_t write_size, loff_t *poffset); extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, int); +extern int cifs_strict_fsync(struct file *, int); extern int cifs_flush(struct file *, fl_owner_t id); extern int cifs_file_mmap(struct file * , struct vm_area_struct *); extern const struct file_operations cifs_dir_ops; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 1b26c2717599..5790fab7349b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1528,27 +1528,47 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, return rc; } -int cifs_fsync(struct file *file, int datasync) +int cifs_strict_fsync(struct file *file, int datasync) { int xid; int rc = 0; struct cifsTconInfo *tcon; struct cifsFileInfo *smbfile = file->private_data; struct inode *inode = file->f_path.dentry->d_inode; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); xid = GetXid(); cFYI(1, "Sync file - name: %s datasync: 0x%x", file->f_path.dentry->d_name.name, datasync); - rc = filemap_write_and_wait(inode->i_mapping); - if (rc == 0) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + if (!CIFS_I(inode)->clientCanCacheRead) + cifs_invalidate_mapping(inode); - tcon = tlink_tcon(smbfile->tlink); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) - rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); - } + tcon = tlink_tcon(smbfile->tlink); + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) + rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); + + FreeXid(xid); + return rc; +} + +int cifs_fsync(struct file *file, int datasync) +{ + int xid; + int rc = 0; + struct cifsTconInfo *tcon; + struct cifsFileInfo *smbfile = file->private_data; + struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + + xid = GetXid(); + + cFYI(1, "Sync file - name: %s datasync: 0x%x", + file->f_path.dentry->d_name.name, datasync); + + tcon = tlink_tcon(smbfile->tlink); + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) + rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); FreeXid(xid); return rc; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 6c9ee8014ff0..8852470b4fbb 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -44,13 +44,17 @@ static void cifs_set_ops(struct inode *inode) inode->i_fop = &cifs_file_direct_nobrl_ops; else inode->i_fop = &cifs_file_direct_ops; + } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + inode->i_fop = &cifs_file_strict_nobrl_ops; + else + inode->i_fop = &cifs_file_strict_ops; } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) inode->i_fop = &cifs_file_nobrl_ops; else { /* not direct, send byte range locks */ inode->i_fop = &cifs_file_ops; } - /* check if server can support readpages */ if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf < PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE) @@ -1679,7 +1683,7 @@ cifs_inode_needs_reval(struct inode *inode) /* * Zap the cache. Called when invalid_mapping flag is set. */ -static void +void cifs_invalidate_mapping(struct inode *inode) { int rc; -- cgit v1.2.3 From 7a6a19b17ab9103ec708c18befd28f2a3908d4c1 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 14 Dec 2010 11:29:51 +0300 Subject: CIFS: Implement cifs_file_strict_mmap (try #2) Invalidate inode mapping if we don't have at least Level II oplock. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 4 ++-- fs/cifs/cifsfs.h | 1 + fs/cifs/file.c | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f6093e401c5a..e24d966fb214 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -743,7 +743,7 @@ const struct file_operations cifs_file_strict_ops = { .lock = cifs_lock, .fsync = cifs_strict_fsync, .flush = cifs_flush, - .mmap = cifs_file_mmap, + .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, #ifdef CONFIG_CIFS_POSIX @@ -798,7 +798,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .release = cifs_close, .fsync = cifs_strict_fsync, .flush = cifs_flush, - .mmap = cifs_file_mmap, + .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, #ifdef CONFIG_CIFS_POSIX diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 10c4303c282d..710072e36912 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -89,6 +89,7 @@ extern int cifs_fsync(struct file *, int); extern int cifs_strict_fsync(struct file *, int); extern int cifs_flush(struct file *, fl_owner_t id); extern int cifs_file_mmap(struct file * , struct vm_area_struct *); +extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); extern const struct file_operations cifs_dir_ops; extern int cifs_dir_open(struct inode *inode, struct file *file); extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5790fab7349b..0b32377ef8b7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1769,6 +1769,21 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, return total_read; } +int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) +{ + int rc, xid; + struct inode *inode = file->f_path.dentry->d_inode; + + xid = GetXid(); + + if (!CIFS_I(inode)->clientCanCacheRead) + cifs_invalidate_mapping(inode); + + rc = generic_file_mmap(file, vma); + FreeXid(xid); + return rc; +} + int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) { int rc, xid; -- cgit v1.2.3 From a70307eeeb25b89f6b2baf3cf3f0cef83c96ba12 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 14 Dec 2010 11:50:41 +0300 Subject: CIFS: Implement cifs_strict_readv (try #4) Read from the cache if we have at least Level II oplock - otherwise read from the server. Add cifs_user_readv to let the client read into iovec buffers. Reviewed-by: Jeff Layton Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 4 +- fs/cifs/cifsfs.h | 4 +- fs/cifs/file.c | 116 +++++++++++++++++++++++++++++++++++++------------------ 3 files changed, 84 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e24d966fb214..a8323f1dc1c4 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -736,7 +736,7 @@ const struct file_operations cifs_file_ops = { const struct file_operations cifs_file_strict_ops = { .read = do_sync_read, .write = do_sync_write, - .aio_read = generic_file_aio_read, + .aio_read = cifs_strict_readv, .aio_write = cifs_file_aio_write, .open = cifs_open, .release = cifs_close, @@ -792,7 +792,7 @@ const struct file_operations cifs_file_nobrl_ops = { const struct file_operations cifs_file_strict_nobrl_ops = { .read = do_sync_read, .write = do_sync_write, - .aio_read = generic_file_aio_read, + .aio_read = cifs_strict_readv, .aio_write = cifs_file_aio_write, .open = cifs_open, .release = cifs_close, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 710072e36912..f23206d46531 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -81,7 +81,9 @@ extern int cifs_open(struct inode *inode, struct file *file); extern int cifs_close(struct inode *inode, struct file *file); extern int cifs_closedir(struct inode *inode, struct file *file); extern ssize_t cifs_user_read(struct file *file, char __user *read_data, - size_t read_size, loff_t *poffset); + size_t read_size, loff_t *poffset); +extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, size_t write_size, loff_t *poffset); extern int cifs_lock(struct file *, int, struct file_lock *); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0b32377ef8b7..d7d65a70678e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1619,42 +1619,42 @@ int cifs_flush(struct file *file, fl_owner_t id) return rc; } -ssize_t cifs_user_read(struct file *file, char __user *read_data, - size_t read_size, loff_t *poffset) +static ssize_t +cifs_iovec_read(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *poffset) { - int rc = -EACCES; - unsigned int bytes_read = 0; - unsigned int total_read = 0; - unsigned int current_read_size; + int rc; + int xid; + unsigned int total_read, bytes_read = 0; + size_t len, cur_len; + int iov_offset = 0; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; - int xid; struct cifsFileInfo *open_file; - char *smb_read_data; - char __user *current_offset; struct smb_com_read_rsp *pSMBr; + char *read_data; + + if (!nr_segs) + return 0; + + len = iov_length(iov, nr_segs); + if (!len) + return 0; xid = GetXid(); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - if (file->private_data == NULL) { - rc = -EBADF; - FreeXid(xid); - return rc; - } open_file = file->private_data; pTcon = tlink_tcon(open_file->tlink); if ((file->f_flags & O_ACCMODE) == O_WRONLY) cFYI(1, "attempting read on write only file instance"); - for (total_read = 0, current_offset = read_data; - read_size > total_read; - total_read += bytes_read, current_offset += bytes_read) { - current_read_size = min_t(const int, read_size - total_read, - cifs_sb->rsize); + for (total_read = 0; total_read < len; total_read += bytes_read) { + cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); rc = -EAGAIN; - smb_read_data = NULL; + read_data = NULL; + while (rc == -EAGAIN) { int buf_type = CIFS_NO_BUFFER; if (open_file->invalidHandle) { @@ -1662,27 +1662,25 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, if (rc != 0) break; } - rc = CIFSSMBRead(xid, pTcon, - open_file->netfid, - current_read_size, *poffset, - &bytes_read, &smb_read_data, - &buf_type); - pSMBr = (struct smb_com_read_rsp *)smb_read_data; - if (smb_read_data) { - if (copy_to_user(current_offset, - smb_read_data + - 4 /* RFC1001 length field */ + - le16_to_cpu(pSMBr->DataOffset), - bytes_read)) + rc = CIFSSMBRead(xid, pTcon, open_file->netfid, + cur_len, *poffset, &bytes_read, + &read_data, &buf_type); + pSMBr = (struct smb_com_read_rsp *)read_data; + if (read_data) { + char *data_offset = read_data + 4 + + le16_to_cpu(pSMBr->DataOffset); + if (memcpy_toiovecend(iov, data_offset, + iov_offset, bytes_read)) rc = -EFAULT; - if (buf_type == CIFS_SMALL_BUFFER) - cifs_small_buf_release(smb_read_data); + cifs_small_buf_release(read_data); else if (buf_type == CIFS_LARGE_BUFFER) - cifs_buf_release(smb_read_data); - smb_read_data = NULL; + cifs_buf_release(read_data); + read_data = NULL; + iov_offset += bytes_read; } } + if (rc || (bytes_read == 0)) { if (total_read) { break; @@ -1695,13 +1693,57 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, *poffset += bytes_read; } } + FreeXid(xid); return total_read; } +ssize_t cifs_user_read(struct file *file, char __user *read_data, + size_t read_size, loff_t *poffset) +{ + struct iovec iov; + iov.iov_base = read_data; + iov.iov_len = read_size; + + return cifs_iovec_read(file, &iov, 1, poffset); +} + +static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + ssize_t read; + + read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos); + if (read > 0) + iocb->ki_pos = pos; + + return read; +} + +ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct inode *inode; + + inode = iocb->ki_filp->f_path.dentry->d_inode; + + if (CIFS_I(inode)->clientCanCacheRead) + return generic_file_aio_read(iocb, iov, nr_segs, pos); + + /* + * In strict cache mode we need to read from the server all the time + * if we don't have level II oplock because the server can delay mtime + * change - so we can't make a decision about inode invalidating. + * And we can also fail with pagereading if there are mandatory locks + * on pages affected by this read but not on the region from pos to + * pos+len-1. + */ + + return cifs_user_readv(iocb, iov, nr_segs, pos); +} static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, - loff_t *poffset) + loff_t *poffset) { int rc = -EACCES; unsigned int bytes_read = 0; -- cgit v1.2.3 From c3dccf48174e50668b7c63544ac8c60c07a45978 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Jan 2011 13:36:50 -0500 Subject: cifs: TCP_Server_Info diet Remove fields that are completely unused, and rearrange struct according to recommendations by "pahole". Before: /* size: 1112, cachelines: 18, members: 49 */ /* sum members: 1086, holes: 8, sum holes: 26 */ /* bit holes: 1, sum bit holes: 7 bits */ /* last cacheline: 24 bytes */ After: /* size: 1072, cachelines: 17, members: 42 */ /* sum members: 1065, holes: 3, sum holes: 7 */ /* last cacheline: 48 bytes */ ...savings of 40 bytes per struct on x86_64. 21 bytes by field removal, and 19 by reorganizing to eliminate holes. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 26 +++++++++----------------- fs/cifs/cifssmb.c | 2 -- 2 files changed, 9 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 571132c95231..36f097e1ee74 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -161,6 +161,7 @@ struct TCP_Server_Info { int srv_count; /* reference counter */ /* 15 character server name + 0x20 16th byte indicating type = srv */ char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; + enum statusEnum tcpStatus; /* what we think the status is */ char *hostname; /* hostname portion of UNC string */ struct socket *ssocket; struct sockaddr_storage dstaddr; @@ -168,25 +169,16 @@ struct TCP_Server_Info { wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ struct list_head pending_mid_q; - void *Server_NlsInfo; /* BB - placeholder for future NLS info */ - unsigned short server_codepage; /* codepage for the server */ - enum protocolEnum protocolType; - char versionMajor; - char versionMinor; - bool svlocal:1; /* local server or remote */ bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ bool tcp_nodelay; atomic_t inFlight; /* number of requests on the wire to server */ -#ifdef CONFIG_CIFS_STATS2 - atomic_t inSend; /* requests trying to send */ - atomic_t num_waiters; /* blocked waiting to get in sendrecv */ -#endif - enum statusEnum tcpStatus; /* what we think the status is */ struct mutex srv_mutex; struct task_struct *tsk; char server_GUID[16]; char secMode; + bool session_estab; /* mark when very first sess is established */ + u16 dialect; /* dialect index that server chose */ enum securityEnum secType; unsigned int maxReq; /* Clients should submit no more */ /* than maxReq distinct unanswered SMBs to the server when using */ @@ -199,8 +191,6 @@ struct TCP_Server_Info { unsigned int max_vcs; /* maximum number of smb sessions, at least those that can be specified uniquely with vcnumbers */ - char sessid[4]; /* unique token id for this session */ - /* (returned on Negotiate */ int capabilities; /* allow selective disabling of caps by smb sess */ int timeAdj; /* Adjust for difference in server time zone in sec */ __u16 CurrentMid; /* multiplex id - rotating counter */ @@ -210,18 +200,20 @@ struct TCP_Server_Info { __u32 sequence_number; /* for signing, protected by srv_mutex */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ - u16 dialect; /* dialect index that server chose */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ /* extended security flavors that server supports */ + bool sec_ntlmssp; /* supports NTLMSSP */ + bool sec_kerberosu2u; /* supports U2U Kerberos */ bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ - bool sec_kerberosu2u; /* supports U2U Kerberos */ - bool sec_ntlmssp; /* supports NTLMSSP */ - bool session_estab; /* mark when very first sess is established */ struct delayed_work echo; /* echo ping workqueue job */ #ifdef CONFIG_CIFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ #endif +#ifdef CONFIG_CIFS_STATS2 + atomic_t inSend; /* requests trying to send */ + atomic_t num_waiters; /* blocked waiting to get in sendrecv */ +#endif }; /* diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 37113450757b..5b1f6637f161 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -452,7 +452,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize), (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); - GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey); /* even though we do not use raw we might as well set this accurately, in case we ever find a need for it */ if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { @@ -566,7 +565,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); cFYI(DBG2, "Max buf = %d", ses->server->maxBuf); - GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); server->capabilities = le32_to_cpu(pSMBr->Capabilities); server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); server->timeAdj *= 60; -- cgit v1.2.3 From aae62fdb6b9a6605abdea7370c4a0e005e6c1cd7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Jan 2011 13:36:50 -0500 Subject: cifs: move time field in cifsInodeInfo ...and remove length qualifiers from bools. Before: /* size: 1176, cachelines: 19, members: 13 */ /* sum members: 1165, holes: 2, sum holes: 11 */ /* bit holes: 1, sum bit holes: 4 bits */ /* last cacheline: 24 bytes */ After: /* size: 1168, cachelines: 19, members: 13 */ /* last cacheline: 16 bytes */ ...savings of 8 bytes per inode. Signed-off-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 36f097e1ee74..5bfb75346cb0 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -439,11 +439,11 @@ struct cifsInodeInfo { /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ - unsigned long time; /* jiffies of last update/check of inode */ - bool clientCanCacheRead:1; /* read oplock */ - bool clientCanCacheAll:1; /* read and writebehind oplock */ - bool delete_pending:1; /* DELETE_ON_CLOSE is set */ - bool invalid_mapping:1; /* pagecache is invalid */ + bool clientCanCacheRead; /* read oplock */ + bool clientCanCacheAll; /* read and writebehind oplock */ + bool delete_pending; /* DELETE_ON_CLOSE is set */ + bool invalid_mapping; /* pagecache is invalid */ + unsigned long time; /* jiffies of last update of inode */ u64 server_eof; /* current file size on server */ u64 uniqueid; /* server inode number */ u64 createtime; /* creation time on server */ -- cgit v1.2.3 From 690c522fa5a62825af880775e3ef1e55755667b2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Jan 2011 13:36:51 -0500 Subject: cifs: use get/put_unaligned functions to access ByteCount It's possible that when we access the ByteCount that the alignment will be off. Most CPUs deal with that transparently, but there's usually some performance impact. Some CPUs raise an exception on unaligned accesses. Fix this by accessing the byte count using the get_unaligned and put_unaligned inlined functions. While we're at it, fix the types of some of the variables that end up getting returns from these functions. Acked-by: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 47 +++++++++++++++++++++++++++++++++++++++++++---- fs/cifs/cifssmb.c | 14 +++++--------- fs/cifs/connect.c | 10 +++++----- fs/cifs/netmisc.c | 4 ++-- fs/cifs/sess.c | 13 ++++++------- fs/cifs/transport.c | 9 ++++----- 6 files changed, 65 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index ea205b4fcad2..b5c8cc5d7a7f 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -23,6 +23,7 @@ #define _CIFSPDU_H #include +#include #include "smbfsctl.h" #ifdef CONFIG_CIFS_WEAK_PW_HASH @@ -426,11 +427,49 @@ struct smb_hdr { __u16 Mid; __u8 WordCount; } __attribute__((packed)); -/* given a pointer to an smb_hdr retrieve the value of byte count */ -#define BCC(smb_var) (*(__u16 *)((char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount))) -#define BCC_LE(smb_var) (*(__le16 *)((char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount))) + +/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */ +#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \ + (2 * (smb_var)->WordCount)) + /* given a pointer to an smb_hdr retrieve the pointer to the byte area */ -#define pByteArea(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount) + 2) +#define pByteArea(smb_var) (BCC(smb_var) + 2) + +/* get the converted ByteCount for a SMB packet and return it */ +static inline __u16 +get_bcc(struct smb_hdr *hdr) +{ + __u16 *bc_ptr = (__u16 *)BCC(hdr); + + return get_unaligned(bc_ptr); +} + +/* get the unconverted ByteCount for a SMB packet and return it */ +static inline __u16 +get_bcc_le(struct smb_hdr *hdr) +{ + __le16 *bc_ptr = (__le16 *)BCC(hdr); + + return get_unaligned_le16(bc_ptr); +} + +/* set the ByteCount for a SMB packet in host-byte order */ +static inline void +put_bcc(__u16 count, struct smb_hdr *hdr) +{ + __u16 *bc_ptr = (__u16 *)BCC(hdr); + + put_unaligned(count, bc_ptr); +} + +/* set the ByteCount for a SMB packet in little-endian */ +static inline void +put_bcc_le(__u16 count, struct smb_hdr *hdr) +{ + __le16 *bc_ptr = (__le16 *)BCC(hdr); + + put_unaligned_le16(count, bc_ptr); +} /* * Computer Name Length (since Netbios name was length 16 with last byte 0x20) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5b1f6637f161..39cec0d9cd1b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -333,7 +333,6 @@ static int validate_t2(struct smb_t2_rsp *pSMB) { int rc = -EINVAL; int total_size; - char *pBCC; /* check for plausible wct, bcc and t2 data and parm sizes */ /* check for parm and data offset going beyond end of smb */ @@ -346,13 +345,9 @@ static int validate_t2(struct smb_t2_rsp *pSMB) if (total_size < 512) { total_size += le16_to_cpu(pSMB->t2_rsp.DataCount); - /* BCC le converted in SendReceive */ - pBCC = (pSMB->hdr.WordCount * 2) + - sizeof(struct smb_hdr) + - (char *)pSMB; - if ((total_size <= (*(u16 *)pBCC)) && - (total_size < - CIFSMaxBufSize+MAX_CIFS_HDR_SIZE)) { + if (total_size <= get_bcc(&pSMB->hdr) && + total_size < + CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { return 0; } } @@ -362,6 +357,7 @@ static int validate_t2(struct smb_t2_rsp *pSMB) sizeof(struct smb_t2_rsp) + 16); return rc; } + int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) { @@ -5609,7 +5605,7 @@ QAllEAsRetry: } /* make sure list_len doesn't go past end of SMB */ - end_of_smb = (char *)pByteArea(&pSMBr->hdr) + BCC(&pSMBr->hdr); + end_of_smb = (char *)pByteArea(&pSMBr->hdr) + get_bcc(&pSMBr->hdr); if ((char *)ea_response_data + list_len > end_of_smb) { cFYI(1, "EA list appears to go beyond SMB"); rc = -EIO; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8d4657596301..ca20e813275d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -318,9 +318,9 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); total_in_buf += total_in_buf2; pSMBt->t2_rsp.DataCount = cpu_to_le16(total_in_buf); - byte_count = le16_to_cpu(BCC_LE(pTargetSMB)); + byte_count = get_bcc_le(pTargetSMB); byte_count += total_in_buf2; - BCC_LE(pTargetSMB) = cpu_to_le16(byte_count); + put_bcc_le(byte_count, pTargetSMB); byte_count = pTargetSMB->smb_buf_length; byte_count += total_in_buf2; @@ -2937,8 +2937,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, TCONX_RSP *pSMBr; unsigned char *bcc_ptr; int rc = 0; - int length, bytes_left; - __u16 count; + int length; + __u16 bytes_left, count; if (ses == NULL) return -EIO; @@ -3032,7 +3032,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, tcon->need_reconnect = false; tcon->tid = smb_buffer_response->Tid; bcc_ptr = pByteArea(smb_buffer_response); - bytes_left = BCC(smb_buffer_response); + bytes_left = get_bcc(smb_buffer_response); length = strnlen(bcc_ptr, bytes_left - 2); if (smb_buffer->Flags2 & SMBFLG2_UNICODE) is_unicode = true; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 6783ce6cdc89..8d9189f64477 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -916,14 +916,14 @@ unsigned int smbCalcSize(struct smb_hdr *ptr) { return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + - 2 /* size of the bcc field */ + BCC(ptr)); + 2 /* size of the bcc field */ + get_bcc(ptr)); } unsigned int smbCalcSize_LE(struct smb_hdr *ptr) { return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + - 2 /* size of the bcc field */ + le16_to_cpu(BCC_LE(ptr))); + 2 /* size of the bcc field */ + get_bcc_le(ptr)); } /* The following are taken from fs/ntfs/util.c */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 1cffd82c4f13..1adc9625a344 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -277,7 +277,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, } static void -decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, +decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses, const struct nls_table *nls_cp) { int len; @@ -323,7 +323,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, return; } -static int decode_ascii_ssetup(char **pbcc_area, int bleft, +static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses, const struct nls_table *nls_cp) { @@ -575,12 +575,11 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, char *str_area; SESSION_SETUP_ANDX *pSMB; __u32 capabilities; - int count; + __u16 count; int resp_buf_type; struct kvec iov[3]; enum securityEnum type; - __u16 action; - int bytes_remaining; + __u16 action, bytes_remaining; struct key *spnego_key = NULL; __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ u16 blob_len; @@ -876,7 +875,7 @@ ssetup_ntlmssp_authenticate: count = iov[1].iov_len + iov[2].iov_len; smb_buf->smb_buf_length += count; - BCC_LE(smb_buf) = cpu_to_le16(count); + put_bcc_le(count, smb_buf); rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, CIFS_LOG_ERROR); @@ -910,7 +909,7 @@ ssetup_ntlmssp_authenticate: cFYI(1, "UID = %d ", ses->Suid); /* response can have either 3 or 4 word count - Samba sends 3 */ /* and lanman response is 3 */ - bytes_remaining = BCC(smb_buf); + bytes_remaining = get_bcc(smb_buf); bcc_ptr = pByteArea(smb_buf); if (smb_buf->WordCount == 4) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index c8e2808cd5e6..c1ccca1a933f 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -484,7 +484,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf, in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2; in_buf->Command = SMB_COM_NT_CANCEL; in_buf->WordCount = 0; - BCC_LE(in_buf) = 0; + put_bcc_le(0, in_buf); mutex_lock(&server->srv_mutex); rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); @@ -632,8 +632,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, if (receive_len >= sizeof(struct smb_hdr) - 4 /* do not count RFC1001 header */ + (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ ) - BCC(midQ->resp_buf) = - le16_to_cpu(BCC_LE(midQ->resp_buf)); + put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf); if ((flags & CIFS_NO_RESP) == 0) midQ->resp_buf = NULL; /* mark it so buf will not be freed by @@ -776,7 +775,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, if (receive_len >= sizeof(struct smb_hdr) - 4 /* do not count RFC1001 header */ + (2 * out_buf->WordCount) + 2 /* bcc */ ) - BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); + put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf); } else { rc = -EIO; cERROR(1, "Bad MID state?"); @@ -977,7 +976,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, if (receive_len >= sizeof(struct smb_hdr) - 4 /* do not count RFC1001 header */ + (2 * out_buf->WordCount) + 2 /* bcc */ ) - BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); + put_bcc(get_bcc_le(out_buf), out_buf); out: delete_mid(midQ); -- cgit v1.2.3 From 12df83c9b901cfe8ca7a66fbe0effc6d873cbbb9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Jan 2011 13:36:51 -0500 Subject: cifs: clean up unaligned accesses in validate_t2 ...and clean up function to reduce indentation. Signed-off-by: Jeff Layton Acked-by: Pavel Shilovsky Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 39cec0d9cd1b..675041a6949c 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -331,31 +331,33 @@ smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon, static int validate_t2(struct smb_t2_rsp *pSMB) { - int rc = -EINVAL; - int total_size; + unsigned int total_size; + + /* check for plausible wct */ + if (pSMB->hdr.WordCount < 10) + goto vt2_err; - /* check for plausible wct, bcc and t2 data and parm sizes */ /* check for parm and data offset going beyond end of smb */ - if (pSMB->hdr.WordCount >= 10) { - if ((le16_to_cpu(pSMB->t2_rsp.ParameterOffset) <= 1024) && - (le16_to_cpu(pSMB->t2_rsp.DataOffset) <= 1024)) { - /* check that bcc is at least as big as parms + data */ - /* check that bcc is less than negotiated smb buffer */ - total_size = le16_to_cpu(pSMB->t2_rsp.ParameterCount); - if (total_size < 512) { - total_size += - le16_to_cpu(pSMB->t2_rsp.DataCount); - if (total_size <= get_bcc(&pSMB->hdr) && - total_size < - CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { - return 0; - } - } - } - } + if (get_unaligned_le16(&pSMB->t2_rsp.ParameterOffset) > 1024 || + get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024) + goto vt2_err; + + /* check that bcc is at least as big as parms + data */ + /* check that bcc is less than negotiated smb buffer */ + total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount); + if (total_size >= 512) + goto vt2_err; + + total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount); + if (total_size > get_bcc(&pSMB->hdr) || + total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) + goto vt2_err; + + return 0; +vt2_err: cifs_dump_mem("Invalid transact2 SMB: ", (char *)pSMB, sizeof(struct smb_t2_rsp) + 16); - return rc; + return -EINVAL; } int -- cgit v1.2.3 From 26ec254869c0158ea8db6de83b7644e2d93cac2a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Jan 2011 13:36:51 -0500 Subject: cifs: fix unaligned access in check2ndT2 and coalesce_t2 Signed-off-by: Jeff Layton Acked-by: Pavel Shilovsky Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/connect.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ca20e813275d..18d3c7724d6e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -232,9 +232,8 @@ cifs_reconnect(struct TCP_Server_Info *server) static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize) { struct smb_t2_rsp *pSMBt; - int total_data_size; - int data_in_this_rsp; int remaining; + __u16 total_data_size, data_in_this_rsp; if (pSMB->Command != SMB_COM_TRANSACTION2) return 0; @@ -248,8 +247,8 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize) pSMBt = (struct smb_t2_rsp *)pSMB; - total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount); - data_in_this_rsp = le16_to_cpu(pSMBt->t2_rsp.DataCount); + total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); + data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); remaining = total_data_size - data_in_this_rsp; @@ -275,21 +274,18 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) { struct smb_t2_rsp *pSMB2 = (struct smb_t2_rsp *)psecond; struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)pTargetSMB; - int total_data_size; - int total_in_buf; - int remaining; - int total_in_buf2; char *data_area_of_target; char *data_area_of_buf2; - __u16 byte_count; + int remaining; + __u16 byte_count, total_data_size, total_in_buf, total_in_buf2; - total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount); + total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); - if (total_data_size != le16_to_cpu(pSMB2->t2_rsp.TotalDataCount)) { + if (total_data_size != + get_unaligned_le16(&pSMB2->t2_rsp.TotalDataCount)) cFYI(1, "total data size of primary and secondary t2 differ"); - } - total_in_buf = le16_to_cpu(pSMBt->t2_rsp.DataCount); + total_in_buf = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); remaining = total_data_size - total_in_buf; @@ -299,25 +295,25 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) if (remaining == 0) /* nothing to do, ignore */ return 0; - total_in_buf2 = le16_to_cpu(pSMB2->t2_rsp.DataCount); + total_in_buf2 = get_unaligned_le16(&pSMB2->t2_rsp.DataCount); if (remaining < total_in_buf2) { cFYI(1, "transact2 2nd response contains too much data"); } /* find end of first SMB data area */ data_area_of_target = (char *)&pSMBt->hdr.Protocol + - le16_to_cpu(pSMBt->t2_rsp.DataOffset); + get_unaligned_le16(&pSMBt->t2_rsp.DataOffset); /* validate target area */ - data_area_of_buf2 = (char *) &pSMB2->hdr.Protocol + - le16_to_cpu(pSMB2->t2_rsp.DataOffset); + data_area_of_buf2 = (char *)&pSMB2->hdr.Protocol + + get_unaligned_le16(&pSMB2->t2_rsp.DataOffset); data_area_of_target += total_in_buf; /* copy second buffer into end of first buffer */ memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); total_in_buf += total_in_buf2; - pSMBt->t2_rsp.DataCount = cpu_to_le16(total_in_buf); + put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); byte_count = get_bcc_le(pTargetSMB); byte_count += total_in_buf2; put_bcc_le(byte_count, pTargetSMB); @@ -334,7 +330,6 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) return 0; /* we are done */ } else /* more responses to go */ return 1; - } static void -- cgit v1.2.3 From ba2dbf30df210b519bdd8d34ac2ecaaeeef34c44 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Jan 2011 13:36:51 -0500 Subject: cifs: clean up unaligned accesses in cifs_unicode.c Make sure we use get/put_unaligned routines when accessing wide character strings. Signed-off-by: Jeff Layton Acked-by: Pavel Shilovsky Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 51 +++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 430f510a1720..5f6e71857be6 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -44,10 +44,14 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes, int charlen, outlen = 0; int maxwords = maxbytes / 2; char tmp[NLS_MAX_CHARSET_SIZE]; + __u16 ftmp; - for (i = 0; i < maxwords && from[i]; i++) { - charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp, - NLS_MAX_CHARSET_SIZE); + for (i = 0; i < maxwords; i++) { + ftmp = get_unaligned_le16(&from[i]); + if (ftmp == 0) + break; + + charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); if (charlen > 0) outlen += charlen; else @@ -58,9 +62,9 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes, } /* - * cifs_mapchar - convert a little-endian char to proper char in codepage + * cifs_mapchar - convert a host-endian char to proper char in codepage * @target - where converted character should be copied - * @src_char - 2 byte little-endian source character + * @src_char - 2 byte host-endian source character * @cp - codepage to which character should be converted * @mapchar - should character be mapped according to mapchars mount option? * @@ -69,7 +73,7 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes, * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). */ static int -cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp, +cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, bool mapchar) { int len = 1; @@ -82,7 +86,7 @@ cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp, * build_path_from_dentry are modified, as they use slash as * separator. */ - switch (le16_to_cpu(src_char)) { + switch (src_char) { case UNI_COLON: *target = ':'; break; @@ -109,8 +113,7 @@ out: return len; cp_convert: - len = cp->uni2char(le16_to_cpu(src_char), target, - NLS_MAX_CHARSET_SIZE); + len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); if (len <= 0) { *target = '?'; len = 1; @@ -149,6 +152,7 @@ cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, int nullsize = nls_nullsize(codepage); int fromwords = fromlen / 2; char tmp[NLS_MAX_CHARSET_SIZE]; + __u16 ftmp; /* * because the chars can be of varying widths, we need to take care @@ -158,19 +162,23 @@ cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, */ safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); - for (i = 0; i < fromwords && from[i]; i++) { + for (i = 0; i < fromwords; i++) { + ftmp = get_unaligned_le16(&from[i]); + if (ftmp == 0) + break; + /* * check to see if converting this character might make the * conversion bleed into the null terminator */ if (outlen >= safelen) { - charlen = cifs_mapchar(tmp, from[i], codepage, mapchar); + charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar); if ((outlen + charlen) > (tolen - nullsize)) break; } /* put converted char into 'to' buffer */ - charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar); + charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar); outlen += charlen; } @@ -193,24 +201,21 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, { int charlen; int i; - wchar_t *wchar_to = (wchar_t *)to; /* needed to quiet sparse */ + wchar_t wchar_to; /* needed to quiet sparse */ for (i = 0; len && *from; i++, from += charlen, len -= charlen) { - - /* works for 2.4.0 kernel or later */ - charlen = codepage->char2uni(from, len, &wchar_to[i]); + charlen = codepage->char2uni(from, len, &wchar_to); if (charlen < 1) { - cERROR(1, "strtoUCS: char2uni of %d returned %d", - (int)*from, charlen); + cERROR(1, "strtoUCS: char2uni of 0x%x returned %d", + *from, charlen); /* A question mark */ - to[i] = cpu_to_le16(0x003f); + wchar_to = 0x003f; charlen = 1; - } else - to[i] = cpu_to_le16(wchar_to[i]); - + } + put_unaligned_le16(wchar_to, &to[i]); } - to[i] = 0; + put_unaligned_le16(0, &to[i]); return i; } -- cgit v1.2.3 From 84cdf74e8096a10dd6acbb870dd404b92f07a756 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Jan 2011 13:36:51 -0500 Subject: cifs: fix unaligned accesses in cifsConvertToUCS Move cifsConvertToUCS to cifs_unicode.c where all of the other unicode related functions live. Have it store mapped characters in 'temp' and then use put_unaligned_le16 to copy it to the target buffer. Also fix the comments to match kernel coding style. Signed-off-by: Jeff Layton Acked-by: Pavel Shilovsky Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/misc.c | 71 ---------------------------------------------- 2 files changed, 76 insertions(+), 71 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 5f6e71857be6..fc0fd4fde306 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -257,3 +257,79 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode, return dst; } +/* + * Convert 16 bit Unicode pathname to wire format from string in current code + * page. Conversion may involve remapping up the six characters that are + * only legal in POSIX-like OS (if they are present in the string). Path + * names are little endian 16 bit Unicode on the wire + */ +int +cifsConvertToUCS(__le16 *target, const char *source, int maxlen, + const struct nls_table *cp, int mapChars) +{ + int i, j, charlen; + int len_remaining = maxlen; + char src_char; + __u16 temp; + + if (!mapChars) + return cifs_strtoUCS(target, source, PATH_MAX, cp); + + for (i = 0, j = 0; i < maxlen; j++) { + src_char = source[i]; + switch (src_char) { + case 0: + put_unaligned_le16(0, &target[j]); + goto ctoUCS_out; + case ':': + temp = UNI_COLON; + break; + case '*': + temp = UNI_ASTERIK; + break; + case '?': + temp = UNI_QUESTION; + break; + case '<': + temp = UNI_LESSTHAN; + break; + case '>': + temp = UNI_GRTRTHAN; + break; + case '|': + temp = UNI_PIPE; + break; + /* + * FIXME: We can not handle remapping backslash (UNI_SLASH) + * until all the calls to build_path_from_dentry are modified, + * as they use backslash as separator. + */ + default: + charlen = cp->char2uni(source+i, len_remaining, + &temp); + /* + * if no match, use question mark, which at least in + * some cases serves as wild card + */ + if (charlen < 1) { + temp = 0x003f; + charlen = 1; + } + len_remaining -= charlen; + /* + * character may take more than one byte in the source + * string, but will take exactly two bytes in the + * target string + */ + i += charlen; + continue; + } + put_unaligned_le16(temp, &target[j]); + i++; /* move to next char in source string */ + len_remaining--; + } + +ctoUCS_out: + return i; +} + diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 09bfcf08a90f..a09e077ba925 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -637,77 +637,6 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length) return; } -/* Convert 16 bit Unicode pathname to wire format from string in current code - page. Conversion may involve remapping up the seven characters that are - only legal in POSIX-like OS (if they are present in the string). Path - names are little endian 16 bit Unicode on the wire */ -int -cifsConvertToUCS(__le16 *target, const char *source, int maxlen, - const struct nls_table *cp, int mapChars) -{ - int i, j, charlen; - int len_remaining = maxlen; - char src_char; - __u16 temp; - - if (!mapChars) - return cifs_strtoUCS(target, source, PATH_MAX, cp); - - for (i = 0, j = 0; i < maxlen; j++) { - src_char = source[i]; - switch (src_char) { - case 0: - target[j] = 0; - goto ctoUCS_out; - case ':': - target[j] = cpu_to_le16(UNI_COLON); - break; - case '*': - target[j] = cpu_to_le16(UNI_ASTERIK); - break; - case '?': - target[j] = cpu_to_le16(UNI_QUESTION); - break; - case '<': - target[j] = cpu_to_le16(UNI_LESSTHAN); - break; - case '>': - target[j] = cpu_to_le16(UNI_GRTRTHAN); - break; - case '|': - target[j] = cpu_to_le16(UNI_PIPE); - break; - /* BB We can not handle remapping slash until - all the calls to build_path_from_dentry - are modified, as they use slash as separator BB */ - /* case '\\': - target[j] = cpu_to_le16(UNI_SLASH); - break;*/ - default: - charlen = cp->char2uni(source+i, - len_remaining, &temp); - /* if no match, use question mark, which - at least in some cases servers as wild card */ - if (charlen < 1) { - target[j] = cpu_to_le16(0x003f); - charlen = 1; - } else - target[j] = cpu_to_le16(temp); - len_remaining -= charlen; - /* character may take more than one byte in the - the source string, but will take exactly two - bytes in the target string */ - i += charlen; - continue; - } - i++; /* move to next char in source string */ - len_remaining--; - } - -ctoUCS_out: - return i; -} - void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) { -- cgit v1.2.3 From 28e58ee8ce1f0e69c207f747b7b9054b071e328d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 20 Jan 2011 16:21:59 -0800 Subject: Fix broken "pipe: use event aware wakeups" optimization Commit e462c448fdc8 ("pipe: use event aware wakeups") optimized the pipe event wakeup calls to avoid wakeups if the events do not match the requested set. However, the optimization was buggy, in that it didn't actually use the correct sets for the events: when we make room for more data to be written, the pipe poll() routine will return both the POLLOUT _and_ POLLWRNORM bits. Similarly for read. And most critically, when a pipe is released, that will potentially result in POLLHUP|POLLERR (depending on whether it was the last reader or writer), not just the regular POLLIN|POLLOUT. This bug showed itself as a hung gnome-screensaver-dialog process, stuck forever (or at least until it was poked by a signal or by being traced) in a poll() system call. Cc: Davide Libenzi Cc: David S. Miller Cc: Eric Dumazet Cc: Jens Axboe Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 89e9e19b1b2e..da42f7db50de 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -441,7 +441,7 @@ redo: break; } if (do_wakeup) { - wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT); + wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } pipe_wait(pipe); @@ -450,7 +450,7 @@ redo: /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { - wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT); + wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } if (ret > 0) @@ -612,7 +612,7 @@ redo2: break; } if (do_wakeup) { - wake_up_interruptible_sync_poll(&pipe->wait, POLLIN); + wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); do_wakeup = 0; } @@ -623,7 +623,7 @@ redo2: out: mutex_unlock(&inode->i_mutex); if (do_wakeup) { - wake_up_interruptible_sync_poll(&pipe->wait, POLLIN); + wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } if (ret > 0) @@ -715,7 +715,7 @@ pipe_release(struct inode *inode, int decr, int decw) if (!pipe->readers && !pipe->writers) { free_pipe_info(inode); } else { - wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT); + wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } -- cgit v1.2.3 From 6a108a14fa356ef607be308b68337939e56ea94e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 20 Jan 2011 14:44:16 -0800 Subject: kconfig: rename CONFIG_EMBEDDED to CONFIG_EXPERT The meaning of CONFIG_EMBEDDED has long since been obsoleted; the option is used to configure any non-standard kernel with a much larger scope than only small devices. This patch renames the option to CONFIG_EXPERT in init/Kconfig and fixes references to the option throughout the kernel. A new CONFIG_EMBEDDED option is added that automatically selects CONFIG_EXPERT when enabled and can be used in the future to isolate options that should only be considered for embedded systems (RISC architectures, SLOB, etc). Calling the option "EXPERT" more accurately represents its intention: only expert users who understand the impact of the configuration changes they are making should enable it. Reviewed-by: Ingo Molnar Acked-by: David Woodhouse Signed-off-by: David Rientjes Cc: Greg KH Cc: "David S. Miller" Cc: Jens Axboe Cc: Arnd Bergmann Cc: Robin Holt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/configs/ag5evm_defconfig | 2 +- arch/arm/configs/am200epdkit_defconfig | 2 +- arch/arm/configs/at572d940hfek_defconfig | 2 +- arch/arm/configs/badge4_defconfig | 2 +- arch/arm/configs/bcmring_defconfig | 2 +- arch/arm/configs/cm_x2xx_defconfig | 2 +- arch/arm/configs/colibri_pxa270_defconfig | 2 +- arch/arm/configs/collie_defconfig | 2 +- arch/arm/configs/corgi_defconfig | 2 +- arch/arm/configs/da8xx_omapl_defconfig | 2 +- arch/arm/configs/davinci_all_defconfig | 2 +- arch/arm/configs/dove_defconfig | 2 +- arch/arm/configs/ebsa110_defconfig | 2 +- arch/arm/configs/edb7211_defconfig | 2 +- arch/arm/configs/em_x270_defconfig | 2 +- arch/arm/configs/ep93xx_defconfig | 2 +- arch/arm/configs/eseries_pxa_defconfig | 2 +- arch/arm/configs/ezx_defconfig | 2 +- arch/arm/configs/footbridge_defconfig | 2 +- arch/arm/configs/fortunet_defconfig | 2 +- arch/arm/configs/h5000_defconfig | 2 +- arch/arm/configs/imote2_defconfig | 2 +- arch/arm/configs/ixp2000_defconfig | 2 +- arch/arm/configs/ixp23xx_defconfig | 2 +- arch/arm/configs/ixp4xx_defconfig | 2 +- arch/arm/configs/loki_defconfig | 2 +- arch/arm/configs/lpd7a400_defconfig | 2 +- arch/arm/configs/lpd7a404_defconfig | 2 +- arch/arm/configs/magician_defconfig | 2 +- arch/arm/configs/mv78xx0_defconfig | 2 +- arch/arm/configs/mx1_defconfig | 2 +- arch/arm/configs/mx21_defconfig | 2 +- arch/arm/configs/mx27_defconfig | 2 +- arch/arm/configs/mx3_defconfig | 2 +- arch/arm/configs/mx51_defconfig | 2 +- arch/arm/configs/nhk8815_defconfig | 2 +- arch/arm/configs/omap1_defconfig | 2 +- arch/arm/configs/omap2plus_defconfig | 2 +- arch/arm/configs/orion5x_defconfig | 2 +- arch/arm/configs/pcm027_defconfig | 2 +- arch/arm/configs/pcontrol_g20_defconfig | 2 +- arch/arm/configs/pleb_defconfig | 2 +- arch/arm/configs/pnx4008_defconfig | 2 +- arch/arm/configs/simpad_defconfig | 2 +- arch/arm/configs/spitz_defconfig | 2 +- arch/arm/configs/stmp378x_defconfig | 2 +- arch/arm/configs/stmp37xx_defconfig | 2 +- arch/arm/configs/tct_hammer_defconfig | 2 +- arch/arm/configs/trizeps4_defconfig | 2 +- arch/arm/configs/u300_defconfig | 2 +- arch/arm/configs/viper_defconfig | 2 +- arch/arm/configs/xcep_defconfig | 2 +- arch/avr32/Kconfig | 4 +- arch/blackfin/configs/BF518F-EZBRD_defconfig | 2 +- arch/blackfin/configs/BF526-EZBRD_defconfig | 2 +- arch/blackfin/configs/BF527-AD7160-EVAL_defconfig | 2 +- arch/blackfin/configs/BF527-EZKIT-V2_defconfig | 2 +- arch/blackfin/configs/BF527-EZKIT_defconfig | 2 +- arch/blackfin/configs/BF527-TLL6527M_defconfig | 2 +- arch/blackfin/configs/BF533-EZKIT_defconfig | 2 +- arch/blackfin/configs/BF533-STAMP_defconfig | 2 +- arch/blackfin/configs/BF537-STAMP_defconfig | 2 +- arch/blackfin/configs/BF538-EZKIT_defconfig | 2 +- arch/blackfin/configs/BF548-EZKIT_defconfig | 2 +- arch/blackfin/configs/BF561-ACVILON_defconfig | 2 +- arch/blackfin/configs/BF561-EZKIT-SMP_defconfig | 2 +- arch/blackfin/configs/BF561-EZKIT_defconfig | 2 +- arch/blackfin/configs/BlackStamp_defconfig | 2 +- arch/blackfin/configs/CM-BF527_defconfig | 2 +- arch/blackfin/configs/CM-BF533_defconfig | 2 +- arch/blackfin/configs/CM-BF537E_defconfig | 2 +- arch/blackfin/configs/CM-BF537U_defconfig | 2 +- arch/blackfin/configs/CM-BF548_defconfig | 2 +- arch/blackfin/configs/CM-BF561_defconfig | 2 +- arch/blackfin/configs/DNP5370_defconfig | 2 +- arch/blackfin/configs/H8606_defconfig | 2 +- arch/blackfin/configs/IP0X_defconfig | 2 +- arch/blackfin/configs/PNAV-10_defconfig | 2 +- arch/blackfin/configs/SRV1_defconfig | 2 +- arch/blackfin/configs/TCM-BF518_defconfig | 2 +- arch/blackfin/configs/TCM-BF537_defconfig | 2 +- arch/cris/configs/artpec_3_defconfig | 2 +- arch/cris/configs/etrax-100lx_v2_defconfig | 2 +- arch/cris/configs/etraxfs_defconfig | 2 +- arch/frv/defconfig | 2 +- arch/h8300/defconfig | 2 +- arch/m32r/configs/m32700ut.smp_defconfig | 2 +- arch/m32r/configs/m32700ut.up_defconfig | 2 +- arch/m32r/configs/mappi.nommu_defconfig | 2 +- arch/m32r/configs/mappi.smp_defconfig | 2 +- arch/m32r/configs/mappi.up_defconfig | 2 +- arch/m32r/configs/mappi2.opsp_defconfig | 2 +- arch/m32r/configs/mappi2.vdec2_defconfig | 2 +- arch/m32r/configs/mappi3.smp_defconfig | 2 +- arch/m32r/configs/oaks32r_defconfig | 2 +- arch/m32r/configs/opsput_defconfig | 2 +- arch/m32r/configs/usrv_defconfig | 2 +- arch/m68knommu/configs/m5208evb_defconfig | 2 +- arch/m68knommu/configs/m5249evb_defconfig | 2 +- arch/m68knommu/configs/m5272c3_defconfig | 2 +- arch/m68knommu/configs/m5275evb_defconfig | 2 +- arch/m68knommu/configs/m5307c3_defconfig | 2 +- arch/m68knommu/configs/m5407c3_defconfig | 2 +- arch/m68knommu/defconfig | 2 +- arch/microblaze/configs/mmu_defconfig | 2 +- arch/microblaze/configs/nommu_defconfig | 2 +- arch/mips/Kconfig.debug | 2 +- arch/mips/configs/ar7_defconfig | 2 +- arch/mips/configs/bcm47xx_defconfig | 2 +- arch/mips/configs/bcm63xx_defconfig | 2 +- arch/mips/configs/bigsur_defconfig | 2 +- arch/mips/configs/capcella_defconfig | 2 +- arch/mips/configs/cavium-octeon_defconfig | 2 +- arch/mips/configs/cobalt_defconfig | 2 +- arch/mips/configs/db1000_defconfig | 2 +- arch/mips/configs/db1100_defconfig | 2 +- arch/mips/configs/db1200_defconfig | 2 +- arch/mips/configs/db1500_defconfig | 2 +- arch/mips/configs/db1550_defconfig | 2 +- arch/mips/configs/decstation_defconfig | 2 +- arch/mips/configs/e55_defconfig | 2 +- arch/mips/configs/fuloong2e_defconfig | 2 +- arch/mips/configs/gpr_defconfig | 2 +- arch/mips/configs/ip22_defconfig | 2 +- arch/mips/configs/ip27_defconfig | 2 +- arch/mips/configs/ip28_defconfig | 2 +- arch/mips/configs/ip32_defconfig | 2 +- arch/mips/configs/jazz_defconfig | 2 +- arch/mips/configs/jmr3927_defconfig | 2 +- arch/mips/configs/lasat_defconfig | 2 +- arch/mips/configs/lemote2f_defconfig | 2 +- arch/mips/configs/malta_defconfig | 2 +- arch/mips/configs/markeins_defconfig | 2 +- arch/mips/configs/mipssim_defconfig | 2 +- arch/mips/configs/mpc30x_defconfig | 2 +- arch/mips/configs/msp71xx_defconfig | 2 +- arch/mips/configs/mtx1_defconfig | 2 +- arch/mips/configs/pb1100_defconfig | 2 +- arch/mips/configs/pb1200_defconfig | 2 +- arch/mips/configs/pb1500_defconfig | 2 +- arch/mips/configs/pb1550_defconfig | 2 +- arch/mips/configs/pnx8335-stb225_defconfig | 2 +- arch/mips/configs/pnx8550-jbs_defconfig | 2 +- arch/mips/configs/pnx8550-stb810_defconfig | 2 +- arch/mips/configs/powertv_defconfig | 2 +- arch/mips/configs/rb532_defconfig | 2 +- arch/mips/configs/rbtx49xx_defconfig | 2 +- arch/mips/configs/rm200_defconfig | 2 +- arch/mips/configs/sb1250-swarm_defconfig | 2 +- arch/mips/configs/tb0219_defconfig | 2 +- arch/mips/configs/tb0226_defconfig | 2 +- arch/mips/configs/tb0287_defconfig | 2 +- arch/mips/configs/workpad_defconfig | 2 +- arch/mips/configs/wrppmc_defconfig | 2 +- arch/mips/configs/yosemite_defconfig | 2 +- arch/mn10300/configs/asb2303_defconfig | 2 +- arch/mn10300/configs/asb2364_defconfig | 2 +- arch/parisc/configs/a500_defconfig | 2 +- arch/parisc/configs/c3000_defconfig | 2 +- arch/powerpc/configs/40x/acadia_defconfig | 2 +- arch/powerpc/configs/40x/ep405_defconfig | 2 +- arch/powerpc/configs/40x/hcu4_defconfig | 2 +- arch/powerpc/configs/40x/kilauea_defconfig | 2 +- arch/powerpc/configs/40x/makalu_defconfig | 2 +- arch/powerpc/configs/40x/walnut_defconfig | 2 +- arch/powerpc/configs/44x/arches_defconfig | 2 +- arch/powerpc/configs/44x/bamboo_defconfig | 2 +- arch/powerpc/configs/44x/bluestone_defconfig | 2 +- arch/powerpc/configs/44x/canyonlands_defconfig | 2 +- arch/powerpc/configs/44x/ebony_defconfig | 2 +- arch/powerpc/configs/44x/eiger_defconfig | 2 +- arch/powerpc/configs/44x/icon_defconfig | 2 +- arch/powerpc/configs/44x/iss476-smp_defconfig | 2 +- arch/powerpc/configs/44x/katmai_defconfig | 2 +- arch/powerpc/configs/44x/rainier_defconfig | 2 +- arch/powerpc/configs/44x/redwood_defconfig | 2 +- arch/powerpc/configs/44x/sam440ep_defconfig | 2 +- arch/powerpc/configs/44x/sequoia_defconfig | 2 +- arch/powerpc/configs/44x/taishan_defconfig | 2 +- arch/powerpc/configs/44x/warp_defconfig | 2 +- arch/powerpc/configs/52xx/cm5200_defconfig | 2 +- arch/powerpc/configs/52xx/lite5200b_defconfig | 2 +- arch/powerpc/configs/52xx/motionpro_defconfig | 2 +- arch/powerpc/configs/52xx/pcm030_defconfig | 2 +- arch/powerpc/configs/52xx/tqm5200_defconfig | 2 +- arch/powerpc/configs/83xx/asp8347_defconfig | 2 +- arch/powerpc/configs/83xx/kmeter1_defconfig | 2 +- arch/powerpc/configs/83xx/mpc8313_rdb_defconfig | 2 +- arch/powerpc/configs/83xx/mpc8315_rdb_defconfig | 2 +- arch/powerpc/configs/83xx/mpc832x_mds_defconfig | 2 +- arch/powerpc/configs/83xx/mpc832x_rdb_defconfig | 2 +- arch/powerpc/configs/83xx/mpc834x_itx_defconfig | 2 +- arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig | 2 +- arch/powerpc/configs/83xx/mpc834x_mds_defconfig | 2 +- arch/powerpc/configs/83xx/mpc836x_mds_defconfig | 2 +- arch/powerpc/configs/83xx/mpc836x_rdk_defconfig | 2 +- arch/powerpc/configs/83xx/mpc837x_mds_defconfig | 2 +- arch/powerpc/configs/83xx/mpc837x_rdb_defconfig | 2 +- arch/powerpc/configs/83xx/sbc834x_defconfig | 2 +- arch/powerpc/configs/85xx/ksi8560_defconfig | 2 +- arch/powerpc/configs/85xx/mpc8540_ads_defconfig | 2 +- arch/powerpc/configs/85xx/mpc8560_ads_defconfig | 2 +- arch/powerpc/configs/85xx/mpc85xx_cds_defconfig | 2 +- arch/powerpc/configs/85xx/sbc8548_defconfig | 2 +- arch/powerpc/configs/85xx/sbc8560_defconfig | 2 +- arch/powerpc/configs/85xx/socrates_defconfig | 2 +- arch/powerpc/configs/85xx/stx_gp3_defconfig | 2 +- arch/powerpc/configs/85xx/tqm8540_defconfig | 2 +- arch/powerpc/configs/85xx/tqm8541_defconfig | 2 +- arch/powerpc/configs/85xx/tqm8548_defconfig | 2 +- arch/powerpc/configs/85xx/tqm8555_defconfig | 2 +- arch/powerpc/configs/85xx/tqm8560_defconfig | 2 +- arch/powerpc/configs/85xx/xes_mpc85xx_defconfig | 2 +- arch/powerpc/configs/86xx/gef_ppc9a_defconfig | 2 +- arch/powerpc/configs/86xx/gef_sbc310_defconfig | 2 +- arch/powerpc/configs/86xx/gef_sbc610_defconfig | 2 +- arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig | 2 +- arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig | 2 +- arch/powerpc/configs/86xx/sbc8641d_defconfig | 2 +- arch/powerpc/configs/adder875_defconfig | 2 +- arch/powerpc/configs/e55xx_smp_defconfig | 2 +- arch/powerpc/configs/ep8248e_defconfig | 2 +- arch/powerpc/configs/ep88xc_defconfig | 2 +- arch/powerpc/configs/gamecube_defconfig | 2 +- arch/powerpc/configs/holly_defconfig | 2 +- arch/powerpc/configs/mgcoge_defconfig | 2 +- arch/powerpc/configs/mgsuvd_defconfig | 2 +- arch/powerpc/configs/mpc7448_hpc2_defconfig | 2 +- arch/powerpc/configs/mpc8272_ads_defconfig | 2 +- arch/powerpc/configs/mpc83xx_defconfig | 2 +- arch/powerpc/configs/mpc85xx_defconfig | 2 +- arch/powerpc/configs/mpc85xx_smp_defconfig | 2 +- arch/powerpc/configs/mpc866_ads_defconfig | 2 +- arch/powerpc/configs/mpc86xx_defconfig | 2 +- arch/powerpc/configs/mpc885_ads_defconfig | 2 +- arch/powerpc/configs/ppc40x_defconfig | 2 +- arch/powerpc/configs/ppc44x_defconfig | 2 +- arch/powerpc/configs/pq2fads_defconfig | 2 +- arch/powerpc/configs/ps3_defconfig | 2 +- arch/powerpc/configs/storcenter_defconfig | 2 +- arch/powerpc/configs/tqm8xx_defconfig | 2 +- arch/powerpc/configs/wii_defconfig | 2 +- arch/powerpc/platforms/iseries/Kconfig | 2 +- arch/powerpc/platforms/pseries/Kconfig | 6 +-- arch/score/configs/spct6600_defconfig | 2 +- arch/sh/Kconfig | 2 +- arch/tile/Kconfig | 2 +- arch/tile/Kconfig.debug | 2 +- arch/tile/configs/tile_defconfig | 2 +- arch/um/defconfig | 2 +- arch/x86/Kconfig | 20 +++---- arch/x86/Kconfig.cpu | 2 +- arch/x86/Kconfig.debug | 4 +- arch/xtensa/configs/common_defconfig | 2 +- arch/xtensa/configs/iss_defconfig | 2 +- arch/xtensa/configs/s6105_defconfig | 2 +- block/Kconfig | 2 +- drivers/acpi/Kconfig | 2 +- drivers/ata/Kconfig | 2 +- drivers/base/Kconfig | 2 +- drivers/char/Kconfig | 10 ++-- drivers/cpufreq/Kconfig | 2 +- drivers/firmware/Kconfig | 2 +- drivers/gpu/drm/Kconfig | 2 +- drivers/gpu/drm/drm_fb_helper.c | 4 +- drivers/gpu/drm/nouveau/Kconfig | 2 +- drivers/gpu/vga/Kconfig | 2 +- drivers/hid/Kconfig | 64 +++++++++++------------ drivers/hid/usbhid/Kconfig | 2 +- drivers/ide/Kconfig | 2 +- drivers/infiniband/hw/mthca/Kconfig | 2 +- drivers/infiniband/ulp/ipoib/Kconfig | 2 +- drivers/input/Kconfig | 6 +-- drivers/input/keyboard/Kconfig | 4 +- drivers/input/mouse/Kconfig | 10 ++-- drivers/input/serio/Kconfig | 6 +-- drivers/input/touchscreen/Kconfig | 30 +++++------ drivers/media/common/tuners/Kconfig | 2 +- drivers/media/dvb/frontends/Kconfig | 2 +- drivers/media/video/Kconfig | 2 +- drivers/net/Kconfig | 2 +- drivers/pci/pcie/Kconfig | 2 +- drivers/pcmcia/Kconfig | 12 ++--- drivers/serial/Kconfig | 4 +- drivers/ssb/Kconfig | 2 +- drivers/usb/core/Kconfig | 6 +-- drivers/video/Kconfig | 2 +- drivers/video/console/Kconfig | 2 +- fs/Kconfig | 2 +- fs/proc/Kconfig | 6 +-- fs/sysfs/Kconfig | 2 +- init/Kconfig | 60 ++++++++++++--------- lib/Kconfig.debug | 6 +-- lib/xz/Kconfig | 12 ++--- net/mac80211/Kconfig | 6 +-- net/rfkill/Kconfig | 4 +- net/wireless/Kconfig | 2 +- usr/Kconfig | 18 +++---- 298 files changed, 431 insertions(+), 423 deletions(-) (limited to 'fs') diff --git a/arch/arm/configs/ag5evm_defconfig b/arch/arm/configs/ag5evm_defconfig index 2b9cf56db363..212ead354a6b 100644 --- a/arch/arm/configs/ag5evm_defconfig +++ b/arch/arm/configs/ag5evm_defconfig @@ -10,7 +10,7 @@ CONFIG_NAMESPACES=y # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig index 5536c488dd01..f0dea52e49c4 100644 --- a/arch/arm/configs/am200epdkit_defconfig +++ b/arch/arm/configs/am200epdkit_defconfig @@ -3,7 +3,7 @@ CONFIG_LOCALVERSION="gum" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set diff --git a/arch/arm/configs/at572d940hfek_defconfig b/arch/arm/configs/at572d940hfek_defconfig index 695e32d4fb58..1b1158ae8f82 100644 --- a/arch/arm/configs/at572d940hfek_defconfig +++ b/arch/arm/configs/at572d940hfek_defconfig @@ -17,7 +17,7 @@ CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=m diff --git a/arch/arm/configs/badge4_defconfig b/arch/arm/configs/badge4_defconfig index 3a1ad15a779f..5b54abbeb0b3 100644 --- a/arch/arm/configs/badge4_defconfig +++ b/arch/arm/configs/badge4_defconfig @@ -1,6 +1,6 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODVERSIONS=y CONFIG_ARCH_SA1100=y diff --git a/arch/arm/configs/bcmring_defconfig b/arch/arm/configs/bcmring_defconfig index 75984cd1e233..795374d48f81 100644 --- a/arch/arm/configs/bcmring_defconfig +++ b/arch/arm/configs/bcmring_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_HOTPLUG is not set # CONFIG_ELF_CORE is not set diff --git a/arch/arm/configs/cm_x2xx_defconfig b/arch/arm/configs/cm_x2xx_defconfig index dcfbcf3b6c3e..a93ff8da5bab 100644 --- a/arch/arm/configs/cm_x2xx_defconfig +++ b/arch/arm/configs/cm_x2xx_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/arm/configs/colibri_pxa270_defconfig b/arch/arm/configs/colibri_pxa270_defconfig index f52c64e36d8d..2ef2c5e8aaec 100644 --- a/arch/arm/configs/colibri_pxa270_defconfig +++ b/arch/arm/configs/colibri_pxa270_defconfig @@ -8,7 +8,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 310f9a6270be..6c56ad086c7c 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_BASE_FULL is not set # CONFIG_EPOLL is not set CONFIG_SLOB=y diff --git a/arch/arm/configs/corgi_defconfig b/arch/arm/configs/corgi_defconfig index 4a1fa81ed37d..e53c47563845 100644 --- a/arch/arm/configs/corgi_defconfig +++ b/arch/arm/configs/corgi_defconfig @@ -4,7 +4,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_PROFILING=y CONFIG_OPROFILE=m CONFIG_MODULES=y diff --git a/arch/arm/configs/da8xx_omapl_defconfig b/arch/arm/configs/da8xx_omapl_defconfig index cdc40c4b8c48..88ccde058ba4 100644 --- a/arch/arm/configs/da8xx_omapl_defconfig +++ b/arch/arm/configs/da8xx_omapl_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index 2519cc5a5f8f..889922ad229c 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index 9359e1bf32c1..54bf5eec8016 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/ebsa110_defconfig b/arch/arm/configs/ebsa110_defconfig index c3194186920c..14559dbb4c2c 100644 --- a/arch/arm/configs/ebsa110_defconfig +++ b/arch/arm/configs/ebsa110_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_ARCH_EBSA110=y CONFIG_PCCARD=m diff --git a/arch/arm/configs/edb7211_defconfig b/arch/arm/configs/edb7211_defconfig index 7b62be1561ea..d52ded350a12 100644 --- a/arch/arm/configs/edb7211_defconfig +++ b/arch/arm/configs/edb7211_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set CONFIG_ARCH_CLPS711X=y CONFIG_ARCH_EDB7211=y diff --git a/arch/arm/configs/em_x270_defconfig b/arch/arm/configs/em_x270_defconfig index d7db34f79702..60a21e01eb70 100644 --- a/arch/arm/configs/em_x270_defconfig +++ b/arch/arm/configs/em_x270_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 6d6689cdf398..8e97b2f7ceec 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -4,7 +4,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/eseries_pxa_defconfig b/arch/arm/configs/eseries_pxa_defconfig index 1691dea582fe..d68ac67c201c 100644 --- a/arch/arm/configs/eseries_pxa_defconfig +++ b/arch/arm/configs/eseries_pxa_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig index c4eeb6d1cbf0..227a477346ed 100644 --- a/arch/arm/configs/ezx_defconfig +++ b/arch/arm/configs/ezx_defconfig @@ -7,7 +7,7 @@ CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/footbridge_defconfig b/arch/arm/configs/footbridge_defconfig index 4f925ead2617..038518ab39a8 100644 --- a/arch/arm/configs/footbridge_defconfig +++ b/arch/arm/configs/footbridge_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set CONFIG_MODULES=y CONFIG_ARCH_FOOTBRIDGE=y diff --git a/arch/arm/configs/fortunet_defconfig b/arch/arm/configs/fortunet_defconfig index e11c7eab8ed0..840fced7529f 100644 --- a/arch/arm/configs/fortunet_defconfig +++ b/arch/arm/configs/fortunet_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set CONFIG_ARCH_CLPS711X=y CONFIG_ARCH_FORTUNET=y diff --git a/arch/arm/configs/h5000_defconfig b/arch/arm/configs/h5000_defconfig index ac336f10000c..37903e3f0efc 100644 --- a/arch/arm/configs/h5000_defconfig +++ b/arch/arm/configs/h5000_defconfig @@ -4,7 +4,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig index ade55c8c408b..176ec22af034 100644 --- a/arch/arm/configs/imote2_defconfig +++ b/arch/arm/configs/imote2_defconfig @@ -6,7 +6,7 @@ CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/ixp2000_defconfig b/arch/arm/configs/ixp2000_defconfig index 908324684549..8405aded97a3 100644 --- a/arch/arm/configs/ixp2000_defconfig +++ b/arch/arm/configs/ixp2000_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/ixp23xx_defconfig b/arch/arm/configs/ixp23xx_defconfig index 7fc056a8569c..688717612e91 100644 --- a/arch/arm/configs/ixp23xx_defconfig +++ b/arch/arm/configs/ixp23xx_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index 5c5023934001..063e2ab2c8f1 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/arm/configs/loki_defconfig b/arch/arm/configs/loki_defconfig index e1eaff7f5536..1ba752b2dc6d 100644 --- a/arch/arm/configs/loki_defconfig +++ b/arch/arm/configs/loki_defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/lpd7a400_defconfig b/arch/arm/configs/lpd7a400_defconfig index 20caaaba4a04..5a48f171204c 100644 --- a/arch/arm/configs/lpd7a400_defconfig +++ b/arch/arm/configs/lpd7a400_defconfig @@ -3,7 +3,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set # CONFIG_IOSCHED_DEADLINE is not set diff --git a/arch/arm/configs/lpd7a404_defconfig b/arch/arm/configs/lpd7a404_defconfig index 1efcce97b4a7..22d0631de009 100644 --- a/arch/arm/configs/lpd7a404_defconfig +++ b/arch/arm/configs/lpd7a404_defconfig @@ -3,7 +3,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set CONFIG_SLAB=y diff --git a/arch/arm/configs/magician_defconfig b/arch/arm/configs/magician_defconfig index af805e8fd03d..a88e64d4e9a5 100644 --- a/arch/arm/configs/magician_defconfig +++ b/arch/arm/configs/magician_defconfig @@ -4,7 +4,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig index b0d082422d46..7305ebddb510 100644 --- a/arch/arm/configs/mv78xx0_defconfig +++ b/arch/arm/configs/mv78xx0_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y # CONFIG_SLUB_DEBUG is not set CONFIG_PROFILING=y diff --git a/arch/arm/configs/mx1_defconfig b/arch/arm/configs/mx1_defconfig index 2f38d9715437..b39b5ced8a10 100644 --- a/arch/arm/configs/mx1_defconfig +++ b/arch/arm/configs/mx1_defconfig @@ -4,7 +4,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/mx21_defconfig b/arch/arm/configs/mx21_defconfig index 6454e18e2abe..411f88dd4402 100644 --- a/arch/arm/configs/mx21_defconfig +++ b/arch/arm/configs/mx21_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/mx27_defconfig b/arch/arm/configs/mx27_defconfig index 813cfb366c18..9ad4c656c9bd 100644 --- a/arch/arm/configs/mx27_defconfig +++ b/arch/arm/configs/mx27_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y diff --git a/arch/arm/configs/mx3_defconfig b/arch/arm/configs/mx3_defconfig index e648ea3429be..7c4b30b34952 100644 --- a/arch/arm/configs/mx3_defconfig +++ b/arch/arm/configs/mx3_defconfig @@ -4,7 +4,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/mx51_defconfig b/arch/arm/configs/mx51_defconfig index 5c7a87260fab..9cba68cfa51a 100644 --- a/arch/arm/configs/mx51_defconfig +++ b/arch/arm/configs/mx51_defconfig @@ -3,7 +3,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_RELAY=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_MODULES=y diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index 0e2dc26ebe66..37207d1bf44b 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -7,7 +7,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index a350cc6bfe6a..7b63462b349d 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -6,7 +6,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index ccedde1371c3..ae890caa17a7 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index 439323b3b0ed..a288d7033950 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SLUB_DEBUG is not set CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/arm/configs/pcm027_defconfig b/arch/arm/configs/pcm027_defconfig index 583a0610bd00..2f136c30a989 100644 --- a/arch/arm/configs/pcm027_defconfig +++ b/arch/arm/configs/pcm027_defconfig @@ -7,7 +7,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/pcontrol_g20_defconfig b/arch/arm/configs/pcontrol_g20_defconfig index b42ee62c4d77..c75c9fcede58 100644 --- a/arch/arm/configs/pcontrol_g20_defconfig +++ b/arch/arm/configs/pcontrol_g20_defconfig @@ -10,7 +10,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arm/configs/pleb_defconfig b/arch/arm/configs/pleb_defconfig index d1efbdc1e6dc..cb08cc561da5 100644 --- a/arch/arm/configs/pleb_defconfig +++ b/arch/arm/configs/pleb_defconfig @@ -3,7 +3,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set # CONFIG_SHMEM is not set CONFIG_MODULES=y diff --git a/arch/arm/configs/pnx4008_defconfig b/arch/arm/configs/pnx4008_defconfig index bd481f04276f..35a31ccacc32 100644 --- a/arch/arm/configs/pnx4008_defconfig +++ b/arch/arm/configs/pnx4008_defconfig @@ -5,7 +5,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/simpad_defconfig b/arch/arm/configs/simpad_defconfig index af3b12e3b464..d3358155bf8a 100644 --- a/arch/arm/configs/simpad_defconfig +++ b/arch/arm/configs/simpad_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOCALVERSION="oe1" CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index aebd4bb0ad01..70158273c6dd 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -4,7 +4,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_PROFILING=y CONFIG_OPROFILE=m CONFIG_MODULES=y diff --git a/arch/arm/configs/stmp378x_defconfig b/arch/arm/configs/stmp378x_defconfig index 94a2d904bf94..1079c2b6eb3a 100644 --- a/arch/arm/configs/stmp378x_defconfig +++ b/arch/arm/configs/stmp378x_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/stmp37xx_defconfig b/arch/arm/configs/stmp37xx_defconfig index d8ee58cfa872..564a5cc44085 100644 --- a/arch/arm/configs/stmp37xx_defconfig +++ b/arch/arm/configs/stmp37xx_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/tct_hammer_defconfig b/arch/arm/configs/tct_hammer_defconfig index e89ca19489c2..95c0f0d63db6 100644 --- a/arch/arm/configs/tct_hammer_defconfig +++ b/arch/arm/configs/tct_hammer_defconfig @@ -5,7 +5,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_BUG is not set # CONFIG_ELF_CORE is not set diff --git a/arch/arm/configs/trizeps4_defconfig b/arch/arm/configs/trizeps4_defconfig index 37f48342827c..3162173fa75a 100644 --- a/arch/arm/configs/trizeps4_defconfig +++ b/arch/arm/configs/trizeps4_defconfig @@ -7,7 +7,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/arm/configs/u300_defconfig b/arch/arm/configs/u300_defconfig index c1c252cdca60..4a5a12681be2 100644 --- a/arch/arm/configs/u300_defconfig +++ b/arch/arm/configs/u300_defconfig @@ -3,7 +3,7 @@ CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_AIO is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_MODULES=y diff --git a/arch/arm/configs/viper_defconfig b/arch/arm/configs/viper_defconfig index 9d7bf5e0d0f5..8b0c717378fa 100644 --- a/arch/arm/configs/viper_defconfig +++ b/arch/arm/configs/viper_defconfig @@ -3,7 +3,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=13 CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set # CONFIG_SHMEM is not set CONFIG_SLAB=y diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig index 70d47dbae6db..5b5504143647 100644 --- a/arch/arm/configs/xcep_defconfig +++ b/arch/arm/configs/xcep_defconfig @@ -8,7 +8,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set # CONFIG_SHMEM is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 313b13073c54..cd2062fe0f61 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -1,8 +1,8 @@ config AVR32 def_bool y - # With EMBEDDED=n, we get lots of stuff automatically selected + # With EXPERT=n, we get lots of stuff automatically selected # that we usually don't need on AVR32. - select EMBEDDED + select EXPERT select HAVE_CLK select HAVE_OPROFILE select HAVE_KPROBES diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig index c0b988ee30df..db8d38a12a9a 100644 --- a/arch/blackfin/configs/BF518F-EZBRD_defconfig +++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig index 864af5b68874..3e50d7857c27 100644 --- a/arch/blackfin/configs/BF526-EZBRD_defconfig +++ b/arch/blackfin/configs/BF526-EZBRD_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF527-AD7160-EVAL_defconfig b/arch/blackfin/configs/BF527-AD7160-EVAL_defconfig index 7b6a3370dbe2..362f59dd5228 100644 --- a/arch/blackfin/configs/BF527-AD7160-EVAL_defconfig +++ b/arch/blackfin/configs/BF527-AD7160-EVAL_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set # CONFIG_AIO is not set CONFIG_SLAB=y diff --git a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig index 4faa6b46a352..023ff0df2692 100644 --- a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig index 9d893eb68243..4e5a121b3c56 100644 --- a/arch/blackfin/configs/BF527-EZKIT_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF527-TLL6527M_defconfig b/arch/blackfin/configs/BF527-TLL6527M_defconfig index 97a2767c80f8..cd0636bb24a0 100644 --- a/arch/blackfin/configs/BF527-TLL6527M_defconfig +++ b/arch/blackfin/configs/BF527-TLL6527M_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF533-EZKIT_defconfig b/arch/blackfin/configs/BF533-EZKIT_defconfig index f84774360c5b..9f8fc84e4ac9 100644 --- a/arch/blackfin/configs/BF533-EZKIT_defconfig +++ b/arch/blackfin/configs/BF533-EZKIT_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF533-STAMP_defconfig b/arch/blackfin/configs/BF533-STAMP_defconfig index 0e7262c04cc2..ccc432b722a0 100644 --- a/arch/blackfin/configs/BF533-STAMP_defconfig +++ b/arch/blackfin/configs/BF533-STAMP_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF537-STAMP_defconfig b/arch/blackfin/configs/BF537-STAMP_defconfig index 4d14a002e7bd..566695472a84 100644 --- a/arch/blackfin/configs/BF537-STAMP_defconfig +++ b/arch/blackfin/configs/BF537-STAMP_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig index fbee9d776f56..ac22124ccb6c 100644 --- a/arch/blackfin/configs/BF538-EZKIT_defconfig +++ b/arch/blackfin/configs/BF538-EZKIT_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig index 05dd11db2f7d..944404b6ff08 100644 --- a/arch/blackfin/configs/BF548-EZKIT_defconfig +++ b/arch/blackfin/configs/BF548-EZKIT_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF561-ACVILON_defconfig b/arch/blackfin/configs/BF561-ACVILON_defconfig index bcb14d1c5664..b7c8451f26ac 100644 --- a/arch/blackfin/configs/BF561-ACVILON_defconfig +++ b/arch/blackfin/configs/BF561-ACVILON_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig index 4cf451024fd8..7e67ba31e991 100644 --- a/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig +++ b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BF561-EZKIT_defconfig b/arch/blackfin/configs/BF561-EZKIT_defconfig index 843aaa54a9e3..141e5933e1aa 100644 --- a/arch/blackfin/configs/BF561-EZKIT_defconfig +++ b/arch/blackfin/configs/BF561-EZKIT_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig index dae7adf3b2a2..97ebe09a7370 100644 --- a/arch/blackfin/configs/BlackStamp_defconfig +++ b/arch/blackfin/configs/BlackStamp_defconfig @@ -6,7 +6,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/CM-BF527_defconfig b/arch/blackfin/configs/CM-BF527_defconfig index f3414244bfed..c2457543e58c 100644 --- a/arch/blackfin/configs/CM-BF527_defconfig +++ b/arch/blackfin/configs/CM-BF527_defconfig @@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set CONFIG_RD_LZMA=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/CM-BF533_defconfig b/arch/blackfin/configs/CM-BF533_defconfig index 8c7e08f173d4..baf1c1573e5e 100644 --- a/arch/blackfin/configs/CM-BF533_defconfig +++ b/arch/blackfin/configs/CM-BF533_defconfig @@ -7,7 +7,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set CONFIG_RD_LZMA=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set diff --git a/arch/blackfin/configs/CM-BF537E_defconfig b/arch/blackfin/configs/CM-BF537E_defconfig index bd3cb766d078..707cbf8a2590 100644 --- a/arch/blackfin/configs/CM-BF537E_defconfig +++ b/arch/blackfin/configs/CM-BF537E_defconfig @@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set CONFIG_RD_LZMA=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set diff --git a/arch/blackfin/configs/CM-BF537U_defconfig b/arch/blackfin/configs/CM-BF537U_defconfig index 82224f37c04e..4596935eadac 100644 --- a/arch/blackfin/configs/CM-BF537U_defconfig +++ b/arch/blackfin/configs/CM-BF537U_defconfig @@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set CONFIG_RD_LZMA=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set diff --git a/arch/blackfin/configs/CM-BF548_defconfig b/arch/blackfin/configs/CM-BF548_defconfig index 433598c6e773..df267588efec 100644 --- a/arch/blackfin/configs/CM-BF548_defconfig +++ b/arch/blackfin/configs/CM-BF548_defconfig @@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set CONFIG_RD_LZMA=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set diff --git a/arch/blackfin/configs/CM-BF561_defconfig b/arch/blackfin/configs/CM-BF561_defconfig index ded7d845cb39..6c7b21585a43 100644 --- a/arch/blackfin/configs/CM-BF561_defconfig +++ b/arch/blackfin/configs/CM-BF561_defconfig @@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set CONFIG_RD_LZMA=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set diff --git a/arch/blackfin/configs/DNP5370_defconfig b/arch/blackfin/configs/DNP5370_defconfig index 0ebc7d9aa426..f50313657f3e 100644 --- a/arch/blackfin/configs/DNP5370_defconfig +++ b/arch/blackfin/configs/DNP5370_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLOB=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig index 700fb701c121..7450127b6455 100644 --- a/arch/blackfin/configs/H8606_defconfig +++ b/arch/blackfin/configs/H8606_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/IP0X_defconfig b/arch/blackfin/configs/IP0X_defconfig index b40156d217e3..5e797cf72043 100644 --- a/arch/blackfin/configs/IP0X_defconfig +++ b/arch/blackfin/configs/IP0X_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set # CONFIG_ELF_CORE is not set diff --git a/arch/blackfin/configs/PNAV-10_defconfig b/arch/blackfin/configs/PNAV-10_defconfig index be866d95ed76..a566a2fe6b9b 100644 --- a/arch/blackfin/configs/PNAV-10_defconfig +++ b/arch/blackfin/configs/PNAV-10_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/SRV1_defconfig b/arch/blackfin/configs/SRV1_defconfig index b64bdf759b82..853809510ee9 100644 --- a/arch/blackfin/configs/SRV1_defconfig +++ b/arch/blackfin/configs/SRV1_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_ALL=y # CONFIG_ELF_CORE is not set diff --git a/arch/blackfin/configs/TCM-BF518_defconfig b/arch/blackfin/configs/TCM-BF518_defconfig index 1bccd9a50986..d496ae9a39b0 100644 --- a/arch/blackfin/configs/TCM-BF518_defconfig +++ b/arch/blackfin/configs/TCM-BF518_defconfig @@ -7,7 +7,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set CONFIG_RD_LZMA=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_FUTEX is not set diff --git a/arch/blackfin/configs/TCM-BF537_defconfig b/arch/blackfin/configs/TCM-BF537_defconfig index 00ce899e9e5d..65f642167a50 100644 --- a/arch/blackfin/configs/TCM-BF537_defconfig +++ b/arch/blackfin/configs/TCM-BF537_defconfig @@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set CONFIG_RD_LZMA=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set diff --git a/arch/cris/configs/artpec_3_defconfig b/arch/cris/configs/artpec_3_defconfig index 590f72c9455d..71854d41c5a0 100644 --- a/arch/cris/configs/artpec_3_defconfig +++ b/arch/cris/configs/artpec_3_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/cris/configs/etrax-100lx_v2_defconfig b/arch/cris/configs/etrax-100lx_v2_defconfig index 1b2853e39801..a85aabf92be5 100644 --- a/arch/cris/configs/etrax-100lx_v2_defconfig +++ b/arch/cris/configs/etrax-100lx_v2_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/cris/configs/etraxfs_defconfig b/arch/cris/configs/etraxfs_defconfig index f73d38cc9c66..87c7227fecb2 100644 --- a/arch/cris/configs/etraxfs_defconfig +++ b/arch/cris/configs/etraxfs_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/frv/defconfig b/arch/frv/defconfig index b8ebe9e8a493..b1b792610fdf 100644 --- a/arch/frv/defconfig +++ b/arch/frv/defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set CONFIG_MMU=y CONFIG_FRV_OUTOFLINE_ATOMIC_OPS=y diff --git a/arch/h8300/defconfig b/arch/h8300/defconfig index 342f77765f02..042425a02645 100644 --- a/arch/h8300/defconfig +++ b/arch/h8300/defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set diff --git a/arch/m32r/configs/m32700ut.smp_defconfig b/arch/m32r/configs/m32700ut.smp_defconfig index 816c3ecaa2aa..a3d727ed6a16 100644 --- a/arch/m32r/configs/m32700ut.smp_defconfig +++ b/arch/m32r/configs/m32700ut.smp_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/m32700ut.up_defconfig b/arch/m32r/configs/m32700ut.up_defconfig index 84785686640a..b8334163099d 100644 --- a/arch/m32r/configs/m32700ut.up_defconfig +++ b/arch/m32r/configs/m32700ut.up_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/mappi.nommu_defconfig b/arch/m32r/configs/mappi.nommu_defconfig index 354a964d084d..7c90ce2fc42b 100644 --- a/arch/m32r/configs/mappi.nommu_defconfig +++ b/arch/m32r/configs/mappi.nommu_defconfig @@ -3,7 +3,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/mappi.smp_defconfig b/arch/m32r/configs/mappi.smp_defconfig index 9022307bd073..367d07cebcd3 100644 --- a/arch/m32r/configs/mappi.smp_defconfig +++ b/arch/m32r/configs/mappi.smp_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/mappi.up_defconfig b/arch/m32r/configs/mappi.up_defconfig index 3726068721a5..cb11384386ce 100644 --- a/arch/m32r/configs/mappi.up_defconfig +++ b/arch/m32r/configs/mappi.up_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/mappi2.opsp_defconfig b/arch/m32r/configs/mappi2.opsp_defconfig index 6136fad048e4..3bff779259b4 100644 --- a/arch/m32r/configs/mappi2.opsp_defconfig +++ b/arch/m32r/configs/mappi2.opsp_defconfig @@ -4,7 +4,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/mappi2.vdec2_defconfig b/arch/m32r/configs/mappi2.vdec2_defconfig index dce1fc7d67ed..75246c9c1af8 100644 --- a/arch/m32r/configs/mappi2.vdec2_defconfig +++ b/arch/m32r/configs/mappi2.vdec2_defconfig @@ -4,7 +4,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/mappi3.smp_defconfig b/arch/m32r/configs/mappi3.smp_defconfig index b204e2ecd0f1..27cefd41ac1f 100644 --- a/arch/m32r/configs/mappi3.smp_defconfig +++ b/arch/m32r/configs/mappi3.smp_defconfig @@ -5,7 +5,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/oaks32r_defconfig b/arch/m32r/configs/oaks32r_defconfig index 5aa4ea9ebb10..5087a510ca4f 100644 --- a/arch/m32r/configs/oaks32r_defconfig +++ b/arch/m32r/configs/oaks32r_defconfig @@ -2,7 +2,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/opsput_defconfig b/arch/m32r/configs/opsput_defconfig index 8494c6a276e8..50c6f525db20 100644 --- a/arch/m32r/configs/opsput_defconfig +++ b/arch/m32r/configs/opsput_defconfig @@ -4,7 +4,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/m32r/configs/usrv_defconfig b/arch/m32r/configs/usrv_defconfig index 1df293bc2ab9..a3cfaaedab60 100644 --- a/arch/m32r/configs/usrv_defconfig +++ b/arch/m32r/configs/usrv_defconfig @@ -5,7 +5,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=15 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/m68knommu/configs/m5208evb_defconfig b/arch/m68knommu/configs/m5208evb_defconfig index 6ac2981a2cdf..2f5655c577af 100644 --- a/arch/m68knommu/configs/m5208evb_defconfig +++ b/arch/m68knommu/configs/m5208evb_defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set diff --git a/arch/m68knommu/configs/m5249evb_defconfig b/arch/m68knommu/configs/m5249evb_defconfig index 14934ff8d5c3..16df72bfbd45 100644 --- a/arch/m68knommu/configs/m5249evb_defconfig +++ b/arch/m68knommu/configs/m5249evb_defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set diff --git a/arch/m68knommu/configs/m5272c3_defconfig b/arch/m68knommu/configs/m5272c3_defconfig index 5985a3b593d8..4e6ea50c7f33 100644 --- a/arch/m68knommu/configs/m5272c3_defconfig +++ b/arch/m68knommu/configs/m5272c3_defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set diff --git a/arch/m68knommu/configs/m5275evb_defconfig b/arch/m68knommu/configs/m5275evb_defconfig index 5a7857efb45d..f3dd74115a34 100644 --- a/arch/m68knommu/configs/m5275evb_defconfig +++ b/arch/m68knommu/configs/m5275evb_defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set diff --git a/arch/m68knommu/configs/m5307c3_defconfig b/arch/m68knommu/configs/m5307c3_defconfig index e8102018c8d4..bce0a20c3737 100644 --- a/arch/m68knommu/configs/m5307c3_defconfig +++ b/arch/m68knommu/configs/m5307c3_defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set diff --git a/arch/m68knommu/configs/m5407c3_defconfig b/arch/m68knommu/configs/m5407c3_defconfig index 5c124a7ba2a7..618cc32691f2 100644 --- a/arch/m68knommu/configs/m5407c3_defconfig +++ b/arch/m68knommu/configs/m5407c3_defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set diff --git a/arch/m68knommu/defconfig b/arch/m68knommu/defconfig index 6ac2981a2cdf..2f5655c577af 100644 --- a/arch/m68knommu/defconfig +++ b/arch/m68knommu/defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index ab8fbe7ad90b..b3f5eecff2a7 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -7,7 +7,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="rootfs.cpio" CONFIG_INITRAMFS_COMPRESSION_GZIP=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_HOTPLUG is not set diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig index ebc143c5368e..0249e4b7e1d3 100644 --- a/arch/microblaze/configs/nommu_defconfig +++ b/arch/microblaze/configs/nommu_defconfig @@ -6,7 +6,7 @@ CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_HOTPLUG is not set diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index f437cd1fafb8..5358f90b4dd2 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -7,7 +7,7 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" config EARLY_PRINTK - bool "Early printk" if EMBEDDED + bool "Early printk" if EXPERT depends on SYS_HAS_EARLY_PRINTK default y help diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig index c78c7e7e41df..6cd5a519ce5c 100644 --- a/arch/mips/configs/ar7_defconfig +++ b/arch/mips/configs/ar7_defconfig @@ -14,7 +14,7 @@ CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_RD_LZMA=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_ELF_CORE is not set # CONFIG_PCSPKR_PLATFORM is not set diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig index 927d58b2cd03..22fdf2f0cc23 100644 --- a/arch/mips/configs/bcm47xx_defconfig +++ b/arch/mips/configs/bcm47xx_defconfig @@ -21,7 +21,7 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_RD_LZMA=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/mips/configs/bcm63xx_defconfig b/arch/mips/configs/bcm63xx_defconfig index b806a4e32896..919005139f5a 100644 --- a/arch/mips/configs/bcm63xx_defconfig +++ b/arch/mips/configs/bcm63xx_defconfig @@ -10,7 +10,7 @@ CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_TINY_RCU=y CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_PCSPKR_PLATFORM is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index 9749bc8758db..1cdff6b6327d 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -26,7 +26,7 @@ CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_PCSPKR_PLATFORM is not set CONFIG_SLAB=y diff --git a/arch/mips/configs/capcella_defconfig b/arch/mips/configs/capcella_defconfig index 502a8e9c084b..5135dc0b950a 100644 --- a/arch/mips/configs/capcella_defconfig +++ b/arch/mips/configs/capcella_defconfig @@ -4,7 +4,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/mips/configs/cavium-octeon_defconfig b/arch/mips/configs/cavium-octeon_defconfig index 3567b6f07b37..75165dfa60c1 100644 --- a/arch/mips/configs/cavium-octeon_defconfig +++ b/arch/mips/configs/cavium-octeon_defconfig @@ -15,7 +15,7 @@ CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_PCSPKR_PLATFORM is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig index 6c4f7e9d3383..5419adb219a8 100644 --- a/arch/mips/configs/cobalt_defconfig +++ b/arch/mips/configs/cobalt_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/mips/configs/db1000_defconfig b/arch/mips/configs/db1000_defconfig index dda158b2c8dc..4044c9e0fb73 100644 --- a/arch/mips/configs/db1000_defconfig +++ b/arch/mips/configs/db1000_defconfig @@ -11,7 +11,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_TINY_RCU=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_PCSPKR_PLATFORM is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/configs/db1100_defconfig b/arch/mips/configs/db1100_defconfig index 7e4fc76df538..c6b49938ee84 100644 --- a/arch/mips/configs/db1100_defconfig +++ b/arch/mips/configs/db1100_defconfig @@ -11,7 +11,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_TINY_RCU=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_PCSPKR_PLATFORM is not set diff --git a/arch/mips/configs/db1200_defconfig b/arch/mips/configs/db1200_defconfig index 6fe205fa7b61..1f69249b839a 100644 --- a/arch/mips/configs/db1200_defconfig +++ b/arch/mips/configs/db1200_defconfig @@ -12,7 +12,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_TINY_RCU=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_PCSPKR_PLATFORM is not set diff --git a/arch/mips/configs/db1500_defconfig b/arch/mips/configs/db1500_defconfig index a741c55448d0..b6e21c7cb6bd 100644 --- a/arch/mips/configs/db1500_defconfig +++ b/arch/mips/configs/db1500_defconfig @@ -10,7 +10,7 @@ CONFIG_LOCALVERSION="-db1500" CONFIG_KERNEL_LZMA=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_PCSPKR_PLATFORM is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/configs/db1550_defconfig b/arch/mips/configs/db1550_defconfig index cd32dd8c8008..798a553c9e80 100644 --- a/arch/mips/configs/db1550_defconfig +++ b/arch/mips/configs/db1550_defconfig @@ -11,7 +11,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_TINY_RCU=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_PCSPKR_PLATFORM is not set diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index b15bfd1e69c8..87d0340837aa 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -4,7 +4,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y diff --git a/arch/mips/configs/e55_defconfig b/arch/mips/configs/e55_defconfig index 0b60c06a943d..0126e66d60cb 100644 --- a/arch/mips/configs/e55_defconfig +++ b/arch/mips/configs/e55_defconfig @@ -4,7 +4,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 63944a14b816..e5b73de08fc5 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -17,7 +17,7 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_PID_NS=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_PCSPKR_PLATFORM is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index 53edc134f274..48a40aefaf58 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -11,7 +11,7 @@ CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_MODULES=y diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 36de199f4c27..d1606569b001 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -17,7 +17,7 @@ CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set # CONFIG_PCSPKR_PLATFORM is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 4b16c48b0c36..0e36abcd39cc 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -15,7 +15,7 @@ CONFIG_CGROUPS=y CONFIG_CPUSETS=y CONFIG_RELAY=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_PCSPKR_PLATFORM is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig index 98f2c7736e87..4dbf6269b3f9 100644 --- a/arch/mips/configs/ip28_defconfig +++ b/arch/mips/configs/ip28_defconfig @@ -8,7 +8,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index 5bea99b26fa8..7bbd52194fc3 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -10,7 +10,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_RELAY=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 6ae46bcdb20b..92a60aecad5c 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -10,7 +10,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/jmr3927_defconfig b/arch/mips/configs/jmr3927_defconfig index bf24e9309b9c..db5705e18b36 100644 --- a/arch/mips/configs/jmr3927_defconfig +++ b/arch/mips/configs/jmr3927_defconfig @@ -4,7 +4,7 @@ CONFIG_TOSHIBA_JMR3927=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set # CONFIG_PCSPKR_PLATFORM is not set CONFIG_SLAB=y diff --git a/arch/mips/configs/lasat_defconfig b/arch/mips/configs/lasat_defconfig index 6447261c61d0..d9f3db29ab95 100644 --- a/arch/mips/configs/lasat_defconfig +++ b/arch/mips/configs/lasat_defconfig @@ -8,7 +8,7 @@ CONFIG_HZ_1000=y CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index f7033f3a5822..167c1d07b809 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -21,7 +21,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_PROFILING=y CONFIG_OPROFILE=m CONFIG_MODULES=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 9d03b68aece8..7270f3183bda 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -15,7 +15,7 @@ CONFIG_UTS_NS=y CONFIG_IPC_NS=y CONFIG_PID_NS=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y diff --git a/arch/mips/configs/markeins_defconfig b/arch/mips/configs/markeins_defconfig index 86bf001babe9..9c9a123016c0 100644 --- a/arch/mips/configs/markeins_defconfig +++ b/arch/mips/configs/markeins_defconfig @@ -9,7 +9,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/mips/configs/mipssim_defconfig b/arch/mips/configs/mipssim_defconfig index 4925f507dc21..b5ad7387bbb0 100644 --- a/arch/mips/configs/mipssim_defconfig +++ b/arch/mips/configs/mipssim_defconfig @@ -7,7 +7,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/mips/configs/mpc30x_defconfig b/arch/mips/configs/mpc30x_defconfig index efb779f8f6fe..c16de9812920 100644 --- a/arch/mips/configs/mpc30x_defconfig +++ b/arch/mips/configs/mpc30x_defconfig @@ -5,7 +5,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/mips/configs/msp71xx_defconfig b/arch/mips/configs/msp71xx_defconfig index ab051458452b..d1142e9cd9a1 100644 --- a/arch/mips/configs/msp71xx_defconfig +++ b/arch/mips/configs/msp71xx_defconfig @@ -8,7 +8,7 @@ CONFIG_LOCALVERSION="-pmc" CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SHMEM is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 814699754e0d..a97a42c6b2c8 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -11,7 +11,7 @@ CONFIG_AUDIT=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=m diff --git a/arch/mips/configs/pb1100_defconfig b/arch/mips/configs/pb1100_defconfig index 1597aa1842fa..75eb1b1f316c 100644 --- a/arch/mips/configs/pb1100_defconfig +++ b/arch/mips/configs/pb1100_defconfig @@ -11,7 +11,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_TINY_RCU=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_PCSPKR_PLATFORM is not set diff --git a/arch/mips/configs/pb1200_defconfig b/arch/mips/configs/pb1200_defconfig index 96f0d43cf08b..dcbe2704e5ed 100644 --- a/arch/mips/configs/pb1200_defconfig +++ b/arch/mips/configs/pb1200_defconfig @@ -12,7 +12,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_TINY_RCU=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_PCSPKR_PLATFORM is not set diff --git a/arch/mips/configs/pb1500_defconfig b/arch/mips/configs/pb1500_defconfig index b4bfd4823458..fa00487146f8 100644 --- a/arch/mips/configs/pb1500_defconfig +++ b/arch/mips/configs/pb1500_defconfig @@ -11,7 +11,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_TINY_RCU=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_PCSPKR_PLATFORM is not set diff --git a/arch/mips/configs/pb1550_defconfig b/arch/mips/configs/pb1550_defconfig index 5a660024d22a..e83d6497e8b4 100644 --- a/arch/mips/configs/pb1550_defconfig +++ b/arch/mips/configs/pb1550_defconfig @@ -11,7 +11,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_TINY_RCU=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_PCSPKR_PLATFORM is not set diff --git a/arch/mips/configs/pnx8335-stb225_defconfig b/arch/mips/configs/pnx8335-stb225_defconfig index 39926a1a96b6..f2925769dfa3 100644 --- a/arch/mips/configs/pnx8335-stb225_defconfig +++ b/arch/mips/configs/pnx8335-stb225_defconfig @@ -11,7 +11,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/mips/configs/pnx8550-jbs_defconfig b/arch/mips/configs/pnx8550-jbs_defconfig index 3376bc8616cc..1d1f2067f3e6 100644 --- a/arch/mips/configs/pnx8550-jbs_defconfig +++ b/arch/mips/configs/pnx8550-jbs_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/pnx8550-stb810_defconfig b/arch/mips/configs/pnx8550-stb810_defconfig index 6514f1bf0afb..15c66a571f99 100644 --- a/arch/mips/configs/pnx8550-stb810_defconfig +++ b/arch/mips/configs/pnx8550-stb810_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y diff --git a/arch/mips/configs/powertv_defconfig b/arch/mips/configs/powertv_defconfig index f1f58e91dd80..3b0b6e8c8533 100644 --- a/arch/mips/configs/powertv_defconfig +++ b/arch/mips/configs/powertv_defconfig @@ -14,7 +14,7 @@ CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_ALL=y # CONFIG_PCSPKR_PLATFORM is not set diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index d6457bc38c71..55902d9cd0f2 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig @@ -13,7 +13,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_ELF_CORE is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig index 29acfab31516..9cba856277ff 100644 --- a/arch/mips/configs/rbtx49xx_defconfig +++ b/arch/mips/configs/rbtx49xx_defconfig @@ -12,7 +12,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set # CONFIG_PCSPKR_PLATFORM is not set # CONFIG_EPOLL is not set diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 2b3e47653f60..2c0230e76d20 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -12,7 +12,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/mips/configs/sb1250-swarm_defconfig b/arch/mips/configs/sb1250-swarm_defconfig index 64840d717750..5b0463ef9389 100644 --- a/arch/mips/configs/sb1250-swarm_defconfig +++ b/arch/mips/configs/sb1250-swarm_defconfig @@ -15,7 +15,7 @@ CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/tb0219_defconfig b/arch/mips/configs/tb0219_defconfig index d9be37fc9cb7..30036b4cbeb1 100644 --- a/arch/mips/configs/tb0219_defconfig +++ b/arch/mips/configs/tb0219_defconfig @@ -5,7 +5,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_PCSPKR_PLATFORM is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/tb0226_defconfig b/arch/mips/configs/tb0226_defconfig index 3d25dd08907b..81bfa1d4d8e3 100644 --- a/arch/mips/configs/tb0226_defconfig +++ b/arch/mips/configs/tb0226_defconfig @@ -5,7 +5,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_PCSPKR_PLATFORM is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/mips/configs/tb0287_defconfig b/arch/mips/configs/tb0287_defconfig index be697c9b23c6..c415c4f0e5c2 100644 --- a/arch/mips/configs/tb0287_defconfig +++ b/arch/mips/configs/tb0287_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_PCSPKR_PLATFORM is not set CONFIG_SLAB=y diff --git a/arch/mips/configs/workpad_defconfig b/arch/mips/configs/workpad_defconfig index 7ec9287254d8..ee4b2be43c44 100644 --- a/arch/mips/configs/workpad_defconfig +++ b/arch/mips/configs/workpad_defconfig @@ -4,7 +4,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/mips/configs/wrppmc_defconfig b/arch/mips/configs/wrppmc_defconfig index a231b73b1a40..44a451be359e 100644 --- a/arch/mips/configs/wrppmc_defconfig +++ b/arch/mips/configs/wrppmc_defconfig @@ -7,7 +7,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_EPOLL is not set CONFIG_SLAB=y diff --git a/arch/mips/configs/yosemite_defconfig b/arch/mips/configs/yosemite_defconfig index ab3a3dcec04d..f72d305a3f08 100644 --- a/arch/mips/configs/yosemite_defconfig +++ b/arch/mips/configs/yosemite_defconfig @@ -8,7 +8,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/mn10300/configs/asb2303_defconfig b/arch/mn10300/configs/asb2303_defconfig index 3f749b69ca71..1fd41ec1dfb5 100644 --- a/arch/mn10300/configs/asb2303_defconfig +++ b/arch/mn10300/configs/asb2303_defconfig @@ -4,7 +4,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_TINY_RCU=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mn10300/configs/asb2364_defconfig b/arch/mn10300/configs/asb2364_defconfig index 83ce2f27b12a..31d76261a3d5 100644 --- a/arch/mn10300/configs/asb2364_defconfig +++ b/arch/mn10300/configs/asb2364_defconfig @@ -15,7 +15,7 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y CONFIG_RELAY=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_SLAB=y diff --git a/arch/parisc/configs/a500_defconfig b/arch/parisc/configs/a500_defconfig index f9305f30603a..b647b182dacc 100644 --- a/arch/parisc/configs/a500_defconfig +++ b/arch/parisc/configs/a500_defconfig @@ -8,7 +8,7 @@ CONFIG_LOG_BUF_SHIFT=16 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y CONFIG_PROFILING=y diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig index 628d3e022535..311ca367b622 100644 --- a/arch/parisc/configs/c3000_defconfig +++ b/arch/parisc/configs/c3000_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y CONFIG_PROFILING=y diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig index 97fedceaa30b..4182c772340b 100644 --- a/arch/powerpc/configs/40x/acadia_defconfig +++ b/arch/powerpc/configs/40x/acadia_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig index 33b3c24f4edd..2dbb293163f5 100644 --- a/arch/powerpc/configs/40x/ep405_defconfig +++ b/arch/powerpc/configs/40x/ep405_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/40x/hcu4_defconfig b/arch/powerpc/configs/40x/hcu4_defconfig index 4613079a0ab1..ebeb4accad65 100644 --- a/arch/powerpc/configs/40x/hcu4_defconfig +++ b/arch/powerpc/configs/40x/hcu4_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig index 34b8c1a1e752..532ea9d93a15 100644 --- a/arch/powerpc/configs/40x/kilauea_defconfig +++ b/arch/powerpc/configs/40x/kilauea_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig index 651be09136fa..3c142ac1b344 100644 --- a/arch/powerpc/configs/40x/makalu_defconfig +++ b/arch/powerpc/configs/40x/makalu_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig index ded455e18339..ff57d4828ffc 100644 --- a/arch/powerpc/configs/40x/walnut_defconfig +++ b/arch/powerpc/configs/40x/walnut_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig index 63746a041d6b..3ed16d5c909d 100644 --- a/arch/powerpc/configs/44x/arches_defconfig +++ b/arch/powerpc/configs/44x/arches_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig index f5f2a4e3e21b..b1b7d2c5c059 100644 --- a/arch/powerpc/configs/44x/bamboo_defconfig +++ b/arch/powerpc/configs/44x/bamboo_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig index ac65b48b8ccd..30a0a8e08fdd 100644 --- a/arch/powerpc/configs/44x/bluestone_defconfig +++ b/arch/powerpc/configs/44x/bluestone_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_PCI_QUIRKS is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig index 17e4dd98eed7..a46942aac695 100644 --- a/arch/powerpc/configs/44x/canyonlands_defconfig +++ b/arch/powerpc/configs/44x/canyonlands_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig index fedd03fdf5d5..07d77e51f1ba 100644 --- a/arch/powerpc/configs/44x/ebony_defconfig +++ b/arch/powerpc/configs/44x/ebony_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig index ebff7011282e..2ce7e9aff09e 100644 --- a/arch/powerpc/configs/44x/eiger_defconfig +++ b/arch/powerpc/configs/44x/eiger_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig index 865e93fb41fd..18730ff9de7c 100644 --- a/arch/powerpc/configs/44x/icon_defconfig +++ b/arch/powerpc/configs/44x/icon_defconfig @@ -6,7 +6,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index 8ece4c774415..92f863ac8443 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -7,7 +7,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_PROFILING=y diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig index 4ca9b4873c51..34c09144a699 100644 --- a/arch/powerpc/configs/44x/katmai_defconfig +++ b/arch/powerpc/configs/44x/katmai_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig index e3b65d24207e..21c33faf61a2 100644 --- a/arch/powerpc/configs/44x/rainier_defconfig +++ b/arch/powerpc/configs/44x/rainier_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig index 64cd0f3421a9..01cc2b1a7f9a 100644 --- a/arch/powerpc/configs/44x/redwood_defconfig +++ b/arch/powerpc/configs/44x/redwood_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 01d03367917e..dfcffede16ad 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig index 89b2f9626137..47e399f2892f 100644 --- a/arch/powerpc/configs/44x/sequoia_defconfig +++ b/arch/powerpc/configs/44x/sequoia_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig index e3386cf6f5b7..a6a002ed5681 100644 --- a/arch/powerpc/configs/44x/taishan_defconfig +++ b/arch/powerpc/configs/44x/taishan_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 9c13b9dffafa..6cf9d6614805 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -8,7 +8,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig index f234c4d0b15c..69b57daf402e 100644 --- a/arch/powerpc/configs/52xx/cm5200_defconfig +++ b/arch/powerpc/configs/52xx/cm5200_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_EPOLL is not set diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig index a4a795c80740..f3638ae0a627 100644 --- a/arch/powerpc/configs/52xx/lite5200b_defconfig +++ b/arch/powerpc/configs/52xx/lite5200b_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_EPOLL is not set diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index 20d53a1aa7e4..6828eda02bdc 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_EPOLL is not set diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig index 6bd58338bf1a..7f7e4a878602 100644 --- a/arch/powerpc/configs/52xx/pcm030_defconfig +++ b/arch/powerpc/configs/52xx/pcm030_defconfig @@ -8,7 +8,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_SLAB=y diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig index 3a1f70292d9d..959cd2cfc275 100644 --- a/arch/powerpc/configs/52xx/tqm5200_defconfig +++ b/arch/powerpc/configs/52xx/tqm5200_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_EPOLL is not set diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig index eed42d8919e8..d2762d9dcb8e 100644 --- a/arch/powerpc/configs/83xx/asp8347_defconfig +++ b/arch/powerpc/configs/83xx/asp8347_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig index e43ecb27dfd7..7a7b731c5735 100644 --- a/arch/powerpc/configs/83xx/kmeter1_defconfig +++ b/arch/powerpc/configs/83xx/kmeter1_defconfig @@ -3,7 +3,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig index c2e6ab51d335..c683bce4c26e 100644 --- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig index 1d3b20065913..a721cd3d793f 100644 --- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig index 91fe73bd5ad2..a5699a1f7d0a 100644 --- a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig index 6d300f205604..b4da1a7e6449 100644 --- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig index b236a67e01fe..291f8221d5a6 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig index 001dead3cde9..f8b228aaa03a 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig index 9dccefca00c3..99660c062191 100644 --- a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig index d4b165d7d294..10b5c4cd0e72 100644 --- a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig index 89ba67274bda..45925d701d2a 100644 --- a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig index 2ea6b405046a..f367985be6f7 100644 --- a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig index bffe3c775030..414eda381591 100644 --- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/83xx/sbc834x_defconfig b/arch/powerpc/configs/83xx/sbc834x_defconfig index fa5c9eefc9ad..6d6463fe06fc 100644 --- a/arch/powerpc/configs/83xx/sbc834x_defconfig +++ b/arch/powerpc/configs/83xx/sbc834x_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/85xx/ksi8560_defconfig b/arch/powerpc/configs/85xx/ksi8560_defconfig index 385b1af37d75..8f7c1061891a 100644 --- a/arch/powerpc/configs/85xx/ksi8560_defconfig +++ b/arch/powerpc/configs/85xx/ksi8560_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_KSI8560=y CONFIG_CPM2=y diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig index 222b704c1f4b..55e0725500dc 100644 --- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_MPC8540_ADS=y CONFIG_NO_HZ=y diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig index 619702de9477..d724095530a6 100644 --- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_MPC8560_ADS=y CONFIG_BINFMT_MISC=y diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig index 6bf56e83f957..4b44beaa21ae 100644 --- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig +++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_MPC85xx_CDS=y CONFIG_NO_HZ=y diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig index a9a17d055766..5b2b651dfb98 100644 --- a/arch/powerpc/configs/85xx/sbc8548_defconfig +++ b/arch/powerpc/configs/85xx/sbc8548_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set CONFIG_SBC8548=y diff --git a/arch/powerpc/configs/85xx/sbc8560_defconfig b/arch/powerpc/configs/85xx/sbc8560_defconfig index 820e32d8c42b..f7fdb0318e4c 100644 --- a/arch/powerpc/configs/85xx/sbc8560_defconfig +++ b/arch/powerpc/configs/85xx/sbc8560_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set CONFIG_SBC8560=y diff --git a/arch/powerpc/configs/85xx/socrates_defconfig b/arch/powerpc/configs/85xx/socrates_defconfig index b6db3f47af99..77506b5d5a41 100644 --- a/arch/powerpc/configs/85xx/socrates_defconfig +++ b/arch/powerpc/configs/85xx/socrates_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig index 333a41bd2a68..5d4db154bf59 100644 --- a/arch/powerpc/configs/85xx/stx_gp3_defconfig +++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig index 33db352f847e..ddcb9f37fa1f 100644 --- a/arch/powerpc/configs/85xx/tqm8540_defconfig +++ b/arch/powerpc/configs/85xx/tqm8540_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig index f0c20dfbd4d3..981abd6d4b57 100644 --- a/arch/powerpc/configs/85xx/tqm8541_defconfig +++ b/arch/powerpc/configs/85xx/tqm8541_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig index a883450dcdfa..37b3d7227cdd 100644 --- a/arch/powerpc/configs/85xx/tqm8548_defconfig +++ b/arch/powerpc/configs/85xx/tqm8548_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig index ff95f90dc171..3593b320c97c 100644 --- a/arch/powerpc/configs/85xx/tqm8555_defconfig +++ b/arch/powerpc/configs/85xx/tqm8555_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig index 8d6c90ea4783..de413acc34d6 100644 --- a/arch/powerpc/configs/85xx/tqm8560_defconfig +++ b/arch/powerpc/configs/85xx/tqm8560_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index f53efe4a0e0c..5ea3124518fd 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -11,7 +11,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig b/arch/powerpc/configs/86xx/gef_ppc9a_defconfig index 432ebc28d25c..4b2441244eab 100644 --- a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig +++ b/arch/powerpc/configs/86xx/gef_ppc9a_defconfig @@ -11,7 +11,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/86xx/gef_sbc310_defconfig b/arch/powerpc/configs/86xx/gef_sbc310_defconfig index ce5e919d9b55..a360ba44b928 100644 --- a/arch/powerpc/configs/86xx/gef_sbc310_defconfig +++ b/arch/powerpc/configs/86xx/gef_sbc310_defconfig @@ -11,7 +11,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/86xx/gef_sbc610_defconfig b/arch/powerpc/configs/86xx/gef_sbc610_defconfig index 589e71e6dc1c..be2829dd129f 100644 --- a/arch/powerpc/configs/86xx/gef_sbc610_defconfig +++ b/arch/powerpc/configs/86xx/gef_sbc610_defconfig @@ -11,7 +11,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig index 321fb47096d9..036bfb2d18cd 100644 --- a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig +++ b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_ELF_CORE is not set CONFIG_MODULES=y diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig index b5e46399374e..0c9c7ed7ec75 100644 --- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig +++ b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig @@ -10,7 +10,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/86xx/sbc8641d_defconfig b/arch/powerpc/configs/86xx/sbc8641d_defconfig index 71145c3a64db..0a92ca045641 100644 --- a/arch/powerpc/configs/86xx/sbc8641d_defconfig +++ b/arch/powerpc/configs/86xx/sbc8641d_defconfig @@ -11,7 +11,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index ca84c7fc24d5..69128740c14d 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -4,7 +4,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig index 94d120ef99cf..06f95492afc7 100644 --- a/arch/powerpc/configs/e55xx_smp_defconfig +++ b/arch/powerpc/configs/e55xx_smp_defconfig @@ -12,7 +12,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig index 2677b08199e7..fceffb3cffbe 100644 --- a/arch/powerpc/configs/ep8248e_defconfig +++ b/arch/powerpc/configs/ep8248e_defconfig @@ -2,7 +2,7 @@ CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index f9a3112e5442..219fd470ed22 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -4,7 +4,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig index fcf0a398cd66..e74d3a483705 100644 --- a/arch/powerpc/configs/gamecube_defconfig +++ b/arch/powerpc/configs/gamecube_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set CONFIG_PERF_COUNTERS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig index b9b63a609525..94ebfee188db 100644 --- a/arch/powerpc/configs/holly_defconfig +++ b/arch/powerpc/configs/holly_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_PPC_CHRP is not set diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig index c4ed255af18b..39518e91822f 100644 --- a/arch/powerpc/configs/mgcoge_defconfig +++ b/arch/powerpc/configs/mgcoge_defconfig @@ -3,7 +3,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/powerpc/configs/mgsuvd_defconfig b/arch/powerpc/configs/mgsuvd_defconfig index f276c7cf555b..2a490626015c 100644 --- a/arch/powerpc/configs/mgsuvd_defconfig +++ b/arch/powerpc/configs/mgsuvd_defconfig @@ -4,7 +4,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set # CONFIG_BUG is not set diff --git a/arch/powerpc/configs/mpc7448_hpc2_defconfig b/arch/powerpc/configs/mpc7448_hpc2_defconfig index 3b9470883de5..75f0bbf0f6e8 100644 --- a/arch/powerpc/configs/mpc7448_hpc2_defconfig +++ b/arch/powerpc/configs/mpc7448_hpc2_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig index c7d68ff1a736..6a22400f73c1 100644 --- a/arch/powerpc/configs/mpc8272_ads_defconfig +++ b/arch/powerpc/configs/mpc8272_ads_defconfig @@ -2,7 +2,7 @@ CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index 5b1b10fd9740..5aac9a8bc53b 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -3,7 +3,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index 3aeb5949cfef..99a19d1e9bf8 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -10,7 +10,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index d62c8016f4bc..c636f23f8c92 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -12,7 +12,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index 668215cae890..5c258823e694 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -4,7 +4,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set # CONFIG_BUG is not set diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig index 63b90d477889..55b54318fef6 100644 --- a/arch/powerpc/configs/mpc86xx_defconfig +++ b/arch/powerpc/configs/mpc86xx_defconfig @@ -10,7 +10,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index f9b83481b00e..9e146cdf63de 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -4,7 +4,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index 93d7425ce6cd..bfd634b5ada7 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index 2fa05f7be4cb..47133202a625 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -5,7 +5,7 @@ CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig index a4353bef31c5..baad8db21b61 100644 --- a/arch/powerpc/configs/pq2fads_defconfig +++ b/arch/powerpc/configs/pq2fads_defconfig @@ -3,7 +3,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 49cffe003657..caba919f65d8 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -8,7 +8,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_PERF_EVENTS is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index 4f0c10a62b9d..ebb2a66c99d3 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -1,7 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index d0a5b6763880..8616fde0896f 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -5,7 +5,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index bb8ba75b7c68..175295fbf4f3 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -7,7 +7,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set CONFIG_PERF_COUNTERS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig index 47a20cfb4486..e5bc9f75d474 100644 --- a/arch/powerpc/platforms/iseries/Kconfig +++ b/arch/powerpc/platforms/iseries/Kconfig @@ -2,7 +2,7 @@ config PPC_ISERIES bool "IBM Legacy iSeries" depends on PPC64 && PPC_BOOK3S select PPC_INDIRECT_IO - select PPC_PCI_CHOICE if EMBEDDED + select PPC_PCI_CHOICE if EXPERT menu "iSeries device drivers" depends on PPC_ISERIES diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 5d1b743dbe7e..5b3da4b4ea79 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -10,7 +10,7 @@ config PPC_PSERIES select RTAS_ERROR_LOGGING select PPC_UDBG_16550 select PPC_NATIVE - select PPC_PCI_CHOICE if EMBEDDED + select PPC_PCI_CHOICE if EXPERT default y config PPC_SPLPAR @@ -24,9 +24,9 @@ config PPC_SPLPAR two or more partitions. config EEH - bool "PCI Extended Error Handling (EEH)" if EMBEDDED + bool "PCI Extended Error Handling (EEH)" if EXPERT depends on PPC_PSERIES && PCI - default y if !EMBEDDED + default y if !EXPERT config PSERIES_MSI bool diff --git a/arch/score/configs/spct6600_defconfig b/arch/score/configs/spct6600_defconfig index 9883c50e4636..df1edbf507a2 100644 --- a/arch/score/configs/spct6600_defconfig +++ b/arch/score/configs/spct6600_defconfig @@ -9,7 +9,7 @@ CONFIG_LOG_BUF_SHIFT=12 CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index fff252209f63..ae555569823b 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -1,6 +1,6 @@ config SUPERH def_bool y - select EMBEDDED + select EXPERT select CLKDEV_LOOKUP select HAVE_IDE if HAS_IOPORT select HAVE_MEMBLOCK diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index e11b5fcb70eb..4e8b82bca9e5 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -220,7 +220,7 @@ config FORCE_MAX_ZONEORDER choice depends on !TILEGX - prompt "Memory split" if EMBEDDED + prompt "Memory split" if EXPERT default VMSPLIT_3G ---help--- Select the desired split between kernel and user memory. diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug index a81f0fbf7e60..9bc161a02c71 100644 --- a/arch/tile/Kconfig.debug +++ b/arch/tile/Kconfig.debug @@ -3,7 +3,7 @@ menu "Kernel hacking" source "lib/Kconfig.debug" config EARLY_PRINTK - bool "Early printk" if EMBEDDED && DEBUG_KERNEL + bool "Early printk" if EXPERT && DEBUG_KERNEL default y help Write kernel log output directly via the hypervisor console. diff --git a/arch/tile/configs/tile_defconfig b/arch/tile/configs/tile_defconfig index 919c54afd981..0fe54445fda5 100644 --- a/arch/tile/configs/tile_defconfig +++ b/arch/tile/configs/tile_defconfig @@ -3,7 +3,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="usr/contents.txt" -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_MODULES=y diff --git a/arch/um/defconfig b/arch/um/defconfig index 564f3de65b4a..9f7634f08cf3 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -133,7 +133,7 @@ CONFIG_SYSFS_DEPRECATED=y # CONFIG_BLK_DEV_INITRD is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y -# CONFIG_EMBEDDED is not set +# CONFIG_EXPERT is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3ed5ad92b029..d5ed94d30aad 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -627,11 +627,11 @@ config APB_TIMER as it is off-chip. APB timers are always running regardless of CPU C states, they are used as per CPU clockevent device when possible. -# Mark as embedded because too many people got it wrong. +# Mark as expert because too many people got it wrong. # The code disables itself when not needed. config DMI default y - bool "Enable DMI scanning" if EMBEDDED + bool "Enable DMI scanning" if EXPERT ---help--- Enabled scanning of DMI to identify machine quirks. Say Y here unless you have verified that your setup is not @@ -639,7 +639,7 @@ config DMI BIOS code. config GART_IOMMU - bool "GART IOMMU support" if EMBEDDED + bool "GART IOMMU support" if EXPERT default y select SWIOTLB depends on X86_64 && PCI && AMD_NB @@ -889,7 +889,7 @@ config X86_THERMAL_VECTOR depends on X86_MCE_INTEL config VM86 - bool "Enable VM86 support" if EMBEDDED + bool "Enable VM86 support" if EXPERT default y depends on X86_32 ---help--- @@ -1073,7 +1073,7 @@ endchoice choice depends on EXPERIMENTAL - prompt "Memory split" if EMBEDDED + prompt "Memory split" if EXPERT default VMSPLIT_3G depends on X86_32 ---help--- @@ -1135,7 +1135,7 @@ config ARCH_DMA_ADDR_T_64BIT def_bool X86_64 || HIGHMEM64G config DIRECT_GBPAGES - bool "Enable 1GB pages for kernel pagetables" if EMBEDDED + bool "Enable 1GB pages for kernel pagetables" if EXPERT default y depends on X86_64 ---help--- @@ -1369,7 +1369,7 @@ config MATH_EMULATION config MTRR def_bool y - prompt "MTRR (Memory Type Range Register) support" if EMBEDDED + prompt "MTRR (Memory Type Range Register) support" if EXPERT ---help--- On Intel P6 family processors (Pentium Pro, Pentium II and later) the Memory Type Range Registers (MTRRs) may be used to control @@ -1435,7 +1435,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT config X86_PAT def_bool y - prompt "x86 PAT support" if EMBEDDED + prompt "x86 PAT support" if EXPERT depends on MTRR ---help--- Use PAT attributes to setup page level cache control. @@ -1539,7 +1539,7 @@ config KEXEC_JUMP code in physical address mode via KEXEC config PHYSICAL_START - hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) + hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP) default "0x1000000" ---help--- This gives the physical address where the kernel is loaded. @@ -1934,7 +1934,7 @@ config PCI_MMCONFIG depends on X86_64 && PCI && ACPI config PCI_CNB20LE_QUIRK - bool "Read CNB20LE Host Bridge Windows" if EMBEDDED + bool "Read CNB20LE Host Bridge Windows" if EXPERT default n depends on PCI && EXPERIMENTAL help diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 15588a0ef466..283c5a6a03a6 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -424,7 +424,7 @@ config X86_DEBUGCTLMSR depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML menuconfig PROCESSOR_SELECT - bool "Supported processor vendors" if EMBEDDED + bool "Supported processor vendors" if EXPERT ---help--- This lets you choose what x86 vendor support code your kernel will include. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 45143bbcfe5e..615e18810f48 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -31,7 +31,7 @@ config X86_VERBOSE_BOOTUP see errors. Disable this if you want silent bootup. config EARLY_PRINTK - bool "Early printk" if EMBEDDED + bool "Early printk" if EXPERT default y ---help--- Write kernel log output directly into the VGA buffer or to a serial @@ -138,7 +138,7 @@ config DEBUG_NX_TEST config DOUBLEFAULT default y - bool "Enable doublefault exception handler" if EMBEDDED + bool "Enable doublefault exception handler" if EXPERT depends on X86_32 ---help--- This option allows trapping of rare doublefault exceptions that diff --git a/arch/xtensa/configs/common_defconfig b/arch/xtensa/configs/common_defconfig index 1d230ee081b4..b90038e40dd3 100644 --- a/arch/xtensa/configs/common_defconfig +++ b/arch/xtensa/configs/common_defconfig @@ -32,7 +32,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_HOTPLUG is not set CONFIG_KOBJECT_UEVENT=y # CONFIG_IKCONFIG is not set -# CONFIG_EMBEDDED is not set +# CONFIG_EXPERT is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index 7368164843b9..0234cd198c54 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -55,7 +55,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig index bb84fbc9921f..095cd8084164 100644 --- a/arch/xtensa/configs/s6105_defconfig +++ b/arch/xtensa/configs/s6105_defconfig @@ -55,7 +55,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y -CONFIG_EMBEDDED=y +CONFIG_EXPERT=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set diff --git a/block/Kconfig b/block/Kconfig index 6c9213ef15a1..60be1e0455da 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -2,7 +2,7 @@ # Block layer core configuration # menuconfig BLOCK - bool "Enable the block layer" if EMBEDDED + bool "Enable the block layer" if EXPERT default y help Provide block layer support for the kernel. diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 10c7ad59c0e1..2aa042a5da6d 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -318,7 +318,7 @@ config ACPI_PCI_SLOT the module will be called pci_slot. config X86_PM_TIMER - bool "Power Management Timer Support" if EMBEDDED + bool "Power Management Timer Support" if EXPERT depends on X86 default y help diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index c6b298d4c136..c2328aed0836 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -783,7 +783,7 @@ config PATA_PCMCIA config PATA_PLATFORM tristate "Generic platform device PATA support" - depends on EMBEDDED || PPC || HAVE_PATA_PLATFORM + depends on EXPERT || PPC || HAVE_PATA_PLATFORM help This option enables support for generic directly connected ATA devices commonly found on embedded systems. diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index fd96345bc35c..d57e8d0fb823 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -70,7 +70,7 @@ config PREVENT_FIRMWARE_BUILD If unsure say Y here. config FW_LOADER - tristate "Userspace firmware loading support" if EMBEDDED + tristate "Userspace firmware loading support" if EXPERT default y ---help--- This option is provided for the case where no in-kernel-tree modules diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 0f175a866ef0..4237602fd4cd 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -5,7 +5,7 @@ menu "Character devices" config VT - bool "Virtual terminal" if EMBEDDED + bool "Virtual terminal" if EXPERT depends on !S390 select INPUT default y @@ -39,13 +39,13 @@ config VT config CONSOLE_TRANSLATIONS depends on VT default y - bool "Enable character translations in console" if EMBEDDED + bool "Enable character translations in console" if EXPERT ---help--- This enables support for font mapping and Unicode translation on virtual consoles. config VT_CONSOLE - bool "Support for console on virtual terminal" if EMBEDDED + bool "Support for console on virtual terminal" if EXPERT depends on VT default y ---help--- @@ -429,7 +429,7 @@ config SGI_MBCS source "drivers/serial/Kconfig" config UNIX98_PTYS - bool "Unix98 PTY support" if EMBEDDED + bool "Unix98 PTY support" if EXPERT default y ---help--- A pseudo terminal (PTY) is a software device consisting of two @@ -495,7 +495,7 @@ config LEGACY_PTY_COUNT config TTY_PRINTK bool "TTY driver to output user messages via printk" - depends on EMBEDDED + depends on EXPERT default n ---help--- If you say Y here, the support for writing user messages (i.e. diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index a8c8d9c19d74..ca8ee8093d6c 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -71,7 +71,7 @@ config CPU_FREQ_DEFAULT_GOV_PERFORMANCE config CPU_FREQ_DEFAULT_GOV_POWERSAVE bool "powersave" - depends on EMBEDDED + depends on EXPERT select CPU_FREQ_GOV_POWERSAVE help Use the CPUFreq governor 'powersave' as default. This sets diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index e8b6a13515bd..e710424b59ea 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -27,7 +27,7 @@ config EDD_OFF using the kernel parameter 'edd={on|skipmbr|off}'. config FIRMWARE_MEMMAP - bool "Add firmware-provided memory map to sysfs" if EMBEDDED + bool "Add firmware-provided memory map to sysfs" if EXPERT default X86 help Add the firmware-provided (unmodified) memory map to /sys/firmware/memmap. diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 64828a7db77b..bea966f8ac84 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -23,7 +23,7 @@ config DRM_KMS_HELPER tristate depends on DRM select FB - select FRAMEBUFFER_CONSOLE if !EMBEDDED + select FRAMEBUFFER_CONSOLE if !EXPERT help FB and CRTC helpers for KMS drivers. diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 5c4f9b9ecdc0..6977a1ce9d98 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1533,11 +1533,11 @@ bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) } EXPORT_SYMBOL(drm_fb_helper_hotplug_event); -/* The Kconfig DRM_KMS_HELPER selects FRAMEBUFFER_CONSOLE (if !EMBEDDED) +/* The Kconfig DRM_KMS_HELPER selects FRAMEBUFFER_CONSOLE (if !EXPERT) * but the module doesn't depend on any fb console symbols. At least * attempt to load fbcon to avoid leaving the system without a usable console. */ -#if defined(CONFIG_FRAMEBUFFER_CONSOLE_MODULE) && !defined(CONFIG_EMBEDDED) +#if defined(CONFIG_FRAMEBUFFER_CONSOLE_MODULE) && !defined(CONFIG_EXPERT) static int __init drm_fb_helper_modinit(void) { const char *name = "fbcon"; diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 21d6c29c2d21..de70959b9ed5 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -8,7 +8,7 @@ config DRM_NOUVEAU select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT select FB - select FRAMEBUFFER_CONSOLE if !EMBEDDED + select FRAMEBUFFER_CONSOLE if !EXPERT select FB_BACKLIGHT if DRM_NOUVEAU_BACKLIGHT select ACPI_VIDEO if ACPI && X86 && BACKLIGHT_CLASS_DEVICE && VIDEO_OUTPUT_CONTROL && INPUT help diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig index 8d0e31a22027..96c83a9a76bb 100644 --- a/drivers/gpu/vga/Kconfig +++ b/drivers/gpu/vga/Kconfig @@ -1,5 +1,5 @@ config VGA_ARB - bool "VGA Arbitration" if EMBEDDED + bool "VGA Arbitration" if EXPERT default y depends on PCI help diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 24cca2f69dfc..2560f01c1a63 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -62,9 +62,9 @@ config HID_3M_PCT Support for 3M PCT touch screens. config HID_A4TECH - tristate "A4 tech mice" if EMBEDDED + tristate "A4 tech mice" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for A4 tech X5 and WOP-35 / Trust 450L mice. @@ -77,9 +77,9 @@ config HID_ACRUX_FF game controllers. config HID_APPLE - tristate "Apple {i,Power,Mac}Books" if EMBEDDED + tristate "Apple {i,Power,Mac}Books" if EXPERT depends on (USB_HID || BT_HIDP) - default !EMBEDDED + default !EXPERT ---help--- Support for some Apple devices which less or more break HID specification. @@ -88,9 +88,9 @@ config HID_APPLE MacBooks, MacBook Pros and Apple Aluminum. config HID_BELKIN - tristate "Belkin Flip KVM and Wireless keyboard" if EMBEDDED + tristate "Belkin Flip KVM and Wireless keyboard" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for Belkin Flip KVM and Wireless keyboard. @@ -101,16 +101,16 @@ config HID_CANDO Support for Cando dual touch panel. config HID_CHERRY - tristate "Cherry Cymotion keyboard" if EMBEDDED + tristate "Cherry Cymotion keyboard" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for Cherry Cymotion keyboard. config HID_CHICONY - tristate "Chicony Tactical pad" if EMBEDDED + tristate "Chicony Tactical pad" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for Chicony Tactical pad. @@ -130,9 +130,9 @@ config HID_PRODIKEYS and some additional multimedia keys. config HID_CYPRESS - tristate "Cypress mouse and barcode readers" if EMBEDDED + tristate "Cypress mouse and barcode readers" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for cypress mouse and barcode readers. @@ -174,16 +174,16 @@ config HID_ELECOM Support for the ELECOM BM084 (bluetooth mouse). config HID_EZKEY - tristate "Ezkey BTC 8193 keyboard" if EMBEDDED + tristate "Ezkey BTC 8193 keyboard" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for Ezkey BTC 8193 keyboard. config HID_KYE - tristate "Kye/Genius Ergo Mouse" if EMBEDDED + tristate "Kye/Genius Ergo Mouse" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for Kye/Genius Ergo Mouse. @@ -212,16 +212,16 @@ config HID_TWINHAN Support for Twinhan IR remote control. config HID_KENSINGTON - tristate "Kensington Slimblade Trackball" if EMBEDDED + tristate "Kensington Slimblade Trackball" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for Kensington Slimblade Trackball. config HID_LOGITECH - tristate "Logitech devices" if EMBEDDED + tristate "Logitech devices" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for Logitech devices that are not fully compliant with HID standard. @@ -276,9 +276,9 @@ config HID_MAGICMOUSE Apple Wireless "Magic" Mouse. config HID_MICROSOFT - tristate "Microsoft non-fully HID-compliant devices" if EMBEDDED + tristate "Microsoft non-fully HID-compliant devices" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for Microsoft devices that are not fully compliant with HID standard. @@ -289,9 +289,9 @@ config HID_MOSART Support for MosArt dual-touch panels. config HID_MONTEREY - tristate "Monterey Genius KB29E keyboard" if EMBEDDED + tristate "Monterey Genius KB29E keyboard" if EXPERT depends on USB_HID - default !EMBEDDED + default !EXPERT ---help--- Support for Monterey Genius KB29E. @@ -365,8 +365,8 @@ config HID_PICOLCD - IR config HID_PICOLCD_FB - bool "Framebuffer support" if EMBEDDED - default !EMBEDDED + bool "Framebuffer support" if EXPERT + default !EXPERT depends on HID_PICOLCD depends on HID_PICOLCD=FB || FB=y select FB_DEFERRED_IO @@ -379,8 +379,8 @@ config HID_PICOLCD_FB frambuffer device. config HID_PICOLCD_BACKLIGHT - bool "Backlight control" if EMBEDDED - default !EMBEDDED + bool "Backlight control" if EXPERT + default !EXPERT depends on HID_PICOLCD depends on HID_PICOLCD=BACKLIGHT_CLASS_DEVICE || BACKLIGHT_CLASS_DEVICE=y ---help--- @@ -388,16 +388,16 @@ config HID_PICOLCD_BACKLIGHT class. config HID_PICOLCD_LCD - bool "Contrast control" if EMBEDDED - default !EMBEDDED + bool "Contrast control" if EXPERT + default !EXPERT depends on HID_PICOLCD depends on HID_PICOLCD=LCD_CLASS_DEVICE || LCD_CLASS_DEVICE=y ---help--- Provide access to PicoLCD's LCD contrast via lcd class. config HID_PICOLCD_LEDS - bool "GPO via leds class" if EMBEDDED - default !EMBEDDED + bool "GPO via leds class" if EXPERT + default !EXPERT depends on HID_PICOLCD depends on HID_PICOLCD=LEDS_CLASS || LEDS_CLASS=y ---help--- diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig index 4edb3bef94a6..0f20fd17cf06 100644 --- a/drivers/hid/usbhid/Kconfig +++ b/drivers/hid/usbhid/Kconfig @@ -45,7 +45,7 @@ config USB_HIDDEV If unsure, say Y. menu "USB HID Boot Protocol drivers" - depends on USB!=n && USB_HID!=y && EMBEDDED + depends on USB!=n && USB_HID!=y && EXPERT config USB_KBD tristate "USB HIDBP Keyboard (simple Boot) support" diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 98ccfeb3f5aa..9827c5e686cb 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -134,7 +134,7 @@ config BLK_DEV_IDECD module will be called ide-cd. config BLK_DEV_IDECD_VERBOSE_ERRORS - bool "Verbose error logging for IDE/ATAPI CDROM driver" if EMBEDDED + bool "Verbose error logging for IDE/ATAPI CDROM driver" if EXPERT depends on BLK_DEV_IDECD default y help diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig index 03efc074967e..da314c3fec23 100644 --- a/drivers/infiniband/hw/mthca/Kconfig +++ b/drivers/infiniband/hw/mthca/Kconfig @@ -7,7 +7,7 @@ config INFINIBAND_MTHCA ("Tavor") and the MT25208 PCI Express HCA ("Arbel"). config INFINIBAND_MTHCA_DEBUG - bool "Verbose debugging output" if EMBEDDED + bool "Verbose debugging output" if EXPERT depends on INFINIBAND_MTHCA default y ---help--- diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 55855eeabae7..cda8eac55fff 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -24,7 +24,7 @@ config INFINIBAND_IPOIB_CM unless you limit mtu for these destinations to 2044. config INFINIBAND_IPOIB_DEBUG - bool "IP-over-InfiniBand debugging" if EMBEDDED + bool "IP-over-InfiniBand debugging" if EXPERT depends on INFINIBAND_IPOIB default y ---help--- diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index 07c2cd43109c..1903c0f5b925 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -6,7 +6,7 @@ menu "Input device support" depends on !S390 config INPUT - tristate "Generic input layer (needed for keyboard, mouse, ...)" if EMBEDDED + tristate "Generic input layer (needed for keyboard, mouse, ...)" if EXPERT default y help Say Y here if you have any input device (mouse, keyboard, tablet, @@ -67,7 +67,7 @@ config INPUT_SPARSEKMAP comment "Userland interfaces" config INPUT_MOUSEDEV - tristate "Mouse interface" if EMBEDDED + tristate "Mouse interface" if EXPERT default y help Say Y here if you want your mouse to be accessible as char devices @@ -150,7 +150,7 @@ config INPUT_EVBUG module will be called evbug. config INPUT_APMPOWER - tristate "Input Power Event -> APM Bridge" if EMBEDDED + tristate "Input Power Event -> APM Bridge" if EXPERT depends on INPUT && APM_EMULATION help Say Y here if you want suspend key events to trigger a user diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 7b3c0b8fa432..417507348bab 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -2,7 +2,7 @@ # Input core configuration # menuconfig INPUT_KEYBOARD - bool "Keyboards" if EMBEDDED || !X86 + bool "Keyboards" if EXPERT || !X86 default y help Say Y here, and a list of supported keyboards will be displayed. @@ -57,7 +57,7 @@ config KEYBOARD_ATARI module will be called atakbd. config KEYBOARD_ATKBD - tristate "AT keyboard" if EMBEDDED || !X86 + tristate "AT keyboard" if EXPERT || !X86 default y select SERIO select SERIO_LIBPS2 diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index bf5fd7f6a313..9c1e6ee83531 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -39,7 +39,7 @@ config MOUSE_PS2 module will be called psmouse. config MOUSE_PS2_ALPS - bool "ALPS PS/2 mouse protocol extension" if EMBEDDED + bool "ALPS PS/2 mouse protocol extension" if EXPERT default y depends on MOUSE_PS2 help @@ -49,7 +49,7 @@ config MOUSE_PS2_ALPS If unsure, say Y. config MOUSE_PS2_LOGIPS2PP - bool "Logitech PS/2++ mouse protocol extension" if EMBEDDED + bool "Logitech PS/2++ mouse protocol extension" if EXPERT default y depends on MOUSE_PS2 help @@ -59,7 +59,7 @@ config MOUSE_PS2_LOGIPS2PP If unsure, say Y. config MOUSE_PS2_SYNAPTICS - bool "Synaptics PS/2 mouse protocol extension" if EMBEDDED + bool "Synaptics PS/2 mouse protocol extension" if EXPERT default y depends on MOUSE_PS2 help @@ -69,7 +69,7 @@ config MOUSE_PS2_SYNAPTICS If unsure, say Y. config MOUSE_PS2_LIFEBOOK - bool "Fujitsu Lifebook PS/2 mouse protocol extension" if EMBEDDED + bool "Fujitsu Lifebook PS/2 mouse protocol extension" if EXPERT default y depends on MOUSE_PS2 && X86 && DMI help @@ -79,7 +79,7 @@ config MOUSE_PS2_LIFEBOOK If unsure, say Y. config MOUSE_PS2_TRACKPOINT - bool "IBM Trackpoint PS/2 mouse protocol extension" if EMBEDDED + bool "IBM Trackpoint PS/2 mouse protocol extension" if EXPERT default y depends on MOUSE_PS2 help diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 307eef77a172..55f2c2293ec6 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -2,7 +2,7 @@ # Input core configuration # config SERIO - tristate "Serial I/O support" if EMBEDDED || !X86 + tristate "Serial I/O support" if EXPERT || !X86 default y help Say Yes here if you have any input device that uses serial I/O to @@ -19,7 +19,7 @@ config SERIO if SERIO config SERIO_I8042 - tristate "i8042 PC Keyboard controller" if EMBEDDED || !X86 + tristate "i8042 PC Keyboard controller" if EXPERT || !X86 default y depends on !PARISC && (!ARM || ARCH_SHARK || FOOTBRIDGE_HOST) && \ (!SUPERH || SH_CAYMAN) && !M68K && !BLACKFIN @@ -168,7 +168,7 @@ config SERIO_MACEPS2 module will be called maceps2. config SERIO_LIBPS2 - tristate "PS/2 driver library" if EMBEDDED + tristate "PS/2 driver library" if EXPERT depends on SERIO_I8042 || SERIO_I8042=n help Say Y here if you are using a driver for device connected diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 0c9f4b158ff0..61834ae282e1 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -540,62 +540,62 @@ config TOUCHSCREEN_MC13783 config TOUCHSCREEN_USB_EGALAX default y - bool "eGalax, eTurboTouch CT-410/510/700 device support" if EMBEDDED + bool "eGalax, eTurboTouch CT-410/510/700 device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_PANJIT default y - bool "PanJit device support" if EMBEDDED + bool "PanJit device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_3M default y - bool "3M/Microtouch EX II series device support" if EMBEDDED + bool "3M/Microtouch EX II series device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_ITM default y - bool "ITM device support" if EMBEDDED + bool "ITM device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_ETURBO default y - bool "eTurboTouch (non-eGalax compatible) device support" if EMBEDDED + bool "eTurboTouch (non-eGalax compatible) device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_GUNZE default y - bool "Gunze AHL61 device support" if EMBEDDED + bool "Gunze AHL61 device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_DMC_TSC10 default y - bool "DMC TSC-10/25 device support" if EMBEDDED + bool "DMC TSC-10/25 device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_IRTOUCH default y - bool "IRTOUCHSYSTEMS/UNITOP device support" if EMBEDDED + bool "IRTOUCHSYSTEMS/UNITOP device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_IDEALTEK default y - bool "IdealTEK URTC1000 device support" if EMBEDDED + bool "IdealTEK URTC1000 device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_GENERAL_TOUCH default y - bool "GeneralTouch Touchscreen device support" if EMBEDDED + bool "GeneralTouch Touchscreen device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_GOTOP default y - bool "GoTop Super_Q2/GogoPen/PenPower tablet device support" if EMBEDDED + bool "GoTop Super_Q2/GogoPen/PenPower tablet device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_JASTEC default y - bool "JASTEC/DigiTech DTR-02U USB touch controller device support" if EMBEDDED + bool "JASTEC/DigiTech DTR-02U USB touch controller device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_E2I @@ -605,17 +605,17 @@ config TOUCHSCREEN_USB_E2I config TOUCHSCREEN_USB_ZYTRONIC default y - bool "Zytronic controller" if EMBEDDED + bool "Zytronic controller" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_ETT_TC45USB default y - bool "ET&T USB series TC4UM/TC5UH touchscreen controller support" if EMBEDDED + bool "ET&T USB series TC4UM/TC5UH touchscreen controller support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_USB_NEXIO default y - bool "NEXIO/iNexio device support" if EMBEDDED + bool "NEXIO/iNexio device support" if EXPERT depends on TOUCHSCREEN_USB_COMPOSITE config TOUCHSCREEN_TOUCHIT213 diff --git a/drivers/media/common/tuners/Kconfig b/drivers/media/common/tuners/Kconfig index 78b089526e02..6fc79f15dcbc 100644 --- a/drivers/media/common/tuners/Kconfig +++ b/drivers/media/common/tuners/Kconfig @@ -34,7 +34,7 @@ config MEDIA_TUNER config MEDIA_TUNER_CUSTOMISE bool "Customize analog and hybrid tuner modules to build" depends on MEDIA_TUNER - default y if EMBEDDED + default y if EXPERT help This allows the user to deselect tuner drivers unnecessary for their hardware from the build. Use this option with care diff --git a/drivers/media/dvb/frontends/Kconfig b/drivers/media/dvb/frontends/Kconfig index ef3e43a03199..b8519ba511e5 100644 --- a/drivers/media/dvb/frontends/Kconfig +++ b/drivers/media/dvb/frontends/Kconfig @@ -1,7 +1,7 @@ config DVB_FE_CUSTOMISE bool "Customise the frontend modules to build" depends on DVB_CORE - default y if EMBEDDED + default y if EXPERT help This allows the user to select/deselect frontend drivers for their hardware from the build. diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index eb875af05e79..34e7abadceaa 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -78,7 +78,7 @@ config VIDEO_FIXED_MINOR_RANGES config VIDEO_HELPER_CHIPS_AUTO bool "Autoselect pertinent encoders/decoders and other helper chips" - default y if !EMBEDDED + default y if !EXPERT ---help--- Most video cards may require additional modules to encode or decode audio/video standards. This option will autoselect diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 16fe4f9b719b..03823327db25 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2864,7 +2864,7 @@ config MLX4_CORE default n config MLX4_DEBUG - bool "Verbose debugging output" if (MLX4_CORE && EMBEDDED) + bool "Verbose debugging output" if (MLX4_CORE && EXPERT) depends on MLX4_CORE default y ---help--- diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index dda70981b7a6..dc29348264c6 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -31,7 +31,7 @@ source "drivers/pci/pcie/aer/Kconfig" # PCI Express ASPM # config PCIEASPM - bool "PCI Express ASPM control" if EMBEDDED + bool "PCI Express ASPM control" if EXPERT depends on PCI && PCIEPORTBUS default y help diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index de886f3dfd39..6e318ce41136 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -69,7 +69,7 @@ comment "PC-card bridges" config YENTA tristate "CardBus yenta-compatible bridge support" depends on PCI - select CARDBUS if !EMBEDDED + select CARDBUS if !EXPERT select PCCARD_NONSTATIC if PCMCIA != n ---help--- This option enables support for CardBus host bridges. Virtually @@ -84,27 +84,27 @@ config YENTA config YENTA_O2 default y - bool "Special initialization for O2Micro bridges" if EMBEDDED + bool "Special initialization for O2Micro bridges" if EXPERT depends on YENTA config YENTA_RICOH default y - bool "Special initialization for Ricoh bridges" if EMBEDDED + bool "Special initialization for Ricoh bridges" if EXPERT depends on YENTA config YENTA_TI default y - bool "Special initialization for TI and EnE bridges" if EMBEDDED + bool "Special initialization for TI and EnE bridges" if EXPERT depends on YENTA config YENTA_ENE_TUNE default y - bool "Auto-tune EnE bridges for CB cards" if EMBEDDED + bool "Auto-tune EnE bridges for CB cards" if EXPERT depends on YENTA_TI && CARDBUS config YENTA_TOSHIBA default y - bool "Special initialization for Toshiba ToPIC bridges" if EMBEDDED + bool "Special initialization for Toshiba ToPIC bridges" if EXPERT depends on YENTA config PD6729 diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index c1df7676a73d..b1682d7f1d8a 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -81,7 +81,7 @@ config SERIAL_8250_GSC default SERIAL_8250 config SERIAL_8250_PCI - tristate "8250/16550 PCI device support" if EMBEDDED + tristate "8250/16550 PCI device support" if EXPERT depends on SERIAL_8250 && PCI default SERIAL_8250 help @@ -90,7 +90,7 @@ config SERIAL_8250_PCI Saves about 9K. config SERIAL_8250_PNP - tristate "8250/16550 PNP device support" if EMBEDDED + tristate "8250/16550 PNP device support" if EXPERT depends on SERIAL_8250 && PNP default SERIAL_8250 help diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index 2d8cc455dbc7..42cdaa9a4d8a 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -82,7 +82,7 @@ config SSB_SDIOHOST config SSB_SILENT bool "No SSB kernel messages" - depends on SSB && EMBEDDED + depends on SSB && EXPERT help This option turns off all Sonics Silicon Backplane printks. Note that you won't be able to identify problems, once diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig index bcc24779ba0e..18d02e32a3d5 100644 --- a/drivers/usb/core/Kconfig +++ b/drivers/usb/core/Kconfig @@ -123,9 +123,9 @@ config USB_OTG config USB_OTG_WHITELIST bool "Rely on OTG Targeted Peripherals List" - depends on USB_OTG || EMBEDDED + depends on USB_OTG || EXPERT default y if USB_OTG - default n if EMBEDDED + default n if EXPERT help If you say Y here, the "otg_whitelist.h" file will be used as a product whitelist, so USB peripherals not listed there will be @@ -141,7 +141,7 @@ config USB_OTG_WHITELIST config USB_OTG_BLACKLIST_HUB bool "Disable external hubs" - depends on USB_OTG || EMBEDDED + depends on USB_OTG || EXPERT help If you say Y here, then Linux will refuse to enumerate external hubs. OTG hosts are allowed to reduce hardware diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index d916ac04abab..6bafb51bb437 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1227,7 +1227,7 @@ config FB_CARILLO_RANCH config FB_INTEL tristate "Intel 830M/845G/852GM/855GM/865G/915G/945G/945GM/965G/965GM support (EXPERIMENTAL)" - depends on EXPERIMENTAL && FB && PCI && X86 && AGP_INTEL && EMBEDDED + depends on EXPERIMENTAL && FB && PCI && X86 && AGP_INTEL && EXPERT select FB_MODE_HELPERS select FB_CFB_FILLRECT select FB_CFB_COPYAREA diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index 5a35f22372b9..2209e354f531 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -5,7 +5,7 @@ menu "Console display driver support" config VGA_CONSOLE - bool "VGA text console" if EMBEDDED || !X86 + bool "VGA text console" if EXPERT || !X86 depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !SUPERH && !BLACKFIN && !AVR32 && !MN10300 && (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) default y help diff --git a/fs/Kconfig b/fs/Kconfig index 9a7921ae4763..3db9caa57edc 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -50,7 +50,7 @@ config EXPORTFS tristate config FILE_LOCKING - bool "Enable POSIX file locking API" if EMBEDDED + bool "Enable POSIX file locking API" if EXPERT default y help This option enables standard file locking support, required diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 6a0068841d96..15af6222f8a4 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -1,5 +1,5 @@ config PROC_FS - bool "/proc file system support" if EMBEDDED + bool "/proc file system support" if EXPERT default y help This is a virtual file system providing information about the status @@ -40,7 +40,7 @@ config PROC_VMCORE Exports the dump image of crashed kernel in ELF format. config PROC_SYSCTL - bool "Sysctl support (/proc/sys)" if EMBEDDED + bool "Sysctl support (/proc/sys)" if EXPERT depends on PROC_FS select SYSCTL default y @@ -61,7 +61,7 @@ config PROC_SYSCTL config PROC_PAGE_MONITOR default y depends on PROC_FS && MMU - bool "Enable /proc page monitoring" if EMBEDDED + bool "Enable /proc page monitoring" if EXPERT help Various /proc files exist to monitor process memory utilization: /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, diff --git a/fs/sysfs/Kconfig b/fs/sysfs/Kconfig index f4b67588b9d6..8c41feacbac5 100644 --- a/fs/sysfs/Kconfig +++ b/fs/sysfs/Kconfig @@ -1,5 +1,5 @@ config SYSFS - bool "sysfs file system support" if EMBEDDED + bool "sysfs file system support" if EXPERT default y help The sysfs filesystem is a virtual filesystem that the kernel uses to diff --git a/init/Kconfig b/init/Kconfig index 4e337906016e..be788c0957d4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -745,8 +745,8 @@ config DEBUG_BLK_CGROUP endif # CGROUPS menuconfig NAMESPACES - bool "Namespaces support" if EMBEDDED - default !EMBEDDED + bool "Namespaces support" if EXPERT + default !EXPERT help Provides the way to make tasks work with different objects using the same id. For example same IPC id may refer to different objects @@ -899,23 +899,31 @@ config SYSCTL config ANON_INODES bool -menuconfig EMBEDDED - bool "Configure standard kernel features (for small systems)" +menuconfig EXPERT + bool "Configure standard kernel features (expert users)" help This option allows certain base kernel options and settings to be disabled or tweaked. This is for specialized environments which can tolerate a "non-standard" kernel. Only use this if you really know what you are doing. +config EMBEDDED + bool "Embedded system" + select EXPERT + help + This option should be enabled if compiling the kernel for + an embedded system so certain expert options are available + for configuration. + config UID16 - bool "Enable 16-bit UID system calls" if EMBEDDED + bool "Enable 16-bit UID system calls" if EXPERT depends on ARM || BLACKFIN || CRIS || FRV || H8300 || X86_32 || M68K || (S390 && !64BIT) || SUPERH || SPARC32 || (SPARC64 && COMPAT) || UML || (X86_64 && IA32_EMULATION) default y help This enables the legacy 16-bit UID syscall wrappers. config SYSCTL_SYSCALL - bool "Sysctl syscall support" if EMBEDDED + bool "Sysctl syscall support" if EXPERT depends on PROC_SYSCTL default y select SYSCTL @@ -932,7 +940,7 @@ config SYSCTL_SYSCALL If unsure say Y here. config KALLSYMS - bool "Load all symbols for debugging/ksymoops" if EMBEDDED + bool "Load all symbols for debugging/ksymoops" if EXPERT default y help Say Y here to let the kernel print out symbolic crash information and @@ -963,7 +971,7 @@ config KALLSYMS_EXTRA_PASS config HOTPLUG - bool "Support for hot-pluggable devices" if EMBEDDED + bool "Support for hot-pluggable devices" if EXPERT default y help This option is provided for the case where no hotplug or uevent @@ -973,7 +981,7 @@ config HOTPLUG config PRINTK default y - bool "Enable support for printk" if EMBEDDED + bool "Enable support for printk" if EXPERT help This option enables normal printk support. Removing it eliminates most of the message strings from the kernel image @@ -982,7 +990,7 @@ config PRINTK strongly discouraged. config BUG - bool "BUG() support" if EMBEDDED + bool "BUG() support" if EXPERT default y help Disabling this option eliminates support for BUG and WARN, reducing @@ -993,12 +1001,12 @@ config BUG config ELF_CORE default y - bool "Enable ELF core dumps" if EMBEDDED + bool "Enable ELF core dumps" if EXPERT help Enable support for generating core dumps. Disabling saves about 4k. config PCSPKR_PLATFORM - bool "Enable PC-Speaker support" if EMBEDDED + bool "Enable PC-Speaker support" if EXPERT depends on ALPHA || X86 || MIPS || PPC_PREP || PPC_CHRP || PPC_PSERIES default y help @@ -1007,14 +1015,14 @@ config PCSPKR_PLATFORM config BASE_FULL default y - bool "Enable full-sized data structures for core" if EMBEDDED + bool "Enable full-sized data structures for core" if EXPERT help Disabling this option reduces the size of miscellaneous core kernel data structures. This saves memory on small machines, but may reduce performance. config FUTEX - bool "Enable futex support" if EMBEDDED + bool "Enable futex support" if EXPERT default y select RT_MUTEXES help @@ -1023,7 +1031,7 @@ config FUTEX run glibc-based applications correctly. config EPOLL - bool "Enable eventpoll support" if EMBEDDED + bool "Enable eventpoll support" if EXPERT default y select ANON_INODES help @@ -1031,7 +1039,7 @@ config EPOLL support for epoll family of system calls. config SIGNALFD - bool "Enable signalfd() system call" if EMBEDDED + bool "Enable signalfd() system call" if EXPERT select ANON_INODES default y help @@ -1041,7 +1049,7 @@ config SIGNALFD If unsure, say Y. config TIMERFD - bool "Enable timerfd() system call" if EMBEDDED + bool "Enable timerfd() system call" if EXPERT select ANON_INODES default y help @@ -1051,7 +1059,7 @@ config TIMERFD If unsure, say Y. config EVENTFD - bool "Enable eventfd() system call" if EMBEDDED + bool "Enable eventfd() system call" if EXPERT select ANON_INODES default y help @@ -1061,7 +1069,7 @@ config EVENTFD If unsure, say Y. config SHMEM - bool "Use full shmem filesystem" if EMBEDDED + bool "Use full shmem filesystem" if EXPERT default y depends on MMU help @@ -1072,7 +1080,7 @@ config SHMEM which may be appropriate on small systems without swap. config AIO - bool "Enable AIO support" if EMBEDDED + bool "Enable AIO support" if EXPERT default y help This option enables POSIX asynchronous I/O which may by used @@ -1149,16 +1157,16 @@ endmenu config VM_EVENT_COUNTERS default y - bool "Enable VM event counters for /proc/vmstat" if EMBEDDED + bool "Enable VM event counters for /proc/vmstat" if EXPERT help VM event counters are needed for event counts to be shown. This option allows the disabling of the VM event counters - on EMBEDDED systems. /proc/vmstat will only show page counts + on EXPERT systems. /proc/vmstat will only show page counts if VM event counters are disabled. config PCI_QUIRKS default y - bool "Enable PCI quirk workarounds" if EMBEDDED + bool "Enable PCI quirk workarounds" if EXPERT depends on PCI help This enables workarounds for various PCI chipset @@ -1167,7 +1175,7 @@ config PCI_QUIRKS config SLUB_DEBUG default y - bool "Enable SLUB debugging support" if EMBEDDED + bool "Enable SLUB debugging support" if EXPERT depends on SLUB && SYSFS help SLUB has extensive debug support features. Disabling these can @@ -1211,7 +1219,7 @@ config SLUB a slab allocator. config SLOB - depends on EMBEDDED + depends on EXPERT bool "SLOB (Simple Allocator)" help SLOB replaces the stock allocator with a drastically simpler @@ -1222,7 +1230,7 @@ endchoice config MMAP_ALLOW_UNINITIALIZED bool "Allow mmapped anonymous memory to be uninitialized" - depends on EMBEDDED && !MMU + depends on EXPERT && !MMU default n help Normally, and according to the Linux spec, anonymous memory obtained diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2d05adb98401..3967c2356e37 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -657,7 +657,7 @@ config DEBUG_HIGHMEM Disable for production systems. config DEBUG_BUGVERBOSE - bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED + bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT depends on BUG depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \ FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 @@ -729,8 +729,8 @@ config DEBUG_WRITECOUNT If unsure, say N. config DEBUG_MEMORY_INIT - bool "Debug memory initialisation" if EMBEDDED - default !EMBEDDED + bool "Debug memory initialisation" if EXPERT + default !EXPERT help Enable this for additional checks during memory initialisation. The sanity checks verify aspects of the VM such as the memory model diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index e3b6e18fdac5..60a6088d0e5e 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -7,37 +7,37 @@ config XZ_DEC CRC32 is supported. See Documentation/xz.txt for more information. config XZ_DEC_X86 - bool "x86 BCJ filter decoder" if EMBEDDED + bool "x86 BCJ filter decoder" if EXPERT default y depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_POWERPC - bool "PowerPC BCJ filter decoder" if EMBEDDED + bool "PowerPC BCJ filter decoder" if EXPERT default y depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_IA64 - bool "IA-64 BCJ filter decoder" if EMBEDDED + bool "IA-64 BCJ filter decoder" if EXPERT default y depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_ARM - bool "ARM BCJ filter decoder" if EMBEDDED + bool "ARM BCJ filter decoder" if EXPERT default y depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_ARMTHUMB - bool "ARM-Thumb BCJ filter decoder" if EMBEDDED + bool "ARM-Thumb BCJ filter decoder" if EXPERT default y depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_SPARC - bool "SPARC BCJ filter decoder" if EMBEDDED + bool "SPARC BCJ filter decoder" if EXPERT default y depends on XZ_DEC select XZ_DEC_BCJ diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 9109262abd24..c766056d0488 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -20,7 +20,7 @@ config MAC80211_HAS_RC def_bool n config MAC80211_RC_PID - bool "PID controller based rate control algorithm" if EMBEDDED + bool "PID controller based rate control algorithm" if EXPERT select MAC80211_HAS_RC ---help--- This option enables a TX rate control algorithm for @@ -28,14 +28,14 @@ config MAC80211_RC_PID rate. config MAC80211_RC_MINSTREL - bool "Minstrel" if EMBEDDED + bool "Minstrel" if EXPERT select MAC80211_HAS_RC default y ---help--- This option enables the 'minstrel' TX rate control algorithm config MAC80211_RC_MINSTREL_HT - bool "Minstrel 802.11n support" if EMBEDDED + bool "Minstrel 802.11n support" if EXPERT depends on MAC80211_RC_MINSTREL default y ---help--- diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index eaf765876458..7fce6dfd2180 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -18,7 +18,7 @@ config RFKILL_LEDS default y config RFKILL_INPUT - bool "RF switch input support" if EMBEDDED + bool "RF switch input support" if EXPERT depends on RFKILL depends on INPUT = y || RFKILL = INPUT - default y if !EMBEDDED + default y if !EXPERT diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index d0ee29063e5d..1f1ef70f34f2 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -95,7 +95,7 @@ config CFG80211_DEBUGFS If unsure, say N. config CFG80211_INTERNAL_REGDB - bool "use statically compiled regulatory rules database" if EMBEDDED + bool "use statically compiled regulatory rules database" if EXPERT default n depends on CFG80211 ---help--- diff --git a/usr/Kconfig b/usr/Kconfig index 4780deac5974..65b845bd4e3e 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -46,7 +46,7 @@ config INITRAMFS_ROOT_GID If you are not sure, leave it set to "0". config RD_GZIP - bool "Support initial ramdisks compressed using gzip" if EMBEDDED + bool "Support initial ramdisks compressed using gzip" if EXPERT default y depends on BLK_DEV_INITRD select DECOMPRESS_GZIP @@ -55,8 +55,8 @@ config RD_GZIP If unsure, say Y. config RD_BZIP2 - bool "Support initial ramdisks compressed using bzip2" if EMBEDDED - default !EMBEDDED + bool "Support initial ramdisks compressed using bzip2" if EXPERT + default !EXPERT depends on BLK_DEV_INITRD select DECOMPRESS_BZIP2 help @@ -64,8 +64,8 @@ config RD_BZIP2 If unsure, say N. config RD_LZMA - bool "Support initial ramdisks compressed using LZMA" if EMBEDDED - default !EMBEDDED + bool "Support initial ramdisks compressed using LZMA" if EXPERT + default !EXPERT depends on BLK_DEV_INITRD select DECOMPRESS_LZMA help @@ -73,8 +73,8 @@ config RD_LZMA If unsure, say N. config RD_XZ - bool "Support initial ramdisks compressed using XZ" if EMBEDDED - default !EMBEDDED + bool "Support initial ramdisks compressed using XZ" if EXPERT + default !EXPERT depends on BLK_DEV_INITRD select DECOMPRESS_XZ help @@ -82,8 +82,8 @@ config RD_XZ If unsure, say N. config RD_LZO - bool "Support initial ramdisks compressed using LZO" if EMBEDDED - default !EMBEDDED + bool "Support initial ramdisks compressed using LZO" if EXPERT + default !EXPERT depends on BLK_DEV_INITRD select DECOMPRESS_LZO help -- cgit v1.2.3 From 20d9600cb407b0b55fef6ee814b60345c6f58264 Mon Sep 17 00:00:00 2001 From: David Dillow Date: Thu, 20 Jan 2011 14:44:22 -0800 Subject: fs/direct-io.c: don't try to allocate more than BIO_MAX_PAGES in a bio When using devices that support max_segments > BIO_MAX_PAGES (256), direct IO tries to allocate a bio with more pages than allowed, which leads to an oops in dio_bio_alloc(). Clamp the request to the supported maximum, and change dio_bio_alloc() to reflect that bio_alloc() will always return a bio when called with __GFP_WAIT and a valid number of vectors. [akpm@linux-foundation.org: remove redundant BUG_ON()] Signed-off-by: David Dillow Reviewed-by: Jeff Moyer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/direct-io.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index 85882f6ba5f7..b044705eedd4 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -325,12 +325,16 @@ void dio_end_io(struct bio *bio, int error) } EXPORT_SYMBOL_GPL(dio_end_io); -static int +static void dio_bio_alloc(struct dio *dio, struct block_device *bdev, sector_t first_sector, int nr_vecs) { struct bio *bio; + /* + * bio_alloc() is guaranteed to return a bio when called with + * __GFP_WAIT and we request a valid number of vectors. + */ bio = bio_alloc(GFP_KERNEL, nr_vecs); bio->bi_bdev = bdev; @@ -342,7 +346,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, dio->bio = bio; dio->logical_offset_in_bio = dio->cur_page_fs_offset; - return 0; } /* @@ -583,8 +586,9 @@ static int dio_new_bio(struct dio *dio, sector_t start_sector) goto out; sector = start_sector << (dio->blkbits - 9); nr_pages = min(dio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev)); + nr_pages = min(nr_pages, BIO_MAX_PAGES); BUG_ON(nr_pages <= 0); - ret = dio_bio_alloc(dio, dio->map_bh.b_bdev, sector, nr_pages); + dio_bio_alloc(dio, dio->map_bh.b_bdev, sector, nr_pages); dio->boundary = 0; out: return ret; -- cgit v1.2.3 From 99d86c8f1b7101d7c55dbf644b32bb1f0d7eb303 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Jan 2011 21:19:25 -0500 Subject: cifs: fix up CIFSSMBEcho for unaligned access Make sure that CIFSSMBEcho can handle unaligned fields. Also fix a minor bug that causes this warning: fs/cifs/cifssmb.c: In function 'CIFSSMBEcho': fs/cifs/cifssmb.c:740: warning: large integer implicitly truncated to unsigned type ...WordCount is u8, not __le16, so no need to convert it. This patch should apply cleanly on top of the rest of the patchset to clean up unaligned access. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 675041a6949c..3106f5e5c633 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -733,9 +733,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server) /* set up echo request */ smb->hdr.Tid = cpu_to_le16(0xffff); - smb->hdr.WordCount = cpu_to_le16(1); - smb->EchoCount = cpu_to_le16(1); - smb->ByteCount = cpu_to_le16(1); + smb->hdr.WordCount = 1; + put_unaligned_le16(1, &smb->EchoCount); + put_bcc_le(1, &smb->hdr); smb->Data[0] = 'a'; smb->hdr.smb_buf_length += 3; -- cgit v1.2.3 From 0ca7a5b9ac5d301845dd6382ff25a699b6263a81 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 21 Jan 2011 16:40:31 +0900 Subject: nilfs2: fix crash after one superblock became unavailable Fixes the following kernel oops in nilfs_setup_super() which could arise if one of two super-blocks is unavailable. > BUG: unable to handle kernel NULL pointer dereference at (null) > Pid: 3529, comm: mount.nilfs2 Not tainted 2.6.37 #1 / > EIP: 0060:[] EFLAGS: 00010202 CPU: 3 > EIP is at memcpy+0xc/0x1b > Call Trace: > [] ? nilfs_setup_super+0x6c/0xa5 [nilfs2] > [] ? nilfs_get_root_dentry+0x81/0xcb [nilfs2] > [] ? nilfs_mount+0x4f9/0x62c [nilfs2] > [] ? kstrdup+0x36/0x3f > [] ? nilfs_mount+0x0/0x62c [nilfs2] > [] ? vfs_kern_mount+0x4d/0x12c > [] ? get_fs_type+0x76/0x8f > [] ? do_kern_mount+0x33/0xbf > [] ? do_mount+0x2ed/0x714 > [] ? copy_mount_options+0x28/0xfc > [] ? sys_mount+0x72/0xaf > [] ? syscall_call+0x7/0xb Reported-by: Wakko Warner Signed-off-by: Ryusuke Konishi Tested-by: Wakko Warner Cc: stable [2.6.37, 2.6.36] LKML-Reference: <20110121024918.GA29598@animx.eu.org> --- fs/nilfs2/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 0994f6a76c07..58fd707174e1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -704,7 +704,8 @@ skip_mount_setup: sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); /* synchronize sbp[1] with sbp[0] */ - memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + if (sbp[1]) + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); } -- cgit v1.2.3 From ff5fdb61493d95332945630fcae249f896098652 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 22 Jan 2011 20:16:06 -0800 Subject: fs: fix new dcache.c kernel-doc warnings Fix new fs/dcache.c kernel-doc warnings: Warning(fs/dcache.c:184): No description found for parameter 'dentry' Warning(fs/dcache.c:296): No description found for parameter 'parent' Warning(fs/dcache.c:1985): No description found for parameter 'dparent' Warning(fs/dcache.c:1985): Excess function parameter 'parent' description in 'd_validate' Signed-off-by: Randy Dunlap Cc: Alexander Viro Cc: Nick Piggin Signed-off-by: Linus Torvalds --- fs/dcache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 9f493ee4dcba..2a6bd9a4ae97 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -176,6 +176,7 @@ static void d_free(struct dentry *dentry) /** * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * @dentry: the target dentry * After this call, in-progress rcu-walk path lookup will fail. This * should be called after unhashing, and after changing d_inode (if * the dentry has not already been unhashed). @@ -281,6 +282,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) /** * d_kill - kill dentry and return parent * @dentry: dentry to kill + * @parent: parent dentry * * The dentry must already be unhashed and removed from the LRU. * @@ -1973,7 +1975,7 @@ out: /** * d_validate - verify dentry provided from insecure source (deprecated) * @dentry: The dentry alleged to be valid child of @dparent - * @parent: The parent dentry (known to be valid) + * @dparent: The parent dentry (known to be valid) * * An insecure source has sent us a dentry, here we verify it and dget() it. * This is used by ncpfs in its readdir implementation. -- cgit v1.2.3 From 3f391c79b0686ce183668c6e2b7d02f3e716766c Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 22 Jan 2011 21:07:16 +0100 Subject: CIFS: Remove pointless variable assignment in cifs_dfs_do_automount() In fs/cifs/cifs_dfs_ref.c::cifs_dfs_do_automount() we have this code: ... mnt = ERR_PTR(-EINVAL); if (IS_ERR(tlink)) { mnt = ERR_CAST(tlink); goto free_full_path; } ses = tlink_tcon(tlink)->ses; rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, &num_referrals, &referrals, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); cifs_put_tlink(tlink); mnt = ERR_PTR(-ENOENT); ... The assignment of 'mnt = ERR_PTR(-EINVAL);' is completely pointless. If we take the 'if (IS_ERR(tlink))' branch we'll set 'mnt' again and we'll also do so if we do not take the branch. There is no way we'll ever use 'mnt' with the assigned 'ERR_PTR(-EINVAL)' value, so we may as well just remove the pointless assignment. Signed-off-by: Jesper Juhl Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 7ed36536e754..f1c68629f277 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -297,7 +297,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) cifs_sb = CIFS_SB(mntpt->d_inode->i_sb); tlink = cifs_sb_tlink(cifs_sb); - mnt = ERR_PTR(-EINVAL); if (IS_ERR(tlink)) { mnt = ERR_CAST(tlink); goto free_full_path; -- cgit v1.2.3 From f1d0c998653f1eeec60ee6420e550135b62dbab4 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Sat, 22 Jan 2011 15:44:05 -0600 Subject: Make CIFS mount work in a container. Teach cifs about network namespaces, so mounting uses adresses/routing visible from the container rather than from init context. A container is a chroot on steroids that changes more than just the root filesystem the new processes see. One thing containers can isolate is "network namespaces", meaning each container can have its own set of ethernet interfaces, each with its own own IP address and routing to the outside world. And if you open a socket in _userspace_ from processes within such a container, this works fine. But sockets opened from within the kernel still use a single global networking context in a lot of places, meaning the new socket's address and routing are correct for PID 1 on the host, but are _not_ what userspace processes in the container get to use. So when you mount a network filesystem from within in a container, the mount code in the CIFS driver uses the host's networking context and not the container's networking context, so it gets the wrong address, uses the wrong routing, and may even try to go out an interface that the container can't even access... Bad stuff. This patch copies the mount process's network context into the CIFS structure that stores the rest of the server information for that mount point, and changes the socket open code to use the saved network context instead of the global network context. I.E. "when you attempt to use these addresses, do so relative to THIS set of network interfaces and routing rules, not the old global context from back before we supported containers". The big long HOWTO sets up a test environment on the assumption you've never used ocntainers before. It basically says: 1) configure and build a new kernel that has container support 2) build a new root filesystem that includes the userspace container control package (LXC) 3) package/run them under KVM (so you don't have to mess up your host system in order to play with containers). 4) set up some containers under the KVM system 5) set up contradictory routing in the KVM system and the container so that the host and the container see different things for the same address 6) try to mount a CIFS share from both contexts so you can both force it to work and force it to fail. For a long drawn out test reproduction sequence, see: http://landley.livejournal.com/47024.html http://landley.livejournal.com/47205.html http://landley.livejournal.com/47476.html Signed-off-by: Rob Landley Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 33 +++++++++++++++++++++++++++++++++ fs/cifs/connect.c | 12 ++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 5bfb75346cb0..edd5b29b53c9 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -166,6 +166,9 @@ struct TCP_Server_Info { struct socket *ssocket; struct sockaddr_storage dstaddr; struct sockaddr_storage srcaddr; /* locally bind to this IP */ +#ifdef CONFIG_NET_NS + struct net *net; +#endif wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ struct list_head pending_mid_q; @@ -216,6 +219,36 @@ struct TCP_Server_Info { #endif }; +/* + * Macros to allow the TCP_Server_Info->net field and related code to drop out + * when CONFIG_NET_NS isn't set. + */ + +#ifdef CONFIG_NET_NS + +static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) +{ + return srv->net; +} + +static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) +{ + srv->net = net; +} + +#else + +static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) +{ + return &init_net; +} + +static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) +{ +} + +#endif + /* * Session structure. One of these for each uid session with a particular host */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 18d3c7724d6e..0cc3b81c2e84 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1568,6 +1568,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { + if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) + continue; + if (!match_address(server, addr, (struct sockaddr *)&vol->srcaddr)) continue; @@ -1598,6 +1601,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) return; } + put_net(cifs_net_ns(server)); + list_del_init(&server->tcp_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -1672,6 +1677,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) goto out_err; } + cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); tcp_ses->hostname = extract_hostname(volume_info->UNC); if (IS_ERR(tcp_ses->hostname)) { rc = PTR_ERR(tcp_ses->hostname); @@ -1752,6 +1758,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info) out_err_crypto_release: cifs_crypto_shash_release(tcp_ses); + put_net(cifs_net_ns(tcp_ses)); + out_err: if (tcp_ses) { if (!IS_ERR(tcp_ses->hostname)) @@ -2263,8 +2271,8 @@ generic_ip_connect(struct TCP_Server_Info *server) } if (socket == NULL) { - rc = sock_create_kern(sfamily, SOCK_STREAM, - IPPROTO_TCP, &socket); + rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM, + IPPROTO_TCP, &socket, 1); if (rc < 0) { cERROR(1, "Error %d creating socket", rc); server->ssocket = NULL; -- cgit v1.2.3 From d66bbd441c08fe00ed2add1cf70cb243ebc2b27e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 21 Jan 2011 21:16:46 -0800 Subject: ceph: avoid picking MDS that is not active Ignore replication or auth frag data if it indicates an MDS that is not active. This can happen if the MDS shuts down and the client has stale data about the namespace distribution across the MDS cluster. If that's the case, fall back to directing the request based on the auth cap (which should always be accurate). Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 509339ceef72..a6949cc7c69a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -693,9 +693,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc, dout("choose_mds %p %llx.%llx " "frag %u mds%d (%d/%d)\n", inode, ceph_vinop(inode), - frag.frag, frag.mds, + frag.frag, mds, (int)r, frag.ndist); - return mds; + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= + CEPH_MDS_STATE_ACTIVE) + return mds; } /* since this file/dir wasn't known to be @@ -708,7 +710,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, dout("choose_mds %p %llx.%llx " "frag %u mds%d (auth)\n", inode, ceph_vinop(inode), frag.frag, mds); - return mds; + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= + CEPH_MDS_STATE_ACTIVE) + return mds; } } } -- cgit v1.2.3 From 93c100c0b423266c0ee28497e90fdf27c05e6b8e Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 25 Jan 2011 19:28:43 +0000 Subject: [CIFS] Replace cifs md5 hashing functions with kernel crypto APIs Replace remaining use of md5 hash functions local to cifs module with kernel crypto APIs. Remove header and source file containing those local functions. Signed-off-by: Shirish Pargaonkar Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/Makefile | 2 +- fs/cifs/cifsencrypt.c | 1 - fs/cifs/link.c | 59 ++++++-- fs/cifs/md5.c | 366 -------------------------------------------------- fs/cifs/md5.h | 38 ------ fs/cifs/smbencrypt.c | 1 - 6 files changed, 51 insertions(+), 416 deletions(-) delete mode 100644 fs/cifs/md5.c delete mode 100644 fs/cifs/md5.h (limited to 'fs') diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 43b19dd39191..e1322296cb69 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ - md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ + md4.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ readdir.o ioctl.o sess.o export.o cifs-$(CONFIG_CIFS_ACL) += cifsacl.o diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 66f3d50d0676..35bf329c90e1 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -24,7 +24,6 @@ #include "cifspdu.h" #include "cifsglob.h" #include "cifs_debug.h" -#include "md5.h" #include "cifs_unicode.h" #include "cifsproto.h" #include "ntlmssp.h" diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 306769de2fb5..d3444ea6ac71 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -28,7 +28,6 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" -#include "md5.h" #define CIFS_MF_SYMLINK_LEN_OFFSET (4+1) #define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1)) @@ -46,6 +45,45 @@ md5_hash[8], md5_hash[9], md5_hash[10], md5_hash[11],\ md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15] +static int +symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) +{ + int rc; + unsigned int size; + struct crypto_shash *md5; + struct sdesc *sdescmd5; + + md5 = crypto_alloc_shash("md5", 0, 0); + if (!md5 || IS_ERR(md5)) { + rc = PTR_ERR(md5); + cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc); + return rc; + } + size = sizeof(struct shash_desc) + crypto_shash_descsize(md5); + sdescmd5 = kmalloc(size, GFP_KERNEL); + if (!sdescmd5) { + rc = -ENOMEM; + cERROR(1, "%s: Memory allocation failure\n", __func__); + goto symlink_hash_err; + } + sdescmd5->shash.tfm = md5; + sdescmd5->shash.flags = 0x0; + + rc = crypto_shash_init(&sdescmd5->shash); + if (rc) { + cERROR(1, "%s: Could not init md5 shash\n", __func__); + goto symlink_hash_err; + } + crypto_shash_update(&sdescmd5->shash, link_str, link_len); + rc = crypto_shash_final(&sdescmd5->shash, md5_hash); + +symlink_hash_err: + crypto_free_shash(md5); + kfree(sdescmd5); + + return rc; +} + static int CIFSParseMFSymlink(const u8 *buf, unsigned int buf_len, @@ -56,7 +94,6 @@ CIFSParseMFSymlink(const u8 *buf, unsigned int link_len; const char *md5_str1; const char *link_str; - struct MD5Context md5_ctx; u8 md5_hash[16]; char md5_str2[34]; @@ -70,9 +107,11 @@ CIFSParseMFSymlink(const u8 *buf, if (rc != 1) return -EINVAL; - cifs_MD5_init(&md5_ctx); - cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); - cifs_MD5_final(md5_hash, &md5_ctx); + rc = symlink_hash(link_len, link_str, md5_hash); + if (rc) { + cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc); + return rc; + } snprintf(md5_str2, sizeof(md5_str2), CIFS_MF_SYMLINK_MD5_FORMAT, @@ -94,9 +133,9 @@ CIFSParseMFSymlink(const u8 *buf, static int CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) { + int rc; unsigned int link_len; unsigned int ofs; - struct MD5Context md5_ctx; u8 md5_hash[16]; if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE) @@ -107,9 +146,11 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) return -ENAMETOOLONG; - cifs_MD5_init(&md5_ctx); - cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); - cifs_MD5_final(md5_hash, &md5_ctx); + rc = symlink_hash(link_len, link_str, md5_hash); + if (rc) { + cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc); + return rc; + } snprintf(buf, buf_len, CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c deleted file mode 100644 index 98b66a54c319..000000000000 --- a/fs/cifs/md5.c +++ /dev/null @@ -1,366 +0,0 @@ -/* - * This code implements the MD5 message-digest algorithm. - * The algorithm is due to Ron Rivest. This code was - * written by Colin Plumb in 1993, no copyright is claimed. - * This code is in the public domain; do with it what you wish. - * - * Equivalent code is available from RSA Data Security, Inc. - * This code has been tested against that, and is equivalent, - * except that you don't need to include two pages of legalese - * with every copy. - * - * To compute the message digest of a chunk of bytes, declare an - * MD5Context structure, pass it to cifs_MD5_init, call cifs_MD5_update as - * needed on buffers full of bytes, and then call cifs_MD5_final, which - * will fill a supplied 16-byte array with the digest. - */ - -/* This code slightly modified to fit into Samba by - abartlet@samba.org Jun 2001 - and to fit the cifs vfs by - Steve French sfrench@us.ibm.com */ - -#include -#include "md5.h" - -static void MD5Transform(__u32 buf[4], __u32 const in[16]); - -/* - * Note: this code is harmless on little-endian machines. - */ -static void -byteReverse(unsigned char *buf, unsigned longs) -{ - __u32 t; - do { - t = (__u32) ((unsigned) buf[3] << 8 | buf[2]) << 16 | - ((unsigned) buf[1] << 8 | buf[0]); - *(__u32 *) buf = t; - buf += 4; - } while (--longs); -} - -/* - * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious - * initialization constants. - */ -void -cifs_MD5_init(struct MD5Context *ctx) -{ - ctx->buf[0] = 0x67452301; - ctx->buf[1] = 0xefcdab89; - ctx->buf[2] = 0x98badcfe; - ctx->buf[3] = 0x10325476; - - ctx->bits[0] = 0; - ctx->bits[1] = 0; -} - -/* - * Update context to reflect the concatenation of another buffer full - * of bytes. - */ -void -cifs_MD5_update(struct MD5Context *ctx, unsigned char const *buf, unsigned len) -{ - register __u32 t; - - /* Update bitcount */ - - t = ctx->bits[0]; - if ((ctx->bits[0] = t + ((__u32) len << 3)) < t) - ctx->bits[1]++; /* Carry from low to high */ - ctx->bits[1] += len >> 29; - - t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ - - /* Handle any leading odd-sized chunks */ - - if (t) { - unsigned char *p = (unsigned char *) ctx->in + t; - - t = 64 - t; - if (len < t) { - memmove(p, buf, len); - return; - } - memmove(p, buf, t); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (__u32 *) ctx->in); - buf += t; - len -= t; - } - /* Process data in 64-byte chunks */ - - while (len >= 64) { - memmove(ctx->in, buf, 64); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (__u32 *) ctx->in); - buf += 64; - len -= 64; - } - - /* Handle any remaining bytes of data. */ - - memmove(ctx->in, buf, len); -} - -/* - * Final wrapup - pad to 64-byte boundary with the bit pattern - * 1 0* (64-bit count of bits processed, MSB-first) - */ -void -cifs_MD5_final(unsigned char digest[16], struct MD5Context *ctx) -{ - unsigned int count; - unsigned char *p; - - /* Compute number of bytes mod 64 */ - count = (ctx->bits[0] >> 3) & 0x3F; - - /* Set the first char of padding to 0x80. This is safe since there is - always at least one byte free */ - p = ctx->in + count; - *p++ = 0x80; - - /* Bytes of padding needed to make 64 bytes */ - count = 64 - 1 - count; - - /* Pad out to 56 mod 64 */ - if (count < 8) { - /* Two lots of padding: Pad the first block to 64 bytes */ - memset(p, 0, count); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (__u32 *) ctx->in); - - /* Now fill the next block with 56 bytes */ - memset(ctx->in, 0, 56); - } else { - /* Pad block to 56 bytes */ - memset(p, 0, count - 8); - } - byteReverse(ctx->in, 14); - - /* Append length in bits and transform */ - ((__u32 *) ctx->in)[14] = ctx->bits[0]; - ((__u32 *) ctx->in)[15] = ctx->bits[1]; - - MD5Transform(ctx->buf, (__u32 *) ctx->in); - byteReverse((unsigned char *) ctx->buf, 4); - memmove(digest, ctx->buf, 16); - memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */ -} - -/* The four core functions - F1 is optimized somewhat */ - -/* #define F1(x, y, z) (x & y | ~x & z) */ -#define F1(x, y, z) (z ^ (x & (y ^ z))) -#define F2(x, y, z) F1(z, x, y) -#define F3(x, y, z) (x ^ y ^ z) -#define F4(x, y, z) (y ^ (x | ~z)) - -/* This is the central step in the MD5 algorithm. */ -#define MD5STEP(f, w, x, y, z, data, s) \ - (w += f(x, y, z) + data, w = w<>(32-s), w += x) - -/* - * The core of the MD5 algorithm, this alters an existing MD5 hash to - * reflect the addition of 16 longwords of new data. cifs_MD5_update blocks - * the data and converts bytes into longwords for this routine. - */ -static void -MD5Transform(__u32 buf[4], __u32 const in[16]) -{ - register __u32 a, b, c, d; - - a = buf[0]; - b = buf[1]; - c = buf[2]; - d = buf[3]; - - MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); - MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); - MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); - MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); - MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); - MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); - MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); - MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); - MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); - MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); - MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); - MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); - MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); - MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); - MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); - MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); - - MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); - MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); - MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); - MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); - MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); - MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); - MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); - MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); - MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); - MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); - MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); - MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); - MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); - MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); - MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); - MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); - - MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); - MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); - MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); - MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); - MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); - MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); - MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); - MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); - MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); - MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); - MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); - MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); - MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); - MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); - MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); - MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); - - MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); - MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); - MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); - MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); - MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); - MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); - MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); - MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); - MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); - MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); - MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); - MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); - MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); - MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); - MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); - MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); - - buf[0] += a; - buf[1] += b; - buf[2] += c; - buf[3] += d; -} - -#if 0 /* currently unused */ -/*********************************************************************** - the rfc 2104 version of hmac_md5 initialisation. -***********************************************************************/ -static void -hmac_md5_init_rfc2104(unsigned char *key, int key_len, - struct HMACMD5Context *ctx) -{ - int i; - - /* if key is longer than 64 bytes reset it to key=MD5(key) */ - if (key_len > 64) { - unsigned char tk[16]; - struct MD5Context tctx; - - cifs_MD5_init(&tctx); - cifs_MD5_update(&tctx, key, key_len); - cifs_MD5_final(tk, &tctx); - - key = tk; - key_len = 16; - } - - /* start out by storing key in pads */ - memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad)); - memset(ctx->k_opad, 0, sizeof(ctx->k_opad)); - memcpy(ctx->k_ipad, key, key_len); - memcpy(ctx->k_opad, key, key_len); - - /* XOR key with ipad and opad values */ - for (i = 0; i < 64; i++) { - ctx->k_ipad[i] ^= 0x36; - ctx->k_opad[i] ^= 0x5c; - } - - cifs_MD5_init(&ctx->ctx); - cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64); -} -#endif - -/*********************************************************************** - the microsoft version of hmac_md5 initialisation. -***********************************************************************/ -void -hmac_md5_init_limK_to_64(const unsigned char *key, int key_len, - struct HMACMD5Context *ctx) -{ - int i; - - /* if key is longer than 64 bytes truncate it */ - if (key_len > 64) - key_len = 64; - - /* start out by storing key in pads */ - memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad)); - memset(ctx->k_opad, 0, sizeof(ctx->k_opad)); - memcpy(ctx->k_ipad, key, key_len); - memcpy(ctx->k_opad, key, key_len); - - /* XOR key with ipad and opad values */ - for (i = 0; i < 64; i++) { - ctx->k_ipad[i] ^= 0x36; - ctx->k_opad[i] ^= 0x5c; - } - - cifs_MD5_init(&ctx->ctx); - cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64); -} - -/*********************************************************************** - update hmac_md5 "inner" buffer -***********************************************************************/ -void -hmac_md5_update(const unsigned char *text, int text_len, - struct HMACMD5Context *ctx) -{ - cifs_MD5_update(&ctx->ctx, text, text_len); /* then text of datagram */ -} - -/*********************************************************************** - finish off hmac_md5 "inner" buffer and generate outer one. -***********************************************************************/ -void -hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx) -{ - struct MD5Context ctx_o; - - cifs_MD5_final(digest, &ctx->ctx); - - cifs_MD5_init(&ctx_o); - cifs_MD5_update(&ctx_o, ctx->k_opad, 64); - cifs_MD5_update(&ctx_o, digest, 16); - cifs_MD5_final(digest, &ctx_o); -} - -/*********************************************************** - single function to calculate an HMAC MD5 digest from data. - use the microsoft hmacmd5 init method because the key is 16 bytes. -************************************************************/ -#if 0 /* currently unused */ -static void -hmac_md5(unsigned char key[16], unsigned char *data, int data_len, - unsigned char *digest) -{ - struct HMACMD5Context ctx; - hmac_md5_init_limK_to_64(key, 16, &ctx); - if (data_len != 0) - hmac_md5_update(data, data_len, &ctx); - - hmac_md5_final(digest, &ctx); -} -#endif diff --git a/fs/cifs/md5.h b/fs/cifs/md5.h deleted file mode 100644 index 6fba8cb402fd..000000000000 --- a/fs/cifs/md5.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef MD5_H -#define MD5_H -#ifndef HEADER_MD5_H -/* Try to avoid clashes with OpenSSL */ -#define HEADER_MD5_H -#endif - -struct MD5Context { - __u32 buf[4]; - __u32 bits[2]; - unsigned char in[64]; -}; -#endif /* !MD5_H */ - -#ifndef _HMAC_MD5_H -struct HMACMD5Context { - struct MD5Context ctx; - unsigned char k_ipad[65]; - unsigned char k_opad[65]; -}; -#endif /* _HMAC_MD5_H */ - -void cifs_MD5_init(struct MD5Context *context); -void cifs_MD5_update(struct MD5Context *context, unsigned char const *buf, - unsigned len); -void cifs_MD5_final(unsigned char digest[16], struct MD5Context *context); - -/* The following definitions come from lib/hmacmd5.c */ - -/* void hmac_md5_init_rfc2104(unsigned char *key, int key_len, - struct HMACMD5Context *ctx);*/ -void hmac_md5_init_limK_to_64(const unsigned char *key, int key_len, - struct HMACMD5Context *ctx); -void hmac_md5_update(const unsigned char *text, int text_len, - struct HMACMD5Context *ctx); -void hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx); -/* void hmac_md5(unsigned char key[16], unsigned char *data, int data_len, - unsigned char *digest);*/ diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 192ea51af20f..30135005e4f3 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -32,7 +32,6 @@ #include "cifs_unicode.h" #include "cifspdu.h" #include "cifsglob.h" -#include "md5.h" #include "cifs_debug.h" #include "cifsencrypt.h" -- cgit v1.2.3 From 72432ffcf555decbbae47f1be338e1d2f210aa69 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 24 Jan 2011 14:16:35 -0500 Subject: CIFS: Implement cifs_strict_writev (try #4) If we don't have Exclusive oplock we write a data to the server. Also set invalidate_mapping flag on the inode if we wrote something to the server. Add cifs_iovec_write to let the client write iovec buffers through CIFSSMBWrite2. Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 15 ++-- fs/cifs/cifsfs.h | 4 +- fs/cifs/cifsproto.h | 2 + fs/cifs/file.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 217 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a8323f1dc1c4..f2970136d17d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -600,10 +600,17 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; ssize_t written; + int rc; written = generic_file_aio_write(iocb, iov, nr_segs, pos); - if (!CIFS_I(inode)->clientCanCacheAll) - filemap_fdatawrite(inode->i_mapping); + + if (CIFS_I(inode)->clientCanCacheAll) + return written; + + rc = filemap_fdatawrite(inode->i_mapping); + if (rc) + cFYI(1, "cifs_file_aio_write: %d rc on %p inode", rc, inode); + return written; } @@ -737,7 +744,7 @@ const struct file_operations cifs_file_strict_ops = { .read = do_sync_read, .write = do_sync_write, .aio_read = cifs_strict_readv, - .aio_write = cifs_file_aio_write, + .aio_write = cifs_strict_writev, .open = cifs_open, .release = cifs_close, .lock = cifs_lock, @@ -793,7 +800,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .read = do_sync_read, .write = do_sync_write, .aio_read = cifs_strict_readv, - .aio_write = cifs_file_aio_write, + .aio_write = cifs_strict_writev, .open = cifs_open, .release = cifs_close, .fsync = cifs_strict_fsync, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index f23206d46531..14789a97304e 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -85,7 +85,9 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data, extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, - size_t write_size, loff_t *poffset); + size_t write_size, loff_t *poffset); +extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, int); extern int cifs_strict_fsync(struct file *, int); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 982895fa7615..35c989f4924f 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -85,6 +85,8 @@ extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); extern bool is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); +extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, + unsigned int bytes_written); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); extern unsigned int smbCalcSize(struct smb_hdr *ptr); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d7d65a70678e..0de17c1db608 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -848,7 +848,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) } /* update the file size (if needed) after a write */ -static void +void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written) { @@ -1619,6 +1619,206 @@ int cifs_flush(struct file *file, fl_owner_t id) return rc; } +static int +cifs_write_allocate_pages(struct page **pages, unsigned long num_pages) +{ + int rc = 0; + unsigned long i; + + for (i = 0; i < num_pages; i++) { + pages[i] = alloc_page(__GFP_HIGHMEM); + if (!pages[i]) { + /* + * save number of pages we have already allocated and + * return with ENOMEM error + */ + num_pages = i; + rc = -ENOMEM; + goto error; + } + } + + return rc; + +error: + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + return rc; +} + +static inline +size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len) +{ + size_t num_pages; + size_t clen; + + clen = min_t(const size_t, len, wsize); + num_pages = clen / PAGE_CACHE_SIZE; + if (clen % PAGE_CACHE_SIZE) + num_pages++; + + if (cur_len) + *cur_len = clen; + + return num_pages; +} + +static ssize_t +cifs_iovec_write(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *poffset) +{ + size_t total_written = 0, written = 0; + unsigned long num_pages, npages; + size_t copied, len, cur_len, i; + struct kvec *to_send; + struct page **pages; + struct iov_iter it; + struct inode *inode; + struct cifsFileInfo *open_file; + struct cifsTconInfo *pTcon; + struct cifs_sb_info *cifs_sb; + int xid, rc; + + len = iov_length(iov, nr_segs); + if (!len) + return 0; + + rc = generic_write_checks(file, poffset, &len, 0); + if (rc) + return rc; + + cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + num_pages = get_numpages(cifs_sb->wsize, len, &cur_len); + + pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL); + if (!pages) + return -ENOMEM; + + to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL); + if (!to_send) { + kfree(pages); + return -ENOMEM; + } + + rc = cifs_write_allocate_pages(pages, num_pages); + if (rc) { + kfree(pages); + kfree(to_send); + return rc; + } + + xid = GetXid(); + open_file = file->private_data; + pTcon = tlink_tcon(open_file->tlink); + inode = file->f_path.dentry->d_inode; + + iov_iter_init(&it, iov, nr_segs, len, 0); + npages = num_pages; + + do { + size_t save_len = cur_len; + for (i = 0; i < npages; i++) { + copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE); + copied = iov_iter_copy_from_user(pages[i], &it, 0, + copied); + cur_len -= copied; + iov_iter_advance(&it, copied); + to_send[i+1].iov_base = kmap(pages[i]); + to_send[i+1].iov_len = copied; + } + + cur_len = save_len - cur_len; + + do { + if (open_file->invalidHandle) { + rc = cifs_reopen_file(open_file, false); + if (rc != 0) + break; + } + rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, + cur_len, *poffset, &written, + to_send, npages, 0); + } while (rc == -EAGAIN); + + for (i = 0; i < npages; i++) + kunmap(pages[i]); + + if (written) { + len -= written; + total_written += written; + cifs_update_eof(CIFS_I(inode), *poffset, written); + *poffset += written; + } else if (rc < 0) { + if (!total_written) + total_written = rc; + break; + } + + /* get length and number of kvecs of the next write */ + npages = get_numpages(cifs_sb->wsize, len, &cur_len); + } while (len > 0); + + if (total_written > 0) { + spin_lock(&inode->i_lock); + if (*poffset > inode->i_size) + i_size_write(inode, *poffset); + spin_unlock(&inode->i_lock); + } + + cifs_stats_bytes_written(pTcon, total_written); + mark_inode_dirty_sync(inode); + + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + kfree(to_send); + kfree(pages); + FreeXid(xid); + return total_written; +} + +static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + ssize_t written; + struct inode *inode; + + inode = iocb->ki_filp->f_path.dentry->d_inode; + + /* + * BB - optimize the way when signing is disabled. We can drop this + * extra memory-to-memory copying and use iovec buffers for constructing + * write request. + */ + + written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos); + if (written > 0) { + CIFS_I(inode)->invalid_mapping = true; + iocb->ki_pos = pos; + } + + return written; +} + +ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct inode *inode; + + inode = iocb->ki_filp->f_path.dentry->d_inode; + + if (CIFS_I(inode)->clientCanCacheAll) + return generic_file_aio_write(iocb, iov, nr_segs, pos); + + /* + * In strict cache mode we need to write the data to the server exactly + * from the pos to pos+len-1 rather than flush all affected pages + * because it may cause a error with mandatory locks on these pages but + * not on the region from pos to ppos+len-1. + */ + + return cifs_user_writev(iocb, iov, nr_segs, pos); +} + static ssize_t cifs_iovec_read(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *poffset) -- cgit v1.2.3 From d39454ffe4a3c85428483b8a8a8e5e797b6363d5 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 24 Jan 2011 14:16:35 -0500 Subject: CIFS: Add strictcache mount option Use for switching on strict cache mode. In this mode the client reads from the cache all the time it has Oplock Level II, otherwise - read from the server. As for write - the client stores a data in the cache in Exclusive Oplock case, otherwise - write directly to the server. Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/README | 5 +++++ fs/cifs/connect.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/cifs/README b/fs/cifs/README index 46af99ab3614..fe1683590828 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -452,6 +452,11 @@ A partial list of the supported mount options follows: if oplock (caching token) is granted and held. Note that direct allows write operations larger than page size to be sent to the server. + strictcache Use for switching on strict cache mode. In this mode the + client read from the cache all the time it has Oplock Level II, + otherwise - read from the server. All written data are stored + in the cache, but if the client doesn't have Exclusive Oplock, + it writes the data to the server. acl Allow setfacl and getfacl to manage posix ACLs if server supports them. (default) noacl Do not allow setfacl and getfacl calls on this mount diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0cc3b81c2e84..47034af67b09 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -87,6 +87,7 @@ struct smb_vol { bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ bool server_ino:1; /* use inode numbers from server ie UniqueId */ bool direct_io:1; + bool strict_io:1; /* strict cache behavior */ bool remap:1; /* set to remap seven reserved chars in filenames */ bool posix_paths:1; /* unset to not ask for posix pathnames. */ bool no_linux_ext:1; @@ -1344,6 +1345,8 @@ cifs_parse_mount_options(char *options, const char *devname, vol->direct_io = 1; } else if (strnicmp(data, "forcedirectio", 13) == 0) { vol->direct_io = 1; + } else if (strnicmp(data, "strictcache", 11) == 0) { + vol->strict_io = 1; } else if (strnicmp(data, "noac", 4) == 0) { printk(KERN_WARNING "CIFS: Mount option noac not " "supported. Instead set " @@ -2584,6 +2587,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info, if (pvolume_info->multiuser) cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_NO_PERM); + if (pvolume_info->strict_io) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO; if (pvolume_info->direct_io) { cFYI(1, "mounting share using direct i/o"); cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; -- cgit v1.2.3 From ad3d2eedf0ed3611f5f86b9e4d0d15cc76c63465 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 17 Jan 2011 18:41:50 +0000 Subject: NFS4: Avoid potential NULL pointer dereference in decode_and_add_ds(). On Mon, 17 Jan 2011, Mi Jinlong wrote: > > > Jesper Juhl: > > strrchr() can return NULL if nothing is found. If this happens we'll > > dereference a NULL pointer in > > fs/nfs/nfs4filelayoutdev.c::decode_and_add_ds(). > > > > I tried to find some other code that guarantees that this can never > > happen but I was unsuccessful. So, unless someone else can point to some > > code that ensures this can never be a problem, I believe this patch is > > needed. > > > > While I was changing this code I also noticed that all the dprintk() > > statements, except one, start with "%s:". The one missing the ":" I added > > it to. > > Maybe another one also should be changed at decode_and_add_ds() at line 243: > > 243 printk("%s Decoded address and port %s\n", __func__, buf); > Missed that one. Thanks. Signed-off-by: Jesper Juhl Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayoutdev.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 51fe64ace55a..f5c9b125e8cc 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -214,7 +214,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) /* ipv6 length plus port is legal */ if (rlen > INET6_ADDRSTRLEN + 8) { - dprintk("%s Invalid address, length %d\n", __func__, + dprintk("%s: Invalid address, length %d\n", __func__, rlen); goto out_err; } @@ -225,6 +225,11 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) /* replace the port dots with dashes for the in4_pton() delimiter*/ for (i = 0; i < 2; i++) { char *res = strrchr(buf, '.'); + if (!res) { + dprintk("%s: Failed finding expected dots in port\n", + __func__); + goto out_free; + } *res = '-'; } @@ -240,7 +245,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) port = htons((tmp[0] << 8) | (tmp[1])); ds = nfs4_pnfs_ds_add(inode, ip_addr, port); - dprintk("%s Decoded address and port %s\n", __func__, buf); + dprintk("%s: Decoded address and port %s\n", __func__, buf); out_free: kfree(buf); out_err: -- cgit v1.2.3 From 839f7ad6932d95f4d5ae7267b95c574714ff3d5b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Jan 2011 15:54:57 +0000 Subject: NFS: Fix "kernel BUG at fs/aio.c:554!" Nick Piggin reports: > I'm getting use after frees in aio code in NFS > > [ 2703.396766] Call Trace: > [ 2703.396858] [] ? native_sched_clock+0x27/0x80 > [ 2703.396959] [] ? put_lock_stats+0xe/0x40 > [ 2703.397058] [] ? lock_release_holdtime+0xa8/0x140 > [ 2703.397159] [] lock_acquire+0x95/0x1b0 > [ 2703.397260] [] ? aio_put_req+0x2b/0x60 > [ 2703.397361] [] ? get_parent_ip+0x11/0x50 > [ 2703.397464] [] _raw_spin_lock_irq+0x41/0x80 > [ 2703.397564] [] ? aio_put_req+0x2b/0x60 > [ 2703.397662] [] aio_put_req+0x2b/0x60 > [ 2703.397761] [] do_io_submit+0x2be/0x7c0 > [ 2703.397895] [] sys_io_submit+0xb/0x10 > [ 2703.397995] [] system_call_fastpath+0x16/0x1b > > Adding some tracing, it is due to nfs completing the request then > returning something other than -EIOCBQUEUED, so aio.c > also completes the request. To address this, prevent the NFS direct I/O engine from completing async iocbs when the forward path returns an error without starting any I/O. This fix appears to survive ^C during both "xfstest no. 208" and "fsx -Z." It's likely this bug has existed for a very long while, as we are seeing very similar symptoms in OEL 5. Copying stable. Cc: Stable Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e6ace0d93c71..9943a75bb6d1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -407,15 +407,18 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } + /* + * If no bytes were started, return the error, and let the + * generic layer handle the completion. + */ + if (requested_bytes == 0) { + nfs_direct_req_release(dreq); + return result < 0 ? result : -EIO; + } + if (put_dreq(dreq)) nfs_direct_complete(dreq); - - if (requested_bytes != 0) - return 0; - - if (result < 0) - return result; - return -EIO; + return 0; } static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, @@ -841,15 +844,18 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } + /* + * If no bytes were started, return the error, and let the + * generic layer handle the completion. + */ + if (requested_bytes == 0) { + nfs_direct_req_release(dreq); + return result < 0 ? result : -EIO; + } + if (put_dreq(dreq)) nfs_direct_write_complete(dreq, dreq->inode); - - if (requested_bytes != 0) - return 0; - - if (result < 0) - return result; - return -EIO; + return 0; } static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, -- cgit v1.2.3 From ee5dc7732bd557bae6d10873a0aac606d2c551fb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Jan 2011 03:05:18 +0000 Subject: NFS: Fix "kernel BUG at fs/nfs/nfs3xdr.c:1338!" Milan Broz reports: > on today Linus' tree I get OOps if using nfs. > > server (2.6.36) exports dir: > /dir 172.16.1.0/24(rw,async,all_squash,no_subtree_check,anonuid=500,anongid=500) > > on client it is mounted in fstab > server:/dir /mnt/tst nfs rw,soft 0 0 > > and these commands OOpses it (simplified from a configure script): > > cd /dir > touch x > install x y > > [ 105.327701] ------------[ cut here ]------------ > [ 105.327979] kernel BUG at fs/nfs/nfs3xdr.c:1338! > [ 105.328075] invalid opcode: 0000 [#1] PREEMPT SMP > [ 105.328223] last sysfs file: /sys/devices/virtual/bdi/0:16/uevent > [ 105.328349] Modules linked in: usbcore dm_mod > [ 105.328553] > [ 105.328678] Pid: 3710, comm: install Not tainted 2.6.37+ #423 440BX Desktop Reference Platform/VMware Virtual Platform > [ 105.328853] EIP: 0060:[] EFLAGS: 00010282 CPU: 0 > [ 105.329152] EIP is at nfs3_xdr_enc_setacl3args+0x61/0x98 > [ 105.329249] EAX: ffffffea EBX: ce941d98 ECX: 00000000 EDX: 00000004 > [ 105.329340] ESI: ce941cd0 EDI: 000000a4 EBP: ce941cc0 ESP: ce941cb4 > [ 105.329431] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 > [ 105.329525] Process install (pid: 3710, ti=ce940000 task=ced36f20 task.ti=ce940000) > [ 105.336600] Stack: > [ 105.336693] ce941cd0 ce9dc000 00000000 ce941cf8 c12ecd02 c12f43e0 c116c00b cf754158 > [ 105.336982] ce9dc004 cf754284 ce9dc004 cf7ffee8 ceff9978 ce9dc000 cf7ffee8 ce9dc000 > [ 105.337182] ce9dc000 ce941d14 c12e698d cf75412c ce941d98 cf7ffee8 cf7fff20 00000000 > [ 105.337405] Call Trace: > [ 105.337695] [] rpcauth_wrap_req+0x75/0x7f > [ 105.337806] [] ? xdr_encode_opaque+0x12/0x15 > [ 105.337898] [] ? nfs3_xdr_enc_setacl3args+0x0/0x98 > [ 105.337988] [] call_transmit+0x17e/0x1e8 > [ 105.338072] [] __rpc_execute+0x6d/0x1a6 > [ 105.338155] [] rpc_execute+0x34/0x37 > [ 105.338235] [] rpc_run_task+0xb5/0xbd > [ 105.338316] [] rpc_call_sync+0x3d/0x58 > [ 105.338402] [] nfs3_proc_setacls+0x18e/0x24f > [ 105.338493] [] ? __kmalloc+0x148/0x1c4 > [ 105.338579] [] ? posix_acl_alloc+0x12/0x22 > [ 105.338665] [] nfs3_proc_setacl+0xa0/0xca > [ 105.338748] [] nfs3_setxattr+0x62/0x88 > [ 105.338834] [] ? sub_preempt_count+0x7c/0x89 > [ 105.338926] [] ? nfs3_setxattr+0x0/0x88 > [ 105.339026] [] __vfs_setxattr_noperm+0x26/0x95 > [ 105.339114] [] vfs_setxattr+0x5b/0x76 > [ 105.339211] [] setxattr+0x9d/0xc3 > [ 105.339298] [] ? handle_pte_fault+0x258/0x5cb > [ 105.339428] [] ? __free_pages+0x1a/0x23 > [ 105.339517] [] ? up_read+0x16/0x2c > [ 105.339599] [] ? fget+0x0/0xa3 > [ 105.339677] [] ? fget+0x0/0xa3 > [ 105.339760] [] ? get_parent_ip+0xb/0x31 > [ 105.339843] [] ? sub_preempt_count+0x7c/0x89 > [ 105.339931] [] sys_fsetxattr+0x51/0x79 > [ 105.340014] [] sysenter_do_call+0x12/0x32 > [ 105.340133] Code: 2e 76 18 00 58 31 d2 8b 7f 28 f6 43 04 01 74 03 8b 53 08 6a 00 8b 46 04 6a 01 8b 0b 52 89 fa e8 85 10 f8 ff 83 c4 0c 85 c0 79 04 <0f> 0b eb fe 31 c9 f6 43 04 04 74 03 8b 4b 0c 68 00 10 00 00 8d > [ 105.350321] EIP: [] nfs3_xdr_enc_setacl3args+0x61/0x98 SS:ESP 0068:ce941cb4 > [ 105.364385] ---[ end trace 01fcfe7f0f7f6e4a ]--- nfs3_xdr_enc_setacl3args() is not properly setting up the target buffer before nfsacl_encode() attempts to encode the ACL. Introduced by commit d9c407b1 "NFS: Introduce new-style XDR encoding functions for NFSv3." Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 01c5e8b1941d..183c6b123d0f 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1328,10 +1328,13 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, encode_nfs_fh3(xdr, NFS_FH(args->inode)); encode_uint32(xdr, args->mask); + + base = req->rq_slen; if (args->npages != 0) xdr_write_pages(xdr, args->pages, 0, args->len); + else + xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); - base = req->rq_slen; error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? args->acl_access : NULL, 1, 0); -- cgit v1.2.3 From 731f3f482ad3b2c58a1af2d0a9a634a82803706a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Jan 2011 03:05:28 +0000 Subject: NFS: nfsacl_{encode,decode} should return signed integer Clean up. The nfsacl_encode() and nfsacl_decode() functions return negative errno values, and each call site verifies that the returned value is not negative. Change the synopsis of both of these functions to reflect this usage. Document the synopsis and return values. Reported-by: Trond Myklebust Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs_common/nfsacl.c | 32 ++++++++++++++++++++++++++------ include/linux/nfsacl.h | 4 ++-- 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index fc1c52571c03..a3e78bd18679 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -72,9 +72,20 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) return 0; } -unsigned int -nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, - struct posix_acl *acl, int encode_entries, int typeflag) +/** + * nfsacl_encode - Encode an NFSv3 ACL + * + * @buf: destination xdr_buf to contain XDR encoded ACL + * @base: byte offset in xdr_buf where XDR'd ACL begins + * @inode: inode of file whose ACL this is + * @acl: posix_acl to encode + * @encode_entries: whether to encode ACEs as well + * @typeflag: ACL type: NFS_ACL_DEFAULT or zero + * + * Returns size of encoded ACL in bytes or a negative errno value. + */ +int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag) { int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; struct nfsacl_encode_desc nfsacl_desc = { @@ -224,9 +235,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl) return 0; } -unsigned int -nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, - struct posix_acl **pacl) +/** + * nfsacl_decode - Decode an NFSv3 ACL + * + * @buf: xdr_buf containing XDR'd ACL data to decode + * @base: byte offset in xdr_buf where XDR'd ACL begins + * @aclcnt: count of ACEs in decoded posix_acl + * @pacl: buffer in which to place decoded posix_acl + * + * Returns the length of the decoded ACL in bytes, or a negative errno value. + */ +int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, + struct posix_acl **pacl) { struct nfsacl_decode_desc nfsacl_desc = { .desc = { diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h index f321b578edeb..fabcb1e5c460 100644 --- a/include/linux/nfsacl.h +++ b/include/linux/nfsacl.h @@ -51,10 +51,10 @@ nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default) return w; } -extern unsigned int +extern int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, struct posix_acl *acl, int encode_entries, int typeflag); -extern unsigned int +extern int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, struct posix_acl **pacl); -- cgit v1.2.3 From f61f6da0d53842e849bab7f69e1431bd3de1136d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Jan 2011 03:05:38 +0000 Subject: NFS: Prevent memory allocation failure in nfsacl_encode() nfsacl_encode() allocates memory in certain cases. This of course is not guaranteed to work. Since commit 9f06c719 "SUNRPC: New xdr_streams XDR encoder API", the kernel's XDR encoders can't return a result indicating possibly a failure, so a memory allocation failure in nfsacl_encode() has become fatal (ie, the XDR code Oopses) in some cases. However, the allocated memory is a tiny fixed amount, on the order of 40-50 bytes. We can easily use a stack-allocated buffer for this, with only a wee bit of nose-holding. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 4 ++-- fs/nfs_common/nfsacl.c | 22 +++++++++++++++------- fs/posix_acl.c | 17 +++++++++++++---- include/linux/posix_acl.h | 1 + 4 files changed, 31 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9f88c5f4c7e2..274342771655 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -311,8 +311,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, if (!nfs_server_capable(inode, NFS_CAP_ACLS)) goto out; - /* We are doing this here, because XDR marshalling can only - return -ENOMEM. */ + /* We are doing this here because XDR marshalling does not + * return any results, it BUGs. */ status = -ENOSPC; if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) goto out; diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index a3e78bd18679..84c27d69d421 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -42,6 +42,11 @@ struct nfsacl_encode_desc { gid_t gid; }; +struct nfsacl_simple_acl { + struct posix_acl acl; + struct posix_acl_entry ace[4]; +}; + static int xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) { @@ -99,17 +104,22 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, .uid = inode->i_uid, .gid = inode->i_gid, }; + struct nfsacl_simple_acl aclbuf; int err; - struct posix_acl *acl2 = NULL; if (entries > NFS_ACL_MAX_ENTRIES || xdr_encode_word(buf, base, entries)) return -EINVAL; if (encode_entries && acl && acl->a_count == 3) { - /* Fake up an ACL_MASK entry. */ - acl2 = posix_acl_alloc(4, GFP_KERNEL); - if (!acl2) - return -ENOMEM; + struct posix_acl *acl2 = &aclbuf.acl; + + /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is + * invoked in contexts where a memory allocation failure is + * fatal. Fortunately this fake ACL is small enough to + * construct on the stack. */ + memset(acl2, 0, sizeof(acl2)); + posix_acl_init(acl2, 4); + /* Insert entries in canonical order: other orders seem to confuse Solaris VxFS. */ acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ @@ -120,8 +130,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, nfsacl_desc.acl = acl2; } err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); - if (acl2) - posix_acl_release(acl2); if (!err) err = 8 + nfsacl_desc.desc.elem_size * nfsacl_desc.desc.array_len; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 39df95a0ec25..b1cf6bf4b41d 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -22,6 +22,7 @@ #include +EXPORT_SYMBOL(posix_acl_init); EXPORT_SYMBOL(posix_acl_alloc); EXPORT_SYMBOL(posix_acl_clone); EXPORT_SYMBOL(posix_acl_valid); @@ -31,6 +32,16 @@ EXPORT_SYMBOL(posix_acl_create_masq); EXPORT_SYMBOL(posix_acl_chmod_masq); EXPORT_SYMBOL(posix_acl_permission); +/* + * Init a fresh posix_acl + */ +void +posix_acl_init(struct posix_acl *acl, int count) +{ + atomic_set(&acl->a_refcount, 1); + acl->a_count = count; +} + /* * Allocate a new ACL with the specified number of entries. */ @@ -40,10 +51,8 @@ posix_acl_alloc(int count, gfp_t flags) const size_t size = sizeof(struct posix_acl) + count * sizeof(struct posix_acl_entry); struct posix_acl *acl = kmalloc(size, flags); - if (acl) { - atomic_set(&acl->a_refcount, 1); - acl->a_count = count; - } + if (acl) + posix_acl_init(acl, count); return acl; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index d68283a898bb..54211c1cd926 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -71,6 +71,7 @@ posix_acl_release(struct posix_acl *acl) /* posix_acl.c */ +extern void posix_acl_init(struct posix_acl *, int); extern struct posix_acl *posix_acl_alloc(int, gfp_t); extern struct posix_acl *posix_acl_clone(const struct posix_acl *, gfp_t); extern int posix_acl_valid(const struct posix_acl *); -- cgit v1.2.3 From 80c30e8de4f81851b1f712bcc596e11d53bc76f1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 24 Jan 2011 20:50:26 +0000 Subject: NLM: Fix "kernel BUG at fs/lockd/host.c:417!" or ".../host.c:283!" Nick Bowler reports: > We were just having some NFS server troubles, and my client machine > running 2.6.38-rc1+ (specifically, commit 2b1caf6ed7b888c95) crashed > hard (syslog output appended to this mail). > > I'm not sure what the exact timeline was or how to reproduce this, > but the server was rebooted during all this. Since I've never seen > this happen before, it is possibly a regression from previous kernel > releases. However, I recently updated my nfs-utils (on the client) to > version 1.2.3, so that might be related as well. [ BUG output redacted ] When done searching, the for_each_host loop in next_host_state() falls through and returns the final host on the host chain without bumping it's reference count. Since the host's ref count is only one at that point, releasing the host in nlm_host_rebooted() attempts to destroy the host prematurely, and therefore hits a BUG(). Likely, the original intent of the for_each_host behavior in next_host_state() was to handle the case when the host chain is empty. Searching the chain and finding no suitable host to return needs to be handled as well. Defensively restructure next_host_state() always to return NULL when the loop falls through. Introduced by commit b10e30f6 "lockd: reorganize nlm_host_rebooted". Cc: J. Bruce Fields Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 5f1bcb2f06f3..b7c99bfb3da6 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -520,7 +520,7 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, struct nsm_handle *nsm, const struct nlm_reboot *info) { - struct nlm_host *host = NULL; + struct nlm_host *host; struct hlist_head *chain; struct hlist_node *pos; @@ -532,12 +532,13 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, host->h_state++; nlm_get_host(host); - goto out; + mutex_unlock(&nlm_host_mutex); + return host; } } -out: + mutex_unlock(&nlm_host_mutex); - return host; + return NULL; } /** -- cgit v1.2.3 From 778be232a207e79088ba70d832ac25dfea6fbf1a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 25 Jan 2011 15:38:01 +0000 Subject: NFS do not find client in NFSv4 pg_authenticate The information required to find the nfs_client cooresponding to the incoming back channel request is contained in the NFS layer. Perform minimal checking in the RPC layer pg_authenticate method, and push more detailed checking into the NFS layer where the nfs_client can be found. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 109 +++++++++++----------------------------- fs/nfs/callback.h | 4 +- fs/nfs/callback_proc.c | 10 +--- fs/nfs/callback_xdr.c | 5 +- fs/nfs/client.c | 15 ++---- fs/nfs/internal.h | 3 +- fs/nfs/nfs4state.c | 6 --- include/linux/sunrpc/bc_xprt.h | 13 ----- include/linux/sunrpc/svc_xprt.h | 1 - net/sunrpc/svcsock.c | 4 +- 10 files changed, 42 insertions(+), 128 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 199016528fcb..e3d294269058 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -134,33 +134,6 @@ out_err: } #if defined(CONFIG_NFS_V4_1) -/* - * * CB_SEQUENCE operations will fail until the callback sessionid is set. - * */ -int nfs4_set_callback_sessionid(struct nfs_client *clp) -{ - struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv; - struct nfs4_sessionid *bc_sid; - - if (!serv->sv_bc_xprt) - return -EINVAL; - - /* on success freed in xprt_free */ - bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL); - if (!bc_sid) - return -ENOMEM; - memcpy(bc_sid->data, &clp->cl_session->sess_id.data, - NFS4_MAX_SESSIONID_LEN); - spin_lock_bh(&serv->sv_cb_lock); - serv->sv_bc_xprt->xpt_bc_sid = bc_sid; - spin_unlock_bh(&serv->sv_cb_lock); - dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__, - ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1], - ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3], - serv->sv_bc_xprt); - return 0; -} - /* * The callback service for NFSv4.1 callbacks */ @@ -266,10 +239,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, struct nfs_callback_data *cb_info) { } -int nfs4_set_callback_sessionid(struct nfs_client *clp) -{ - return 0; -} #endif /* CONFIG_NFS_V4_1 */ /* @@ -359,78 +328,58 @@ void nfs_callback_down(int minorversion) mutex_unlock(&nfs_callback_mutex); } -static int check_gss_callback_principal(struct nfs_client *clp, - struct svc_rqst *rqstp) +/* Boolean check of RPC_AUTH_GSS principal */ +int +check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) { struct rpc_clnt *r = clp->cl_rpcclient; char *p = svc_gss_principal(rqstp); + if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) + return 1; + /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */ if (clp->cl_minorversion != 0) - return SVC_DROP; + return 0; /* * It might just be a normal user principal, in which case * userspace won't bother to tell us the name at all. */ if (p == NULL) - return SVC_DENIED; + return 0; /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ if (memcmp(p, "nfs@", 4) != 0) - return SVC_DENIED; + return 0; p += 4; if (strcmp(p, r->cl_server) != 0) - return SVC_DENIED; - return SVC_OK; + return 0; + return 1; } -/* pg_authenticate method helper */ -static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp) -{ - struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp); - int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0; - - dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc); - if (svc_is_backchannel(rqstp)) - /* Sessionid (usually) set after CB_NULL ping */ - return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid, - is_cb_compound); - else - /* No callback identifier in pg_authenticate */ - return nfs4_find_client_no_ident(svc_addr(rqstp)); -} - -/* pg_authenticate method for nfsv4 callback threads. */ +/* + * pg_authenticate method for nfsv4 callback threads. + * + * The authflavor has been negotiated, so an incorrect flavor is a server + * bug. Drop packets with incorrect authflavor. + * + * All other checking done after NFS decoding where the nfs_client can be + * found in nfs4_callback_compound + */ static int nfs_callback_authenticate(struct svc_rqst *rqstp) { - struct nfs_client *clp; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - int ret = SVC_OK; - - /* Don't talk to strangers */ - clp = nfs_cb_find_client(rqstp); - if (clp == NULL) - return SVC_DROP; - - dprintk("%s: %s NFSv4 callback!\n", __func__, - svc_print_addr(rqstp, buf, sizeof(buf))); - switch (rqstp->rq_authop->flavour) { - case RPC_AUTH_NULL: - if (rqstp->rq_proc != CB_NULL) - ret = SVC_DENIED; - break; - case RPC_AUTH_UNIX: - break; - case RPC_AUTH_GSS: - ret = check_gss_callback_principal(clp, rqstp); - break; - default: - ret = SVC_DENIED; + case RPC_AUTH_NULL: + if (rqstp->rq_proc != CB_NULL) + return SVC_DROP; + break; + case RPC_AUTH_GSS: + /* No RPC_AUTH_GSS support yet in NFSv4.1 */ + if (svc_is_backchannel(rqstp)) + return SVC_DROP; } - nfs_put_client(clp); - return ret; + return SVC_OK; } /* diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index d3b44f9bd747..46d93ce7311b 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -7,6 +7,7 @@ */ #ifndef __LINUX_FS_NFS_CALLBACK_H #define __LINUX_FS_NFS_CALLBACK_H +#include #define NFS4_CALLBACK 0x40000000 #define NFS4_CALLBACK_XDRSIZE 2048 @@ -37,7 +38,6 @@ enum nfs4_callback_opnum { struct cb_process_state { __be32 drc_status; struct nfs_client *clp; - struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */ }; struct cb_compound_hdr_arg { @@ -168,7 +168,7 @@ extern unsigned nfs4_callback_layoutrecall( extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); extern void nfs4_cb_take_slot(struct nfs_client *clp); #endif /* CONFIG_NFS_V4_1 */ - +extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res, struct cb_process_state *cps); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 4bb91cb2620d..829f406e91dd 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -373,17 +373,11 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, { struct nfs_client *clp; int i; - __be32 status; + __be32 status = htonl(NFS4ERR_BADSESSION); cps->clp = NULL; - status = htonl(NFS4ERR_BADSESSION); - /* Incoming session must match the callback session */ - if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN)) - goto out; - - clp = nfs4_find_client_sessionid(args->csa_addr, - &args->csa_sessionid, 1); + clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 23112c263f81..14e0f9371d14 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -794,10 +794,9 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r if (hdr_arg.minorversion == 0) { cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); - if (!cps.clp) + if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) return rpc_drop_reply; - } else - cps.svc_sid = bc_xprt_sid(rqstp); + } hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 192f2f860265..bd3ca32879e7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1206,16 +1206,11 @@ nfs4_find_client_ident(int cb_ident) * For CB_COMPOUND calls, find a client by IP address, protocol version, * minorversion, and sessionID * - * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service - * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL - * can arrive before the callback sessionid is set. For CB_NULL calls, - * find a client by IP address protocol version, and minorversion. - * * Returns NULL if no such client */ struct nfs_client * nfs4_find_client_sessionid(const struct sockaddr *addr, - struct nfs4_sessionid *sid, int is_cb_compound) + struct nfs4_sessionid *sid) { struct nfs_client *clp; @@ -1227,9 +1222,9 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, if (!nfs4_has_session(clp)) continue; - /* Match sessionid unless cb_null call*/ - if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data, - sid->data, NFS4_MAX_SESSIONID_LEN) != 0)) + /* Match sessionid*/ + if (memcmp(clp->cl_session->sess_id.data, + sid->data, NFS4_MAX_SESSIONID_LEN) != 0) continue; atomic_inc(&clp->cl_count); @@ -1244,7 +1239,7 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, struct nfs_client * nfs4_find_client_sessionid(const struct sockaddr *addr, - struct nfs4_sessionid *sid, int is_cb_compound) + struct nfs4_sessionid *sid) { return NULL; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4644f04b4b46..cf9fdbdabc67 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -133,8 +133,7 @@ extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); extern struct nfs_client *nfs4_find_client_ident(int); extern struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *, - int); +nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2336d532cf66..e6742b57a04c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -232,12 +232,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) status = nfs4_proc_create_session(clp); if (status != 0) goto out; - status = nfs4_set_callback_sessionid(clp); - if (status != 0) { - printk(KERN_WARNING "Sessionid not set. No callback service\n"); - nfs_callback_down(1); - status = 0; - } nfs41_setup_state_renewal(clp); nfs_mark_client_ready(clp, NFS_CS_READY); out: diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index c50b458b8a3f..082884295f80 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -47,14 +47,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp) return 1; return 0; } -static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp) -{ - if (svc_is_backchannel(rqstp)) - return (struct nfs4_sessionid *) - rqstp->rq_server->sv_bc_xprt->xpt_bc_sid; - return NULL; -} - #else /* CONFIG_NFS_V4_1 */ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) @@ -67,11 +59,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp) return 0; } -static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp) -{ - return NULL; -} - static inline void xprt_free_bc_request(struct rpc_rqst *req) { } diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 059877b4d85b..7ad9751a0d87 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -77,7 +77,6 @@ struct svc_xprt { size_t xpt_remotelen; /* length of address */ struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ struct list_head xpt_users; /* callbacks on free */ - void *xpt_bc_sid; /* back channel session ID */ struct net *xpt_net; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7bd3bbba4710..d802e941d365 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1609,9 +1609,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, */ static void svc_bc_sock_free(struct svc_xprt *xprt) { - if (xprt) { - kfree(xprt->xpt_bc_sid); + if (xprt) kfree(container_of(xprt, struct svc_sock, sk_xprt)); - } } #endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.3 From 2c4cdf8f6d3cfb48036400952329555099c8c92c Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 25 Jan 2011 15:38:02 +0000 Subject: NFS fix cb_sequence error processing Always assign the cb_process_state nfs_client pointer so a processing error in cb_sequence after the nfs_client is found and referenced returns a non-NULL cb_process_state nfs_client and the matching nfs_put_client in nfs4_callback_compound dereferences the client. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 829f406e91dd..89587573fe50 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -408,9 +408,9 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; nfs4_cb_take_slot(clp); - cps->clp = clp; /* put in nfs4_callback_compound */ out: + cps->clp = clp; /* put in nfs4_callback_compound */ for (i = 0; i < args->csa_nrclists; i++) kfree(args->csa_rclists[i].rcl_refcalls); kfree(args->csa_rclists); -- cgit v1.2.3 From b2a2897dc4a59684321de425652061c62a0569d0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 25 Jan 2011 15:38:03 +0000 Subject: NFS improve pnfs_put_deviceid_cache debug print What we really want to know is the ref count. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bc4089769735..1b1bc1a0fb0a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -951,7 +951,7 @@ pnfs_put_deviceid_cache(struct nfs_client *clp) { struct pnfs_deviceid_cache *local = clp->cl_devid_cache; - dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); + dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { int i; /* Verify cache is empty */ -- cgit v1.2.3 From 27dc1cd3ad9300f81e1219e5fc305d91d85353f8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jan 2011 15:28:21 -0500 Subject: NFS: nfs_wcc_update_inode() should set nfsi->attr_gencount If the call to nfs_wcc_update_inode() results in an attribute update, we need to ensure that the inode's attr_gencount gets bumped too, otherwise we are not protected against races with other GETATTR calls. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d8512423ba72..1cc600e77bb4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -881,9 +881,10 @@ out: return ret; } -static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); + unsigned long ret = 0; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -891,25 +892,32 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->change_attr = fattr->change_attr; if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; + ret |= NFS_INO_INVALID_ATTR; } /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) - && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + ret |= NFS_INO_INVALID_ATTR; + } if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + ret |= NFS_INO_INVALID_ATTR; } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->npages == 0) - i_size_write(inode, nfs_size_to_loff_t(fattr->size)); + && nfsi->npages == 0) { + i_size_write(inode, nfs_size_to_loff_t(fattr->size)); + ret |= NFS_INO_INVALID_ATTR; + } + return ret; } /** @@ -1223,7 +1231,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ - nfs_wcc_update_inode(inode, fattr); + invalid |= nfs_wcc_update_inode(inode, fattr); /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { -- cgit v1.2.3 From 3689456b4bd36027022b3215eb2acba51cd0e6b5 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 25 Jan 2011 15:07:34 -0800 Subject: squashfs: fix use of uninitialised variable in zlib & xz decompressors Fix potential use of uninitialised variable caused by recent decompressor code optimisations. In zlib_uncompress (zlib_wrapper.c) we have int zlib_err, zlib_init = 0; ... do { ... if (avail == 0) { offset = 0; put_bh(bh[k++]); continue; } ... zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH); ... } while (zlib_err == Z_OK); If continue is executed (avail == 0) then the while condition will be evaluated testing zlib_err, which is uninitialised first time around the loop. Fix this by getting rid of the 'if (avail == 0)' condition test, this edge condition should not be being handled in the decompressor code, and instead handle it generically in the caller code. Similarly for xz_wrapper.c. Incidentally, on most architectures (bar Mips and Parisc), no uninitialised variable warning is generated by gcc, this is because the while condition test on continue is optimised out and not performed (when executing continue zlib_err has not been changed since entering the loop, and logically if the while condition was true previously, then it's still true). Signed-off-by: Phillip Lougher Reported-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/squashfs/block.c | 8 ++++++++ fs/squashfs/xz_wrapper.c | 6 ------ fs/squashfs/zlib_wrapper.c | 6 ------ 3 files changed, 8 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2fb2882f0fa7..8ab48bc2fa7d 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -63,6 +63,14 @@ static struct buffer_head *get_block_length(struct super_block *sb, *length = (unsigned char) bh->b_data[*offset] | (unsigned char) bh->b_data[*offset + 1] << 8; *offset += 2; + + if (*offset == msblk->devblksize) { + put_bh(bh); + bh = sb_bread(sb, ++(*cur_index)); + if (bh == NULL) + return NULL; + *offset = 0; + } } return bh; diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 856756ca5ee4..c4eb40018256 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -95,12 +95,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (!buffer_uptodate(bh[k])) goto release_mutex; - if (avail == 0) { - offset = 0; - put_bh(bh[k++]); - continue; - } - stream->buf.in = bh[k]->b_data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 818a5e063faf..4661ae2b1cec 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -82,12 +82,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (!buffer_uptodate(bh[k])) goto release_mutex; - if (avail == 0) { - offset = 0; - put_bh(bh[k++]); - continue; - } - stream->next_in = bh[k]->b_data + offset; stream->avail_in = avail; offset = 0; -- cgit v1.2.3 From ac751efa6a0d70f2c9daef5c7e3a92270f5c2dff Mon Sep 17 00:00:00 2001 From: Torben Hohn Date: Tue, 25 Jan 2011 15:07:35 -0800 Subject: console: rename acquire/release_console_sem() to console_lock/unlock() The -rt patches change the console_semaphore to console_mutex. As a result, a quite large chunk of the patches changes all acquire/release_console_sem() to acquire/release_console_mutex() This commit makes things use more neutral function names which dont make implications about the underlying lock. The only real change is the return value of console_trylock which is inverted from try_acquire_console_sem() This patch also paves the way to switching console_sem from a semaphore to a mutex. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: make console_trylock return 1 on success, per Geert] Signed-off-by: Torben Hohn Cc: Thomas Gleixner Cc: Greg KH Cc: Ingo Molnar Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-omap2/pm24xx.c | 4 +- arch/arm/mach-omap2/pm34xx.c | 4 +- arch/arm/mach-omap2/serial.c | 4 +- arch/parisc/kernel/pdc_cons.c | 4 +- drivers/char/bfin_jtag_comm.c | 8 +-- drivers/gpu/drm/nouveau/nouveau_drv.c | 8 +-- drivers/gpu/drm/radeon/radeon_device.c | 10 +-- drivers/staging/msm/msm_fb.c | 8 +-- drivers/staging/olpc_dcon/olpc_dcon.c | 10 +-- drivers/staging/sm7xx/smtcfb.c | 8 +-- drivers/tty/serial/sb1250-duart.c | 2 +- drivers/tty/tty_io.c | 4 +- drivers/tty/vt/selection.c | 4 +- drivers/tty/vt/vc_screen.c | 16 ++--- drivers/tty/vt/vt.c | 124 ++++++++++++++++----------------- drivers/tty/vt/vt_ioctl.c | 60 ++++++++-------- drivers/video/arkfb.c | 12 ++-- drivers/video/aty/aty128fb.c | 12 ++-- drivers/video/aty/atyfb_base.c | 10 +-- drivers/video/aty/radeon_pm.c | 10 +-- drivers/video/chipsfb.c | 8 +-- drivers/video/console/fbcon.c | 42 +++++------ drivers/video/da8xx-fb.c | 8 +-- drivers/video/fbmem.c | 12 ++-- drivers/video/fbsysfs.c | 20 +++--- drivers/video/geode/gxfb_core.c | 8 +-- drivers/video/geode/lxfb_core.c | 8 +-- drivers/video/i810/i810_main.c | 8 +-- drivers/video/jz4740_fb.c | 8 +-- drivers/video/mx3fb.c | 8 +-- drivers/video/nvidia/nvidia.c | 8 +-- drivers/video/ps3fb.c | 16 ++--- drivers/video/s3fb.c | 16 ++--- drivers/video/savage/savagefb_driver.c | 8 +-- drivers/video/sh_mobile_hdmi.c | 8 +-- drivers/video/sh_mobile_lcdcfb.c | 4 +- drivers/video/sm501fb.c | 8 +-- drivers/video/tmiofb.c | 10 +-- drivers/video/via/viafbdev.c | 8 +-- drivers/video/vt8623fb.c | 12 ++-- drivers/video/xen-fbfront.c | 4 +- fs/proc/consoles.c | 4 +- include/linux/console.h | 6 +- kernel/printk.c | 100 ++++++++++++++------------ 44 files changed, 336 insertions(+), 328 deletions(-) (limited to 'fs') diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index 9e5dc8ed51e9..97feb3ab6a69 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -134,7 +134,7 @@ static void omap2_enter_full_retention(void) /* Block console output in case it is on one of the OMAP UARTs */ if (!is_suspending()) - if (try_acquire_console_sem()) + if (!console_trylock()) goto no_sleep; omap_uart_prepare_idle(0); @@ -151,7 +151,7 @@ static void omap2_enter_full_retention(void) omap_uart_resume_idle(0); if (!is_suspending()) - release_console_sem(); + console_unlock(); no_sleep: if (omap2_pm_debug) { diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 8cbbeade4b8a..a4aa1920a75c 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -398,7 +398,7 @@ void omap_sram_idle(void) if (!is_suspending()) if (per_next_state < PWRDM_POWER_ON || core_next_state < PWRDM_POWER_ON) - if (try_acquire_console_sem()) + if (!console_trylock()) goto console_still_active; /* PER */ @@ -481,7 +481,7 @@ void omap_sram_idle(void) } if (!is_suspending()) - release_console_sem(); + console_unlock(); console_still_active: /* Disable IO-PAD and IO-CHAIN wakeup */ diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index 302da7403a10..32e91a9c8b6b 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -812,7 +812,7 @@ void __init omap_serial_init_port(struct omap_board_data *bdata) oh->dev_attr = uart; - acquire_console_sem(); /* in case the earlycon is on the UART */ + console_lock(); /* in case the earlycon is on the UART */ /* * Because of early UART probing, UART did not get idled @@ -838,7 +838,7 @@ void __init omap_serial_init_port(struct omap_board_data *bdata) omap_uart_block_sleep(uart); uart->timeout = DEFAULT_TIMEOUT; - release_console_sem(); + console_unlock(); if ((cpu_is_omap34xx() && uart->padconf) || (uart->wk_en && uart->wk_mask)) { diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index 11bdd68e5762..fc770be465ff 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -169,11 +169,11 @@ static int __init pdc_console_tty_driver_init(void) struct console *tmp; - acquire_console_sem(); + console_lock(); for_each_console(tmp) if (tmp == &pdc_cons) break; - release_console_sem(); + console_unlock(); if (!tmp) { printk(KERN_INFO "PDC console driver not registered anymore, not creating %s\n", pdc_cons.name); diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c index e397df3ad98e..16402445f2b2 100644 --- a/drivers/char/bfin_jtag_comm.c +++ b/drivers/char/bfin_jtag_comm.c @@ -183,16 +183,16 @@ bfin_jc_circ_write(const unsigned char *buf, int count) } #ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE -# define acquire_console_sem() -# define release_console_sem() +# define console_lock() +# define console_unlock() #endif static int bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count) { int i; - acquire_console_sem(); + console_lock(); i = bfin_jc_circ_write(buf, count); - release_console_sem(); + console_unlock(); wake_up_process(bfin_jc_kthread); return i; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index 13bb672a16f4..f658a04eecf9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -234,9 +234,9 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state) pci_set_power_state(pdev, PCI_D3hot); } - acquire_console_sem(); + console_lock(); nouveau_fbcon_set_suspend(dev, 1); - release_console_sem(); + console_unlock(); nouveau_fbcon_restore_accel(dev); return 0; @@ -359,9 +359,9 @@ nouveau_pci_resume(struct pci_dev *pdev) nv_crtc->lut.depth = 0; } - acquire_console_sem(); + console_lock(); nouveau_fbcon_set_suspend(dev, 0); - release_console_sem(); + console_unlock(); nouveau_fbcon_zfill_all(dev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 26091d602b84..0d478932b1a9 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -891,9 +891,9 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) pci_disable_device(dev->pdev); pci_set_power_state(dev->pdev, PCI_D3hot); } - acquire_console_sem(); + console_lock(); radeon_fbdev_set_suspend(rdev, 1); - release_console_sem(); + console_unlock(); return 0; } @@ -905,11 +905,11 @@ int radeon_resume_kms(struct drm_device *dev) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - acquire_console_sem(); + console_lock(); pci_set_power_state(dev->pdev, PCI_D0); pci_restore_state(dev->pdev); if (pci_enable_device(dev->pdev)) { - release_console_sem(); + console_unlock(); return -1; } pci_set_master(dev->pdev); @@ -920,7 +920,7 @@ int radeon_resume_kms(struct drm_device *dev) radeon_restore_bios_scratch_regs(rdev); radeon_fbdev_set_suspend(rdev, 0); - release_console_sem(); + console_unlock(); /* reset hpd state */ radeon_hpd_init(rdev); diff --git a/drivers/staging/msm/msm_fb.c b/drivers/staging/msm/msm_fb.c index 23fa049b51f2..a2f29d464051 100644 --- a/drivers/staging/msm/msm_fb.c +++ b/drivers/staging/msm/msm_fb.c @@ -347,7 +347,7 @@ static int msm_fb_suspend(struct platform_device *pdev, pm_message_t state) if ((!mfd) || (mfd->key != MFD_KEY)) return 0; - acquire_console_sem(); + console_lock(); fb_set_suspend(mfd->fbi, 1); ret = msm_fb_suspend_sub(mfd); @@ -358,7 +358,7 @@ static int msm_fb_suspend(struct platform_device *pdev, pm_message_t state) pdev->dev.power.power_state = state; } - release_console_sem(); + console_unlock(); return ret; } #else @@ -431,11 +431,11 @@ static int msm_fb_resume(struct platform_device *pdev) if ((!mfd) || (mfd->key != MFD_KEY)) return 0; - acquire_console_sem(); + console_lock(); ret = msm_fb_resume_sub(mfd); pdev->dev.power.power_state = PMSG_ON; fb_set_suspend(mfd->fbi, 1); - release_console_sem(); + console_unlock(); return ret; } diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c index 9f26dc9408bb..56a283d1a74d 100644 --- a/drivers/staging/olpc_dcon/olpc_dcon.c +++ b/drivers/staging/olpc_dcon/olpc_dcon.c @@ -373,17 +373,17 @@ static void dcon_source_switch(struct work_struct *work) * * For now, we just hope.. */ - acquire_console_sem(); + console_lock(); ignore_fb_events = 1; if (fb_blank(fbinfo, FB_BLANK_UNBLANK)) { ignore_fb_events = 0; - release_console_sem(); + console_unlock(); printk(KERN_ERR "olpc-dcon: Failed to enter CPU mode\n"); dcon_pending = DCON_SOURCE_DCON; return; } ignore_fb_events = 0; - release_console_sem(); + console_unlock(); /* And turn off the DCON */ pdata->set_dconload(1); @@ -435,12 +435,12 @@ static void dcon_source_switch(struct work_struct *work) } } - acquire_console_sem(); + console_lock(); ignore_fb_events = 1; if (fb_blank(fbinfo, FB_BLANK_POWERDOWN)) printk(KERN_ERR "olpc-dcon: couldn't blank fb!\n"); ignore_fb_events = 0; - release_console_sem(); + console_unlock(); printk(KERN_INFO "olpc-dcon: The DCON has control\n"); break; diff --git a/drivers/staging/sm7xx/smtcfb.c b/drivers/staging/sm7xx/smtcfb.c index 0bc113c44d39..d007e4a12c14 100644 --- a/drivers/staging/sm7xx/smtcfb.c +++ b/drivers/staging/sm7xx/smtcfb.c @@ -1044,9 +1044,9 @@ static int __maybe_unused smtcfb_suspend(struct pci_dev *pdev, pm_message_t msg) /* when doing suspend, call fb apis and pci apis */ if (msg.event == PM_EVENT_SUSPEND) { - acquire_console_sem(); + console_lock(); fb_set_suspend(&sfb->fb, 1); - release_console_sem(); + console_unlock(); retv = pci_save_state(pdev); pci_disable_device(pdev); retv = pci_choose_state(pdev, msg); @@ -1105,9 +1105,9 @@ static int __maybe_unused smtcfb_resume(struct pci_dev *pdev) smtcfb_setmode(sfb); - acquire_console_sem(); + console_lock(); fb_set_suspend(&sfb->fb, 0); - release_console_sem(); + console_unlock(); return 0; } diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c index a2f2b3254499..602d9845c52f 100644 --- a/drivers/tty/serial/sb1250-duart.c +++ b/drivers/tty/serial/sb1250-duart.c @@ -829,7 +829,7 @@ static void __init sbd_probe_duarts(void) #ifdef CONFIG_SERIAL_SB1250_DUART_CONSOLE /* * Serial console stuff. Very basic, polling driver for doing serial - * console output. The console_sem is held by the caller, so we + * console output. The console_lock is held by the caller, so we * shouldn't be interrupted for more console activity. */ static void sbd_console_putchar(struct uart_port *uport, int ch) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 464d09d97873..6158eae0f64a 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -3256,7 +3256,7 @@ static ssize_t show_cons_active(struct device *dev, struct console *c; ssize_t count = 0; - acquire_console_sem(); + console_lock(); for (c = console_drivers; c; c = c->next) { if (!c->device) continue; @@ -3271,7 +3271,7 @@ static ssize_t show_cons_active(struct device *dev, while (i--) count += sprintf(buf + count, "%s%d%c", cs[i]->name, cs[i]->index, i ? ' ':'\n'); - release_console_sem(); + console_unlock(); return count; } diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index ebae344ce910..c956ed6c83a3 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -316,9 +316,9 @@ int paste_selection(struct tty_struct *tty) /* always called with BTM from vt_ioctl */ WARN_ON(!tty_locked()); - acquire_console_sem(); + console_lock(); poke_blanked_console(); - release_console_sem(); + console_unlock(); ld = tty_ldisc_ref(tty); if (!ld) { diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index eab3a1ff99e4..a672ed192d33 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -202,7 +202,7 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) /* Select the proper current console and verify * sanity of the situation under the console lock. */ - acquire_console_sem(); + console_lock(); attr = (currcons & 128); currcons = (currcons & 127); @@ -336,9 +336,9 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) * the pagefault handling code may want to call printk(). */ - release_console_sem(); + console_unlock(); ret = copy_to_user(buf, con_buf_start, orig_count); - acquire_console_sem(); + console_lock(); if (ret) { read += (orig_count - ret); @@ -354,7 +354,7 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) if (read) ret = read; unlock_out: - release_console_sem(); + console_unlock(); mutex_unlock(&con_buf_mtx); return ret; } @@ -379,7 +379,7 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) /* Select the proper current console and verify * sanity of the situation under the console lock. */ - acquire_console_sem(); + console_lock(); attr = (currcons & 128); currcons = (currcons & 127); @@ -414,9 +414,9 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) /* Temporarily drop the console lock so that we can read * in the write data from userspace safely. */ - release_console_sem(); + console_unlock(); ret = copy_from_user(con_buf, buf, this_round); - acquire_console_sem(); + console_lock(); if (ret) { this_round -= ret; @@ -542,7 +542,7 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) vcs_scr_updated(vc); unlock_out: - release_console_sem(); + console_unlock(); mutex_unlock(&con_buf_mtx); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 76407eca9ab0..b230bd3f056f 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1003,9 +1003,9 @@ static int vt_resize(struct tty_struct *tty, struct winsize *ws) struct vc_data *vc = tty->driver_data; int ret; - acquire_console_sem(); + console_lock(); ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row); - release_console_sem(); + console_unlock(); return ret; } @@ -1271,7 +1271,7 @@ static void default_attr(struct vc_data *vc) vc->vc_color = vc->vc_def_color; } -/* console_sem is held */ +/* console_lock is held */ static void csi_m(struct vc_data *vc) { int i; @@ -1415,7 +1415,7 @@ int mouse_reporting(void) return vc_cons[fg_console].d->vc_report_mouse; } -/* console_sem is held */ +/* console_lock is held */ static void set_mode(struct vc_data *vc, int on_off) { int i; @@ -1485,7 +1485,7 @@ static void set_mode(struct vc_data *vc, int on_off) } } -/* console_sem is held */ +/* console_lock is held */ static void setterm_command(struct vc_data *vc) { switch(vc->vc_par[0]) { @@ -1545,7 +1545,7 @@ static void setterm_command(struct vc_data *vc) } } -/* console_sem is held */ +/* console_lock is held */ static void csi_at(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_cols - vc->vc_x) @@ -1555,7 +1555,7 @@ static void csi_at(struct vc_data *vc, unsigned int nr) insert_char(vc, nr); } -/* console_sem is held */ +/* console_lock is held */ static void csi_L(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_rows - vc->vc_y) @@ -1566,7 +1566,7 @@ static void csi_L(struct vc_data *vc, unsigned int nr) vc->vc_need_wrap = 0; } -/* console_sem is held */ +/* console_lock is held */ static void csi_P(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_cols - vc->vc_x) @@ -1576,7 +1576,7 @@ static void csi_P(struct vc_data *vc, unsigned int nr) delete_char(vc, nr); } -/* console_sem is held */ +/* console_lock is held */ static void csi_M(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_rows - vc->vc_y) @@ -1587,7 +1587,7 @@ static void csi_M(struct vc_data *vc, unsigned int nr) vc->vc_need_wrap = 0; } -/* console_sem is held (except via vc_init->reset_terminal */ +/* console_lock is held (except via vc_init->reset_terminal */ static void save_cur(struct vc_data *vc) { vc->vc_saved_x = vc->vc_x; @@ -1603,7 +1603,7 @@ static void save_cur(struct vc_data *vc) vc->vc_saved_G1 = vc->vc_G1_charset; } -/* console_sem is held */ +/* console_lock is held */ static void restore_cur(struct vc_data *vc) { gotoxy(vc, vc->vc_saved_x, vc->vc_saved_y); @@ -1625,7 +1625,7 @@ enum { ESnormal, ESesc, ESsquare, ESgetpars, ESgotpars, ESfunckey, EShash, ESsetG0, ESsetG1, ESpercent, ESignore, ESnonstd, ESpalette }; -/* console_sem is held (except via vc_init()) */ +/* console_lock is held (except via vc_init()) */ static void reset_terminal(struct vc_data *vc, int do_clear) { vc->vc_top = 0; @@ -1685,7 +1685,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) csi_J(vc, 2); } -/* console_sem is held */ +/* console_lock is held */ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) { /* @@ -2119,7 +2119,7 @@ static int is_double_width(uint32_t ucs) return bisearch(ucs, double_width, ARRAY_SIZE(double_width) - 1); } -/* acquires console_sem */ +/* acquires console_lock */ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count) { #ifdef VT_BUF_VRAM_ONLY @@ -2147,11 +2147,11 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co might_sleep(); - acquire_console_sem(); + console_lock(); vc = tty->driver_data; if (vc == NULL) { printk(KERN_ERR "vt: argh, driver_data is NULL !\n"); - release_console_sem(); + console_unlock(); return 0; } @@ -2159,7 +2159,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co if (!vc_cons_allocated(currcons)) { /* could this happen? */ printk_once("con_write: tty %d not allocated\n", currcons+1); - release_console_sem(); + console_unlock(); return 0; } @@ -2375,7 +2375,7 @@ rescan_last_byte: } FLUSH console_conditional_schedule(); - release_console_sem(); + console_unlock(); notify_update(vc); return n; #undef FLUSH @@ -2388,11 +2388,11 @@ rescan_last_byte: * us to do the switches asynchronously (needed when we want * to switch due to a keyboard interrupt). Synchronization * with other console code and prevention of re-entrancy is - * ensured with console_sem. + * ensured with console_lock. */ static void console_callback(struct work_struct *ignored) { - acquire_console_sem(); + console_lock(); if (want_console >= 0) { if (want_console != fg_console && @@ -2422,7 +2422,7 @@ static void console_callback(struct work_struct *ignored) } notify_update(vc_cons[fg_console].d); - release_console_sem(); + console_unlock(); } int set_console(int nr) @@ -2603,7 +2603,7 @@ static struct console vt_console_driver = { */ /* - * Generally a bit racy with respect to console_sem(). + * Generally a bit racy with respect to console_lock();. * * There are some functions which don't need it. * @@ -2629,17 +2629,17 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) switch (type) { case TIOCL_SETSEL: - acquire_console_sem(); + console_lock(); ret = set_selection((struct tiocl_selection __user *)(p+1), tty); - release_console_sem(); + console_unlock(); break; case TIOCL_PASTESEL: ret = paste_selection(tty); break; case TIOCL_UNBLANKSCREEN: - acquire_console_sem(); + console_lock(); unblank_screen(); - release_console_sem(); + console_unlock(); break; case TIOCL_SELLOADLUT: ret = sel_loadlut(p); @@ -2688,10 +2688,10 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) } break; case TIOCL_BLANKSCREEN: /* until explicitly unblanked, not only poked */ - acquire_console_sem(); + console_lock(); ignore_poke = 1; do_blank_screen(0); - release_console_sem(); + console_unlock(); break; case TIOCL_BLANKEDSCREEN: ret = console_blanked; @@ -2790,11 +2790,11 @@ static void con_flush_chars(struct tty_struct *tty) return; /* if we race with con_close(), vt may be null */ - acquire_console_sem(); + console_lock(); vc = tty->driver_data; if (vc) set_cursor(vc); - release_console_sem(); + console_unlock(); } /* @@ -2805,7 +2805,7 @@ static int con_open(struct tty_struct *tty, struct file *filp) unsigned int currcons = tty->index; int ret = 0; - acquire_console_sem(); + console_lock(); if (tty->driver_data == NULL) { ret = vc_allocate(currcons); if (ret == 0) { @@ -2813,7 +2813,7 @@ static int con_open(struct tty_struct *tty, struct file *filp) /* Still being freed */ if (vc->port.tty) { - release_console_sem(); + console_unlock(); return -ERESTARTSYS; } tty->driver_data = vc; @@ -2827,11 +2827,11 @@ static int con_open(struct tty_struct *tty, struct file *filp) tty->termios->c_iflag |= IUTF8; else tty->termios->c_iflag &= ~IUTF8; - release_console_sem(); + console_unlock(); return ret; } } - release_console_sem(); + console_unlock(); return ret; } @@ -2844,9 +2844,9 @@ static void con_shutdown(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; BUG_ON(vc == NULL); - acquire_console_sem(); + console_lock(); vc->port.tty = NULL; - release_console_sem(); + console_unlock(); tty_shutdown(tty); } @@ -2893,13 +2893,13 @@ static int __init con_init(void) struct vc_data *vc; unsigned int currcons = 0, i; - acquire_console_sem(); + console_lock(); if (conswitchp) display_desc = conswitchp->con_startup(); if (!display_desc) { fg_console = 0; - release_console_sem(); + console_unlock(); return 0; } @@ -2946,7 +2946,7 @@ static int __init con_init(void) printable = 1; printk("\n"); - release_console_sem(); + console_unlock(); #ifdef CONFIG_VT_CONSOLE register_console(&vt_console_driver); @@ -3037,7 +3037,7 @@ static int bind_con_driver(const struct consw *csw, int first, int last, if (!try_module_get(owner)) return -ENODEV; - acquire_console_sem(); + console_lock(); /* check if driver is registered */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { @@ -3122,7 +3122,7 @@ static int bind_con_driver(const struct consw *csw, int first, int last, retval = 0; err: - release_console_sem(); + console_unlock(); module_put(owner); return retval; }; @@ -3171,7 +3171,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) if (!try_module_get(owner)) return -ENODEV; - acquire_console_sem(); + console_lock(); /* check if driver is registered and if it is unbindable */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { @@ -3185,7 +3185,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) } if (retval) { - release_console_sem(); + console_unlock(); goto err; } @@ -3204,12 +3204,12 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) } if (retval) { - release_console_sem(); + console_unlock(); goto err; } if (!con_is_bound(csw)) { - release_console_sem(); + console_unlock(); goto err; } @@ -3238,7 +3238,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) if (!con_is_bound(csw)) con_driver->flag &= ~CON_DRIVER_FLAG_INIT; - release_console_sem(); + console_unlock(); /* ignore return value, binding should not fail */ bind_con_driver(defcsw, first, last, deflt); err: @@ -3538,7 +3538,7 @@ int register_con_driver(const struct consw *csw, int first, int last) if (!try_module_get(owner)) return -ENODEV; - acquire_console_sem(); + console_lock(); for (i = 0; i < MAX_NR_CON_DRIVER; i++) { con_driver = ®istered_con_driver[i]; @@ -3592,7 +3592,7 @@ int register_con_driver(const struct consw *csw, int first, int last) } err: - release_console_sem(); + console_unlock(); module_put(owner); return retval; } @@ -3613,7 +3613,7 @@ int unregister_con_driver(const struct consw *csw) { int i, retval = -ENODEV; - acquire_console_sem(); + console_lock(); /* cannot unregister a bound driver */ if (con_is_bound(csw)) @@ -3639,7 +3639,7 @@ int unregister_con_driver(const struct consw *csw) } } err: - release_console_sem(); + console_unlock(); return retval; } EXPORT_SYMBOL(unregister_con_driver); @@ -3934,9 +3934,9 @@ int con_set_cmap(unsigned char __user *arg) { int rc; - acquire_console_sem(); + console_lock(); rc = set_get_cmap (arg,1); - release_console_sem(); + console_unlock(); return rc; } @@ -3945,9 +3945,9 @@ int con_get_cmap(unsigned char __user *arg) { int rc; - acquire_console_sem(); + console_lock(); rc = set_get_cmap (arg,0); - release_console_sem(); + console_unlock(); return rc; } @@ -3994,12 +3994,12 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) } else font.data = NULL; - acquire_console_sem(); + console_lock(); if (vc->vc_sw->con_font_get) rc = vc->vc_sw->con_font_get(vc, &font); else rc = -ENOSYS; - release_console_sem(); + console_unlock(); if (rc) goto out; @@ -4076,12 +4076,12 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) font.data = memdup_user(op->data, size); if (IS_ERR(font.data)) return PTR_ERR(font.data); - acquire_console_sem(); + console_lock(); if (vc->vc_sw->con_font_set) rc = vc->vc_sw->con_font_set(vc, &font, op->flags); else rc = -ENOSYS; - release_console_sem(); + console_unlock(); kfree(font.data); return rc; } @@ -4103,12 +4103,12 @@ static int con_font_default(struct vc_data *vc, struct console_font_op *op) else name[MAX_FONT_NAME - 1] = 0; - acquire_console_sem(); + console_lock(); if (vc->vc_sw->con_font_default) rc = vc->vc_sw->con_font_default(vc, &font, s); else rc = -ENOSYS; - release_console_sem(); + console_unlock(); if (!rc) { op->width = font.width; op->height = font.height; @@ -4124,7 +4124,7 @@ static int con_font_copy(struct vc_data *vc, struct console_font_op *op) if (vc->vc_mode != KD_TEXT) return -EINVAL; - acquire_console_sem(); + console_lock(); if (!vc->vc_sw->con_font_copy) rc = -ENOSYS; else if (con < 0 || !vc_cons_allocated(con)) @@ -4133,7 +4133,7 @@ static int con_font_copy(struct vc_data *vc, struct console_font_op *op) rc = 0; else rc = vc->vc_sw->con_font_copy(vc, con); - release_console_sem(); + console_unlock(); return rc; } diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 6b68a0fb4611..1235ebda6e1c 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -649,12 +649,12 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, /* * explicitly blank/unblank the screen if switching modes */ - acquire_console_sem(); + console_lock(); if (arg == KD_TEXT) do_unblank_screen(1); else do_blank_screen(1); - release_console_sem(); + console_unlock(); break; case KDGETMODE: @@ -893,7 +893,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, ret = -EINVAL; goto out; } - acquire_console_sem(); + console_lock(); vc->vt_mode = tmp; /* the frsig is ignored, so we set it to 0 */ vc->vt_mode.frsig = 0; @@ -901,7 +901,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, vc->vt_pid = get_pid(task_pid(current)); /* no switch is required -- saw@shade.msu.ru */ vc->vt_newvt = -1; - release_console_sem(); + console_unlock(); break; } @@ -910,9 +910,9 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, struct vt_mode tmp; int rc; - acquire_console_sem(); + console_lock(); memcpy(&tmp, &vc->vt_mode, sizeof(struct vt_mode)); - release_console_sem(); + console_unlock(); rc = copy_to_user(up, &tmp, sizeof(struct vt_mode)); if (rc) @@ -965,9 +965,9 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, ret = -ENXIO; else { arg--; - acquire_console_sem(); + console_lock(); ret = vc_allocate(arg); - release_console_sem(); + console_unlock(); if (ret) break; set_console(arg); @@ -990,7 +990,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, ret = -ENXIO; else { vsa.console--; - acquire_console_sem(); + console_lock(); ret = vc_allocate(vsa.console); if (ret == 0) { struct vc_data *nvc; @@ -1003,7 +1003,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, put_pid(nvc->vt_pid); nvc->vt_pid = get_pid(task_pid(current)); } - release_console_sem(); + console_unlock(); if (ret) break; /* Commence switch and lock */ @@ -1044,7 +1044,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, /* * Switching-from response */ - acquire_console_sem(); + console_lock(); if (vc->vt_newvt >= 0) { if (arg == 0) /* @@ -1063,7 +1063,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, vc->vt_newvt = -1; ret = vc_allocate(newvt); if (ret) { - release_console_sem(); + console_unlock(); break; } /* @@ -1083,7 +1083,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, if (arg != VT_ACKACQ) ret = -EINVAL; } - release_console_sem(); + console_unlock(); break; /* @@ -1096,20 +1096,20 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, } if (arg == 0) { /* deallocate all unused consoles, but leave 0 */ - acquire_console_sem(); + console_lock(); for (i=1; iv_cols)) ret = -EFAULT; else { - acquire_console_sem(); + console_lock(); for (i = 0; i < MAX_NR_CONSOLES; i++) { vc = vc_cons[i].d; @@ -1135,7 +1135,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, vc_resize(vc_cons[i].d, cc, ll); } } - release_console_sem(); + console_unlock(); } break; } @@ -1187,14 +1187,14 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, for (i = 0; i < MAX_NR_CONSOLES; i++) { if (!vc_cons[i].d) continue; - acquire_console_sem(); + console_lock(); if (vlin) vc_cons[i].d->vc_scan_lines = vlin; if (clin) vc_cons[i].d->vc_font.height = clin; vc_cons[i].d->vc_resize_user = 1; vc_resize(vc_cons[i].d, cc, ll); - release_console_sem(); + console_unlock(); } break; } @@ -1367,7 +1367,7 @@ void vc_SAK(struct work_struct *work) struct vc_data *vc; struct tty_struct *tty; - acquire_console_sem(); + console_lock(); vc = vc_con->d; if (vc) { tty = vc->port.tty; @@ -1379,7 +1379,7 @@ void vc_SAK(struct work_struct *work) __do_SAK(tty); reset_vc(vc); } - release_console_sem(); + console_unlock(); } #ifdef CONFIG_COMPAT @@ -1737,10 +1737,10 @@ int vt_move_to_console(unsigned int vt, int alloc) { int prev; - acquire_console_sem(); + console_lock(); /* Graphics mode - up to X */ if (disable_vt_switch) { - release_console_sem(); + console_unlock(); return 0; } prev = fg_console; @@ -1748,7 +1748,7 @@ int vt_move_to_console(unsigned int vt, int alloc) if (alloc && vc_allocate(vt)) { /* we can't have a free VC for now. Too bad, * we don't want to mess the screen for now. */ - release_console_sem(); + console_unlock(); return -ENOSPC; } @@ -1758,10 +1758,10 @@ int vt_move_to_console(unsigned int vt, int alloc) * Let the calling function know so it can decide * what to do. */ - release_console_sem(); + console_unlock(); return -EIO; } - release_console_sem(); + console_unlock(); tty_lock(); if (vt_waitactive(vt + 1)) { pr_debug("Suspend: Can't switch VCs."); @@ -1781,8 +1781,8 @@ int vt_move_to_console(unsigned int vt, int alloc) */ void pm_set_vt_switch(int do_switch) { - acquire_console_sem(); + console_lock(); disable_vt_switch = !do_switch; - release_console_sem(); + console_unlock(); } EXPORT_SYMBOL(pm_set_vt_switch); diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c index d583bea608fd..391ac939f011 100644 --- a/drivers/video/arkfb.c +++ b/drivers/video/arkfb.c @@ -23,7 +23,7 @@ #include #include #include -#include /* Why should fb driver call console functions? because acquire_console_sem() */ +#include /* Why should fb driver call console functions? because console_lock() */ #include