diff options
author | David S. Miller <davem@davemloft.net> | 2015-05-13 14:31:43 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-05-13 14:31:43 -0400 |
commit | b04096ff33a977c01c8780ca3ee129dbd641bad4 (patch) | |
tree | 8652f27f158984e5aa4c00ddf1a4885a32435f28 /fs | |
parent | 7f460d30c8e130382de1443fdbc4d040a9e062ec (diff) | |
parent | 110bc76729d448fdbcb5cdb63b83d9fd65ce5e26 (diff) | |
download | lwn-b04096ff33a977c01c8780ca3ee129dbd641bad4.tar.gz lwn-b04096ff33a977c01c8780ca3ee129dbd641bad4.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Four minor merge conflicts:
1) qca_spi.c renamed the local variable used for the SPI device
from spi_device to spi, meanwhile the spi_set_drvdata() call
got moved further up in the probe function.
2) Two changes were both adding new members to codel params
structure, and thus we had overlapping changes to the
initializer function.
3) 'net' was making a fix to sk_release_kernel() which is
completely removed in 'net-next'.
4) In net_namespace.c, the rtnl_net_fill() call for GET operations
had the command value fixed, meanwhile 'net-next' adjusted the
argument signature a bit.
This also matches example merge resolutions posted by Stephen
Rothwell over the past two days.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 2 | ||||
-rw-r--r-- | fs/configfs/mount.c | 2 | ||||
-rw-r--r-- | fs/efivarfs/super.c | 2 | ||||
-rw-r--r-- | fs/ext4/Kconfig | 9 | ||||
-rw-r--r-- | fs/ext4/crypto_fname.c | 280 | ||||
-rw-r--r-- | fs/ext4/crypto_key.c | 1 | ||||
-rw-r--r-- | fs/ext4/crypto_policy.c | 14 | ||||
-rw-r--r-- | fs/ext4/dir.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 16 | ||||
-rw-r--r-- | fs/ext4/ext4_crypto.h | 11 | ||||
-rw-r--r-- | fs/ext4/extents.c | 15 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 8 | ||||
-rw-r--r-- | fs/ext4/inode.c | 2 | ||||
-rw-r--r-- | fs/ext4/namei.c | 72 | ||||
-rw-r--r-- | fs/ext4/resize.c | 7 | ||||
-rw-r--r-- | fs/ext4/symlink.c | 2 | ||||
-rw-r--r-- | fs/f2fs/data.c | 7 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 1 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 8 | ||||
-rw-r--r-- | fs/f2fs/super.c | 1 | ||||
-rw-r--r-- | fs/namei.c | 22 | ||||
-rw-r--r-- | fs/namespace.c | 6 | ||||
-rw-r--r-- | fs/nfsd/blocklayout.c | 11 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 119 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 147 | ||||
-rw-r--r-- | fs/nfsd/state.h | 19 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 1 | ||||
-rw-r--r-- | fs/nilfs2/btree.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 13 | ||||
-rw-r--r-- | fs/splice.c | 12 |
30 files changed, 483 insertions, 331 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 41c510b7cc11..5e020d76fd07 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_mask(inode->i_mapping) & - ~(GFP_NOFS & ~__GFP_HIGHMEM)); + ~(__GFP_FS | __GFP_HIGHMEM)); return inode; } diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index da94e41bdbf6..537356742091 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -173,5 +173,5 @@ MODULE_LICENSE("GPL"); MODULE_VERSION("0.0.2"); MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration."); -module_init(configfs_init); +core_initcall(configfs_init); module_exit(configfs_exit); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 59fedbcf8798..86a2121828c3 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -121,7 +121,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, int len, i; int err = -ENOMEM; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return err; diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 18228c201f7f..024f2284d3f6 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -64,8 +64,8 @@ config EXT4_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. -config EXT4_FS_ENCRYPTION - bool "Ext4 Encryption" +config EXT4_ENCRYPTION + tristate "Ext4 Encryption" depends on EXT4_FS select CRYPTO_AES select CRYPTO_CBC @@ -81,6 +81,11 @@ config EXT4_FS_ENCRYPTION efficient since it avoids caching the encrypted and decrypted pages in the page cache. +config EXT4_FS_ENCRYPTION + bool + default y + depends on EXT4_ENCRYPTION + config EXT4_DEBUG bool "EXT4 debugging support" depends on EXT4_FS diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c index ca2f5948c1ac..fded02f72299 100644 --- a/fs/ext4/crypto_fname.c +++ b/fs/ext4/crypto_fname.c @@ -66,6 +66,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx, int res = 0; char iv[EXT4_CRYPTO_BLOCK_SIZE]; struct scatterlist sg[1]; + int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK); char *workbuf; if (iname->len <= 0 || iname->len > ctx->lim) @@ -73,6 +74,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx, ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ? EXT4_CRYPTO_BLOCK_SIZE : iname->len; + ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding); ciphertext_len = (ciphertext_len > ctx->lim) ? ctx->lim : ciphertext_len; @@ -101,7 +103,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx, /* Create encryption request */ sg_init_table(sg, 1); sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0); - ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv); + ablkcipher_request_set_crypt(req, sg, sg, ciphertext_len, iv); res = crypto_ablkcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { BUG_ON(req->base.data != &ecr); @@ -198,106 +200,57 @@ static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx, return oname->len; } +static const char *lookup_table = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; + /** * ext4_fname_encode_digest() - * * Encodes the input digest using characters from the set [a-zA-Z0-9_+]. * The encoded string is roughly 4/3 times the size of the input string. */ -int ext4_fname_encode_digest(char *dst, char *src, u32 len) +static int digest_encode(const char *src, int len, char *dst) { - static const char *lookup_table = - "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_+"; - u32 current_chunk, num_chunks, i; - char tmp_buf[3]; - u32 c0, c1, c2, c3; - - current_chunk = 0; - num_chunks = len/3; - for (i = 0; i < num_chunks; i++) { - c0 = src[3*i] & 0x3f; - c1 = (((src[3*i]>>6)&0x3) | ((src[3*i+1] & 0xf)<<2)) & 0x3f; - c2 = (((src[3*i+1]>>4)&0xf) | ((src[3*i+2] & 0x3)<<4)) & 0x3f; - c3 = (src[3*i+2]>>2) & 0x3f; - dst[4*i] = lookup_table[c0]; - dst[4*i+1] = lookup_table[c1]; - dst[4*i+2] = lookup_table[c2]; - dst[4*i+3] = lookup_table[c3]; - } - if (i*3 < len) { - memset(tmp_buf, 0, 3); - memcpy(tmp_buf, &src[3*i], len-3*i); - c0 = tmp_buf[0] & 0x3f; - c1 = (((tmp_buf[0]>>6)&0x3) | ((tmp_buf[1] & 0xf)<<2)) & 0x3f; - c2 = (((tmp_buf[1]>>4)&0xf) | ((tmp_buf[2] & 0x3)<<4)) & 0x3f; - c3 = (tmp_buf[2]>>2) & 0x3f; - dst[4*i] = lookup_table[c0]; - dst[4*i+1] = lookup_table[c1]; - dst[4*i+2] = lookup_table[c2]; - dst[4*i+3] = lookup_table[c3]; + int i = 0, bits = 0, ac = 0; + char *cp = dst; + + while (i < len) { + ac += (((unsigned char) src[i]) << bits); + bits += 8; + do { + *cp++ = lookup_table[ac & 0x3f]; + ac >>= 6; + bits -= 6; + } while (bits >= 6); i++; } - return (i * 4); + if (bits) + *cp++ = lookup_table[ac & 0x3f]; + return cp - dst; } -/** - * ext4_fname_hash() - - * - * This function computes the hash of the input filename, and sets the output - * buffer to the *encoded* digest. It returns the length of the digest as its - * return value. Errors are returned as negative numbers. We trust the caller - * to allocate sufficient memory to oname string. - */ -static int ext4_fname_hash(struct ext4_fname_crypto_ctx *ctx, - const struct ext4_str *iname, - struct ext4_str *oname) +static int digest_decode(const char *src, int len, char *dst) { - struct scatterlist sg; - struct hash_desc desc = { - .tfm = (struct crypto_hash *)ctx->htfm, - .flags = CRYPTO_TFM_REQ_MAY_SLEEP - }; - int res = 0; - - if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) { - res = ext4_fname_encode_digest(oname->name, iname->name, - iname->len); - oname->len = res; - return res; - } - - sg_init_one(&sg, iname->name, iname->len); - res = crypto_hash_init(&desc); - if (res) { - printk(KERN_ERR - "%s: Error initializing crypto hash; res = [%d]\n", - __func__, res); - goto out; - } - res = crypto_hash_update(&desc, &sg, iname->len); - if (res) { - printk(KERN_ERR - "%s: Error updating crypto hash; res = [%d]\n", - __func__, res); - goto out; - } - res = crypto_hash_final(&desc, - &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE]); - if (res) { - printk(KERN_ERR - "%s: Error finalizing crypto hash; res = [%d]\n", - __func__, res); - goto out; + int i = 0, bits = 0, ac = 0; + const char *p; + char *cp = dst; + + while (i < len) { + p = strchr(lookup_table, src[i]); + if (p == NULL || src[i] == 0) + return -2; + ac += (p - lookup_table) << bits; + bits += 6; + if (bits >= 8) { + *cp++ = ac & 0xff; + ac >>= 8; + bits -= 8; + } + i++; } - /* Encode the digest as a printable string--this will increase the - * size of the digest */ - oname->name[0] = 'I'; - res = ext4_fname_encode_digest(oname->name+1, - &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE], - EXT4_FNAME_CRYPTO_DIGEST_SIZE) + 1; - oname->len = res; -out: - return res; + if (ac) + return -1; + return cp - dst; } /** @@ -405,6 +358,7 @@ struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx( if (IS_ERR(ctx)) return ctx; + ctx->flags = ei->i_crypt_policy_flags; if (ctx->has_valid_key) { if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) { printk_once(KERN_WARNING @@ -517,6 +471,7 @@ int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx, u32 namelen) { u32 ciphertext_len; + int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK); if (ctx == NULL) return -EIO; @@ -524,6 +479,7 @@ int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx, return -EACCES; ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ? EXT4_CRYPTO_BLOCK_SIZE : namelen; + ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding); ciphertext_len = (ciphertext_len > ctx->lim) ? ctx->lim : ciphertext_len; return (int) ciphertext_len; @@ -539,10 +495,13 @@ int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx, u32 ilen, struct ext4_str *crypto_str) { unsigned int olen; + int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK); if (!ctx) return -EIO; - olen = ext4_fname_crypto_round_up(ilen, EXT4_CRYPTO_BLOCK_SIZE); + if (padding < EXT4_CRYPTO_BLOCK_SIZE) + padding = EXT4_CRYPTO_BLOCK_SIZE; + olen = ext4_fname_crypto_round_up(ilen, padding); crypto_str->len = olen; if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2; @@ -571,9 +530,13 @@ void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str) * ext4_fname_disk_to_usr() - converts a filename from disk space to user space */ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, - const struct ext4_str *iname, - struct ext4_str *oname) + struct dx_hash_info *hinfo, + const struct ext4_str *iname, + struct ext4_str *oname) { + char buf[24]; + int ret; + if (ctx == NULL) return -EIO; if (iname->len < 3) { @@ -587,18 +550,33 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, } if (ctx->has_valid_key) return ext4_fname_decrypt(ctx, iname, oname); - else - return ext4_fname_hash(ctx, iname, oname); + + if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) { + ret = digest_encode(iname->name, iname->len, oname->name); + oname->len = ret; + return ret; + } + if (hinfo) { + memcpy(buf, &hinfo->hash, 4); + memcpy(buf+4, &hinfo->minor_hash, 4); + } else + memset(buf, 0, 8); + memcpy(buf + 8, iname->name + iname->len - 16, 16); + oname->name[0] = '_'; + ret = digest_encode(buf, 24, oname->name+1); + oname->len = ret + 1; + return ret + 1; } int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, + struct dx_hash_info *hinfo, const struct ext4_dir_entry_2 *de, struct ext4_str *oname) { struct ext4_str iname = {.name = (unsigned char *) de->name, .len = de->name_len }; - return _ext4_fname_disk_to_usr(ctx, &iname, oname); + return _ext4_fname_disk_to_usr(ctx, hinfo, &iname, oname); } @@ -640,10 +618,11 @@ int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx, const struct qstr *iname, struct dx_hash_info *hinfo) { - struct ext4_str tmp, tmp2; + struct ext4_str tmp; int ret = 0; + char buf[EXT4_FNAME_CRYPTO_DIGEST_SIZE+1]; - if (!ctx || !ctx->has_valid_key || + if (!ctx || ((iname->name[0] == '.') && ((iname->len == 1) || ((iname->name[1] == '.') && (iname->len == 2))))) { @@ -651,59 +630,90 @@ int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx, return 0; } + if (!ctx->has_valid_key && iname->name[0] == '_') { + if (iname->len != 33) + return -ENOENT; + ret = digest_decode(iname->name+1, iname->len, buf); + if (ret != 24) + return -ENOENT; + memcpy(&hinfo->hash, buf, 4); + memcpy(&hinfo->minor_hash, buf + 4, 4); + return 0; + } + + if (!ctx->has_valid_key && iname->name[0] != '_') { + if (iname->len > 43) + return -ENOENT; + ret = digest_decode(iname->name, iname->len, buf); + ext4fs_dirhash(buf, ret, hinfo); + return 0; + } + /* First encrypt the plaintext name */ ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp); if (ret < 0) return ret; ret = ext4_fname_encrypt(ctx, iname, &tmp); - if (ret < 0) - goto out; - - tmp2.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1; - tmp2.name = kmalloc(tmp2.len + 1, GFP_KERNEL); - if (tmp2.name == NULL) { - ret = -ENOMEM; - goto out; + if (ret >= 0) { + ext4fs_dirhash(tmp.name, tmp.len, hinfo); + ret = 0; } - ret = ext4_fname_hash(ctx, &tmp, &tmp2); - if (ret > 0) - ext4fs_dirhash(tmp2.name, tmp2.len, hinfo); - ext4_fname_crypto_free_buffer(&tmp2); -out: ext4_fname_crypto_free_buffer(&tmp); return ret; } -/** - * ext4_fname_disk_to_htree() - converts a filename from disk space to htree-access string - */ -int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx, - const struct ext4_dir_entry_2 *de, - struct dx_hash_info *hinfo) +int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr, + int len, const char * const name, + struct ext4_dir_entry_2 *de) { - struct ext4_str iname = {.name = (unsigned char *) de->name, - .len = de->name_len}; - struct ext4_str tmp; - int ret; + int ret = -ENOENT; + int bigname = (*name == '_'); - if (!ctx || - ((iname.name[0] == '.') && - ((iname.len == 1) || - ((iname.name[1] == '.') && (iname.len == 2))))) { - ext4fs_dirhash(iname.name, iname.len, hinfo); - return 0; + if (ctx->has_valid_key) { + if (cstr->name == NULL) { + struct qstr istr; + + ret = ext4_fname_crypto_alloc_buffer(ctx, len, cstr); + if (ret < 0) + goto errout; + istr.name = name; + istr.len = len; + ret = ext4_fname_encrypt(ctx, &istr, cstr); + if (ret < 0) + goto errout; + } + } else { + if (cstr->name == NULL) { + cstr->name = kmalloc(32, GFP_KERNEL); + if (cstr->name == NULL) + return -ENOMEM; + if ((bigname && (len != 33)) || + (!bigname && (len > 43))) + goto errout; + ret = digest_decode(name+bigname, len-bigname, + cstr->name); + if (ret < 0) { + ret = -ENOENT; + goto errout; + } + cstr->len = ret; + } + if (bigname) { + if (de->name_len < 16) + return 0; + ret = memcmp(de->name + de->name_len - 16, + cstr->name + 8, 16); + return (ret == 0) ? 1 : 0; + } } - - tmp.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1; - tmp.name = kmalloc(tmp.len + 1, GFP_KERNEL); - if (tmp.name == NULL) - return -ENOMEM; - - ret = ext4_fname_hash(ctx, &iname, &tmp); - if (ret > 0) - ext4fs_dirhash(tmp.name, tmp.len, hinfo); - ext4_fname_crypto_free_buffer(&tmp); + if (de->name_len != cstr->len) + return 0; + ret = memcmp(de->name, cstr->name, cstr->len); + return (ret == 0) ? 1 : 0; +errout: + kfree(cstr->name); + cstr->name = NULL; return ret; } diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index c8392af8abbb..52170d0b7c40 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -110,6 +110,7 @@ int ext4_generate_encryption_key(struct inode *inode) } res = 0; + ei->i_crypt_policy_flags = ctx.flags; if (S_ISREG(inode->i_mode)) crypt_key->mode = ctx.contents_encryption_mode; else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c index 30eaf9e9864a..a6d6291aea16 100644 --- a/fs/ext4/crypto_policy.c +++ b/fs/ext4/crypto_policy.c @@ -37,6 +37,8 @@ static int ext4_is_encryption_context_consistent_with_policy( return 0; return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor, EXT4_KEY_DESCRIPTOR_SIZE) == 0 && + (ctx.flags == + policy->flags) && (ctx.contents_encryption_mode == policy->contents_encryption_mode) && (ctx.filenames_encryption_mode == @@ -56,25 +58,25 @@ static int ext4_create_encryption_context_from_policy( printk(KERN_WARNING "%s: Invalid contents encryption mode %d\n", __func__, policy->contents_encryption_mode); - res = -EINVAL; - goto out; + return -EINVAL; } if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) { printk(KERN_WARNING "%s: Invalid filenames encryption mode %d\n", __func__, policy->filenames_encryption_mode); - res = -EINVAL; - goto out; + return -EINVAL; } + if (policy->flags & ~EXT4_POLICY_FLAGS_VALID) + return -EINVAL; ctx.contents_encryption_mode = policy->contents_encryption_mode; ctx.filenames_encryption_mode = policy->filenames_encryption_mode; + ctx.flags = policy->flags; BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE); get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE); res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, sizeof(ctx), 0); -out: if (!res) ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); return res; @@ -115,6 +117,7 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy) policy->version = 0; policy->contents_encryption_mode = ctx.contents_encryption_mode; policy->filenames_encryption_mode = ctx.filenames_encryption_mode; + policy->flags = ctx.flags; memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor, EXT4_KEY_DESCRIPTOR_SIZE); return 0; @@ -176,6 +179,7 @@ int ext4_inherit_context(struct inode *parent, struct inode *child) EXT4_ENCRYPTION_MODE_AES_256_XTS; ctx.filenames_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_CTS; + ctx.flags = 0; memset(ctx.master_key_descriptor, 0x42, EXT4_KEY_DESCRIPTOR_SIZE); res = 0; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 61db51a5ce4c..5665d82d2332 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -249,7 +249,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) } else { /* Directory is encrypted */ err = ext4_fname_disk_to_usr(enc_ctx, - de, &fname_crypto_str); + NULL, de, &fname_crypto_str); if (err < 0) goto errout; if (!dir_emit(ctx, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ef267adce19a..009a0590b20f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -911,6 +911,7 @@ struct ext4_inode_info { /* on-disk additional length */ __u16 i_extra_isize; + char i_crypt_policy_flags; /* Indicate the inline data space. */ u16 i_inline_off; @@ -1066,12 +1067,6 @@ extern void ext4_set_bits(void *bm, int cur, int len); /* Metadata checksum algorithm codes */ #define EXT4_CRC32C_CHKSUM 1 -/* Encryption algorithms */ -#define EXT4_ENCRYPTION_MODE_INVALID 0 -#define EXT4_ENCRYPTION_MODE_AES_256_XTS 1 -#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2 -#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 - /* * Structure of the super block */ @@ -2093,9 +2088,11 @@ u32 ext4_fname_crypto_round_up(u32 size, u32 blksize); int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx, u32 ilen, struct ext4_str *crypto_str); int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, + struct dx_hash_info *hinfo, const struct ext4_str *iname, struct ext4_str *oname); int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, + struct dx_hash_info *hinfo, const struct ext4_dir_entry_2 *de, struct ext4_str *oname); int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx, @@ -2104,11 +2101,12 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx, int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx, const struct qstr *iname, struct dx_hash_info *hinfo); -int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx, - const struct ext4_dir_entry_2 *de, - struct dx_hash_info *hinfo); int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx, u32 namelen); +int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr, + int len, const char * const name, + struct ext4_dir_entry_2 *de); + #ifdef CONFIG_EXT4_FS_ENCRYPTION void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx); diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h index c2ba35a914b6..d75159c101ce 100644 --- a/fs/ext4/ext4_crypto.h +++ b/fs/ext4/ext4_crypto.h @@ -20,12 +20,20 @@ struct ext4_encryption_policy { char version; char contents_encryption_mode; char filenames_encryption_mode; + char flags; char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE]; } __attribute__((__packed__)); #define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1 #define EXT4_KEY_DERIVATION_NONCE_SIZE 16 +#define EXT4_POLICY_FLAGS_PAD_4 0x00 +#define EXT4_POLICY_FLAGS_PAD_8 0x01 +#define EXT4_POLICY_FLAGS_PAD_16 0x02 +#define EXT4_POLICY_FLAGS_PAD_32 0x03 +#define EXT4_POLICY_FLAGS_PAD_MASK 0x03 +#define EXT4_POLICY_FLAGS_VALID 0x03 + /** * Encryption context for inode * @@ -41,7 +49,7 @@ struct ext4_encryption_context { char format; char contents_encryption_mode; char filenames_encryption_mode; - char reserved; + char flags; char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE]; char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE]; } __attribute__((__packed__)); @@ -120,6 +128,7 @@ struct ext4_fname_crypto_ctx { struct crypto_hash *htfm; struct page *workpage; struct ext4_encryption_key key; + unsigned flags : 8; unsigned has_valid_key : 1; unsigned ctfm_key_is_ready : 1; }; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 973816bfe4a9..d74e08029643 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4927,13 +4927,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (ret) return ret; - /* - * currently supporting (pre)allocate mode for extent-based - * files _only_ - */ - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - return -EOPNOTSUPP; - if (mode & FALLOC_FL_COLLAPSE_RANGE) return ext4_collapse_range(inode, offset, len); @@ -4955,6 +4948,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) mutex_lock(&inode->i_mutex); + /* + * We only support preallocation for extent-based files only + */ + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { + ret = -EOPNOTSUPP; + goto out; + } + if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index d33d5a6852b9..26724aeece73 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -703,6 +703,14 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, BUG_ON(end < lblk); + if ((status & EXTENT_STATUS_DELAYED) && + (status & EXTENT_STATUS_WRITTEN)) { + ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as " + " delayed and written which can potentially " + " cause data loss.\n", lblk, len); + WARN_ON(1); + } + newes.es_lblk = lblk; newes.es_len = len; ext4_es_store_pblock_status(&newes, pblk, status); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cbd0654a2675..55b187c3bac1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -531,6 +531,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && + !(status & EXTENT_STATUS_WRITTEN) && ext4_find_delalloc_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1)) status |= EXTENT_STATUS_DELAYED; @@ -635,6 +636,7 @@ found: status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && + !(status & EXTENT_STATUS_WRITTEN) && ext4_find_delalloc_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1)) status |= EXTENT_STATUS_DELAYED; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 7223b0b4bc38..814f3beb4369 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -640,7 +640,7 @@ static struct stats dx_show_leaf(struct inode *dir, ext4_put_fname_crypto_ctx(&ctx); ctx = NULL; } - res = ext4_fname_disk_to_usr(ctx, de, + res = ext4_fname_disk_to_usr(ctx, NULL, de, &fname_crypto_str); if (res < 0) { printk(KERN_WARNING "Error " @@ -653,15 +653,8 @@ static struct stats dx_show_leaf(struct inode *dir, name = fname_crypto_str.name; len = fname_crypto_str.len; } - res = ext4_fname_disk_to_hash(ctx, de, - &h); - if (res < 0) { - printk(KERN_WARNING "Error " - "converting filename " - "from disk to htree" - "\n"); - h.hash = 0xDEADBEEF; - } + ext4fs_dirhash(de->name, de->name_len, + &h); printk("%*.s:(E)%x.%u ", len, name, h.hash, (unsigned) ((char *) de - base)); @@ -1008,15 +1001,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, /* silently ignore the rest of the block */ break; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION - err = ext4_fname_disk_to_hash(ctx, de, hinfo); - if (err < 0) { - count = err; - goto errout; - } -#else ext4fs_dirhash(de->name, de->name_len, hinfo); -#endif if ((hinfo->hash < start_hash) || ((hinfo->hash == start_hash) && (hinfo->minor_hash < start_minor_hash))) @@ -1032,7 +1017,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, &tmp_str); } else { /* Directory is encrypted */ - err = ext4_fname_disk_to_usr(ctx, de, + err = ext4_fname_disk_to_usr(ctx, hinfo, de, &fname_crypto_str); if (err < 0) { count = err; @@ -1193,26 +1178,10 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, int count = 0; char *base = (char *) de; struct dx_hash_info h = *hinfo; -#ifdef CONFIG_EXT4_FS_ENCRYPTION - struct ext4_fname_crypto_ctx *ctx = NULL; - int err; - - ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); -#endif while ((char *) de < base + blocksize) { if (de->name_len && de->inode) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION - err = ext4_fname_disk_to_hash(ctx, de, &h); - if (err < 0) { - ext4_put_fname_crypto_ctx(&ctx); - return err; - } -#else ext4fs_dirhash(de->name, de->name_len, &h); -#endif map_tail--; map_tail->hash = h.hash; map_tail->offs = ((char *) de - base)>>2; @@ -1223,9 +1192,6 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, /* XXX: do we need to check rec_len == 0 case? -Chris */ de = ext4_next_entry(de, blocksize); } -#ifdef CONFIG_EXT4_FS_ENCRYPTION - ext4_put_fname_crypto_ctx(&ctx); -#endif return count; } @@ -1287,16 +1253,8 @@ static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx, return 0; #ifdef CONFIG_EXT4_FS_ENCRYPTION - if (ctx) { - /* Directory is encrypted */ - res = ext4_fname_disk_to_usr(ctx, de, fname_crypto_str); - if (res < 0) - return res; - if (len != res) - return 0; - res = memcmp(name, fname_crypto_str->name, len); - return (res == 0) ? 1 : 0; - } + if (ctx) + return ext4_fname_match(ctx, fname_crypto_str, len, name, de); #endif if (len != de->name_len) return 0; @@ -1324,16 +1282,6 @@ int search_dir(struct buffer_head *bh, char *search_buf, int buf_size, if (IS_ERR(ctx)) return -1; - if (ctx != NULL) { - /* Allocate buffer to hold maximum name length */ - res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN, - &fname_crypto_str); - if (res < 0) { - ext4_put_fname_crypto_ctx(&ctx); - return -1; - } - } - de = (struct ext4_dir_entry_2 *)search_buf; dlimit = search_buf + buf_size; while ((char *) de < dlimit) { @@ -1872,14 +1820,6 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, return res; } reclen = EXT4_DIR_REC_LEN(res); - - /* Allocate buffer to hold maximum name length */ - res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN, - &fname_crypto_str); - if (res < 0) { - ext4_put_fname_crypto_ctx(&ctx); - return -1; - } } de = (struct ext4_dir_entry_2 *)buf; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 8a8ec6293b19..cf0c472047e3 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1432,12 +1432,15 @@ static int ext4_flex_group_add(struct super_block *sb, goto exit; /* * We will always be modifying at least the superblock and GDT - * block. If we are adding a group past the last current GDT block, + * blocks. If we are adding a group past the last current GDT block, * we will also modify the inode and the dindirect block. If we * are adding a group with superblock/GDT backups we will also * modify each of the reserved GDT dindirect blocks. */ - credit = flex_gd->count * 4 + reserved_gdb; + credit = 3; /* sb, resize inode, resize inode dindirect */ + /* GDT blocks */ + credit += 1 + DIV_ROUND_UP(flex_gd->count, EXT4_DESC_PER_BLOCK(sb)); + credit += reserved_gdb; /* Reserved GDT dindirect blocks */ handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit); if (IS_ERR(handle)) { err = PTR_ERR(handle); diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 19f78f20975e..187b78920314 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -74,7 +74,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) goto errout; } pstr.name = paddr; - res = _ext4_fname_disk_to_usr(ctx, &cstr, &pstr); + res = _ext4_fname_disk_to_usr(ctx, NULL, &cstr, &pstr); if (res < 0) goto errout; /* Null-terminate the name */ diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b91b0e10678e..1e1aae669fa8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + bool locked = false; int ret; long diff; @@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, DATA, wbc); + if (!S_ISDIR(inode->i_mode)) { + mutex_lock(&sbi->writepages); + locked = true; + } ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); + if (locked) + mutex_unlock(&sbi->writepages); f2fs_submit_merged_bio(sbi, DATA, WRITE); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d8921cf2ba9a..8de34ab6d5b1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -625,6 +625,7 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ + struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7e3794edae42..658e8079aaf9 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -298,16 +298,14 @@ fail: static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct page *page; + struct page *page = page_follow_link_light(dentry, nd); - page = page_follow_link_light(dentry, nd); - if (IS_ERR(page)) + if (IS_ERR_OR_NULL(page)) return page; /* this is broken symlink case */ if (*nd_get_link(nd) == 0) { - kunmap(page); - page_cache_release(page); + page_put_link(dentry, nd, page); return ERR_PTR(-ENOENT); } return page; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 160b88346b24..b2dd1b01f076 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1035,6 +1035,7 @@ try_onemore: sbi->raw_super = raw_super; sbi->raw_super_buf = raw_super_buf; mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); clear_sbi_flag(sbi, SBI_POR_DOING); diff --git a/fs/namei.c b/fs/namei.c index 4a8d998b7274..fe30d3be43a8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1415,6 +1415,7 @@ static int lookup_fast(struct nameidata *nd, */ if (nd->flags & LOOKUP_RCU) { unsigned seq; + bool negative; dentry = __d_lookup_rcu(parent, &nd->last, &seq); if (!dentry) goto unlazy; @@ -1424,8 +1425,11 @@ static int lookup_fast(struct nameidata *nd, * the dentry name information from lookup. */ *inode = dentry->d_inode; + negative = d_is_negative(dentry); if (read_seqcount_retry(&dentry->d_seq, seq)) return -ECHILD; + if (negative) + return -ENOENT; /* * This sequence count validates that the parent had no @@ -1472,6 +1476,10 @@ unlazy: goto need_lookup; } + if (unlikely(d_is_negative(dentry))) { + dput(dentry); + return -ENOENT; + } path->mnt = mnt; path->dentry = dentry; err = follow_managed(path, nd->flags); @@ -1583,10 +1591,10 @@ static inline int walk_component(struct nameidata *nd, struct path *path, goto out_err; inode = path->dentry->d_inode; + err = -ENOENT; + if (d_is_negative(path->dentry)) + goto out_path_put; } - err = -ENOENT; - if (d_is_negative(path->dentry)) - goto out_path_put; if (should_follow_link(path->dentry, follow)) { if (nd->flags & LOOKUP_RCU) { @@ -3036,14 +3044,13 @@ retry_lookup: BUG_ON(nd->flags & LOOKUP_RCU); inode = path->dentry->d_inode; -finish_lookup: - /* we _can_ be in RCU mode here */ error = -ENOENT; if (d_is_negative(path->dentry)) { path_to_nameidata(path, nd); goto out; } - +finish_lookup: + /* we _can_ be in RCU mode here */ if (should_follow_link(path->dentry, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(nd->path.mnt != path->mnt || @@ -3226,7 +3233,7 @@ static struct file *path_openat(int dfd, struct filename *pathname, if (unlikely(file->f_flags & __O_TMPFILE)) { error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened); - goto out; + goto out2; } error = path_init(dfd, pathname, flags, nd); @@ -3256,6 +3263,7 @@ static struct file *path_openat(int dfd, struct filename *pathname, } out: path_cleanup(nd); +out2: if (!(opened & FILE_OPENED)) { BUG_ON(!error); put_filp(file); diff --git a/fs/namespace.c b/fs/namespace.c index 1f4f9dac6e5a..1b9e11167bae 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3179,6 +3179,12 @@ bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_sb->s_type != type) continue; + /* This mount is not fully visible if it's root directory + * is not the root directory of the filesystem. + */ + if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) + continue; + /* This mount is not fully visible if there are any child mounts * that cover anything except for empty directories. */ diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 03d647bf195d..cdefaa331a07 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -181,6 +181,17 @@ nfsd4_block_proc_layoutcommit(struct inode *inode, } const struct nfsd4_layout_ops bl_layout_ops = { + /* + * Pretend that we send notification to the client. This is a blatant + * lie to force recent Linux clients to cache our device IDs. + * We rarely ever change the device ID, so the harm of leaking deviceids + * for a while isn't too bad. Unfortunately RFC5661 is a complete mess + * in this regard, but I filed errata 4119 for this a while ago, and + * hopefully the Linux client will eventually start caching deviceids + * without this again. + */ + .notify_types = + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo, .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, .proc_layoutget = nfsd4_block_proc_layoutget, diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 58277859a467..5694cfb7a47b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -224,7 +224,7 @@ static int nfs_cb_stat_to_errno(int status) } static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, - enum nfsstat4 *status) + int *status) { __be32 *p; u32 op; @@ -235,7 +235,7 @@ static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, op = be32_to_cpup(p++); if (unlikely(op != expected)) goto out_unexpected; - *status = be32_to_cpup(p); + *status = nfs_cb_stat_to_errno(be32_to_cpup(p)); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -446,22 +446,16 @@ out_overflow: static int decode_cb_sequence4res(struct xdr_stream *xdr, struct nfsd4_callback *cb) { - enum nfsstat4 nfserr; int status; if (cb->cb_minorversion == 0) return 0; - status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr); - if (unlikely(status)) - goto out; - if (unlikely(nfserr != NFS4_OK)) - goto out_default; - status = decode_cb_sequence4resok(xdr, cb); -out: - return status; -out_default: - return nfs_cb_stat_to_errno(nfserr); + status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status); + if (unlikely(status || cb->cb_status)) + return status; + + return decode_cb_sequence4resok(xdr, cb); } /* @@ -524,26 +518,19 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, struct nfsd4_callback *cb) { struct nfs4_cb_compound_hdr hdr; - enum nfsstat4 nfserr; int status; status = decode_cb_compound4res(xdr, &hdr); if (unlikely(status)) - goto out; + return status; if (cb != NULL) { status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status)) - goto out; + if (unlikely(status || cb->cb_status)) + return status; } - status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr); - if (unlikely(status)) - goto out; - if (unlikely(nfserr != NFS4_OK)) - status = nfs_cb_stat_to_errno(nfserr); -out: - return status; + return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); } #ifdef CONFIG_NFSD_PNFS @@ -621,24 +608,18 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, struct nfsd4_callback *cb) { struct nfs4_cb_compound_hdr hdr; - enum nfsstat4 nfserr; int status; status = decode_cb_compound4res(xdr, &hdr); if (unlikely(status)) - goto out; + return status; + if (cb) { status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status)) - goto out; + if (unlikely(status || cb->cb_status)) + return status; } - status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr); - if (unlikely(status)) - goto out; - if (unlikely(nfserr != NFS4_OK)) - status = nfs_cb_stat_to_errno(nfserr); -out: - return status; + return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status); } #endif /* CONFIG_NFSD_PNFS */ @@ -898,13 +879,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) if (!nfsd41_cb_get_slot(clp, task)) return; } - spin_lock(&clp->cl_lock); - if (list_empty(&cb->cb_per_client)) { - /* This is the first call, not a restart */ - cb->cb_done = false; - list_add(&cb->cb_per_client, &clp->cl_callbacks); - } - spin_unlock(&clp->cl_lock); rpc_call_start(task); } @@ -918,22 +892,33 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) if (clp->cl_minorversion) { /* No need for lock, access serialized in nfsd4_cb_prepare */ - ++clp->cl_cb_session->se_cb_seq_nr; + if (!task->tk_status) + ++clp->cl_cb_session->se_cb_seq_nr; clear_bit(0, &clp->cl_cb_slot_busy); rpc_wake_up_next(&clp->cl_cb_waitq); dprintk("%s: freed slot, new seqid=%d\n", __func__, clp->cl_cb_session->se_cb_seq_nr); } - if (clp->cl_cb_client != task->tk_client) { - /* We're shutting down or changing cl_cb_client; leave - * it to nfsd4_process_cb_update to restart the call if - * necessary. */ + /* + * If the backchannel connection was shut down while this + * task was queued, we need to resubmit it after setting up + * a new backchannel connection. + * + * Note that if we lost our callback connection permanently + * the submission code will error out, so we don't need to + * handle that case here. + */ + if (task->tk_flags & RPC_TASK_KILLED) { + task->tk_status = 0; + cb->cb_need_restart = true; return; } - if (cb->cb_done) - return; + if (cb->cb_status) { + WARN_ON_ONCE(task->tk_status); + task->tk_status = cb->cb_status; + } switch (cb->cb_ops->done(cb, task)) { case 0: @@ -949,21 +934,17 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) default: BUG(); } - cb->cb_done = true; } static void nfsd4_cb_release(void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_client *clp = cb->cb_clp; - - if (cb->cb_done) { - spin_lock(&clp->cl_lock); - list_del(&cb->cb_per_client); - spin_unlock(&clp->cl_lock); + if (cb->cb_need_restart) + nfsd4_run_cb(cb); + else cb->cb_ops->release(cb); - } + } static const struct rpc_call_ops nfsd4_cb_ops = { @@ -1058,9 +1039,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) nfsd4_mark_cb_down(clp, err); return; } - /* Yay, the callback channel's back! Restart any callbacks: */ - list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) - queue_work(callback_wq, &cb->cb_work); } static void @@ -1071,8 +1049,12 @@ nfsd4_run_cb_work(struct work_struct *work) struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; - if (cb->cb_ops && cb->cb_ops->prepare) - cb->cb_ops->prepare(cb); + if (cb->cb_need_restart) { + cb->cb_need_restart = false; + } else { + if (cb->cb_ops && cb->cb_ops->prepare) + cb->cb_ops->prepare(cb); + } if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) nfsd4_process_cb_update(cb); @@ -1084,6 +1066,15 @@ nfsd4_run_cb_work(struct work_struct *work) cb->cb_ops->release(cb); return; } + + /* + * Don't send probe messages for 4.1 or later. + */ + if (!cb->cb_ops && clp->cl_minorversion) { + clp->cl_cb_state = NFSD4_CB_UP; + return; + } + cb->cb_msg.rpc_cred = clp->cl_cb_cred; rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); @@ -1098,8 +1089,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_resp = cb; cb->cb_ops = ops; INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); - INIT_LIST_HEAD(&cb->cb_per_client); - cb->cb_done = true; + cb->cb_status = 0; + cb->cb_need_restart = false; } void nfsd4_run_cb(struct nfsd4_callback *cb) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 38f2d7abe3a7..039f9c8a95e8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -94,6 +94,7 @@ static struct kmem_cache *lockowner_slab; static struct kmem_cache *file_slab; static struct kmem_cache *stateid_slab; static struct kmem_cache *deleg_slab; +static struct kmem_cache *odstate_slab; static void free_session(struct nfsd4_session *); @@ -281,6 +282,7 @@ put_nfs4_file(struct nfs4_file *fi) if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del_rcu(&fi->fi_hash); spin_unlock(&state_lock); + WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); } @@ -471,6 +473,86 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) __nfs4_file_put_access(fp, O_RDONLY); } +/* + * Allocate a new open/delegation state counter. This is needed for + * pNFS for proper return on close semantics. + * + * Note that we only allocate it for pNFS-enabled exports, otherwise + * all pointers to struct nfs4_clnt_odstate are always NULL. + */ +static struct nfs4_clnt_odstate * +alloc_clnt_odstate(struct nfs4_client *clp) +{ + struct nfs4_clnt_odstate *co; + + co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); + if (co) { + co->co_client = clp; + atomic_set(&co->co_odcount, 1); + } + return co; +} + +static void +hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co) +{ + struct nfs4_file *fp = co->co_file; + + lockdep_assert_held(&fp->fi_lock); + list_add(&co->co_perfile, &fp->fi_clnt_odstate); +} + +static inline void +get_clnt_odstate(struct nfs4_clnt_odstate *co) +{ + if (co) + atomic_inc(&co->co_odcount); +} + +static void +put_clnt_odstate(struct nfs4_clnt_odstate *co) +{ + struct nfs4_file *fp; + + if (!co) + return; + + fp = co->co_file; + if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { + list_del(&co->co_perfile); + spin_unlock(&fp->fi_lock); + + nfsd4_return_all_file_layouts(co->co_client, fp); + kmem_cache_free(odstate_slab, co); + } +} + +static struct nfs4_clnt_odstate * +find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new) +{ + struct nfs4_clnt_odstate *co; + struct nfs4_client *cl; + + if (!new) + return NULL; + + cl = new->co_client; + + spin_lock(&fp->fi_lock); + list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) { + if (co->co_client == cl) { + get_clnt_odstate(co); + goto out; + } + } + co = new; + co->co_file = fp; + hash_clnt_odstate_locked(new); +out: + spin_unlock(&fp->fi_lock); + return co; +} + struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) { @@ -606,7 +688,8 @@ static void block_delegations(struct knfsd_fh *fh) } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) +alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, + struct nfs4_clnt_odstate *odstate) { struct nfs4_delegation *dp; long n; @@ -631,6 +714,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); + dp->dl_clnt_odstate = odstate; + get_clnt_odstate(odstate); dp->dl_type = NFS4_OPEN_DELEGATE_READ; dp->dl_retries = 1; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, @@ -714,6 +799,7 @@ static void destroy_delegation(struct nfs4_delegation *dp) spin_lock(&state_lock); unhash_delegation_locked(dp); spin_unlock(&state_lock); + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_deleg_lease(dp->dl_stid.sc_file); nfs4_put_stid(&dp->dl_stid); } @@ -724,6 +810,7 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_deleg_lease(dp->dl_stid.sc_file); if (clp->cl_minorversion == 0) @@ -933,6 +1020,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) { struct nfs4_ol_stateid *stp = openlockstateid(stid); + put_clnt_odstate(stp->st_clnt_odstate); release_all_access(stp); if (stp->st_stateowner) nfs4_put_stateowner(stp->st_stateowner); @@ -1538,7 +1626,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_lru); - INIT_LIST_HEAD(&clp->cl_callbacks); INIT_LIST_HEAD(&clp->cl_revoked); #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&clp->cl_lo_states); @@ -1634,6 +1721,7 @@ __destroy_client(struct nfs4_client *clp) while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_deleg_lease(dp->dl_stid.sc_file); nfs4_put_stid(&dp->dl_stid); } @@ -3057,6 +3145,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); + INIT_LIST_HEAD(&fp->fi_clnt_odstate); fh_copy_shallow(&fp->fi_fhandle, fh); fp->fi_deleg_file = NULL; fp->fi_had_conflict = false; @@ -3073,6 +3162,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, void nfsd4_free_slabs(void) { + kmem_cache_destroy(odstate_slab); kmem_cache_destroy(openowner_slab); kmem_cache_destroy(lockowner_slab); kmem_cache_destroy(file_slab); @@ -3103,8 +3193,14 @@ nfsd4_init_slabs(void) sizeof(struct nfs4_delegation), 0, 0, NULL); if (deleg_slab == NULL) goto out_free_stateid_slab; + odstate_slab = kmem_cache_create("nfsd4_odstate", + sizeof(struct nfs4_clnt_odstate), 0, 0, NULL); + if (odstate_slab == NULL) + goto out_free_deleg_slab; return 0; +out_free_deleg_slab: + kmem_cache_destroy(deleg_slab); out_free_stateid_slab: kmem_cache_destroy(stateid_slab); out_free_file_slab: @@ -3581,6 +3677,14 @@ alloc_stateid: open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox; + + if (nfsd4_has_session(cstate) && + (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) { + open->op_odstate = alloc_clnt_odstate(clp); + if (!open->op_odstate) + return nfserr_jukebox; + } + return nfs_ok; } @@ -3869,7 +3973,7 @@ out_fput: static struct nfs4_delegation * nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, - struct nfs4_file *fp) + struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) { int status; struct nfs4_delegation *dp; @@ -3877,7 +3981,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); - dp = alloc_init_deleg(clp, fh); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -3903,6 +4007,7 @@ out_unlock: spin_unlock(&state_lock); out: if (status) { + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_stid(&dp->dl_stid); return ERR_PTR(status); } @@ -3980,7 +4085,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, default: goto out_no_deleg; } - dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file); + dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate); if (IS_ERR(dp)) goto out_no_deleg; @@ -4069,6 +4174,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf release_open_stateid(stp); goto out; } + + stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp, + open->op_odstate); + if (stp->st_clnt_odstate == open->op_odstate) + open->op_odstate = NULL; } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); @@ -4129,6 +4239,8 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, kmem_cache_free(file_slab, open->op_file); if (open->op_stp) nfs4_put_stid(&open->op_stp->st_stid); + if (open->op_odstate) + kmem_cache_free(odstate_slab, open->op_odstate); } __be32 @@ -4385,10 +4497,17 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s return nfserr_old_stateid; } +static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols) +{ + if (ols->st_stateowner->so_is_open_owner && + !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; + return nfs_ok; +} + static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) { struct nfs4_stid *s; - struct nfs4_ol_stateid *ols; __be32 status = nfserr_bad_stateid; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) @@ -4418,13 +4537,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: - ols = openlockstateid(s); - if (ols->st_stateowner->so_is_open_owner - && !(openowner(ols->st_stateowner)->oo_flags - & NFS4_OO_CONFIRMED)) - status = nfserr_bad_stateid; - else - status = nfs_ok; + status = nfsd4_check_openowner_confirmed(openlockstateid(s)); break; default: printk("unknown stateid type %x\n", s->sc_type); @@ -4516,8 +4629,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, status = nfs4_check_fh(current_fh, stp); if (status) goto out; - if (stp->st_stateowner->so_is_open_owner - && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + status = nfsd4_check_openowner_confirmed(stp); + if (status) goto out; status = nfs4_check_openmode(stp, flags); if (status) @@ -4852,9 +4965,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - nfsd4_return_all_file_layouts(stp->st_stateowner->so_client, - stp->st_stid.sc_file); - nfsd4_close_open_stateid(stp); /* put reference from nfs4_preprocess_seqid_op */ @@ -6488,6 +6598,7 @@ nfs4_state_shutdown_net(struct net *net) list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_deleg_lease(dp->dl_stid.sc_file); nfs4_put_stid(&dp->dl_stid); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4f3bfeb11766..dbc4f85a5008 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -63,12 +63,12 @@ typedef struct { struct nfsd4_callback { struct nfs4_client *cb_clp; - struct list_head cb_per_client; u32 cb_minorversion; struct rpc_message cb_msg; struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; - bool cb_done; + int cb_status; + bool cb_need_restart; }; struct nfsd4_callback_ops { @@ -126,6 +126,7 @@ struct nfs4_delegation { struct list_head dl_perfile; struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ + struct nfs4_clnt_odstate *dl_clnt_odstate; u32 dl_type; time_t dl_time; /* For recall: */ @@ -332,7 +333,6 @@ struct nfs4_client { int cl_cb_state; struct nfsd4_callback cl_cb_null; struct nfsd4_session *cl_cb_session; - struct list_head cl_callbacks; /* list of in-progress callbacks */ /* for all client information that callback code might need: */ spinlock_t cl_lock; @@ -465,6 +465,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) } /* + * Per-client state indicating no. of opens and outstanding delegations + * on a file from a particular client.'od' stands for 'open & delegation' + */ +struct nfs4_clnt_odstate { + struct nfs4_client *co_client; + struct nfs4_file *co_file; + struct list_head co_perfile; + atomic_t co_odcount; +}; + +/* * nfs4_file: a file opened by some number of (open) nfs4_stateowners. * * These objects are global. nfsd keeps one instance of a nfs4_file per @@ -485,6 +496,7 @@ struct nfs4_file { struct list_head fi_delegations; struct rcu_head fi_rcu; }; + struct list_head fi_clnt_odstate; /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ struct file * fi_fds[3]; /* @@ -526,6 +538,7 @@ struct nfs4_ol_stateid { struct list_head st_perstateowner; struct list_head st_locks; struct nfs4_stateowner * st_stateowner; + struct nfs4_clnt_odstate * st_clnt_odstate; unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid * st_openstp; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f982ae84f0cd..2f8c092be2b3 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -247,6 +247,7 @@ struct nfsd4_open { struct nfs4_openowner *op_openowner; /* used during processing */ struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ + struct nfs4_clnt_odstate *op_odstate; /* used during processing */ struct nfs4_acl *op_acl; struct xdr_netobj op_label; }; diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 059f37137f9a..919fd5bb14a8 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || - level > NILFS_BTREE_LEVEL_MAX || + level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a6944b25fd5b..fdf4b41d0609 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -757,6 +757,19 @@ lookup: if (tmpres) { spin_unlock(&dlm->spinlock); spin_lock(&tmpres->spinlock); + + /* + * Right after dlm spinlock was released, dlm_thread could have + * purged the lockres. Check if lockres got unhashed. If so + * start over. + */ + if (hlist_unhashed(&tmpres->hash_node)) { + spin_unlock(&tmpres->spinlock); + dlm_lockres_put(tmpres); + tmpres = NULL; + goto lookup; + } + /* Wait on the thread that is mastering the resource */ if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { __dlm_wait_on_lockres(tmpres); diff --git a/fs/splice.c b/fs/splice.c index 476024bb6546..bfe62ae40f40 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, long ret, bytes; umode_t i_mode; size_t len; - int i, flags; + int i, flags, more; /* * We require the input being a regular file, as we don't want to @@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * Don't block on output, we have to drain the direct pipe. */ sd->flags &= ~SPLICE_F_NONBLOCK; + more = sd->flags & SPLICE_F_MORE; while (len) { size_t read_len; @@ -1217,6 +1218,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, sd->total_len = read_len; /* + * If more data is pending, set SPLICE_F_MORE + * If this is the last data and SPLICE_F_MORE was not set + * initially, clears it. + */ + if (read_len < len) + sd->flags |= SPLICE_F_MORE; + else if (!more) + sd->flags &= ~SPLICE_F_MORE; + /* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we * could get stuck data in the internal pipe: |