diff options
Diffstat (limited to 'fs')
46 files changed, 507 insertions, 342 deletions
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index c264839b2fd0..24fd163c6323 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -71,7 +71,7 @@ static bool afs_get_io_locks(struct afs_operation *op) swap(vnode, vnode2); if (mutex_lock_interruptible(&vnode->io_lock) < 0) { - op->error = -EINTR; + op->error = -ERESTARTSYS; op->flags |= AFS_OPERATION_STOP; _leave(" = f [I 0]"); return false; @@ -80,7 +80,7 @@ static bool afs_get_io_locks(struct afs_operation *op) if (vnode2) { if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) { - op->error = -EINTR; + op->error = -ERESTARTSYS; op->flags |= AFS_OPERATION_STOP; mutex_unlock(&vnode->io_lock); op->flags &= ~AFS_OPERATION_LOCK_0; diff --git a/fs/afs/write.c b/fs/afs/write.c index 7437806332d9..a121c247d95a 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -449,6 +449,7 @@ static int afs_store_data(struct address_space *mapping, op->store.first_offset = offset; op->store.last_to = to; op->mtime = vnode->vfs_inode.i_mtime; + op->flags |= AFS_OPERATION_UNINTR; op->ops = &afs_store_data_operation; try_next_key: diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index b04c528b19d3..74c886f7c51c 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -53,7 +53,7 @@ static int autofs_write(struct autofs_sb_info *sbi, mutex_lock(&sbi->pipe_mutex); while (bytes) { - wr = __kernel_write(file, data, bytes, &file->f_pos); + wr = kernel_write(file, data, bytes, &file->f_pos); if (wr <= 0) break; data += wr; diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index d888e71e66b6..ea10f7bc99ab 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1461,6 +1461,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, if (ret < 0 && ret != -ENOENT) { ulist_free(tmp); ulist_free(*roots); + *roots = NULL; return ret; } node = ulist_next(tmp, &uiter); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3a7648bff42c..82ab6e5a386d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1196,7 +1196,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, switch (tm->op) { case MOD_LOG_KEY_REMOVE_WHILE_FREEING: BUG_ON(tm->slot < n); - /* Fallthrough */ + fallthrough; case MOD_LOG_KEY_REMOVE_WHILE_MOVING: case MOD_LOG_KEY_REMOVE: btrfs_set_node_key(eb, &tm->key, tm->slot); diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 5615320fa659..741c7e19c32f 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -619,6 +619,7 @@ void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, bg_list) { list_del_init(&block_group->bg_list); + btrfs_put_block_group(block_group); btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); } spin_unlock(&fs_info->unused_bgs_lock); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7c6f0bbb54a5..b1a148058773 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2593,10 +2593,12 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) !extent_buffer_uptodate(tree_root->node)) { handle_error = true; - if (IS_ERR(tree_root->node)) + if (IS_ERR(tree_root->node)) { ret = PTR_ERR(tree_root->node); - else if (!extent_buffer_uptodate(tree_root->node)) + tree_root->node = NULL; + } else if (!extent_buffer_uptodate(tree_root->node)) { ret = -EUCLEAN; + } btrfs_warn(fs_info, "failed to read tree root"); continue; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68c96057ad2d..60278e52c37a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1999,7 +1999,8 @@ static int __process_pages_contig(struct address_space *mapping, if (!PageDirty(pages[i]) || pages[i]->mapping != mapping) { unlock_page(pages[i]); - put_page(pages[i]); + for (; i < ret; i++) + put_page(pages[i]); err = -EAGAIN; goto out; } @@ -5058,25 +5059,28 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; - /* the ref bit is tricky. We have to make sure it is set - * if we have the buffer dirty. Otherwise the - * code to free a buffer can end up dropping a dirty - * page + /* + * The TREE_REF bit is first set when the extent_buffer is added + * to the radix tree. It is also reset, if unset, when a new reference + * is created by find_extent_buffer. * - * Once the ref bit is set, it won't go away while the - * buffer is dirty or in writeback, and it also won't - * go away while we have the reference count on the - * eb bumped. + * It is only cleared in two cases: freeing the last non-tree + * reference to the extent_buffer when its STALE bit is set or + * calling releasepage when the tree reference is the only reference. * - * We can't just set the ref bit without bumping the - * ref on the eb because free_extent_buffer might - * see the ref bit and try to clear it. If this happens - * free_extent_buffer might end up dropping our original - * ref by mistake and freeing the page before we are able - * to add one more ref. + * In both cases, care is taken to ensure that the extent_buffer's + * pages are not under io. However, releasepage can be concurrently + * called with creating new references, which is prone to race + * conditions between the calls to check_buffer_tree_ref in those + * codepaths and clearing TREE_REF in try_release_extent_buffer. * - * So bump the ref count first, then set the bit. If someone - * beat us to it, drop the ref we added. + * The actual lifetime of the extent_buffer in the radix tree is + * adequately protected by the refcount, but the TREE_REF bit and + * its corresponding reference are not. To protect against this + * class of races, we call check_buffer_tree_ref from the codepaths + * which trigger io after they set eb->io_pages. Note that once io is + * initiated, TREE_REF can no longer be cleared, so that is the + * moment at which any such race is best fixed. */ refs = atomic_read(&eb->refs); if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) @@ -5527,6 +5531,11 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); + /* + * It is possible for releasepage to clear the TREE_REF bit before we + * set io_pages. See check_buffer_tree_ref for a more detailed comment. + */ + check_buffer_tree_ref(eb); for (i = 0; i < num_pages; i++) { page = eb->pages[i]; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2520605afc25..b0d2c976587e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3509,6 +3509,7 @@ const struct file_operations btrfs_file_operations = { .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, .write_iter = btrfs_file_write_iter, + .splice_write = iter_file_splice_write, .mmap = btrfs_file_mmap, .open = btrfs_file_open, .release = btrfs_release_file, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 18d384f4af54..6862cd7e21a9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1690,12 +1690,8 @@ out_check: ret = fallback_to_cow(inode, locked_page, cow_start, found_key.offset - 1, page_started, nr_written); - if (ret) { - if (nocow) - btrfs_dec_nocow_writers(fs_info, - disk_bytenr); + if (ret) goto error; - } cow_start = (u64)-1; } @@ -1711,9 +1707,6 @@ out_check: ram_bytes, BTRFS_COMPRESS_NONE, BTRFS_ORDERED_PREALLOC); if (IS_ERR(em)) { - if (nocow) - btrfs_dec_nocow_writers(fs_info, - disk_bytenr); ret = PTR_ERR(em); goto error; } @@ -8130,20 +8123,17 @@ again: /* * Qgroup reserved space handler * Page here will be either - * 1) Already written to disk - * In this case, its reserved space is released from data rsv map - * and will be freed by delayed_ref handler finally. - * So even we call qgroup_free_data(), it won't decrease reserved - * space. - * 2) Not written to disk - * This means the reserved space should be freed here. However, - * if a truncate invalidates the page (by clearing PageDirty) - * and the page is accounted for while allocating extent - * in btrfs_check_data_free_space() we let delayed_ref to - * free the entire extent. + * 1) Already written to disk or ordered extent already submitted + * Then its QGROUP_RESERVED bit in io_tree is already cleaned. + * Qgroup will be handled by its qgroup_record then. + * btrfs_qgroup_free_data() call will do nothing here. + * + * 2) Not written to disk yet + * Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED + * bit of its io_tree, and free the qgroup reserved data space. + * Since the IO will never happen for this page. */ - if (PageDirty(page)) - btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE); + btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE); if (!inode_evicting) { clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 7887317033c9..af92525dbb16 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -509,7 +509,7 @@ static int process_leaf(struct btrfs_root *root, switch (key.type) { case BTRFS_EXTENT_ITEM_KEY: *num_bytes = key.offset; - /* fall through */ + fallthrough; case BTRFS_METADATA_ITEM_KEY: *bytenr = key.objectid; ret = process_extent_item(fs_info, path, &key, i, diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 41ee88633769..c7bd3fdd7792 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -879,8 +879,8 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info, return false; } global_rsv->reserved -= ticket->bytes; + remove_ticket(space_info, ticket); ticket->bytes = 0; - list_del_init(&ticket->list); wake_up(&ticket->wait); space_info->tickets_id++; if (global_rsv->reserved < global_rsv->size) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bc73fd670702..c3826ae883f0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -523,7 +523,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_compress_force: case Opt_compress_force_type: compress_force = true; - /* Fallthrough */ + fallthrough; case Opt_compress: case Opt_compress_type: saved_compress_type = btrfs_test_opt(info, @@ -622,7 +622,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_opt(info->mount_opt, NOSSD); btrfs_clear_and_info(info, SSD, "not using ssd optimizations"); - /* Fallthrough */ + fallthrough; case Opt_nossd_spread: btrfs_clear_and_info(info, SSD_SPREAD, "not using spread ssd allocation scheme"); @@ -793,7 +793,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_recovery: btrfs_warn(info, "'recovery' is deprecated, use 'usebackuproot' instead"); - /* fall through */ + fallthrough; case Opt_usebackuproot: btrfs_info(info, "trying to use backup root at mount time"); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0d6e785bcb98..f403fb1e6d37 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7052,6 +7052,14 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) mutex_lock(&fs_info->chunk_mutex); /* + * It is possible for mount and umount to race in such a way that + * we execute this code path, but open_fs_devices failed to clear + * total_rw_bytes. We certainly want it cleared before reading the + * device items, so clear it here. + */ + fs_info->fs_devices->total_rw_bytes = 0; + + /* * Read all device items, and then all the chunk items. All * device items are found before any chunk item (their object id * is smaller than the lowest possible object id for a chunk diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f067b5934c46..75af2334b2e3 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -408,7 +408,7 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio) return BTRFS_MAP_WRITE; default: WARN_ON_ONCE(1); - /* fall through */ + fallthrough; case REQ_OP_READ: return BTRFS_MAP_READ; } diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index e7726f5f1241..3080cda9e824 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -937,7 +937,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) } data = kmap(page); - ret = __kernel_write(file, data, len, &pos); + ret = kernel_write(file, data, len, &pos); kunmap(page); fput(file); if (ret != len) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index c7a311d28d3d..99b3180c613a 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -156,5 +156,5 @@ extern int cifs_truncate_page(struct address_space *mapping, loff_t from); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.27" +#define CIFS_VERSION "2.28" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9b0f8f33f832..be46fab4c96d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1149,20 +1149,20 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) /* * Set the byte-range lock (posix style). Returns: - * 1) 0, if we set the lock and don't need to request to the server; - * 2) 1, if we need to request to the server; - * 3) <0, if the error occurs while setting the lock. + * 1) <0, if the error occurs while setting the lock; + * 2) 0, if we set the lock and don't need to request to the server; + * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; + * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. */ static int cifs_posix_lock_set(struct file *file, struct file_lock *flock) { struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); - int rc = 1; + int rc = FILE_LOCK_DEFERRED + 1; if ((flock->fl_flags & FL_POSIX) == 0) return rc; -try_again: cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); @@ -1171,13 +1171,6 @@ try_again: rc = posix_lock_file(file, flock, NULL); up_write(&cinode->lock_sem); - if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, - list_empty(&flock->fl_blocked_member)); - if (!rc) - goto try_again; - locks_delete_block(flock); - } return rc; } @@ -1652,7 +1645,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, int posix_lock_type; rc = cifs_posix_lock_set(file, flock); - if (!rc || rc < 0) + if (rc <= FILE_LOCK_DEFERRED) return rc; if (type & server->vals->shared_lock_type) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 49c3ea8aa845..ce95801e9b66 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2044,7 +2044,6 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, FILE_UNIX_BASIC_INFO *info_buf_target; unsigned int xid; int rc, tmprc; - bool new_target = d_really_is_negative(target_dentry); if (flags & ~RENAME_NOREPLACE) return -EINVAL; @@ -2121,13 +2120,8 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, */ unlink_target: - /* - * If the target dentry was created during the rename, try - * unlinking it if it's not negative - */ - if (new_target && - d_really_is_positive(target_dentry) && - (rc == -EACCES || rc == -EEXIST)) { + /* Try unlinking the target dentry if it's not negative */ + if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) { if (d_is_dir(target_dentry)) tmprc = cifs_rmdir(target_dir, target_dentry); else diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 4a73e63c4d43..dcde44ff6cf9 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -169,6 +169,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) unsigned int xid; struct cifsFileInfo *pSMBFile = filep->private_data; struct cifs_tcon *tcon; + struct tcon_link *tlink; struct cifs_sb_info *cifs_sb; __u64 ExtAttrBits = 0; __u64 caps; @@ -307,13 +308,19 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) break; } cifs_sb = CIFS_SB(inode->i_sb); - tcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + break; + } + tcon = tlink_tcon(tlink); if (tcon && tcon->ses->server->ops->notify) { rc = tcon->ses->server->ops->notify(xid, filep, (void __user *)arg); cifs_dbg(FYI, "ioctl notify rc %d\n", rc); } else rc = -EOPNOTSUPP; + cifs_put_tlink(tlink); break; default: cifs_dbg(FYI, "unsupported ioctl\n"); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 6a39451973f8..157992864ce7 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -354,9 +354,13 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr) ((struct smb2_ioctl_rsp *)shdr)->OutputCount); break; case SMB2_CHANGE_NOTIFY: + *off = le16_to_cpu( + ((struct smb2_change_notify_rsp *)shdr)->OutputBufferOffset); + *len = le32_to_cpu( + ((struct smb2_change_notify_rsp *)shdr)->OutputBufferLength); + break; default: - /* BB FIXME for unimplemented cases above */ - cifs_dbg(VFS, "no length check for command\n"); + cifs_dbg(VFS, "no length check for command %d\n", le16_to_cpu(shdr->Command)); break; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d9fdafa5eb60..32f90dc82c84 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2148,7 +2148,7 @@ smb3_notify(const unsigned int xid, struct file *pfile, tcon = cifs_sb_master_tcon(cifs_sb); oparms.tcon = tcon; - oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; oparms.disposition = FILE_OPEN; oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 12c66f5d92dd..28bb5689333a 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -201,6 +201,9 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_d_op = &efivarfs_d_ops; sb->s_time_gran = 1; + if (!efivar_supports_writes()) + sb->s_flags |= SB_RDONLY; + inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true); if (!inode) return -ENOMEM; @@ -252,9 +255,6 @@ static struct file_system_type efivarfs_type = { static __init int efivarfs_init(void) { - if (!efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) - return -ENODEV; - if (!efivars_kobject()) return -ENODEV; diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 91ece649285d..119abf0d8dd6 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -1112,7 +1112,7 @@ found: ret = exfat_get_next_cluster(sb, &clu.dir); } - if (ret || clu.dir != EXFAT_EOF_CLUSTER) { + if (ret || clu.dir == EXFAT_EOF_CLUSTER) { /* just initialized hint_stat */ hint_stat->clu = p_dir->dir; hint_stat->eidx = 0; diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 7579cd3bbadb..75c7bdbeba6d 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -371,7 +371,7 @@ static inline bool exfat_is_last_sector_in_cluster(struct exfat_sb_info *sbi, static inline sector_t exfat_cluster_to_sector(struct exfat_sb_info *sbi, unsigned int clus) { - return ((clus - EXFAT_RESERVED_CLUSTERS) << sbi->sect_per_clus_bits) + + return ((sector_t)(clus - EXFAT_RESERVED_CLUSTERS) << sbi->sect_per_clus_bits) + sbi->data_start_sector; } diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 3b7fea465fd4..a6a063830edc 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -176,7 +176,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) ep2->dentry.stream.size = 0; } else { ep2->dentry.stream.valid_size = cpu_to_le64(new_size); - ep2->dentry.stream.size = ep->dentry.stream.valid_size; + ep2->dentry.stream.size = ep2->dentry.stream.valid_size; } if (new_size == 0) { diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 57b5a7a4d1f7..a3c927501e67 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -495,7 +495,7 @@ static int exfat_utf8_to_utf16(struct super_block *sb, struct exfat_uni_name *p_uniname, int *p_lossy) { int i, unilen, lossy = NLS_NAME_NO_LOSSY; - unsigned short upname[MAX_NAME_LENGTH + 1]; + __le16 upname[MAX_NAME_LENGTH + 1]; unsigned short *uniname = p_uniname->name; WARN_ON(!len); @@ -519,7 +519,7 @@ static int exfat_utf8_to_utf16(struct super_block *sb, exfat_wstrchr(bad_uni_chars, *uniname)) lossy |= NLS_NAME_LOSSY; - upname[i] = exfat_toupper(sb, *uniname); + upname[i] = cpu_to_le16(exfat_toupper(sb, *uniname)); uniname++; } @@ -597,7 +597,7 @@ static int exfat_nls_to_ucs2(struct super_block *sb, struct exfat_uni_name *p_uniname, int *p_lossy) { int i = 0, unilen = 0, lossy = NLS_NAME_NO_LOSSY; - unsigned short upname[MAX_NAME_LENGTH + 1]; + __le16 upname[MAX_NAME_LENGTH + 1]; unsigned short *uniname = p_uniname->name; struct nls_table *nls = EXFAT_SB(sb)->nls_io; @@ -611,7 +611,7 @@ static int exfat_nls_to_ucs2(struct super_block *sb, exfat_wstrchr(bad_uni_chars, *uniname)) lossy |= NLS_NAME_LOSSY; - upname[unilen] = exfat_toupper(sb, *uniname); + upname[unilen] = cpu_to_le16(exfat_toupper(sb, *uniname)); uniname++; unilen++; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e573b0cd2737..83d917f7e542 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -18,6 +18,7 @@ #include <linux/swap.h> #include <linux/falloc.h> #include <linux/uio.h> +#include <linux/fs.h> static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, struct fuse_page_desc **desc) @@ -1586,7 +1587,6 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct backing_dev_info *bdi = inode_to_bdi(inode); int i; - rb_erase(&wpa->writepages_entry, &fi->writepages); for (i = 0; i < ap->num_pages; i++) { dec_wb_stat(&bdi->wb, WB_WRITEBACK); dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP); @@ -1637,6 +1637,7 @@ __acquires(fi->lock) out_free: fi->writectr--; + rb_erase(&wpa->writepages_entry, &fi->writepages); fuse_writepage_finish(fc, wpa); spin_unlock(&fi->lock); @@ -1674,7 +1675,8 @@ __acquires(fi->lock) } } -static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa) +static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root, + struct fuse_writepage_args *wpa) { pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT; pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1; @@ -1697,11 +1699,17 @@ static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa) else if (idx_to < curr_index) p = &(*p)->rb_left; else - return (void) WARN_ON(true); + return curr; } rb_link_node(&wpa->writepages_entry, parent, p); rb_insert_color(&wpa->writepages_entry, root); + return NULL; +} + +static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa) +{ + WARN_ON(fuse_insert_writeback(root, wpa)); } static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, @@ -1714,6 +1722,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, mapping_set_error(inode->i_mapping, error); spin_lock(&fi->lock); + rb_erase(&wpa->writepages_entry, &fi->writepages); while (wpa->next) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_write_in *inarg = &wpa->ia.write.in; @@ -1952,14 +1961,14 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) } /* - * First recheck under fi->lock if the offending offset is still under - * writeback. If yes, then iterate auxiliary write requests, to see if there's + * Check under fi->lock if the page is under writeback, and insert it onto the + * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's * one already added for a page at this offset. If there's none, then insert * this new request onto the auxiliary list, otherwise reuse the existing one by - * copying the new page contents over to the old temporary page. + * swapping the new temp page with the old one. */ -static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, - struct page *page) +static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa, + struct page *page) { struct fuse_inode *fi = get_fuse_inode(new_wpa->inode); struct fuse_writepage_args *tmp; @@ -1967,17 +1976,15 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, struct fuse_args_pages *new_ap = &new_wpa->ia.ap; WARN_ON(new_ap->num_pages != 0); + new_ap->num_pages = 1; spin_lock(&fi->lock); - rb_erase(&new_wpa->writepages_entry, &fi->writepages); - old_wpa = fuse_find_writeback(fi, page->index, page->index); + old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa); if (!old_wpa) { - tree_insert(&fi->writepages, new_wpa); spin_unlock(&fi->lock); - return false; + return true; } - new_ap->num_pages = 1; for (tmp = old_wpa->next; tmp; tmp = tmp->next) { pgoff_t curr_index; @@ -2006,7 +2013,41 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, fuse_writepage_free(new_wpa); } - return true; + return false; +} + +static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page, + struct fuse_args_pages *ap, + struct fuse_fill_wb_data *data) +{ + WARN_ON(!ap->num_pages); + + /* + * Being under writeback is unlikely but possible. For example direct + * read to an mmaped fuse file will set the page dirty twice; once when + * the pages are faulted with get_user_pages(), and then after the read + * completed. + */ + if (fuse_page_is_writeback(data->inode, page->index)) + return true; + + /* Reached max pages */ + if (ap->num_pages == fc->max_pages) + return true; + + /* Reached max write bytes */ + if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write) + return true; + + /* Discontinuity */ + if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index) + return true; + + /* Need to grow the pages array? If so, did the expansion fail? */ + if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data)) + return true; + + return false; } static int fuse_writepages_fill(struct page *page, @@ -2019,7 +2060,6 @@ static int fuse_writepages_fill(struct page *page, struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct page *tmp_page; - bool is_writeback; int err; if (!data->ff) { @@ -2029,25 +2069,9 @@ static int fuse_writepages_fill(struct page *page, goto out_unlock; } - /* - * Being under writeback is unlikely but possible. For example direct - * read to an mmaped fuse file will set the page dirty twice; once when - * the pages are faulted with get_user_pages(), and then after the read - * completed. - */ - is_writeback = fuse_page_is_writeback(inode, page->index); - - if (wpa && ap->num_pages && - (is_writeback || ap->num_pages == fc->max_pages || - (ap->num_pages + 1) * PAGE_SIZE > fc->max_write || - data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)) { + if (wpa && fuse_writepage_need_send(fc, page, ap, data)) { fuse_writepages_send(data); data->wpa = NULL; - } else if (wpa && ap->num_pages == data->max_pages) { - if (!fuse_pages_realloc(data)) { - fuse_writepages_send(data); - data->wpa = NULL; - } } err = -ENOMEM; @@ -2085,12 +2109,6 @@ static int fuse_writepages_fill(struct page *page, ap->args.end = fuse_writepage_end; ap->num_pages = 0; wpa->inode = inode; - - spin_lock(&fi->lock); - tree_insert(&fi->writepages, wpa); - spin_unlock(&fi->lock); - - data->wpa = wpa; } set_page_writeback(page); @@ -2098,26 +2116,25 @@ static int fuse_writepages_fill(struct page *page, ap->pages[ap->num_pages] = tmp_page; ap->descs[ap->num_pages].offset = 0; ap->descs[ap->num_pages].length = PAGE_SIZE; + data->orig_pages[ap->num_pages] = page; inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); err = 0; - if (is_writeback && fuse_writepage_in_flight(wpa, page)) { + if (data->wpa) { + /* + * Protected by fi->lock against concurrent access by + * fuse_page_is_writeback(). + */ + spin_lock(&fi->lock); + ap->num_pages++; + spin_unlock(&fi->lock); + } else if (fuse_writepage_add(wpa, page)) { + data->wpa = wpa; + } else { end_page_writeback(page); - data->wpa = NULL; - goto out_unlock; } - data->orig_pages[ap->num_pages] = page; - - /* - * Protected by fi->lock against concurrent access by - * fuse_page_is_writeback(). - */ - spin_lock(&fi->lock); - ap->num_pages++; - spin_unlock(&fi->lock); - out_unlock: unlock_page(page); @@ -2149,10 +2166,8 @@ static int fuse_writepages(struct address_space *mapping, err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); if (data.wpa) { - /* Ignore errors if we can write at least one page */ WARN_ON(!data.wpa->ia.ap.num_pages); fuse_writepages_send(&data); - err = 0; } if (data.ff) fuse_file_put(data.ff, false, false); @@ -2761,7 +2776,16 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, struct iovec *iov = iov_page; iov->iov_base = (void __user *)arg; - iov->iov_len = _IOC_SIZE(cmd); + + switch (cmd) { + case FS_IOC_GETFLAGS: + case FS_IOC_SETFLAGS: + iov->iov_len = sizeof(int); + break; + default: + iov->iov_len = _IOC_SIZE(cmd); + break; + } if (_IOC_DIR(cmd) & _IOC_WRITE) { in_iov = iov; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 5b4aebf5821f..bba747520e9b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -121,10 +121,12 @@ static void fuse_evict_inode(struct inode *inode) } } -static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) +static int fuse_reconfigure(struct fs_context *fc) { + struct super_block *sb = fc->root->d_sb; + sync_filesystem(sb); - if (*flags & SB_MANDLOCK) + if (fc->sb_flags & SB_MANDLOCK) return -EINVAL; return 0; @@ -475,6 +477,17 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) struct fuse_fs_context *ctx = fc->fs_private; int opt; + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + /* + * Ignore options coming from mount(MS_REMOUNT) for backward + * compatibility. + */ + if (fc->oldapi) + return 0; + + return invalfc(fc, "No changes allowed in reconfigure"); + } + opt = fs_parse(fc, fuse_fs_parameters, param, &result); if (opt < 0) return opt; @@ -817,7 +830,6 @@ static const struct super_operations fuse_super_operations = { .evict_inode = fuse_evict_inode, .write_inode = fuse_write_inode, .drop_inode = generic_delete_inode, - .remount_fs = fuse_remount_fs, .put_super = fuse_put_super, .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, @@ -1296,6 +1308,7 @@ static int fuse_get_tree(struct fs_context *fc) static const struct fs_context_operations fuse_context_ops = { .free = fuse_free_fc, .parse_param = fuse_parse_param, + .reconfigure = fuse_reconfigure, .get_tree = fuse_get_tree, }; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 72c9560f4467..68cd700a2719 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -468,21 +468,10 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) } -/** - * __gfs2_readpage - readpage - * @file: The file to read a page for - * @page: The page to read - * - * This is the core of gfs2's readpage. It's used by the internal file - * reading code as in that case we already hold the glock. Also it's - * called by gfs2_readpage() once the required lock has been granted. - */ - static int __gfs2_readpage(void *file, struct page *page) { struct gfs2_inode *ip = GFS2_I(page->mapping->host); struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); - int error; if (i_blocksize(page->mapping->host) == PAGE_SIZE && @@ -505,36 +494,11 @@ static int __gfs2_readpage(void *file, struct page *page) * gfs2_readpage - read a page of a file * @file: The file to read * @page: The page of the file - * - * This deals with the locking required. We have to unlock and - * relock the page in order to get the locking in the right - * order. */ static int gfs2_readpage(struct file *file, struct page *page) { - struct address_space *mapping = page->mapping; - struct gfs2_inode *ip = GFS2_I(mapping->host); - struct gfs2_holder gh; - int error; - - unlock_page(page); - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - error = gfs2_glock_nq(&gh); - if (unlikely(error)) - goto out; - error = AOP_TRUNCATED_PAGE; - lock_page(page); - if (page->mapping == mapping && !PageUptodate(page)) - error = __gfs2_readpage(file, page); - else - unlock_page(page); - gfs2_glock_dq(&gh); -out: - gfs2_holder_uninit(&gh); - if (error && error != AOP_TRUNCATED_PAGE) - lock_page(page); - return error; + return __gfs2_readpage(file, page); } /** @@ -598,16 +562,9 @@ static void gfs2_readahead(struct readahead_control *rac) { struct inode *inode = rac->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - if (gfs2_glock_nq(&gh)) - goto out_uninit; if (!gfs2_is_stuffed(ip)) mpage_readahead(rac, gfs2_block_map); - gfs2_glock_dq(&gh); -out_uninit: - gfs2_holder_uninit(&gh); } /** diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index fe305e4bfd37..bebde537ac8c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -558,8 +558,29 @@ out_uninit: return block_page_mkwrite_return(ret); } +static vm_fault_t gfs2_fault(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + vm_fault_t ret; + int err; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + err = gfs2_glock_nq(&gh); + if (err) { + ret = block_page_mkwrite_return(err); + goto out_uninit; + } + ret = filemap_fault(vmf); + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return ret; +} + static const struct vm_operations_struct gfs2_vm_ops = { - .fault = filemap_fault, + .fault = gfs2_fault, .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, }; @@ -824,6 +845,9 @@ out_uninit: static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct gfs2_inode *ip; + struct gfs2_holder gh; + size_t written = 0; ssize_t ret; if (iocb->ki_flags & IOCB_DIRECT) { @@ -832,7 +856,31 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; iocb->ki_flags &= ~IOCB_DIRECT; } - return generic_file_read_iter(iocb, to); + iocb->ki_flags |= IOCB_NOIO; + ret = generic_file_read_iter(iocb, to); + iocb->ki_flags &= ~IOCB_NOIO; + if (ret >= 0) { + if (!iov_iter_count(to)) + return ret; + written = ret; + } else { + if (ret != -EAGAIN) + return ret; + if (iocb->ki_flags & IOCB_NOWAIT) + return ret; + } + ip = GFS2_I(iocb->ki_filp->f_mapping->host); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + ret = generic_file_read_iter(iocb, to); + if (ret > 0) + written += ret; + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return written ? written : ret; } /** diff --git a/fs/io_uring.c b/fs/io_uring.c index d37d7ea5ebe5..32b0064f806e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -605,6 +605,7 @@ enum { struct async_poll { struct io_poll_iocb poll; + struct io_poll_iocb *double_poll; struct io_wq_work work; }; @@ -1096,6 +1097,8 @@ static inline void io_prep_async_work(struct io_kiocb *req, { const struct io_op_def *def = &io_op_defs[req->opcode]; + io_req_init_async(req); + if (req->flags & REQ_F_ISREG) { if (def->hash_reg_file) io_wq_hash_work(&req->work, file_inode(req->file)); @@ -1104,7 +1107,6 @@ static inline void io_prep_async_work(struct io_kiocb *req, req->work.flags |= IO_WQ_WORK_UNBOUND; } - io_req_init_async(req); io_req_work_grab_env(req, def); *link = io_prep_linked_timeout(req); @@ -1274,6 +1276,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) if (cqe) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); @@ -1311,6 +1314,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) if (list_empty(&ctx->cq_overflow_list)) { set_bit(0, &ctx->sq_check_overflow); set_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; } req->flags |= REQ_F_OVERFLOW; refcount_inc(&req->refs); @@ -3551,6 +3555,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->flags & REQ_F_NEED_CLEANUP) return 0; + io->msg.msg.msg_name = &io->msg.addr; io->msg.iov = io->msg.fast_iov; ret = sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags, &io->msg.iov); @@ -3732,6 +3737,7 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io) { + io->msg.msg.msg_name = &io->msg.addr; io->msg.iov = io->msg.fast_iov; #ifdef CONFIG_COMPAT @@ -3840,10 +3846,16 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock) ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg, kmsg->uaddr, flags); - if (force_nonblock && ret == -EAGAIN) - return io_setup_async_msg(req, kmsg); + if (force_nonblock && ret == -EAGAIN) { + ret = io_setup_async_msg(req, kmsg); + if (ret != -EAGAIN) + kfree(kbuf); + return ret; + } if (ret == -ERESTARTSYS) ret = -EINTR; + if (kbuf) + kfree(kbuf); } if (kmsg && kmsg->iov != kmsg->fast_iov) @@ -4148,9 +4160,9 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) return false; } -static void io_poll_remove_double(struct io_kiocb *req) +static void io_poll_remove_double(struct io_kiocb *req, void *data) { - struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io; + struct io_poll_iocb *poll = data; lockdep_assert_held(&req->ctx->completion_lock); @@ -4170,7 +4182,7 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error) { struct io_ring_ctx *ctx = req->ctx; - io_poll_remove_double(req); + io_poll_remove_double(req, req->io); req->poll.done = true; io_cqring_fill_event(req, error ? error : mangle_poll(mask)); io_commit_cqring(ctx); @@ -4213,21 +4225,21 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, int sync, void *key) { struct io_kiocb *req = wait->private; - struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io; + struct io_poll_iocb *poll = req->apoll->double_poll; __poll_t mask = key_to_poll(key); /* for instances that support it check for an event match first: */ if (mask && !(mask & poll->events)) return 0; - if (req->poll.head) { + if (poll && poll->head) { bool done; - spin_lock(&req->poll.head->lock); - done = list_empty(&req->poll.wait.entry); + spin_lock(&poll->head->lock); + done = list_empty(&poll->wait.entry); if (!done) - list_del_init(&req->poll.wait.entry); - spin_unlock(&req->poll.head->lock); + list_del_init(&poll->wait.entry); + spin_unlock(&poll->head->lock); if (!done) __io_async_wake(req, poll, mask, io_poll_task_func); } @@ -4247,7 +4259,8 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, } static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, - struct wait_queue_head *head) + struct wait_queue_head *head, + struct io_poll_iocb **poll_ptr) { struct io_kiocb *req = pt->req; @@ -4258,7 +4271,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, */ if (unlikely(poll->head)) { /* already have a 2nd entry, fail a third attempt */ - if (req->io) { + if (*poll_ptr) { pt->error = -EINVAL; return; } @@ -4270,7 +4283,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake); refcount_inc(&req->refs); poll->wait.private = req; - req->io = (void *) poll; + *poll_ptr = poll; } pt->error = 0; @@ -4282,8 +4295,9 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, struct poll_table_struct *p) { struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); + struct async_poll *apoll = pt->req->apoll; - __io_queue_proc(&pt->req->apoll->poll, pt, head); + __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); } static void io_sq_thread_drop_mm(struct io_ring_ctx *ctx) @@ -4333,11 +4347,13 @@ static void io_async_task_func(struct callback_head *cb) } } + io_poll_remove_double(req, apoll->double_poll); spin_unlock_irq(&ctx->completion_lock); /* restore ->work in case we need to retry again */ if (req->flags & REQ_F_WORK_INITIALIZED) memcpy(&req->work, &apoll->work, sizeof(req->work)); + kfree(apoll->double_poll); kfree(apoll); if (!canceled) { @@ -4425,7 +4441,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req) struct async_poll *apoll; struct io_poll_table ipt; __poll_t mask, ret; - bool had_io; if (!req->file || !file_can_poll(req->file)) return false; @@ -4437,11 +4452,11 @@ static bool io_arm_poll_handler(struct io_kiocb *req) apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); if (unlikely(!apoll)) return false; + apoll->double_poll = NULL; req->flags |= REQ_F_POLLED; if (req->flags & REQ_F_WORK_INITIALIZED) memcpy(&apoll->work, &req->work, sizeof(req->work)); - had_io = req->io != NULL; io_get_req_task(req); req->apoll = apoll; @@ -4459,13 +4474,11 @@ static bool io_arm_poll_handler(struct io_kiocb *req) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, io_async_wake); if (ret) { - ipt.error = 0; - /* only remove double add if we did it here */ - if (!had_io) - io_poll_remove_double(req); + io_poll_remove_double(req, apoll->double_poll); spin_unlock_irq(&ctx->completion_lock); if (req->flags & REQ_F_WORK_INITIALIZED) memcpy(&req->work, &apoll->work, sizeof(req->work)); + kfree(apoll->double_poll); kfree(apoll); return false; } @@ -4496,11 +4509,13 @@ static bool io_poll_remove_one(struct io_kiocb *req) bool do_complete; if (req->opcode == IORING_OP_POLL_ADD) { - io_poll_remove_double(req); + io_poll_remove_double(req, req->io); do_complete = __io_poll_remove_one(req, &req->poll); } else { struct async_poll *apoll = req->apoll; + io_poll_remove_double(req, apoll->double_poll); + /* non-poll requests have submit ref still */ do_complete = __io_poll_remove_one(req, &apoll->poll); if (do_complete) { @@ -4513,6 +4528,7 @@ static bool io_poll_remove_one(struct io_kiocb *req) if (req->flags & REQ_F_WORK_INITIALIZED) memcpy(&req->work, &apoll->work, sizeof(req->work)); + kfree(apoll->double_poll); kfree(apoll); } } @@ -4613,7 +4629,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, { struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); - __io_queue_proc(&pt->req->poll, pt, head); + __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->io); } static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -4721,7 +4737,9 @@ static int io_timeout_remove_prep(struct io_kiocb *req, { if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len) + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->ioprio || sqe->buf_index || sqe->len) return -EINVAL; req->timeout.addr = READ_ONCE(sqe->addr); @@ -4899,8 +4917,9 @@ static int io_async_cancel_prep(struct io_kiocb *req, { if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (sqe->flags || sqe->ioprio || sqe->off || sqe->len || - sqe->cancel_flags) + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags) return -EINVAL; req->cancel.addr = READ_ONCE(sqe->addr); @@ -4918,7 +4937,9 @@ static int io_async_cancel(struct io_kiocb *req) static int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (sqe->flags || sqe->ioprio || sqe->rw_flags) + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->ioprio || sqe->rw_flags) return -EINVAL; req->files_update.offset = READ_ONCE(sqe->off); @@ -5709,6 +5730,7 @@ fail_req: * Never try inline submit of IOSQE_ASYNC is set, go straight * to async execution. */ + io_req_init_async(req); req->work.flags |= IO_WQ_WORK_CONCURRENT; io_queue_async_work(req); } else { @@ -6080,9 +6102,9 @@ static int io_sq_thread(void *data) } /* Tell userspace we may need a wakeup call */ + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; - /* make sure to read SQ tail after writing flags */ - smp_mb(); + spin_unlock_irq(&ctx->completion_lock); to_submit = io_sqring_entries(ctx); if (!to_submit || ret == -EBUSY) { @@ -6100,13 +6122,17 @@ static int io_sq_thread(void *data) schedule(); finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); ret = 0; continue; } finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); } mutex_lock(&ctx->uring_lock); @@ -6693,6 +6719,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, for (i = 0; i < nr_tables; i++) kfree(ctx->file_data->table[i].files); + percpu_ref_exit(&ctx->file_data->refs); kfree(ctx->file_data->table); kfree(ctx->file_data); ctx->file_data = NULL; @@ -6845,8 +6872,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, } table->files[index] = file; err = io_sqe_file_register(ctx, file, i); - if (err) + if (err) { + fput(file); break; + } } nr_args--; done++; @@ -7342,9 +7371,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_mem_free(ctx->sq_sqes); percpu_ref_exit(&ctx->refs); - if (ctx->account_mem) - io_unaccount_mem(ctx->user, - ring_pages(ctx->sq_entries, ctx->cq_entries)); free_uid(ctx->user); put_cred(ctx->creds); kfree(ctx->cancel_hash); @@ -7429,6 +7455,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) if (ctx->rings) io_cqring_overflow_flush(ctx, true); idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); + + /* + * Do this upfront, so we won't have a grace period where the ring + * is closed but resources aren't reaped yet. This can cause + * spurious failure in setting up a new ring. + */ + if (ctx->account_mem) + io_unaccount_mem(ctx->user, + ring_pages(ctx->sq_entries, ctx->cq_entries)); + INIT_WORK(&ctx->exit_work, io_ring_exit_work); queue_work(system_wq, &ctx->exit_work); } @@ -7488,6 +7524,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, if (list_empty(&ctx->cq_overflow_list)) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irq(&ctx->completion_lock); diff --git a/fs/namespace.c b/fs/namespace.c index f30ed401cc6d..4a0f600a3328 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2603,6 +2603,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, if (IS_ERR(fc)) return PTR_ERR(fc); + fc->oldapi = true; err = parse_monolithic_mount_data(fc, data); if (!err) { down_write(&sb->s_umount); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3d113cf8908a..1b79dd5cf661 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -267,6 +267,8 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) { struct inode *inode = dreq->inode; + inode_dio_end(inode); + if (dreq->iocb) { long res = (long) dreq->error; if (dreq->count != 0) { @@ -278,10 +280,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) complete(&dreq->completion); - igrab(inode); nfs_direct_req_release(dreq); - inode_dio_end(inode); - iput(inode); } static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) @@ -411,10 +410,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - igrab(inode); - nfs_direct_req_release(dreq); inode_dio_end(inode); - iput(inode); + nfs_direct_req_release(dreq); return result < 0 ? result : -EIO; } @@ -867,10 +864,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - igrab(inode); - nfs_direct_req_release(dreq); inode_dio_end(inode); - iput(inode); + nfs_direct_req_release(dreq); return result < 0 ? result : -EIO; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ccd6c1637b27..f96367a2463e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -83,7 +83,6 @@ nfs_file_release(struct inode *inode, struct file *filp) dprintk("NFS: release(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); - inode_dio_wait(inode); nfs_file_clear_open_context(filp); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e32717fd1169..8963062da57e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -414,7 +414,7 @@ static int nfs4_delay_interruptible(long *timeout) { might_sleep(); - freezable_schedule_timeout_interruptible(nfs4_update_delay(timeout)); + freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout)); if (!signal_pending(current)) return 0; return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS; @@ -774,6 +774,14 @@ static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, slot->seq_nr_last_acked = seqnr; } +static void nfs4_probe_sequence(struct nfs_client *client, const struct cred *cred, + struct nfs4_slot *slot) +{ + struct rpc_task *task = _nfs41_proc_sequence(client, cred, slot, true); + if (!IS_ERR(task)) + rpc_put_task_async(task); +} + static int nfs41_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct rpc_task *task, goto out; session = slot->table->session; + clp = session->clp; trace_nfs4_sequence_done(session, res); @@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct rpc_task *task, nfs4_slot_sequence_acked(slot, slot->seq_nr); /* Update the slot's sequence and clientid lease timer */ slot->seq_done = 1; - clp = session->clp; do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags, @@ -852,10 +860,18 @@ static int nfs41_sequence_process(struct rpc_task *task, /* * Were one or more calls using this slot interrupted? * If the server never received the request, then our - * transmitted slot sequence number may be too high. + * transmitted slot sequence number may be too high. However, + * if the server did receive the request then it might + * accidentally give us a reply with a mismatched operation. + * We can sort this out by sending a lone sequence operation + * to the server on the same slot. */ if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) { slot->seq_nr--; + if (task->tk_msg.rpc_proc != &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE]) { + nfs4_probe_sequence(clp, task->tk_msg.rpc_cred, slot); + res->sr_slot = NULL; + } goto retry_nowait; } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cce2510b2cca..c9056316a0b3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -507,6 +507,17 @@ find_any_file(struct nfs4_file *f) return ret; } +static struct nfsd_file *find_deleg_file(struct nfs4_file *f) +{ + struct nfsd_file *ret = NULL; + + spin_lock(&f->fi_lock); + if (f->fi_deleg_file) + ret = nfsd_file_get(f->fi_deleg_file); + spin_unlock(&f->fi_lock); + return ret; +} + static atomic_long_t num_delegations; unsigned long max_delegations; @@ -2444,6 +2455,8 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) oo = ols->st_stateowner; nf = st->sc_file; file = find_any_file(nf); + if (!file) + return 0; seq_printf(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); @@ -2481,6 +2494,8 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) oo = ols->st_stateowner; nf = st->sc_file; file = find_any_file(nf); + if (!file) + return 0; seq_printf(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); @@ -2513,7 +2528,9 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) ds = delegstateid(st); nf = st->sc_file; - file = nf->fi_deleg_file; + file = find_deleg_file(nf); + if (!file) + return 0; seq_printf(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); @@ -2529,6 +2546,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) seq_printf(s, ", "); nfs4_show_fname(s, file); seq_printf(s, " }\n"); + nfsd_file_put(file); return 0; } diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 79dd052c7dbf..5e0cde85bd6b 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -895,7 +895,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, return err; } -int ovl_copy_up_flags(struct dentry *dentry, int flags) +static int ovl_copy_up_flags(struct dentry *dentry, int flags) { int err = 0; const struct cred *old_cred = ovl_override_creds(dentry->d_sb); diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 8f4286450f92..0e696f72cf65 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -476,7 +476,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, if (IS_ERR_OR_NULL(this)) return this; - if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { + if (ovl_dentry_real_at(this, layer->idx) != real) { dput(this); this = ERR_PTR(-EIO); } diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 01820e654a21..0d940e29d62b 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -33,13 +33,16 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode) return 'm'; } +/* No atime modificaton nor notify on underlying */ +#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY) + static struct file *ovl_open_realfile(const struct file *file, struct inode *realinode) { struct inode *inode = file_inode(file); struct file *realfile; const struct cred *old_cred; - int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY; + int flags = file->f_flags | OVL_OPEN_FLAGS; int acc_mode = ACC_MODE(flags); int err; @@ -72,8 +75,7 @@ static int ovl_change_flags(struct file *file, unsigned int flags) struct inode *inode = file_inode(file); int err; - /* No atime modificaton on underlying */ - flags |= O_NOATIME | FMODE_NONOTIFY; + flags |= OVL_OPEN_FLAGS; /* If some flag changed that cannot be changed then something's amiss */ if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK)) @@ -126,7 +128,7 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real, } /* Did the flags change since open? */ - if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME)) + if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS)) return ovl_change_flags(real->file, file->f_flags); return 0; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 3566282a9199..f7d4358db637 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -389,7 +389,7 @@ invalid: } static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, - struct ovl_path **stackp, unsigned int *ctrp) + struct ovl_path **stackp) { struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN); int err; @@ -406,10 +406,6 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, return err; } - if (WARN_ON(*ctrp)) - return -EIO; - - *ctrp = 1; return 0; } @@ -861,8 +857,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, goto out; } if (upperdentry && !d.is_dir) { - unsigned int origin_ctr = 0; - /* * Lookup copy up origin by decoding origin file handle. * We may get a disconnected dentry, which is fine, @@ -873,8 +867,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * number - it's the same as if we held a reference * to a dentry in lower layer that was moved under us. */ - err = ovl_check_origin(ofs, upperdentry, &origin_path, - &origin_ctr); + err = ovl_check_origin(ofs, upperdentry, &origin_path); if (err) goto out_put_upper; @@ -1073,6 +1066,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, upperredirect = NULL; goto out_free_oe; } + err = ovl_check_metacopy_xattr(upperdentry); + if (err < 0) + goto out_free_oe; + uppermetacopy = err; } if (upperdentry || ctr) { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index b725c7f15ff4..29bc1ec699e7 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -483,7 +483,6 @@ void ovl_aio_request_cache_destroy(void); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_with_data(struct dentry *dentry); -int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_maybe_copy_up(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 91476bc422f9..4b38141c2985 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -580,12 +580,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) } } - /* Workdir is useless in non-upper mount */ - if (!config->upperdir && config->workdir) { - pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", - config->workdir); - kfree(config->workdir); - config->workdir = NULL; + /* Workdir/index are useless in non-upper mount */ + if (!config->upperdir) { + if (config->workdir) { + pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", + config->workdir); + kfree(config->workdir); + config->workdir = NULL; + } + if (config->index && index_opt) { + pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n"); + index_opt = false; + } + config->index = false; } err = ovl_parse_redirect_mode(config, config->redirect_mode); @@ -622,11 +629,13 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) /* Resolve nfs_export -> index dependency */ if (config->nfs_export && !config->index) { - if (nfs_export_opt && index_opt) { + if (!config->upperdir && config->redirect_follow) { + pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); + config->nfs_export = false; + } else if (nfs_export_opt && index_opt) { pr_err("conflicting options: nfs_export=on,index=off\n"); return -EINVAL; - } - if (index_opt) { + } else if (index_opt) { /* * There was an explicit index=off that resulted * in this conflict. @@ -1352,8 +1361,15 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, goto out; } + /* index dir will act also as workdir */ + iput(ofs->workdir_trap); + ofs->workdir_trap = NULL; + dput(ofs->workdir); + ofs->workdir = NULL; ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); if (ofs->indexdir) { + ofs->workdir = dget(ofs->indexdir); + err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, "indexdir"); if (err) @@ -1396,6 +1412,18 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs)) return true; + /* + * We allow using single lower with null uuid for index and nfs_export + * for example to support those features with single lower squashfs. + * To avoid regressions in setups of overlay with re-formatted lower + * squashfs, do not allow decoding origin with lower null uuid unless + * user opted-in to one of the new features that require following the + * lower inode of non-dir upper. + */ + if (!ofs->config.index && !ofs->config.metacopy && !ofs->config.xino && + uuid_is_null(uuid)) + return false; + for (i = 0; i < ofs->numfs; i++) { /* * We use uuid to associate an overlay lower file handle with a @@ -1493,14 +1521,23 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, if (err < 0) goto out; + /* + * Check if lower root conflicts with this overlay layers before + * checking if it is in-use as upperdir/workdir of "another" + * mount, because we do not bother to check in ovl_is_inuse() if + * the upperdir/workdir is in fact in-use by our + * upperdir/workdir. + */ err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); if (err) goto out; if (ovl_is_inuse(stack[i].dentry)) { err = ovl_report_in_use(ofs, "lowerdir"); - if (err) + if (err) { + iput(trap); goto out; + } } mnt = clone_private_mount(&stack[i]); @@ -1575,10 +1612,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, if (!ofs->config.upperdir && numlower == 1) { pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); return ERR_PTR(-EINVAL); - } else if (!ofs->config.upperdir && ofs->config.nfs_export && - ofs->config.redirect_follow) { - pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); - ofs->config.nfs_export = false; } stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL); @@ -1842,21 +1875,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ovl_upper_mnt(ofs)) sb->s_flags |= SB_RDONLY; - if (!(ovl_force_readonly(ofs)) && ofs->config.index) { - /* index dir will act also as workdir */ - dput(ofs->workdir); - ofs->workdir = NULL; - iput(ofs->workdir_trap); - ofs->workdir_trap = NULL; - + if (!ovl_force_readonly(ofs) && ofs->config.index) { err = ovl_get_indexdir(sb, ofs, oe, &upperpath); if (err) goto out_free_oe; /* Force r/o mount with no index dir */ - if (ofs->indexdir) - ofs->workdir = dget(ofs->indexdir); - else + if (!ofs->indexdir) sb->s_flags |= SB_RDONLY; } diff --git a/fs/read_write.c b/fs/read_write.c index bbfa9b12b15e..4fb797822567 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -419,28 +419,42 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo return ret; } -ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, - loff_t *pos) +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { + mm_segment_t old_fs = get_fs(); + ssize_t ret; + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) + return -EINVAL; + if (!(file->f_mode & FMODE_CAN_READ)) + return -EINVAL; + + if (count > MAX_RW_COUNT) + count = MAX_RW_COUNT; + set_fs(KERNEL_DS); if (file->f_op->read) - return file->f_op->read(file, buf, count, pos); + ret = file->f_op->read(file, (void __user *)buf, count, pos); else if (file->f_op->read_iter) - return new_sync_read(file, buf, count, pos); + ret = new_sync_read(file, (void __user *)buf, count, pos); else - return -EINVAL; + ret = -EINVAL; + set_fs(old_fs); + if (ret > 0) { + fsnotify_access(file); + add_rchar(current, ret); + } + inc_syscr(current); + return ret; } ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs; - ssize_t result; + ssize_t ret; - old_fs = get_fs(); - set_fs(KERNEL_DS); - /* The cast to a user pointer is valid due to the set_fs() */ - result = vfs_read(file, (void __user *)buf, count, pos); - set_fs(old_fs); - return result; + ret = rw_verify_area(READ, file, pos, count); + if (ret) + return ret; + return __kernel_read(file, buf, count, pos); } EXPORT_SYMBOL(kernel_read); @@ -456,17 +470,22 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) return -EFAULT; ret = rw_verify_area(READ, file, pos, count); - if (!ret) { - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - ret = __vfs_read(file, buf, count, pos); - if (ret > 0) { - fsnotify_access(file); - add_rchar(current, ret); - } - inc_syscr(current); - } + if (ret) + return ret; + if (count > MAX_RW_COUNT) + count = MAX_RW_COUNT; + if (file->f_op->read) + ret = file->f_op->read(file, buf, count, pos); + else if (file->f_op->read_iter) + ret = new_sync_read(file, buf, count, pos); + else + ret = -EINVAL; + if (ret > 0) { + fsnotify_access(file); + add_rchar(current, ret); + } + inc_syscr(current); return ret; } @@ -488,23 +507,15 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t return ret; } -static ssize_t __vfs_write(struct file *file, const char __user *p, - size_t count, loff_t *pos) -{ - if (file->f_op->write) - return file->f_op->write(file, p, count, pos); - else if (file->f_op->write_iter) - return new_sync_write(file, p, count, pos); - else - return -EINVAL; -} - +/* caller is responsible for file_start_write/file_end_write */ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) { mm_segment_t old_fs; const char __user *p; ssize_t ret; + if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) + return -EBADF; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; @@ -513,7 +524,12 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t p = (__force const char __user *)buf; if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; - ret = __vfs_write(file, p, count, pos); + if (file->f_op->write) + ret = file->f_op->write(file, p, count, pos); + else if (file->f_op->write_iter) + ret = new_sync_write(file, p, count, pos); + else + ret = -EINVAL; set_fs(old_fs); if (ret > 0) { fsnotify_modify(file); @@ -522,21 +538,20 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t inc_syscw(current); return ret; } -EXPORT_SYMBOL(__kernel_write); ssize_t kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs; - ssize_t res; + ssize_t ret; - old_fs = get_fs(); - set_fs(KERNEL_DS); - /* The cast to a user pointer is valid due to the set_fs() */ - res = vfs_write(file, (__force const char __user *)buf, count, pos); - set_fs(old_fs); + ret = rw_verify_area(WRITE, file, pos, count); + if (ret) + return ret; - return res; + file_start_write(file); + ret = __kernel_write(file, buf, count, pos); + file_end_write(file); + return ret; } EXPORT_SYMBOL(kernel_write); @@ -552,19 +567,23 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ return -EFAULT; ret = rw_verify_area(WRITE, file, pos, count); - if (!ret) { - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - file_start_write(file); - ret = __vfs_write(file, buf, count, pos); - if (ret > 0) { - fsnotify_modify(file); - add_wchar(current, ret); - } - inc_syscw(current); - file_end_write(file); + if (ret) + return ret; + if (count > MAX_RW_COUNT) + count = MAX_RW_COUNT; + file_start_write(file); + if (file->f_op->write) + ret = file->f_op->write(file, buf, count, pos); + else if (file->f_op->write_iter) + ret = new_sync_write(file, buf, count, pos); + else + ret = -EINVAL; + if (ret > 0) { + fsnotify_modify(file); + add_wchar(current, ret); } - + inc_syscw(current); + file_end_write(file); return ret; } diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 64f61330564a..76bb1c846845 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -175,7 +175,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, /* Extract the length of the metadata block */ data = page_address(bvec->bv_page) + bvec->bv_offset; length = data[offset]; - if (offset <= bvec->bv_len - 1) { + if (offset < bvec->bv_len - 1) { length |= data[offset + 1] << 8; } else { if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) { diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 07bc42d62673..abfb17f88f9a 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -607,14 +607,14 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) int nr_pages; ssize_t ret; - nr_pages = iov_iter_npages(from, BIO_MAX_PAGES); - if (!nr_pages) - return 0; - max = queue_max_zone_append_sectors(bdev_get_queue(bdev)); max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); iov_iter_truncate(from, max); + nr_pages = iov_iter_npages(from, BIO_MAX_PAGES); + if (!nr_pages) + return 0; + bio = bio_alloc_bioset(GFP_NOFS, nr_pages, &fs_bio_set); if (!bio) return -ENOMEM; @@ -1119,7 +1119,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, char *file_name; struct dentry *dir; unsigned int n = 0; - int ret = -ENOMEM; + int ret; /* If the group is empty, there is nothing to do */ if (!zd->nr_zones[type]) @@ -1135,8 +1135,10 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, zgroup_name = "seq"; dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); - if (!dir) + if (!dir) { + ret = -ENOMEM; goto free; + } /* * The first zone contains the super block: skip it. @@ -1174,8 +1176,10 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, * Use the file number within its group as file name. */ snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); - if (!zonefs_create_inode(dir, file_name, zone, type)) + if (!zonefs_create_inode(dir, file_name, zone, type)) { + ret = -ENOMEM; goto free; + } n++; } |