summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorAndrew Morton <akpm@linux-foundation.org>2023-12-06 17:03:50 -0800
committerAndrew Morton <akpm@linux-foundation.org>2023-12-06 17:03:50 -0800
commit0c92218f4e7d4b4a7245d32bea042fa6f9cc39d7 (patch)
tree44d6ba1879278a5a1b58178e9b0f799427e52e0e /fs
parentb2f557a21bc8fffdcd65794eda8a854e024999f3 (diff)
parent33cc938e65a98f1d29d0a18403dbbee050dcad9a (diff)
downloadlwn-0c92218f4e7d4b4a7245d32bea042fa6f9cc39d7.tar.gz
lwn-0c92218f4e7d4b4a7245d32bea042fa6f9cc39d7.zip
Merge branch 'master' into mm-hotfixes-stable
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/Kconfig12
-rw-r--r--fs/bcachefs/alloc_foreground.c30
-rw-r--r--fs/bcachefs/bcachefs.h4
-rw-r--r--fs/bcachefs/bcachefs_format.h8
-rw-r--r--fs/bcachefs/btree_gc.c9
-rw-r--r--fs/bcachefs/btree_io.c7
-rw-r--r--fs/bcachefs/btree_iter.c6
-rw-r--r--fs/bcachefs/btree_journal_iter.c18
-rw-r--r--fs/bcachefs/btree_journal_iter.h10
-rw-r--r--fs/bcachefs/btree_update_interior.c14
-rw-r--r--fs/bcachefs/buckets.c10
-rw-r--r--fs/bcachefs/compress.c16
-rw-r--r--fs/bcachefs/data_update.c92
-rw-r--r--fs/bcachefs/data_update.h9
-rw-r--r--fs/bcachefs/errcode.h3
-rw-r--r--fs/bcachefs/extents.c30
-rw-r--r--fs/bcachefs/fs-io-direct.c8
-rw-r--r--fs/bcachefs/fs.c3
-rw-r--r--fs/bcachefs/io_read.c2
-rw-r--r--fs/bcachefs/io_write.c14
-rw-r--r--fs/bcachefs/io_write.h3
-rw-r--r--fs/bcachefs/journal.c2
-rw-r--r--fs/bcachefs/journal.h4
-rw-r--r--fs/bcachefs/journal_io.c29
-rw-r--r--fs/bcachefs/journal_io.h2
-rw-r--r--fs/bcachefs/move.c126
-rw-r--r--fs/bcachefs/move.h19
-rw-r--r--fs/bcachefs/movinggc.c2
-rw-r--r--fs/bcachefs/recovery.c11
-rw-r--r--fs/bcachefs/replicas.c69
-rw-r--r--fs/bcachefs/replicas.h2
-rw-r--r--fs/bcachefs/snapshot.c2
-rw-r--r--fs/bcachefs/super-io.c5
-rw-r--r--fs/bcachefs/super.c34
-rw-r--r--fs/bcachefs/super_types.h1
-rw-r--r--fs/bcachefs/trace.h6
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/extent_io.c11
-rw-r--r--fs/btrfs/ioctl.c1
-rw-r--r--fs/btrfs/ref-verify.c2
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/tree-checker.c39
-rw-r--r--fs/btrfs/volumes.c9
-rw-r--r--fs/debugfs/file.c100
-rw-r--r--fs/debugfs/inode.c71
-rw-r--r--fs/debugfs/internal.h21
-rw-r--r--fs/ext2/file.c1
-rw-r--r--fs/smb/client/cifsglob.h14
-rw-r--r--fs/smb/client/cifspdu.h28
-rw-r--r--fs/smb/client/cifsproto.h14
-rw-r--r--fs/smb/client/cifssmb.c199
-rw-r--r--fs/smb/client/inode.c78
-rw-r--r--fs/smb/client/readdir.c6
-rw-r--r--fs/smb/client/sess.c2
-rw-r--r--fs/smb/client/smb1ops.c153
-rw-r--r--fs/smb/client/smb2inode.c2
-rw-r--r--fs/smb/client/smb2ops.c240
-rw-r--r--fs/smb/client/smb2pdu.c8
-rw-r--r--fs/smb/client/smb2pdu.h16
-rw-r--r--fs/smb/common/smb2pdu.h17
-rw-r--r--fs/smb/server/ksmbd_work.c10
-rw-r--r--fs/smb/server/oplock.c3
-rw-r--r--fs/smb/server/smb2pdu.c162
-rw-r--r--fs/smb/server/smbacl.c7
-rw-r--r--fs/smb/server/smbacl.h2
-rw-r--r--fs/smb/server/vfs.c70
-rw-r--r--fs/smb/server/vfs.h10
-rw-r--r--fs/smb/server/vfs_cache.c33
-rw-r--r--fs/smb/server/vfs_cache.h6
-rw-r--r--fs/tracefs/event_inode.c65
-rw-r--r--fs/tracefs/inode.c13
-rw-r--r--fs/xfs/xfs_dquot.c5
-rw-r--r--fs/xfs/xfs_dquot_item_recover.c21
75 files changed, 1219 insertions, 852 deletions
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index c08c2c7d6fbb..fddc7be58022 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -33,6 +33,18 @@ config BCACHEFS_QUOTA
depends on BCACHEFS_FS
select QUOTACTL
+config BCACHEFS_ERASURE_CODING
+ bool "bcachefs erasure coding (RAID5/6) support (EXPERIMENTAL)"
+ depends on BCACHEFS_FS
+ select QUOTACTL
+ help
+ This enables the "erasure_code" filesysystem and inode option, which
+ organizes data into reed-solomon stripes instead of ordinary
+ replication.
+
+ WARNING: this feature is still undergoing on disk format changes, and
+ should only be enabled for testing purposes.
+
config BCACHEFS_POSIX_ACL
bool "bcachefs POSIX ACL support"
depends on BCACHEFS_FS
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index b85c7765272f..1ba0eeb7552a 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1297,6 +1297,30 @@ out:
return wp;
}
+static noinline void
+deallocate_extra_replicas(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct open_buckets *ptrs_no_use,
+ unsigned extra_replicas)
+{
+ struct open_buckets ptrs2 = { 0 };
+ struct open_bucket *ob;
+ unsigned i;
+
+ open_bucket_for_each(c, ptrs, ob, i) {
+ unsigned d = bch_dev_bkey_exists(c, ob->dev)->mi.durability;
+
+ if (d && d <= extra_replicas) {
+ extra_replicas -= d;
+ ob_push(c, ptrs_no_use, ob);
+ } else {
+ ob_push(c, &ptrs2, ob);
+ }
+ }
+
+ *ptrs = ptrs2;
+}
+
/*
* Get us an open_bucket we can allocate from, return with it locked:
*/
@@ -1321,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
int ret;
int i;
+ if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
+ erasure_code = false;
+
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
BUG_ON(!nr_replicas || !nr_replicas_required);
@@ -1382,6 +1409,9 @@ alloc_done:
if (ret)
goto err;
+ if (nr_effective > nr_replicas)
+ deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas);
+
/* Free buckets we didn't use: */
open_bucket_for_each(c, &wp->ptrs, ob, i)
open_bucket_free_unused(c, ob);
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 403aa3389fcc..dfa22f9d9a1d 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -638,6 +638,8 @@ struct journal_keys {
size_t gap;
size_t nr;
size_t size;
+ atomic_t ref;
+ bool initial_ref_held;
};
struct btree_trans_buf {
@@ -929,7 +931,7 @@ struct bch_fs {
mempool_t compression_bounce[2];
mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
mempool_t decompress_workspace;
- ZSTD_parameters zstd_params;
+ size_t zstd_workspace_size;
struct crypto_shash *sha256;
struct crypto_sync_skcipher *chacha20;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 0a750953ff92..1ab1f08d763b 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -151,7 +151,11 @@ struct bpos {
#else
#error edit for your odd byteorder.
#endif
-} __packed __aligned(4);
+} __packed
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+__aligned(4)
+#endif
+;
#define KEY_INODE_MAX ((__u64)~0ULL)
#define KEY_OFFSET_MAX ((__u64)~0ULL)
@@ -1528,7 +1532,7 @@ struct bch_sb_field_disk_groups {
x(move_extent_write, 36) \
x(move_extent_finish, 37) \
x(move_extent_fail, 38) \
- x(move_extent_alloc_mem_fail, 39) \
+ x(move_extent_start_fail, 39) \
x(copygc, 40) \
x(copygc_wait, 41) \
x(gc_gens_end, 42) \
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 0b5d09c8475d..30ab78a24517 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1541,8 +1541,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
rcu_assign_pointer(ca->buckets_gc, buckets);
}
- for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
+ ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = gc_bucket(ca, k.k->p.offset);
@@ -1561,8 +1561,9 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
g->stripe = a->stripe;
g->stripe_redundancy = a->stripe_redundancy;
}
- }
- bch2_trans_iter_exit(trans, &iter);
+
+ 0;
+ }));
err:
bch2_trans_put(trans);
if (ret)
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 37d896edb06e..57c20390e10e 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1358,10 +1358,9 @@ static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *
return offset;
}
-static void btree_node_read_all_replicas_done(struct closure *cl)
+static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
{
- struct btree_node_read_all *ra =
- container_of(cl, struct btree_node_read_all, cl);
+ closure_type(ra, struct btree_node_read_all, cl);
struct bch_fs *c = ra->c;
struct btree *b = ra->b;
struct printbuf buf = PRINTBUF;
@@ -1567,7 +1566,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
if (sync) {
closure_sync(&ra->cl);
- btree_node_read_all_replicas_done(&ra->cl);
+ btree_node_read_all_replicas_done(&ra->cl.work);
} else {
continue_at(&ra->cl, btree_node_read_all_replicas_done,
c->io_complete_wq);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 6fa90bcd7016..8e0fe65f6101 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2981,7 +2981,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans->fn_idx = fn_idx;
trans->locking_wait.task = current;
trans->journal_replay_not_finished =
- !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
+ unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) &&
+ atomic_inc_not_zero(&c->journal_keys.ref);
closure_init_stack(&trans->ref);
s = btree_trans_stats(trans);
@@ -3098,6 +3099,9 @@ void bch2_trans_put(struct btree_trans *trans)
kfree(trans->fs_usage_deltas);
}
+ if (unlikely(trans->journal_replay_not_finished))
+ bch2_journal_keys_put(c);
+
if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
mempool_free(trans->mem, &c->btree_trans_mem_pool);
else
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 58a981bcf3aa..ec52f50d249d 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -80,6 +80,8 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree
struct journal_keys *keys = &c->journal_keys;
unsigned iters = 0;
struct journal_key *k;
+
+ BUG_ON(*idx > keys->nr);
search:
if (!*idx)
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
@@ -189,10 +191,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
/* Since @keys was full, there was no gap: */
memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
kvfree(keys->d);
- *keys = new_keys;
+ keys->d = new_keys.d;
+ keys->nr = new_keys.nr;
+ keys->size = new_keys.size;
/* And now the gap is at the end: */
- keys->gap = keys->nr;
+ keys->gap = keys->nr;
}
journal_iters_move_gap(c, keys->gap, idx);
@@ -415,10 +419,16 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
cmp_int(l->journal_offset, r->journal_offset);
}
-void bch2_journal_keys_free(struct journal_keys *keys)
+void bch2_journal_keys_put(struct bch_fs *c)
{
+ struct journal_keys *keys = &c->journal_keys;
struct journal_key *i;
+ BUG_ON(atomic_read(&keys->ref) <= 0);
+
+ if (!atomic_dec_and_test(&keys->ref))
+ return;
+
move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
keys->gap = keys->nr;
@@ -429,6 +439,8 @@ void bch2_journal_keys_free(struct journal_keys *keys)
kvfree(keys->d);
keys->d = NULL;
keys->nr = keys->gap = keys->size = 0;
+
+ bch2_journal_entries_free(c);
}
static void __journal_keys_sort(struct journal_keys *keys)
diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h
index 5d64e7e22f26..8ca4c100b2e3 100644
--- a/fs/bcachefs/btree_journal_iter.h
+++ b/fs/bcachefs/btree_journal_iter.h
@@ -49,7 +49,15 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct bch_fs *,
struct btree *);
-void bch2_journal_keys_free(struct journal_keys *);
+void bch2_journal_keys_put(struct bch_fs *);
+
+static inline void bch2_journal_keys_put_initial(struct bch_fs *c)
+{
+ if (c->journal_keys.initial_ref_held)
+ bch2_journal_keys_put(c);
+ c->journal_keys.initial_ref_held = false;
+}
+
void bch2_journal_entries_free(struct bch_fs *);
int bch2_journal_keys_sort(struct bch_fs *);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 76f27bc9fa24..6697417273aa 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -778,9 +778,9 @@ static void btree_interior_update_work(struct work_struct *work)
}
}
-static void btree_update_set_nodes_written(struct closure *cl)
+static CLOSURE_CALLBACK(btree_update_set_nodes_written)
{
- struct btree_update *as = container_of(cl, struct btree_update, cl);
+ closure_type(as, struct btree_update, cl);
struct bch_fs *c = as->c;
mutex_lock(&c->btree_interior_update_lock);
@@ -1071,8 +1071,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
break;
}
+ /*
+ * Always check for space for two keys, even if we won't have to
+ * split at prior level - it might have been a merge instead:
+ */
if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
- BKEY_BTREE_PTR_U64s_MAX * (1 + split)))
+ BKEY_BTREE_PTR_U64s_MAX * 2))
break;
split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
@@ -2266,6 +2270,10 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
BUG_ON(!btree_node_hashed(b));
+ struct bch_extent_ptr *ptr;
+ bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr,
+ !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev));
+
ret = bch2_btree_node_update_key(trans, &iter, b, new_key,
commit_flags, skip_triggers);
out:
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 58d8c6ffd955..5a91d3189fcf 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -854,8 +854,12 @@ static int __mark_pointer(struct btree_trans *trans,
return ret;
*dst_sectors += sectors;
- *bucket_data_type = *dirty_sectors || *cached_sectors
- ? ptr_data_type : 0;
+
+ if (!*dirty_sectors && !*cached_sectors)
+ *bucket_data_type = 0;
+ else if (*bucket_data_type != BCH_DATA_stripe)
+ *bucket_data_type = ptr_data_type;
+
return 0;
}
@@ -2091,8 +2095,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
bucket_gens->first_bucket = ca->mi.first_bucket;
bucket_gens->nbuckets = nbuckets;
- bch2_copygc_stop(c);
-
if (resize) {
down_write(&c->gc_lock);
down_write(&ca->bucket_lock);
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index a8b148ec2a2b..51af8ea230ed 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c,
*/
unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
- ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
- zstd_cctx_workspace_bound(&params.cParams));
+ ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
/*
* ZSTD requires that when we decompress we pass in the exact
@@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c,
size_t len = zstd_compress_cctx(ctx,
dst + 4, dst_len - 4 - 7,
src, src_len,
- &c->zstd_params);
+ &params);
if (zstd_is_error(len))
return 0;
@@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
size_t decompress_workspace_size = 0;
ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
c->opts.encoded_extent_max);
+
+ /*
+ * ZSTD is lying: if we allocate the size of the workspace it says it
+ * requires, it returns memory allocation errors
+ */
+ c->zstd_workspace_size = zstd_cctx_workspace_bound(&params.cParams);
+
struct {
unsigned feature;
enum bch_compression_type type;
@@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), },
{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
- zstd_cctx_workspace_bound(&params.cParams),
+ c->zstd_workspace_size,
zstd_dctx_workspace_bound() },
}, *i;
bool have_compressed = false;
- c->zstd_params = params;
-
for (i = compression_types;
i < compression_types + ARRAY_SIZE(compression_types);
i++)
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 5ed66202c226..71aa5e59787b 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -356,7 +356,7 @@ void bch2_data_update_exit(struct data_update *update)
bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
}
-void bch2_update_unwritten_extent(struct btree_trans *trans,
+static void bch2_update_unwritten_extent(struct btree_trans *trans,
struct data_update *update)
{
struct bch_fs *c = update->op.c;
@@ -436,7 +436,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
}
}
+int bch2_extent_drop_ptrs(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct data_update_opts data_opts)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i *n;
+ int ret;
+
+ n = bch2_bkey_make_mut_noupdate(trans, k);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ while (data_opts.kill_ptrs) {
+ unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+ struct bch_extent_ptr *ptr;
+
+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
+ data_opts.kill_ptrs ^= 1U << drop;
+ }
+
+ /*
+ * If the new extent no longer has any pointers, bch2_extent_normalize()
+ * will do the appropriate thing with it (turning it into a
+ * KEY_TYPE_error key, or just a discard if it was a cached extent)
+ */
+ bch2_extent_normalize(c, bkey_i_to_s(n));
+
+ /*
+ * Since we're not inserting through an extent iterator
+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+ * we aren't using the extent overwrite path to delete, we're
+ * just using the normal key deletion path:
+ */
+ if (bkey_deleted(&n->k))
+ n->k.size = 0;
+
+ return bch2_trans_relock(trans) ?:
+ bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
+}
+
int bch2_data_update_init(struct btree_trans *trans,
+ struct btree_iter *iter,
struct moving_context *ctxt,
struct data_update *m,
struct write_point_specifier wp,
@@ -452,7 +496,7 @@ int bch2_data_update_init(struct btree_trans *trans,
const struct bch_extent_ptr *ptr;
unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
unsigned ptrs_locked = 0;
- int ret;
+ int ret = 0;
bch2_bkey_buf_init(&m->k);
bch2_bkey_buf_reassemble(&m->k, c, k);
@@ -478,6 +522,8 @@ int bch2_data_update_init(struct btree_trans *trans,
bkey_for_each_ptr(ptrs, ptr)
percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
+ unsigned durability_have = 0, durability_removing = 0;
+
i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
bool locked;
@@ -489,8 +535,11 @@ int bch2_data_update_init(struct btree_trans *trans,
reserve_sectors += k.k->size;
m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
- } else if (!p.ptr.cached) {
+ durability_removing += bch2_extent_ptr_desired_durability(c, &p);
+ } else if (!p.ptr.cached &&
+ !((1U << i) & m->data_opts.kill_ptrs)) {
bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
+ durability_have += bch2_extent_ptr_durability(c, &p);
}
/*
@@ -529,6 +578,29 @@ int bch2_data_update_init(struct btree_trans *trans,
i++;
}
+ /*
+ * If current extent durability is less than io_opts.data_replicas,
+ * we're not trying to rereplicate the extent up to data_replicas here -
+ * unless extra_replicas was specified
+ *
+ * Increasing replication is an explicit operation triggered by
+ * rereplicate, currently, so that users don't get an unexpected -ENOSPC
+ */
+ if (durability_have >= io_opts.data_replicas) {
+ m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
+ m->data_opts.rewrite_ptrs = 0;
+ /* if iter == NULL, it's just a promote */
+ if (iter)
+ ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+ goto done;
+ }
+
+ m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+ m->data_opts.extra_replicas;
+ m->op.nr_replicas_required = m->op.nr_replicas;
+
+ BUG_ON(!m->op.nr_replicas);
+
if (reserve_sectors) {
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
m->data_opts.extra_replicas
@@ -538,14 +610,11 @@ int bch2_data_update_init(struct btree_trans *trans,
goto err;
}
- m->op.nr_replicas += m->data_opts.extra_replicas;
- m->op.nr_replicas_required = m->op.nr_replicas;
-
- BUG_ON(!m->op.nr_replicas);
+ if (bkey_extent_is_unwritten(k)) {
+ bch2_update_unwritten_extent(trans, m);
+ goto done;
+ }
- /* Special handling required: */
- if (bkey_extent_is_unwritten(k))
- return -BCH_ERR_unwritten_extent_update;
return 0;
err:
i = 0;
@@ -560,6 +629,9 @@ err:
bch2_bkey_buf_exit(&m->k, c);
bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
return ret;
+done:
+ bch2_data_update_exit(m);
+ return ret ?: -BCH_ERR_data_update_done;
}
void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 9dc17b9d8379..991095bbd469 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *);
void bch2_data_update_read_done(struct data_update *,
struct bch_extent_crc_unpacked);
+int bch2_extent_drop_ptrs(struct btree_trans *,
+ struct btree_iter *,
+ struct bkey_s_c,
+ struct data_update_opts);
+
void bch2_data_update_exit(struct data_update *);
-void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
-int bch2_data_update_init(struct btree_trans *, struct moving_context *,
+int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
+ struct moving_context *,
struct data_update *,
struct write_point_specifier,
struct bch_io_opts, struct data_update_opts,
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 68a1a96bb7ca..ae7910bf2228 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -162,7 +162,7 @@
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
x(BCH_ERR_fsck, fsck_repair_impossible) \
x(0, restart_recovery) \
- x(0, unwritten_extent_update) \
+ x(0, data_update_done) \
x(EINVAL, device_state_not_allowed) \
x(EINVAL, member_info_missing) \
x(EINVAL, mismatched_block_size) \
@@ -210,6 +210,7 @@
x(BCH_ERR_invalid_sb, invalid_sb_members) \
x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \
x(BCH_ERR_invalid_sb, invalid_sb_replicas) \
+ x(BCH_ERR_invalid_sb, invalid_replicas_entry) \
x(BCH_ERR_invalid_sb, invalid_sb_journal) \
x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \
x(BCH_ERR_invalid_sb, invalid_sb_crypt) \
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index a864de231b69..f6c92df55270 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
return replicas;
}
-unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p)
{
- struct bch_dev *ca;
-
if (p->ptr.cached)
return 0;
- ca = bch_dev_bkey_exists(c, p->ptr.dev);
-
- return ca->mi.durability +
- (p->has_ec
- ? p->ec.redundancy
- : 0);
+ return p->has_ec
+ ? p->ec.redundancy + 1
+ : ca->mi.durability;
}
-unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
{
- struct bch_dev *ca;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
- if (p->ptr.cached)
- return 0;
+ return __extent_ptr_durability(ca, p);
+}
- ca = bch_dev_bkey_exists(c, p->ptr.dev);
+unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
if (ca->mi.state == BCH_MEMBER_STATE_failed)
return 0;
- return ca->mi.durability +
- (p->has_ec
- ? p->ec.redundancy
- : 0);
+ return __extent_ptr_durability(ca, p);
}
unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 5b42a76c4796..9a479e4de6b3 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -35,9 +35,9 @@ static void bio_check_or_release(struct bio *bio, bool check_dirty)
}
}
-static void bch2_dio_read_complete(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_dio_read_complete)
{
- struct dio_read *dio = container_of(cl, struct dio_read, cl);
+ closure_type(dio, struct dio_read, cl);
dio->req->ki_complete(dio->req, dio->ret);
bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
@@ -325,9 +325,9 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
return 0;
}
-static void bch2_dio_write_flush_done(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_dio_write_flush_done)
{
- struct dio_write *dio = container_of(cl, struct dio_write, op.cl);
+ closure_type(dio, struct dio_write, op.cl);
struct bch_fs *c = dio->op.c;
closure_debug_destroy(cl);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 8ef817304e4a..4d51be813509 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1667,8 +1667,7 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
if (!first)
seq_putc(seq, ':');
first = false;
- seq_puts(seq, "/dev/");
- seq_puts(seq, ca->name);
+ seq_puts(seq, ca->disk_sb.sb_name);
}
return 0;
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index a56ed553dc15..36763865facd 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
bio = &op->write.op.wbio.bio;
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
- ret = bch2_data_update_init(trans, NULL, &op->write,
+ ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
writepoint_hashed((unsigned long) current),
opts,
(struct data_update_opts) {
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index d704a8f829c8..8ede46b1e354 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -580,9 +580,9 @@ static inline void wp_update_state(struct write_point *wp, bool running)
__wp_update_state(wp, state);
}
-static void bch2_write_index(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_write_index)
{
- struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+ closure_type(op, struct bch_write_op, cl);
struct write_point *wp = op->wp;
struct workqueue_struct *wq = index_update_wq(op);
unsigned long flags;
@@ -1208,9 +1208,9 @@ static void __bch2_nocow_write_done(struct bch_write_op *op)
bch2_nocow_write_convert_unwritten(op);
}
-static void bch2_nocow_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_nocow_write_done)
{
- struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+ closure_type(op, struct bch_write_op, cl);
__bch2_nocow_write_done(op);
bch2_write_done(cl);
@@ -1363,7 +1363,7 @@ err:
op->insert_keys.top = op->insert_keys.keys;
} else if (op->flags & BCH_WRITE_SYNC) {
closure_sync(&op->cl);
- bch2_nocow_write_done(&op->cl);
+ bch2_nocow_write_done(&op->cl.work);
} else {
/*
* XXX
@@ -1566,9 +1566,9 @@ err:
* If op->discard is true, instead of inserting the data it invalidates the
* region of the cache represented by op->bio and op->inode.
*/
-void bch2_write(struct closure *cl)
+CLOSURE_CALLBACK(bch2_write)
{
- struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+ closure_type(op, struct bch_write_op, cl);
struct bio *bio = &op->wbio.bio;
struct bch_fs *c = op->c;
unsigned data_len;
diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h
index 9323167229ee..6c276a48f95d 100644
--- a/fs/bcachefs/io_write.h
+++ b/fs/bcachefs/io_write.h
@@ -90,8 +90,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
op->devs_need_flush = NULL;
}
-void bch2_write(struct closure *);
-
+CLOSURE_CALLBACK(bch2_write);
void bch2_write_point_do_index_updates(struct work_struct *);
static inline struct bch_write_bio *wbio_init(struct bio *bio)
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 23a9b7845d11..489b34046e78 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -321,6 +321,8 @@ static int journal_entry_open(struct journal *j)
atomic64_inc(&j->seq);
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
+ BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
+
BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
bkey_extent_init(&buf->key);
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index c85d01cf4948..4c513fca5ef2 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -136,9 +136,7 @@ static inline u64 journal_last_seq(struct journal *j)
static inline u64 journal_cur_seq(struct journal *j)
{
- EBUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
-
- return j->pin.back - 1;
+ return atomic64_read(&j->seq);
}
static inline u64 journal_last_unwritten_seq(struct journal *j)
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 786a09285509..0f17fc5f8d68 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -547,6 +547,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
struct jset_entry_data_usage *u =
container_of(entry, struct jset_entry_data_usage, entry);
unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+ struct printbuf err = PRINTBUF;
int ret = 0;
if (journal_entry_err_on(bytes < sizeof(*u) ||
@@ -555,10 +556,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
journal_entry_data_usage_bad_size,
"invalid journal entry usage: bad size")) {
journal_entry_null_range(entry, vstruct_next(entry));
- return ret;
+ goto out;
}
+ if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err),
+ c, version, jset, entry,
+ journal_entry_data_usage_bad_size,
+ "invalid journal entry usage: %s", err.buf)) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ goto out;
+ }
+out:
fsck_err:
+ printbuf_exit(&err);
return ret;
}
@@ -1025,10 +1035,9 @@ next_block:
return 0;
}
-static void bch2_journal_read_device(struct closure *cl)
+static CLOSURE_CALLBACK(bch2_journal_read_device)
{
- struct journal_device *ja =
- container_of(cl, struct journal_device, read);
+ closure_type(ja, struct journal_device, read);
struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
struct bch_fs *c = ca->fs;
struct journal_list *jlist =
@@ -1520,9 +1529,9 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK);
}
-static void journal_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_done)
{
- struct journal *j = container_of(cl, struct journal, io);
+ closure_type(j, struct journal, io);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *w = journal_last_unwritten_buf(j);
struct bch_replicas_padded replicas;
@@ -1638,9 +1647,9 @@ static void journal_write_endio(struct bio *bio)
percpu_ref_put(&ca->io_ref);
}
-static void do_journal_write(struct closure *cl)
+static CLOSURE_CALLBACK(do_journal_write)
{
- struct journal *j = container_of(cl, struct journal, io);
+ closure_type(j, struct journal, io);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
struct journal_buf *w = journal_last_unwritten_buf(j);
@@ -1850,9 +1859,9 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
return 0;
}
-void bch2_journal_write(struct closure *cl)
+CLOSURE_CALLBACK(bch2_journal_write)
{
- struct journal *j = container_of(cl, struct journal, io);
+ closure_type(j, struct journal, io);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
struct journal_buf *w = journal_last_unwritten_buf(j);
diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h
index a88d097b13f1..c035e7c108e1 100644
--- a/fs/bcachefs/journal_io.h
+++ b/fs/bcachefs/journal_io.h
@@ -60,6 +60,6 @@ void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
-void bch2_journal_write(struct closure *);
+CLOSURE_CALLBACK(bch2_journal_write);
#endif /* _BCACHEFS_JOURNAL_IO_H */
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index ab749bf2fcbc..54830ee0ed88 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -49,17 +49,6 @@ static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
}
}
-static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k)
-{
- if (trace_move_extent_alloc_mem_fail_enabled()) {
- struct printbuf buf = PRINTBUF;
-
- bch2_bkey_val_to_text(&buf, c, k);
- trace_move_extent_alloc_mem_fail(c, buf.buf);
- printbuf_exit(&buf);
- }
-}
-
struct moving_io {
struct list_head read_list;
struct list_head io_list;
@@ -163,12 +152,18 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
atomic_read(&ctxt->write_sectors) != sectors_pending);
}
+static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
+{
+ move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+ bch2_trans_unlock_long(ctxt->trans);
+ closure_sync(&ctxt->cl);
+}
+
void bch2_moving_ctxt_exit(struct moving_context *ctxt)
{
struct bch_fs *c = ctxt->trans->c;
- move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
- closure_sync(&ctxt->cl);
+ bch2_moving_ctxt_flush_all(ctxt);
EBUG_ON(atomic_read(&ctxt->write_sectors));
EBUG_ON(atomic_read(&ctxt->write_ios));
@@ -223,49 +218,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
scnprintf(stats->name, sizeof(stats->name), "%s", name);
}
-static int bch2_extent_drop_ptrs(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_s_c k,
- struct data_update_opts data_opts)
-{
- struct bch_fs *c = trans->c;
- struct bkey_i *n;
- int ret;
-
- n = bch2_bkey_make_mut_noupdate(trans, k);
- ret = PTR_ERR_OR_ZERO(n);
- if (ret)
- return ret;
-
- while (data_opts.kill_ptrs) {
- unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
- struct bch_extent_ptr *ptr;
-
- bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
- data_opts.kill_ptrs ^= 1U << drop;
- }
-
- /*
- * If the new extent no longer has any pointers, bch2_extent_normalize()
- * will do the appropriate thing with it (turning it into a
- * KEY_TYPE_error key, or just a discard if it was a cached extent)
- */
- bch2_extent_normalize(c, bkey_i_to_s(n));
-
- /*
- * Since we're not inserting through an extent iterator
- * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
- * we aren't using the extent overwrite path to delete, we're
- * just using the normal key deletion path:
- */
- if (bkey_deleted(&n->k))
- n->k.size = 0;
-
- return bch2_trans_relock(trans) ?:
- bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
- bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
-}
-
int bch2_move_extent(struct moving_context *ctxt,
struct move_bucket_in_flight *bucket_in_flight,
struct btree_iter *iter,
@@ -335,19 +287,11 @@ int bch2_move_extent(struct moving_context *ctxt,
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
io->rbio.bio.bi_end_io = move_read_endio;
- ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
+ ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
io_opts, data_opts, iter->btree_id, k);
- if (ret && ret != -BCH_ERR_unwritten_extent_update)
+ if (ret)
goto err_free_pages;
- if (ret == -BCH_ERR_unwritten_extent_update) {
- bch2_update_unwritten_extent(trans, &io->write);
- move_free(io);
- return 0;
- }
-
- BUG_ON(ret);
-
io->write.op.end_io = move_write_done;
if (ctxt->rate)
@@ -391,8 +335,23 @@ err_free_pages:
err_free:
kfree(io);
err:
- this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
- trace_move_extent_alloc_mem_fail2(c, k);
+ if (ret == -BCH_ERR_data_update_done)
+ return 0;
+
+ if (bch2_err_matches(ret, EROFS) ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
+
+ this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]);
+ if (trace_move_extent_start_fail_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, ": ");
+ prt_str(&buf, bch2_err_str(ret));
+ trace_move_extent_start_fail(c, buf.buf);
+ printbuf_exit(&buf);
+ }
return ret;
}
@@ -482,37 +441,30 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans,
int bch2_move_ratelimit(struct moving_context *ctxt)
{
struct bch_fs *c = ctxt->trans->c;
+ bool is_kthread = current->flags & PF_KTHREAD;
u64 delay;
- if (ctxt->wait_on_copygc && !c->copygc_running) {
- bch2_trans_unlock_long(ctxt->trans);
+ if (ctxt->wait_on_copygc && c->copygc_running) {
+ bch2_moving_ctxt_flush_all(ctxt);
wait_event_killable(c->copygc_running_wq,
!c->copygc_running ||
- kthread_should_stop());
+ (is_kthread && kthread_should_stop()));
}
do {
delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
-
- if (delay) {
- if (delay > HZ / 10)
- bch2_trans_unlock_long(ctxt->trans);
- else
- bch2_trans_unlock(ctxt->trans);
- set_current_state(TASK_INTERRUPTIBLE);
- }
-
- if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
- __set_current_state(TASK_RUNNING);
+ if (is_kthread && kthread_should_stop())
return 1;
- }
if (delay)
- schedule_timeout(delay);
+ move_ctxt_wait_event_timeout(ctxt,
+ freezing(current) ||
+ (is_kthread && kthread_should_stop()),
+ delay);
if (unlikely(freezing(current))) {
- move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+ bch2_moving_ctxt_flush_all(ctxt);
try_to_freeze();
}
} while (delay);
@@ -683,6 +635,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
{
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
+ bool is_kthread = current->flags & PF_KTHREAD;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct btree_iter iter;
struct bkey_buf sk;
@@ -728,6 +681,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
}
while (!(ret = bch2_move_ratelimit(ctxt))) {
+ if (is_kthread && kthread_should_stop())
+ break;
+
bch2_trans_begin(trans);
ret = bch2_get_next_backpointer(trans, bucket, gen,
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index 07cf9d42643b..0906aa2d1de2 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -38,6 +38,25 @@ struct moving_context {
wait_queue_head_t wait;
};
+#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout) \
+({ \
+ int _ret = 0; \
+ while (true) { \
+ bool cond_finished = false; \
+ bch2_moving_ctxt_do_pending_writes(_ctxt); \
+ \
+ if (_cond) \
+ break; \
+ bch2_trans_unlock_long((_ctxt)->trans); \
+ _ret = __wait_event_timeout((_ctxt)->wait, \
+ bch2_moving_ctxt_next_pending_write(_ctxt) || \
+ (cond_finished = (_cond)), _timeout); \
+ if (_ret || ( cond_finished)) \
+ break; \
+ } \
+ _ret; \
+})
+
#define move_ctxt_wait_event(_ctxt, _cond) \
do { \
bool cond_finished = false; \
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 0a0576326c5b..a84e79f79e5e 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -207,7 +207,7 @@ static int bch2_copygc(struct moving_context *ctxt,
goto err;
darray_for_each(buckets, i) {
- if (unlikely(freezing(current)))
+ if (kthread_should_stop() || freezing(current))
break;
f = move_bucket_in_flight_add(buckets_in_flight, *i);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 9c30500ce920..770ced1c6285 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -167,6 +167,8 @@ static int bch2_journal_replay(struct bch_fs *c)
goto err;
}
+ BUG_ON(!atomic_read(&keys->ref));
+
for (i = 0; i < keys->nr; i++) {
k = keys_sorted[i];
@@ -188,6 +190,9 @@ static int bch2_journal_replay(struct bch_fs *c)
}
}
+ if (!c->opts.keep_journal)
+ bch2_journal_keys_put_initial(c);
+
replay_now_at(j, j->replay_journal_seq_end);
j->replay_journal_seq = 0;
@@ -909,10 +914,8 @@ out:
bch2_flush_fsck_errs(c);
if (!c->opts.keep_journal &&
- test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) {
- bch2_journal_keys_free(&c->journal_keys);
- bch2_journal_entries_free(c);
- }
+ test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+ bch2_journal_keys_put_initial(c);
kfree(clean);
if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 1c3ae13bfced..2008fe8bf706 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
prt_printf(out, "]");
}
+int bch2_replicas_entry_validate(struct bch_replicas_entry *r,
+ struct bch_sb *sb,
+ struct printbuf *err)
+{
+ if (!r->nr_devs) {
+ prt_printf(err, "no devices in entry ");
+ goto bad;
+ }
+
+ if (r->nr_required > 1 &&
+ r->nr_required >= r->nr_devs) {
+ prt_printf(err, "bad nr_required in entry ");
+ goto bad;
+ }
+
+ for (unsigned i = 0; i < r->nr_devs; i++)
+ if (!bch2_dev_exists(sb, r->devs[i])) {
+ prt_printf(err, "invalid device %u in entry ", r->devs[i]);
+ goto bad;
+ }
+
+ return 0;
+bad:
+ bch2_replicas_entry_to_text(err, r);
+ return -BCH_ERR_invalid_replicas_entry;
+}
+
void bch2_cpu_replicas_to_text(struct printbuf *out,
struct bch_replicas_cpu *r)
{
@@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
}
static struct bch_replicas_cpu
-cpu_replicas_add_entry(struct bch_replicas_cpu *old,
+cpu_replicas_add_entry(struct bch_fs *c,
+ struct bch_replicas_cpu *old,
struct bch_replicas_entry *new_entry)
{
unsigned i;
@@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
replicas_entry_bytes(new_entry)),
};
+ for (i = 0; i < new_entry->nr_devs; i++)
+ BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i]));
+
BUG_ON(!new_entry->data_type);
verify_replicas_entry(new_entry);
@@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
if (c->replicas_gc.entries &&
!__replicas_has_entry(&c->replicas_gc, new_entry)) {
- new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
+ new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
if (!new_gc.entries) {
ret = -BCH_ERR_ENOMEM_cpu_replicas;
goto err;
@@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
}
if (!__replicas_has_entry(&c->replicas, new_entry)) {
- new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
+ new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
if (!new_r.entries) {
ret = -BCH_ERR_ENOMEM_cpu_replicas;
goto err;
@@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
if (idx < 0) {
struct bch_replicas_cpu n;
- n = cpu_replicas_add_entry(&c->replicas, r);
+ n = cpu_replicas_add_entry(c, &c->replicas, r);
if (!n.entries)
return -BCH_ERR_ENOMEM_cpu_replicas;
@@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
struct bch_sb *sb,
struct printbuf *err)
{
- unsigned i, j;
+ unsigned i;
sort_cmp_size(cpu_r->entries,
cpu_r->nr,
@@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
struct bch_replicas_entry *e =
cpu_replicas_entry(cpu_r, i);
- if (e->data_type >= BCH_DATA_NR) {
- prt_printf(err, "invalid data type in entry ");
- bch2_replicas_entry_to_text(err, e);
- return -BCH_ERR_invalid_sb_replicas;
- }
-
- if (!e->nr_devs) {
- prt_printf(err, "no devices in entry ");
- bch2_replicas_entry_to_text(err, e);
- return -BCH_ERR_invalid_sb_replicas;
- }
-
- if (e->nr_required > 1 &&
- e->nr_required >= e->nr_devs) {
- prt_printf(err, "bad nr_required in entry ");
- bch2_replicas_entry_to_text(err, e);
- return -BCH_ERR_invalid_sb_replicas;
- }
-
- for (j = 0; j < e->nr_devs; j++)
- if (!bch2_dev_exists(sb, e->devs[j])) {
- prt_printf(err, "invalid device %u in entry ", e->devs[j]);
- bch2_replicas_entry_to_text(err, e);
- return -BCH_ERR_invalid_sb_replicas;
- }
+ int ret = bch2_replicas_entry_validate(e, sb, err);
+ if (ret)
+ return ret;
if (i + 1 < cpu_r->nr) {
struct bch_replicas_entry *n =
diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
index 4887675a86f0..f70a642775d1 100644
--- a/fs/bcachefs/replicas.h
+++ b/fs/bcachefs/replicas.h
@@ -9,6 +9,8 @@
void bch2_replicas_entry_sort(struct bch_replicas_entry *);
void bch2_replicas_entry_to_text(struct printbuf *,
struct bch_replicas_entry *);
+int bch2_replicas_entry_validate(struct bch_replicas_entry *,
+ struct bch_sb *, struct printbuf *);
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
static inline struct bch_replicas_entry *
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index e9af77b384c7..5dac038f0851 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -959,7 +959,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
parent_id, id))
goto err;
- parent->v.children[i] = le32_to_cpu(child_id);
+ parent->v.children[i] = cpu_to_le32(child_id);
normalize_snapshot_child_pointers(&parent->v);
}
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index f4cad903f4d6..f3e12f7979d5 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -166,6 +166,7 @@ void bch2_free_super(struct bch_sb_handle *sb)
if (!IS_ERR_OR_NULL(sb->bdev))
blkdev_put(sb->bdev, sb->holder);
kfree(sb->holder);
+ kfree(sb->sb_name);
kfree(sb->sb);
memset(sb, 0, sizeof(*sb));
@@ -675,6 +676,10 @@ retry:
if (!sb->holder)
return -ENOMEM;
+ sb->sb_name = kstrdup(path, GFP_KERNEL);
+ if (!sb->sb_name)
+ return -ENOMEM;
+
#ifndef __KERNEL__
if (opt_get(*opts, direct_io) == false)
sb->mode |= BLK_OPEN_BUFFERED;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 24672bb31cbe..f63474c5c5a2 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -423,6 +423,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
+ set_bit(BCH_FS_RW, &c->flags);
+ set_bit(BCH_FS_WAS_RW, &c->flags);
+
+#ifndef BCH_WRITE_REF_DEBUG
+ percpu_ref_reinit(&c->writes);
+#else
+ for (i = 0; i < BCH_WRITE_REF_NR; i++) {
+ BUG_ON(atomic_long_read(&c->writes[i]));
+ atomic_long_inc(&c->writes[i]);
+ }
+#endif
+
ret = bch2_gc_thread_start(c);
if (ret) {
bch_err(c, "error starting gc thread");
@@ -439,24 +451,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
goto err;
}
-#ifndef BCH_WRITE_REF_DEBUG
- percpu_ref_reinit(&c->writes);
-#else
- for (i = 0; i < BCH_WRITE_REF_NR; i++) {
- BUG_ON(atomic_long_read(&c->writes[i]));
- atomic_long_inc(&c->writes[i]);
- }
-#endif
- set_bit(BCH_FS_RW, &c->flags);
- set_bit(BCH_FS_WAS_RW, &c->flags);
-
bch2_do_discards(c);
bch2_do_invalidates(c);
bch2_do_stripe_deletes(c);
bch2_do_pending_node_rewrites(c);
return 0;
err:
- __bch2_fs_read_only(c);
+ if (test_bit(BCH_FS_RW, &c->flags))
+ bch2_fs_read_only(c);
+ else
+ __bch2_fs_read_only(c);
return ret;
}
@@ -504,8 +508,8 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[WRITE]);
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
- bch2_journal_keys_free(&c->journal_keys);
- bch2_journal_entries_free(c);
+ bch2_journal_keys_put_initial(c);
+ BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
free_percpu(c->online_reserved);
@@ -702,6 +706,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
init_rwsem(&c->gc_lock);
mutex_init(&c->gc_gens_lock);
+ atomic_set(&c->journal_keys.ref, 1);
+ c->journal_keys.initial_ref_held = true;
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_init(&c->times[i]);
diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
index 7dda4985b99f..9c1fd4ca2b10 100644
--- a/fs/bcachefs/super_types.h
+++ b/fs/bcachefs/super_types.h
@@ -5,6 +5,7 @@
struct bch_sb_handle {
struct bch_sb *sb;
struct block_device *bdev;
+ char *sb_name;
struct bio *bio;
void *holder;
size_t buffer_size;
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 7857671159b4..fd49b63562c3 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -754,9 +754,9 @@ TRACE_EVENT(move_extent_fail,
TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
);
-DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
- TP_PROTO(struct bch_fs *c, const char *k),
- TP_ARGS(c, k)
+DEFINE_EVENT(bkey, move_extent_start_fail,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
);
TRACE_EVENT(move_data,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 401ea09ae4b8..bbcc3df77646 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3213,6 +3213,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_alloc;
}
+ btrfs_info(fs_info, "first mount of filesystem %pU", disk_super->fsid);
/*
* Verify the type first, if that or the checksum value are
* corrupted, we'll find out
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 03cef28d9e37..e6230a6ffa98 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -674,8 +674,8 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
* the array will be skipped
*
* Return: 0 if all pages were able to be allocated;
- * -ENOMEM otherwise, and the caller is responsible for freeing all
- * non-null page pointers in the array.
+ * -ENOMEM otherwise, the partially allocated pages would be freed and
+ * the array slots zeroed
*/
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
{
@@ -694,8 +694,13 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
* though alloc_pages_bulk_array() falls back to alloc_page()
* if it could not bulk-allocate. So we must be out of memory.
*/
- if (allocated == last)
+ if (allocated == last) {
+ for (int i = 0; i < allocated; i++) {
+ __free_page(page_array[i]);
+ page_array[i] = NULL;
+ }
return -ENOMEM;
+ }
memalloc_retry_wait(GFP_NOFS);
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index dfe257e1845b..4e50b62db2a8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4356,6 +4356,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat
arg->clone_sources = compat_ptr(args32.clone_sources);
arg->parent_root = args32.parent_root;
arg->flags = args32.flags;
+ arg->version = args32.version;
memcpy(arg->reserved, args32.reserved,
sizeof(args32.reserved));
#else
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 1f62976bee82..6486f0d7e993 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -794,6 +794,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
dump_ref_action(fs_info, ra);
kfree(ref);
kfree(ra);
+ kfree(re);
goto out_unlock;
} else if (be->num_refs == 0) {
btrfs_err(fs_info,
@@ -803,6 +804,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
dump_ref_action(fs_info, ra);
kfree(ref);
kfree(ra);
+ kfree(re);
goto out_unlock;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 3b929f0e8f04..4e36550618e5 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -8158,7 +8158,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
}
sctx->send_filp = fget(arg->send_fd);
- if (!sctx->send_filp) {
+ if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) {
ret = -EBADF;
goto out;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f638dc339693..ef256b944c72 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -80,7 +80,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data);
static void btrfs_put_super(struct super_block *sb)
{
- close_ctree(btrfs_sb(sb));
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+
+ btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid);
+ close_ctree(fs_info);
}
enum {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6e63816dddcb..bfc0eb5e3b7c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1774,7 +1774,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
ret = btrfs_create_qgroup(trans, objectid);
- if (ret) {
+ if (ret && ret != -EEXIST) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index a416cbea75d1..50fdc69fdddf 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -31,6 +31,7 @@
#include "inode-item.h"
#include "dir-item.h"
#include "raid-stripe-tree.h"
+#include "extent-tree.h"
/*
* Error message should follow the following format:
@@ -1276,6 +1277,8 @@ static int check_extent_item(struct extent_buffer *leaf,
unsigned long ptr; /* Current pointer inside inline refs */
unsigned long end; /* Extent item end */
const u32 item_size = btrfs_item_size(leaf, slot);
+ u8 last_type = 0;
+ u64 last_seq = U64_MAX;
u64 flags;
u64 generation;
u64 total_refs; /* Total refs in btrfs_extent_item */
@@ -1322,6 +1325,18 @@ static int check_extent_item(struct extent_buffer *leaf,
* 2.2) Ref type specific data
* Either using btrfs_extent_inline_ref::offset, or specific
* data structure.
+ *
+ * All above inline items should follow the order:
+ *
+ * - All btrfs_extent_inline_ref::type should be in an ascending
+ * order
+ *
+ * - Within the same type, the items should follow a descending
+ * order by their sequence number. The sequence number is
+ * determined by:
+ * * btrfs_extent_inline_ref::offset for all types other than
+ * EXTENT_DATA_REF
+ * * hash_extent_data_ref() for EXTENT_DATA_REF
*/
if (unlikely(item_size < sizeof(*ei))) {
extent_err(leaf, slot,
@@ -1403,6 +1418,7 @@ static int check_extent_item(struct extent_buffer *leaf,
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
+ u64 seq;
u64 dref_offset;
u64 inline_offset;
u8 inline_type;
@@ -1416,6 +1432,7 @@ static int check_extent_item(struct extent_buffer *leaf,
iref = (struct btrfs_extent_inline_ref *)ptr;
inline_type = btrfs_extent_inline_ref_type(leaf, iref);
inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
+ seq = inline_offset;
if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %u end %lu",
@@ -1446,6 +1463,10 @@ static int check_extent_item(struct extent_buffer *leaf,
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
+ seq = hash_extent_data_ref(
+ btrfs_extent_data_ref_root(leaf, dref),
+ btrfs_extent_data_ref_objectid(leaf, dref),
+ btrfs_extent_data_ref_offset(leaf, dref));
if (unlikely(!IS_ALIGNED(dref_offset,
fs_info->sectorsize))) {
extent_err(leaf, slot,
@@ -1475,6 +1496,24 @@ static int check_extent_item(struct extent_buffer *leaf,
inline_type);
return -EUCLEAN;
}
+ if (inline_type < last_type) {
+ extent_err(leaf, slot,
+ "inline ref out-of-order: has type %u, prev type %u",
+ inline_type, last_type);
+ return -EUCLEAN;
+ }
+ /* Type changed, allow the sequence starts from U64_MAX again. */
+ if (inline_type > last_type)
+ last_seq = U64_MAX;
+ if (seq > last_seq) {
+ extent_err(leaf, slot,
+"inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx",
+ inline_type, inline_offset, seq,
+ last_type, last_seq);
+ return -EUCLEAN;
+ }
+ last_type = inline_type;
+ last_seq = seq;
ptr += btrfs_extent_inline_ref_size(inline_type);
}
/* No padding is allowed */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c6f16625af51..f627674b37db 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3006,15 +3006,16 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
read_unlock(&em_tree->lock);
if (!em) {
- btrfs_crit(fs_info, "unable to find logical %llu length %llu",
+ btrfs_crit(fs_info,
+ "unable to find chunk map for logical %llu length %llu",
logical, length);
return ERR_PTR(-EINVAL);
}
- if (em->start > logical || em->start + em->len < logical) {
+ if (em->start > logical || em->start + em->len <= logical) {
btrfs_crit(fs_info,
- "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
- logical, length, em->start, em->start + em->len);
+ "found a bad chunk map, wanted %llu-%llu, found %llu-%llu",
+ logical, logical + length, em->start, em->start + em->len);
free_extent_map(em);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index c45e8c2d62e1..a5ade8c16375 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -84,6 +84,14 @@ int debugfs_file_get(struct dentry *dentry)
struct debugfs_fsdata *fsd;
void *d_fsd;
+ /*
+ * This could only happen if some debugfs user erroneously calls
+ * debugfs_file_get() on a dentry that isn't even a file, let
+ * them know about it.
+ */
+ if (WARN_ON(!d_is_reg(dentry)))
+ return -EINVAL;
+
d_fsd = READ_ONCE(dentry->d_fsdata);
if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) {
fsd = d_fsd;
@@ -100,6 +108,14 @@ int debugfs_file_get(struct dentry *dentry)
kfree(fsd);
fsd = READ_ONCE(dentry->d_fsdata);
}
+#ifdef CONFIG_LOCKDEP
+ fsd->lock_name = kasprintf(GFP_KERNEL, "debugfs:%pd", dentry);
+ lockdep_register_key(&fsd->key);
+ lockdep_init_map(&fsd->lockdep_map, fsd->lock_name ?: "debugfs",
+ &fsd->key, 0);
+#endif
+ INIT_LIST_HEAD(&fsd->cancellations);
+ mutex_init(&fsd->cancellations_mtx);
}
/*
@@ -116,6 +132,8 @@ int debugfs_file_get(struct dentry *dentry)
if (!refcount_inc_not_zero(&fsd->active_users))
return -EIO;
+ lock_map_acquire_read(&fsd->lockdep_map);
+
return 0;
}
EXPORT_SYMBOL_GPL(debugfs_file_get);
@@ -133,11 +151,93 @@ void debugfs_file_put(struct dentry *dentry)
{
struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata);
+ lock_map_release(&fsd->lockdep_map);
+
if (refcount_dec_and_test(&fsd->active_users))
complete(&fsd->active_users_drained);
}
EXPORT_SYMBOL_GPL(debugfs_file_put);
+/**
+ * debugfs_enter_cancellation - enter a debugfs cancellation
+ * @file: the file being accessed
+ * @cancellation: the cancellation object, the cancel callback
+ * inside of it must be initialized
+ *
+ * When a debugfs file is removed it needs to wait for all active
+ * operations to complete. However, the operation itself may need
+ * to wait for hardware or completion of some asynchronous process
+ * or similar. As such, it may need to be cancelled to avoid long
+ * waits or even deadlocks.
+ *
+ * This function can be used inside a debugfs handler that may
+ * need to be cancelled. As soon as this function is called, the
+ * cancellation's 'cancel' callback may be called, at which point
+ * the caller should proceed to call debugfs_leave_cancellation()
+ * and leave the debugfs handler function as soon as possible.
+ * Note that the 'cancel' callback is only ever called in the
+ * context of some kind of debugfs_remove().
+ *
+ * This function must be paired with debugfs_leave_cancellation().
+ */
+void debugfs_enter_cancellation(struct file *file,
+ struct debugfs_cancellation *cancellation)
+{
+ struct debugfs_fsdata *fsd;
+ struct dentry *dentry = F_DENTRY(file);
+
+ INIT_LIST_HEAD(&cancellation->list);
+
+ if (WARN_ON(!d_is_reg(dentry)))
+ return;
+
+ if (WARN_ON(!cancellation->cancel))
+ return;
+
+ fsd = READ_ONCE(dentry->d_fsdata);
+ if (WARN_ON(!fsd ||
+ ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)))
+ return;
+
+ mutex_lock(&fsd->cancellations_mtx);
+ list_add(&cancellation->list, &fsd->cancellations);
+ mutex_unlock(&fsd->cancellations_mtx);
+
+ /* if we're already removing wake it up to cancel */
+ if (d_unlinked(dentry))
+ complete(&fsd->active_users_drained);
+}
+EXPORT_SYMBOL_GPL(debugfs_enter_cancellation);
+
+/**
+ * debugfs_leave_cancellation - leave cancellation section
+ * @file: the file being accessed
+ * @cancellation: the cancellation previously registered with
+ * debugfs_enter_cancellation()
+ *
+ * See the documentation of debugfs_enter_cancellation().
+ */
+void debugfs_leave_cancellation(struct file *file,
+ struct debugfs_cancellation *cancellation)
+{
+ struct debugfs_fsdata *fsd;
+ struct dentry *dentry = F_DENTRY(file);
+
+ if (WARN_ON(!d_is_reg(dentry)))
+ return;
+
+ fsd = READ_ONCE(dentry->d_fsdata);
+ if (WARN_ON(!fsd ||
+ ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)))
+ return;
+
+ mutex_lock(&fsd->cancellations_mtx);
+ if (!list_empty(&cancellation->list))
+ list_del(&cancellation->list);
+ mutex_unlock(&fsd->cancellations_mtx);
+}
+EXPORT_SYMBOL_GPL(debugfs_leave_cancellation);
+
/*
* Only permit access to world-readable files when the kernel is locked down.
* We also need to exclude any file that has ways to write or alter it as root
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 5d41765e0c77..e4e7fe1bd9fb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -236,17 +236,29 @@ static const struct super_operations debugfs_super_operations = {
static void debugfs_release_dentry(struct dentry *dentry)
{
- void *fsd = dentry->d_fsdata;
+ struct debugfs_fsdata *fsd = dentry->d_fsdata;
- if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))
- kfree(dentry->d_fsdata);
+ if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
+ return;
+
+ /* check it wasn't a dir (no fsdata) or automount (no real_fops) */
+ if (fsd && fsd->real_fops) {
+#ifdef CONFIG_LOCKDEP
+ lockdep_unregister_key(&fsd->key);
+ kfree(fsd->lock_name);
+#endif
+ WARN_ON(!list_empty(&fsd->cancellations));
+ mutex_destroy(&fsd->cancellations_mtx);
+ }
+
+ kfree(fsd);
}
static struct vfsmount *debugfs_automount(struct path *path)
{
- debugfs_automount_t f;
- f = (debugfs_automount_t)path->dentry->d_fsdata;
- return f(path->dentry, d_inode(path->dentry)->i_private);
+ struct debugfs_fsdata *fsd = path->dentry->d_fsdata;
+
+ return fsd->automount(path->dentry, d_inode(path->dentry)->i_private);
}
static const struct dentry_operations debugfs_dops = {
@@ -634,13 +646,23 @@ struct dentry *debugfs_create_automount(const char *name,
void *data)
{
struct dentry *dentry = start_creating(name, parent);
+ struct debugfs_fsdata *fsd;
struct inode *inode;
if (IS_ERR(dentry))
return dentry;
+ fsd = kzalloc(sizeof(*fsd), GFP_KERNEL);
+ if (!fsd) {
+ failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ fsd->automount = f;
+
if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
failed_creating(dentry);
+ kfree(fsd);
return ERR_PTR(-EPERM);
}
@@ -648,13 +670,14 @@ struct dentry *debugfs_create_automount(const char *name,
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create automount '%s'\n",
name);
+ kfree(fsd);
return failed_creating(dentry);
}
make_empty_dir_inode(inode);
inode->i_flags |= S_AUTOMOUNT;
inode->i_private = data;
- dentry->d_fsdata = (void *)f;
+ dentry->d_fsdata = fsd;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
d_instantiate(dentry, inode);
@@ -731,8 +754,40 @@ static void __debugfs_file_removed(struct dentry *dentry)
fsd = READ_ONCE(dentry->d_fsdata);
if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
return;
- if (!refcount_dec_and_test(&fsd->active_users))
+
+ lock_map_acquire(&fsd->lockdep_map);
+ lock_map_release(&fsd->lockdep_map);
+
+ /* if we hit zero, just wait for all to finish */
+ if (!refcount_dec_and_test(&fsd->active_users)) {
+ wait_for_completion(&fsd->active_users_drained);
+ return;
+ }
+
+ /* if we didn't hit zero, try to cancel any we can */
+ while (refcount_read(&fsd->active_users)) {
+ struct debugfs_cancellation *c;
+
+ /*
+ * Lock the cancellations. Note that the cancellations
+ * structs are meant to be on the stack, so we need to
+ * ensure we either use them here or don't touch them,
+ * and debugfs_leave_cancellation() will wait for this
+ * to be finished processing before exiting one. It may
+ * of course win and remove the cancellation, but then
+ * chances are we never even got into this bit, we only
+ * do if the refcount isn't zero already.
+ */
+ mutex_lock(&fsd->cancellations_mtx);
+ while ((c = list_first_entry_or_null(&fsd->cancellations,
+ typeof(*c), list))) {
+ list_del_init(&c->list);
+ c->cancel(dentry, c->cancel_data);
+ }
+ mutex_unlock(&fsd->cancellations_mtx);
+
wait_for_completion(&fsd->active_users_drained);
+ }
}
static void remove_one(struct dentry *victim)
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index 92af8ae31313..0c4c68cf161f 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -7,6 +7,8 @@
#ifndef _DEBUGFS_INTERNAL_H_
#define _DEBUGFS_INTERNAL_H_
+#include <linux/lockdep.h>
+#include <linux/list.h>
struct file_operations;
@@ -17,8 +19,23 @@ extern const struct file_operations debugfs_full_proxy_file_operations;
struct debugfs_fsdata {
const struct file_operations *real_fops;
- refcount_t active_users;
- struct completion active_users_drained;
+ union {
+ /* automount_fn is used when real_fops is NULL */
+ debugfs_automount_t automount;
+ struct {
+ refcount_t active_users;
+ struct completion active_users_drained;
+#ifdef CONFIG_LOCKDEP
+ struct lockdep_map lockdep_map;
+ struct lock_class_key key;
+ char *lock_name;
+#endif
+
+ /* protect cancellations */
+ struct mutex cancellations_mtx;
+ struct list_head cancellations;
+ };
+ };
};
/*
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 1039e5bf90af..4ddc36f4dbd4 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -258,7 +258,6 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out_unlock;
}
- iocb->ki_pos += status;
ret += status;
endbyte = pos + status - 1;
ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 6ffbd81bd109..7558167f603c 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -191,7 +191,13 @@ struct cifs_open_info_data {
bool reparse_point;
bool symlink;
};
- __u32 reparse_tag;
+ struct {
+ __u32 tag;
+ union {
+ struct reparse_data_buffer *buf;
+ struct reparse_posix_data *posix;
+ };
+ } reparse;
char *symlink_target;
union {
struct smb2_file_all_info fi;
@@ -395,8 +401,7 @@ struct smb_version_operations {
struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const char *full_path,
- char **target_path,
- struct kvec *rsp_iov);
+ char **target_path);
/* open a file for non-posix mounts */
int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock,
void *buf);
@@ -551,6 +556,9 @@ struct smb_version_operations {
bool (*is_status_io_timeout)(char *buf);
/* Check for STATUS_NETWORK_NAME_DELETED */
bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
+ int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb,
+ struct kvec *rsp_iov,
+ struct cifs_open_info_data *data);
};
struct smb_version_values {
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index a75220db5c1e..c0513fbb8a59 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -882,11 +882,13 @@ typedef struct smb_com_open_rsp {
__u8 OplockLevel;
__u16 Fid;
__le32 CreateAction;
- __le64 CreationTime;
- __le64 LastAccessTime;
- __le64 LastWriteTime;
- __le64 ChangeTime;
- __le32 FileAttributes;
+ struct_group(common_attributes,
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le32 FileAttributes;
+ );
__le64 AllocationSize;
__le64 EndOfFile;
__le16 FileType;
@@ -1356,7 +1358,7 @@ typedef struct smb_com_transaction_ioctl_rsp {
__le32 DataDisplacement;
__u8 SetupCount; /* 1 */
__le16 ReturnedDataLen;
- __u16 ByteCount;
+ __le16 ByteCount;
} __attribute__((packed)) TRANSACT_IOCTL_RSP;
#define CIFS_ACL_OWNER 1
@@ -1509,7 +1511,7 @@ struct reparse_posix_data {
__le16 ReparseDataLength;
__u16 Reserved;
__le64 InodeType; /* LNK, FIFO, CHR etc. */
- char PathBuffer[];
+ __u8 DataBuffer[];
} __attribute__((packed));
struct cifs_quota_data {
@@ -2264,11 +2266,13 @@ typedef struct {
/* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */
/******************************************************************************/
typedef struct { /* data block encoding of response to level 263 QPathInfo */
- __le64 CreationTime;
- __le64 LastAccessTime;
- __le64 LastWriteTime;
- __le64 ChangeTime;
- __le32 Attributes;
+ struct_group(common_attributes,
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le32 Attributes;
+ );
__u32 Pad1;
__le64 AllocationSize;
__le64 EndOfFile; /* size ie offset to first free byte in file */
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index d87e2c26cce2..46feaa0880bd 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -210,7 +210,7 @@ int cifs_get_inode_info(struct inode **inode, const char *full_path,
const struct cifs_fid *fid);
bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
struct cifs_fattr *fattr,
- u32 tag);
+ struct cifs_open_info_data *data);
extern int smb311_posix_get_inode_info(struct inode **pinode, const char *search_path,
struct super_block *sb, unsigned int xid);
extern int cifs_get_inode_info_unix(struct inode **pinode,
@@ -458,6 +458,12 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
struct cifs_tcon *tcon,
const unsigned char *searchName, char **syminfo,
const struct nls_table *nls_codepage, int remap);
+extern int cifs_query_reparse_point(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ const char *full_path,
+ u32 *tag, struct kvec *rsp,
+ int *rsp_buftype);
extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
__u16 fid, char **symlinkinfo,
const struct nls_table *nls_codepage);
@@ -659,6 +665,12 @@ void cifs_put_tcp_super(struct super_block *sb);
int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix);
char *extract_hostname(const char *unc);
char *extract_sharename(const char *unc);
+int parse_reparse_point(struct reparse_data_buffer *buf,
+ u32 plen, struct cifs_sb_info *cifs_sb,
+ bool unicode, struct cifs_open_info_data *data);
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev);
#ifdef CONFIG_CIFS_DFS_UPCALL
static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 25503f1a4fd2..9ee348e6d106 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1244,8 +1244,10 @@ openRetry:
*oplock |= CIFS_CREATE_ACTION;
if (buf) {
- /* copy from CreationTime to Attributes */
- memcpy((char *)buf, (char *)&rsp->CreationTime, 36);
+ /* copy commonly used attributes */
+ memcpy(&buf->common_attributes,
+ &rsp->common_attributes,
+ sizeof(buf->common_attributes));
/* the file_info buf is endian converted by caller */
buf->AllocationSize = rsp->AllocationSize;
buf->EndOfFile = rsp->EndOfFile;
@@ -2690,136 +2692,97 @@ querySymLinkRetry:
return rc;
}
-/*
- * Recent Windows versions now create symlinks more frequently
- * and they use the "reparse point" mechanism below. We can of course
- * do symlinks nicely to Samba and other servers which support the
- * CIFS Unix Extensions and we can also do SFU symlinks and "client only"
- * "MF" symlinks optionally, but for recent Windows we really need to
- * reenable the code below and fix the cifs_symlink callers to handle this.
- * In the interim this code has been moved to its own config option so
- * it is not compiled in by default until callers fixed up and more tested.
- */
-int
-CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
- __u16 fid, char **symlinkinfo,
- const struct nls_table *nls_codepage)
+int cifs_query_reparse_point(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ const char *full_path,
+ u32 *tag, struct kvec *rsp,
+ int *rsp_buftype)
{
- int rc = 0;
- int bytes_returned;
- struct smb_com_transaction_ioctl_req *pSMB;
- struct smb_com_transaction_ioctl_rsp *pSMBr;
- bool is_unicode;
- unsigned int sub_len;
- char *sub_start;
- struct reparse_symlink_data *reparse_buf;
- struct reparse_posix_data *posix_buf;
+ struct cifs_open_parms oparms;
+ TRANSACT_IOCTL_REQ *io_req = NULL;
+ TRANSACT_IOCTL_RSP *io_rsp = NULL;
+ struct cifs_fid fid;
__u32 data_offset, data_count;
- char *end_of_smb;
+ __u8 *start, *end;
+ int io_rsp_len;
+ int oplock = 0;
+ int rc;
- cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid);
- rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
- (void **) &pSMBr);
+ cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path);
+
+ if (cap_unix(tcon->ses))
+ return -EOPNOTSUPP;
+
+ oparms = (struct cifs_open_parms) {
+ .tcon = tcon,
+ .cifs_sb = cifs_sb,
+ .desired_access = FILE_READ_ATTRIBUTES,
+ .create_options = cifs_create_options(cifs_sb,
+ OPEN_REPARSE_POINT),
+ .disposition = FILE_OPEN,
+ .path = full_path,
+ .fid = &fid,
+ };
+
+ rc = CIFS_open(xid, &oparms, &oplock, NULL);
if (rc)
return rc;
- pSMB->TotalParameterCount = 0 ;
- pSMB->TotalDataCount = 0;
- pSMB->MaxParameterCount = cpu_to_le32(2);
- /* BB find exact data count max from sess structure BB */
- pSMB->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00);
- pSMB->MaxSetupCount = 4;
- pSMB->Reserved = 0;
- pSMB->ParameterOffset = 0;
- pSMB->DataCount = 0;
- pSMB->DataOffset = 0;
- pSMB->SetupCount = 4;
- pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
- pSMB->ParameterCount = pSMB->TotalParameterCount;
- pSMB->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT);
- pSMB->IsFsctl = 1; /* FSCTL */
- pSMB->IsRootFlag = 0;
- pSMB->Fid = fid; /* file handle always le */
- pSMB->ByteCount = 0;
+ rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon,
+ (void **)&io_req, (void **)&io_rsp);
+ if (rc)
+ goto error;
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, 0);
- if (rc) {
- cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc);
- goto qreparse_out;
- }
+ io_req->TotalParameterCount = 0;
+ io_req->TotalDataCount = 0;
+ io_req->MaxParameterCount = cpu_to_le32(2);
+ /* BB find exact data count max from sess structure BB */
+ io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00);
+ io_req->MaxSetupCount = 4;
+ io_req->Reserved = 0;
+ io_req->ParameterOffset = 0;
+ io_req->DataCount = 0;
+ io_req->DataOffset = 0;
+ io_req->SetupCount = 4;
+ io_req->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
+ io_req->ParameterCount = io_req->TotalParameterCount;
+ io_req->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT);
+ io_req->IsFsctl = 1;
+ io_req->IsRootFlag = 0;
+ io_req->Fid = fid.netfid;
+ io_req->ByteCount = 0;
+
+ rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)io_req,
+ (struct smb_hdr *)io_rsp, &io_rsp_len, 0);
+ if (rc)
+ goto error;
- data_offset = le32_to_cpu(pSMBr->DataOffset);
- data_count = le32_to_cpu(pSMBr->DataCount);
- if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
- /* BB also check enough total bytes returned */
- rc = -EIO; /* bad smb */
- goto qreparse_out;
- }
- if (!data_count || (data_count > 2048)) {
+ data_offset = le32_to_cpu(io_rsp->DataOffset);
+ data_count = le32_to_cpu(io_rsp->DataCount);
+ if (get_bcc(&io_rsp->hdr) < 2 || data_offset > 512 ||
+ !data_count || data_count > 2048) {
rc = -EIO;
- cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n");
- goto qreparse_out;
- }
- end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
- reparse_buf = (struct reparse_symlink_data *)
- ((char *)&pSMBr->hdr.Protocol + data_offset);
- if ((char *)reparse_buf >= end_of_smb) {
- rc = -EIO;
- goto qreparse_out;
- }
- if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) {
- cifs_dbg(FYI, "NFS style reparse tag\n");
- posix_buf = (struct reparse_posix_data *)reparse_buf;
-
- if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) {
- cifs_dbg(FYI, "unsupported file type 0x%llx\n",
- le64_to_cpu(posix_buf->InodeType));
- rc = -EOPNOTSUPP;
- goto qreparse_out;
- }
- is_unicode = true;
- sub_len = le16_to_cpu(reparse_buf->ReparseDataLength);
- if (posix_buf->PathBuffer + sub_len > end_of_smb) {
- cifs_dbg(FYI, "reparse buf beyond SMB\n");
- rc = -EIO;
- goto qreparse_out;
- }
- *symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer,
- sub_len, is_unicode, nls_codepage);
- goto qreparse_out;
- } else if (reparse_buf->ReparseTag !=
- cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) {
- rc = -EOPNOTSUPP;
- goto qreparse_out;
+ goto error;
}
- /* Reparse tag is NTFS symlink */
- sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) +
- reparse_buf->PathBuffer;
- sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength);
- if (sub_start + sub_len > end_of_smb) {
- cifs_dbg(FYI, "reparse buf beyond SMB\n");
+ end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount;
+ start = (__u8 *)&io_rsp->hdr.Protocol + data_offset;
+ if (start >= end) {
rc = -EIO;
- goto qreparse_out;
+ goto error;
}
- if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
- is_unicode = true;
- else
- is_unicode = false;
-
- /* BB FIXME investigate remapping reserved chars here */
- *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode,
- nls_codepage);
- if (!*symlinkinfo)
- rc = -ENOMEM;
-qreparse_out:
- cifs_buf_release(pSMB);
- /*
- * Note: On -EAGAIN error only caller can retry on handle based calls
- * since file handle passed in no longer valid.
- */
+ *tag = le32_to_cpu(((struct reparse_data_buffer *)start)->ReparseTag);
+ rsp->iov_base = io_rsp;
+ rsp->iov_len = io_rsp_len;
+ *rsp_buftype = CIFS_LARGE_BUFFER;
+ CIFSSMBClose(xid, tcon, fid.netfid);
+ return 0;
+
+error:
+ cifs_buf_release(io_req);
+ CIFSSMBClose(xid, tcon, fid.netfid);
return rc;
}
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 86fbd3f847d6..09c5c0f5c96e 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -459,8 +459,7 @@ static int cifs_get_unix_fattr(const unsigned char *full_path,
return -EOPNOTSUPP;
rc = server->ops->query_symlink(xid, tcon,
cifs_sb, full_path,
- &fattr->cf_symlink_target,
- NULL);
+ &fattr->cf_symlink_target);
cifs_dbg(FYI, "%s: query_symlink: %d\n", __func__, rc);
}
return rc;
@@ -722,10 +721,51 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr,
fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink);
}
+static inline dev_t nfs_mkdev(struct reparse_posix_data *buf)
+{
+ u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer);
+
+ return MKDEV(v >> 32, v & 0xffffffff);
+}
+
bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
struct cifs_fattr *fattr,
- u32 tag)
+ struct cifs_open_info_data *data)
{
+ struct reparse_posix_data *buf = data->reparse.posix;
+ u32 tag = data->reparse.tag;
+
+ if (tag == IO_REPARSE_TAG_NFS && buf) {
+ switch (le64_to_cpu(buf->InodeType)) {
+ case NFS_SPECFILE_CHR:
+ fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode;
+ fattr->cf_dtype = DT_CHR;
+ fattr->cf_rdev = nfs_mkdev(buf);
+ break;
+ case NFS_SPECFILE_BLK:
+ fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode;
+ fattr->cf_dtype = DT_BLK;
+ fattr->cf_rdev = nfs_mkdev(buf);
+ break;
+ case NFS_SPECFILE_FIFO:
+ fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode;
+ fattr->cf_dtype = DT_FIFO;
+ break;
+ case NFS_SPECFILE_SOCK:
+ fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode;
+ fattr->cf_dtype = DT_SOCK;
+ break;
+ case NFS_SPECFILE_LNK:
+ fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode;
+ fattr->cf_dtype = DT_LNK;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
+ return true;
+ }
+
switch (tag) {
case IO_REPARSE_TAG_LX_SYMLINK:
fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode;
@@ -750,7 +790,7 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
case 0: /* SMB1 symlink */
case IO_REPARSE_TAG_SYMLINK:
case IO_REPARSE_TAG_NFS:
- fattr->cf_mode = S_IFLNK;
+ fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode;
fattr->cf_dtype = DT_LNK;
break;
default:
@@ -791,7 +831,7 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr,
fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
if (cifs_open_data_reparse(data) &&
- cifs_reparse_point_to_fattr(cifs_sb, fattr, data->reparse_tag))
+ cifs_reparse_point_to_fattr(cifs_sb, fattr, data))
goto out_reparse;
if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
@@ -825,6 +865,8 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr,
out_reparse:
if (S_ISLNK(fattr->cf_mode)) {
+ if (likely(data->symlink_target))
+ fattr->cf_eof = strnlen(data->symlink_target, PATH_MAX);
fattr->cf_symlink_target = data->symlink_target;
data->symlink_target = NULL;
}
@@ -856,7 +898,7 @@ cifs_get_file_info(struct file *filp)
data.adjust_tz = false;
if (data.symlink_target) {
data.symlink = true;
- data.reparse_tag = IO_REPARSE_TAG_SYMLINK;
+ data.reparse.tag = IO_REPARSE_TAG_SYMLINK;
}
cifs_open_info_to_fattr(&fattr, &data, inode->i_sb);
break;
@@ -1025,7 +1067,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct kvec rsp_iov, *iov = NULL;
int rsp_buftype = CIFS_NO_BUFFER;
- u32 tag = data->reparse_tag;
+ u32 tag = data->reparse.tag;
int rc = 0;
if (!tag && server->ops->query_reparse_point) {
@@ -1035,22 +1077,28 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
if (!rc)
iov = &rsp_iov;
}
- switch ((data->reparse_tag = tag)) {
+
+ rc = -EOPNOTSUPP;
+ switch ((data->reparse.tag = tag)) {
case 0: /* SMB1 symlink */
- iov = NULL;
- fallthrough;
- case IO_REPARSE_TAG_NFS:
- case IO_REPARSE_TAG_SYMLINK:
- if (!data->symlink_target && server->ops->query_symlink) {
+ if (server->ops->query_symlink) {
rc = server->ops->query_symlink(xid, tcon,
cifs_sb, full_path,
- &data->symlink_target,
- iov);
+ &data->symlink_target);
}
break;
case IO_REPARSE_TAG_MOUNT_POINT:
cifs_create_junction_fattr(fattr, sb);
+ rc = 0;
goto out;
+ default:
+ if (data->symlink_target) {
+ rc = 0;
+ } else if (server->ops->parse_reparse_point) {
+ rc = server->ops->parse_reparse_point(cifs_sb,
+ iov, data);
+ }
+ break;
}
cifs_open_info_to_fattr(fattr, data, sb);
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index 47fc22de8d20..d30ea2005eb3 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -153,6 +153,10 @@ static bool reparse_file_needs_reval(const struct cifs_fattr *fattr)
static void
cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
{
+ struct cifs_open_info_data data = {
+ .reparse = { .tag = fattr->cf_cifstag, },
+ };
+
fattr->cf_uid = cifs_sb->ctx->linux_uid;
fattr->cf_gid = cifs_sb->ctx->linux_gid;
@@ -165,7 +169,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
* reasonably map some of them to directories vs. files vs. symlinks
*/
if ((fattr->cf_cifsattrs & ATTR_REPARSE) &&
- cifs_reparse_point_to_fattr(cifs_sb, fattr, fattr->cf_cifstag))
+ cifs_reparse_point_to_fattr(cifs_sb, fattr, &data))
goto out_reparse;
if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 8b2d7c1ca428..816e01c5589b 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -332,10 +332,10 @@ cifs_disable_secondary_channels(struct cifs_ses *ses)
if (iface) {
spin_lock(&ses->iface_lock);
- kref_put(&iface->refcount, release_iface);
iface->num_channels--;
if (iface->weight_fulfilled)
iface->weight_fulfilled--;
+ kref_put(&iface->refcount, release_iface);
spin_unlock(&ses->iface_lock);
}
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 9bf8735cdd1e..a9eaba8083b0 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -976,64 +976,37 @@ static int cifs_query_symlink(const unsigned int xid,
struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const char *full_path,
- char **target_path,
- struct kvec *rsp_iov)
+ char **target_path)
{
int rc;
- int oplock = 0;
- bool is_reparse_point = !!rsp_iov;
- struct cifs_fid fid;
- struct cifs_open_parms oparms;
- cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
+ cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path);
- if (is_reparse_point) {
- cifs_dbg(VFS, "reparse points not handled for SMB1 symlinks\n");
+ if (!cap_unix(tcon->ses))
return -EOPNOTSUPP;
- }
-
- /* Check for unix extensions */
- if (cap_unix(tcon->ses)) {
- rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
- cifs_sb->local_nls,
- cifs_remap(cifs_sb));
- if (rc == -EREMOTE)
- rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
- target_path,
- cifs_sb->local_nls);
-
- goto out;
- }
-
- oparms = (struct cifs_open_parms) {
- .tcon = tcon,
- .cifs_sb = cifs_sb,
- .desired_access = FILE_READ_ATTRIBUTES,
- .create_options = cifs_create_options(cifs_sb,
- OPEN_REPARSE_POINT),
- .disposition = FILE_OPEN,
- .path = full_path,
- .fid = &fid,
- };
-
- rc = CIFS_open(xid, &oparms, &oplock, NULL);
- if (rc)
- goto out;
-
- rc = CIFSSMBQuerySymLink(xid, tcon, fid.netfid, target_path,
- cifs_sb->local_nls);
- if (rc)
- goto out_close;
- convert_delimiter(*target_path, '/');
-out_close:
- CIFSSMBClose(xid, tcon, fid.netfid);
-out:
- if (!rc)
- cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+ rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
+ cifs_sb->local_nls, cifs_remap(cifs_sb));
+ if (rc == -EREMOTE)
+ rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
+ target_path, cifs_sb->local_nls);
return rc;
}
+static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb,
+ struct kvec *rsp_iov,
+ struct cifs_open_info_data *data)
+{
+ struct reparse_data_buffer *buf;
+ TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base;
+ bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE);
+ u32 plen = le16_to_cpu(io->ByteCount);
+
+ buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol +
+ le32_to_cpu(io->DataOffset));
+ return parse_reparse_point(buf, plen, cifs_sb, unicode, data);
+}
+
static bool
cifs_is_read_op(__u32 oplock)
{
@@ -1068,15 +1041,7 @@ cifs_make_node(unsigned int xid, struct inode *inode,
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct inode *newinode = NULL;
- int rc = -EPERM;
- struct cifs_open_info_data buf = {};
- struct cifs_io_parms io_parms;
- __u32 oplock = 0;
- struct cifs_fid fid;
- struct cifs_open_parms oparms;
- unsigned int bytes_written;
- struct win_dev *pdev;
- struct kvec iov[2];
+ int rc;
if (tcon->unix_ext) {
/*
@@ -1110,74 +1075,18 @@ cifs_make_node(unsigned int xid, struct inode *inode,
d_instantiate(dentry, newinode);
return rc;
}
-
/*
- * SMB1 SFU emulation: should work with all servers, but only
- * support block and char device (no socket & fifo)
+ * Check if mounted with mount parm 'sfu' mount parm.
+ * SFU emulation should work with all servers, but only
+ * supports block and char device (no socket & fifo),
+ * and was used by default in earlier versions of Windows
*/
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
- return rc;
-
- if (!S_ISCHR(mode) && !S_ISBLK(mode))
- return rc;
-
- cifs_dbg(FYI, "sfu compat create special file\n");
-
- oparms = (struct cifs_open_parms) {
- .tcon = tcon,
- .cifs_sb = cifs_sb,
- .desired_access = GENERIC_WRITE,
- .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
- CREATE_OPTION_SPECIAL),
- .disposition = FILE_CREATE,
- .path = full_path,
- .fid = &fid,
- };
-
- if (tcon->ses->server->oplocks)
- oplock = REQ_OPLOCK;
- else
- oplock = 0;
- rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf);
- if (rc)
- return rc;
-
- /*
- * BB Do not bother to decode buf since no local inode yet to put
- * timestamps in, but we can reuse it safely.
- */
-
- pdev = (struct win_dev *)&buf.fi;
- io_parms.pid = current->tgid;
- io_parms.tcon = tcon;
- io_parms.offset = 0;
- io_parms.length = sizeof(struct win_dev);
- iov[1].iov_base = &buf.fi;
- iov[1].iov_len = sizeof(struct win_dev);
- if (S_ISCHR(mode)) {
- memcpy(pdev->type, "IntxCHR", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
- &bytes_written, iov, 1);
- } else if (S_ISBLK(mode)) {
- memcpy(pdev->type, "IntxBLK", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
- &bytes_written, iov, 1);
- }
- tcon->ses->server->ops->close(xid, tcon, &fid);
- d_drop(dentry);
-
- /* FIXME: add code here to set EAs */
-
- cifs_free_open_info(&buf);
- return rc;
+ return -EPERM;
+ return cifs_sfu_make_node(xid, inode, dentry, tcon,
+ full_path, mode, dev);
}
-
-
struct smb_version_operations smb1_operations = {
.send_cancel = send_nt_cancel,
.compare_fids = cifs_compare_fids,
@@ -1214,6 +1123,7 @@ struct smb_version_operations smb1_operations = {
.is_path_accessible = cifs_is_path_accessible,
.can_echo = cifs_can_echo,
.query_path_info = cifs_query_path_info,
+ .query_reparse_point = cifs_query_reparse_point,
.query_file_info = cifs_query_file_info,
.get_srv_inum = cifs_get_srv_inum,
.set_path_size = CIFSSMBSetEOF,
@@ -1229,6 +1139,7 @@ struct smb_version_operations smb1_operations = {
.rename = CIFSSMBRename,
.create_hardlink = CIFSCreateHardLink,
.query_symlink = cifs_query_symlink,
+ .parse_reparse_point = cifs_parse_reparse_point,
.open = cifs_open_file,
.set_fid = cifs_set_fid,
.close = cifs_close_file,
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 0b89f7008ac0..c94940af5d4b 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -555,7 +555,7 @@ static int parse_create_response(struct cifs_open_info_data *data,
break;
}
data->reparse_point = reparse_point;
- data->reparse_tag = tag;
+ data->reparse.tag = tag;
return rc;
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index a959ed2c9b22..45931115f475 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -2866,115 +2866,119 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
return rc;
}
-static int
-parse_reparse_posix(struct reparse_posix_data *symlink_buf,
- u32 plen, char **target_path,
- struct cifs_sb_info *cifs_sb)
+/* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */
+static int parse_reparse_posix(struct reparse_posix_data *buf,
+ struct cifs_sb_info *cifs_sb,
+ struct cifs_open_info_data *data)
{
unsigned int len;
-
- /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */
- len = le16_to_cpu(symlink_buf->ReparseDataLength);
-
- if (le64_to_cpu(symlink_buf->InodeType) != NFS_SPECFILE_LNK) {
- cifs_dbg(VFS, "%lld not a supported symlink type\n",
- le64_to_cpu(symlink_buf->InodeType));
+ u64 type;
+
+ switch ((type = le64_to_cpu(buf->InodeType))) {
+ case NFS_SPECFILE_LNK:
+ len = le16_to_cpu(buf->ReparseDataLength);
+ data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer,
+ len, true,
+ cifs_sb->local_nls);
+ if (!data->symlink_target)
+ return -ENOMEM;
+ convert_delimiter(data->symlink_target, '/');
+ cifs_dbg(FYI, "%s: target path: %s\n",
+ __func__, data->symlink_target);
+ break;
+ case NFS_SPECFILE_CHR:
+ case NFS_SPECFILE_BLK:
+ case NFS_SPECFILE_FIFO:
+ case NFS_SPECFILE_SOCK:
+ break;
+ default:
+ cifs_dbg(VFS, "%s: unhandled inode type: 0x%llx\n",
+ __func__, type);
return -EOPNOTSUPP;
}
-
- *target_path = cifs_strndup_from_utf16(
- symlink_buf->PathBuffer,
- len, true, cifs_sb->local_nls);
- if (!(*target_path))
- return -ENOMEM;
-
- convert_delimiter(*target_path, '/');
- cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
-
return 0;
}
-static int
-parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf,
- u32 plen, char **target_path,
- struct cifs_sb_info *cifs_sb)
+static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym,
+ u32 plen, bool unicode,
+ struct cifs_sb_info *cifs_sb,
+ struct cifs_open_info_data *data)
{
- unsigned int sub_len;
- unsigned int sub_offset;
+ unsigned int len;
+ unsigned int offs;
/* We handle Symbolic Link reparse tag here. See: MS-FSCC 2.1.2.4 */
- sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset);
- sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength);
- if (sub_offset + 20 > plen ||
- sub_offset + sub_len + 20 > plen) {
+ offs = le16_to_cpu(sym->SubstituteNameOffset);
+ len = le16_to_cpu(sym->SubstituteNameLength);
+ if (offs + 20 > plen || offs + len + 20 > plen) {
cifs_dbg(VFS, "srv returned malformed symlink buffer\n");
return -EIO;
}
- *target_path = cifs_strndup_from_utf16(
- symlink_buf->PathBuffer + sub_offset,
- sub_len, true, cifs_sb->local_nls);
- if (!(*target_path))
+ data->symlink_target = cifs_strndup_from_utf16(sym->PathBuffer + offs,
+ len, unicode,
+ cifs_sb->local_nls);
+ if (!data->symlink_target)
return -ENOMEM;
- convert_delimiter(*target_path, '/');
- cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+ convert_delimiter(data->symlink_target, '/');
+ cifs_dbg(FYI, "%s: target path: %s\n", __func__, data->symlink_target);
return 0;
}
-static int
-parse_reparse_point(struct reparse_data_buffer *buf,
- u32 plen, char **target_path,
- struct cifs_sb_info *cifs_sb)
+int parse_reparse_point(struct reparse_data_buffer *buf,
+ u32 plen, struct cifs_sb_info *cifs_sb,
+ bool unicode, struct cifs_open_info_data *data)
{
- if (plen < sizeof(struct reparse_data_buffer)) {
- cifs_dbg(VFS, "reparse buffer is too small. Must be at least 8 bytes but was %d\n",
- plen);
+ if (plen < sizeof(*buf)) {
+ cifs_dbg(VFS, "%s: reparse buffer is too small. Must be at least 8 bytes but was %d\n",
+ __func__, plen);
return -EIO;
}
- if (plen < le16_to_cpu(buf->ReparseDataLength) +
- sizeof(struct reparse_data_buffer)) {
- cifs_dbg(VFS, "srv returned invalid reparse buf length: %d\n",
- plen);
+ if (plen < le16_to_cpu(buf->ReparseDataLength) + sizeof(*buf)) {
+ cifs_dbg(VFS, "%s: invalid reparse buf length: %d\n",
+ __func__, plen);
return -EIO;
}
+ data->reparse.buf = buf;
+
/* See MS-FSCC 2.1.2 */
switch (le32_to_cpu(buf->ReparseTag)) {
case IO_REPARSE_TAG_NFS:
- return parse_reparse_posix(
- (struct reparse_posix_data *)buf,
- plen, target_path, cifs_sb);
+ return parse_reparse_posix((struct reparse_posix_data *)buf,
+ cifs_sb, data);
case IO_REPARSE_TAG_SYMLINK:
return parse_reparse_symlink(
(struct reparse_symlink_data_buffer *)buf,
- plen, target_path, cifs_sb);
+ plen, unicode, cifs_sb, data);
+ case IO_REPARSE_TAG_LX_SYMLINK:
+ case IO_REPARSE_TAG_AF_UNIX:
+ case IO_REPARSE_TAG_LX_FIFO:
+ case IO_REPARSE_TAG_LX_CHR:
+ case IO_REPARSE_TAG_LX_BLK:
+ return 0;
default:
- cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n",
- le32_to_cpu(buf->ReparseTag));
+ cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n",
+ __func__, le32_to_cpu(buf->ReparseTag));
return -EOPNOTSUPP;
}
}
-static int smb2_query_symlink(const unsigned int xid,
- struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb,
- const char *full_path,
- char **target_path,
- struct kvec *rsp_iov)
+static int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb,
+ struct kvec *rsp_iov,
+ struct cifs_open_info_data *data)
{
struct reparse_data_buffer *buf;
struct smb2_ioctl_rsp *io = rsp_iov->iov_base;
u32 plen = le32_to_cpu(io->OutputCount);
- cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
-
buf = (struct reparse_data_buffer *)((u8 *)io +
le32_to_cpu(io->OutputOffset));
- return parse_reparse_point(buf, plen, target_path, cifs_sb);
+ return parse_reparse_point(buf, plen, cifs_sb, true, data);
}
static int smb2_query_reparse_point(const unsigned int xid,
@@ -3307,6 +3311,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
struct inode *inode = file_inode(file);
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsFileInfo *cfile = file->private_data;
+ unsigned long long new_size;
long rc;
unsigned int xid;
__le64 eof;
@@ -3337,10 +3342,15 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
/*
* do we also need to change the size of the file?
*/
- if (keep_size == false && i_size_read(inode) < offset + len) {
- eof = cpu_to_le64(offset + len);
+ new_size = offset + len;
+ if (keep_size == false && (unsigned long long)i_size_read(inode) < new_size) {
+ eof = cpu_to_le64(new_size);
rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, &eof);
+ if (rc >= 0) {
+ truncate_setsize(inode, new_size);
+ fscache_resize_cookie(cifs_inode_cookie(inode), new_size);
+ }
}
zero_range_exit:
@@ -3735,6 +3745,9 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
if (rc < 0)
goto out_2;
+ truncate_setsize(inode, old_eof + len);
+ fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode));
+
rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len);
if (rc < 0)
goto out_2;
@@ -5064,41 +5077,24 @@ smb2_next_header(char *buf)
return le32_to_cpu(hdr->NextCommand);
}
-static int
-smb2_make_node(unsigned int xid, struct inode *inode,
- struct dentry *dentry, struct cifs_tcon *tcon,
- const char *full_path, umode_t mode, dev_t dev)
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
{
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
- int rc = -EPERM;
struct cifs_open_info_data buf = {};
- struct cifs_io_parms io_parms = {0};
- __u32 oplock = 0;
- struct cifs_fid fid;
+ struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ struct cifs_io_parms io_parms = {};
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_fid fid;
unsigned int bytes_written;
struct win_dev *pdev;
struct kvec iov[2];
-
- /*
- * Check if mounted with mount parm 'sfu' mount parm.
- * SFU emulation should work with all servers, but only
- * supports block and char device (no socket & fifo),
- * and was used by default in earlier versions of Windows
- */
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
- return rc;
-
- /*
- * TODO: Add ability to create instead via reparse point. Windows (e.g.
- * their current NFS server) uses this approach to expose special files
- * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions
- */
+ __u32 oplock = server->oplocks ? REQ_OPLOCK : 0;
+ int rc;
if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode))
- return rc;
-
- cifs_dbg(FYI, "sfu compat create special file\n");
+ return -EPERM;
oparms = (struct cifs_open_parms) {
.tcon = tcon,
@@ -5111,11 +5107,7 @@ smb2_make_node(unsigned int xid, struct inode *inode,
.fid = &fid,
};
- if (tcon->ses->server->oplocks)
- oplock = REQ_OPLOCK;
- else
- oplock = 0;
- rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf);
+ rc = server->ops->open(xid, &oparms, &oplock, &buf);
if (rc)
return rc;
@@ -5123,42 +5115,56 @@ smb2_make_node(unsigned int xid, struct inode *inode,
* BB Do not bother to decode buf since no local inode yet to put
* timestamps in, but we can reuse it safely.
*/
-
pdev = (struct win_dev *)&buf.fi;
io_parms.pid = current->tgid;
io_parms.tcon = tcon;
- io_parms.offset = 0;
- io_parms.length = sizeof(struct win_dev);
- iov[1].iov_base = &buf.fi;
- iov[1].iov_len = sizeof(struct win_dev);
+ io_parms.length = sizeof(*pdev);
+ iov[1].iov_base = pdev;
+ iov[1].iov_len = sizeof(*pdev);
if (S_ISCHR(mode)) {
memcpy(pdev->type, "IntxCHR", 8);
pdev->major = cpu_to_le64(MAJOR(dev));
pdev->minor = cpu_to_le64(MINOR(dev));
- rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
- &bytes_written, iov, 1);
} else if (S_ISBLK(mode)) {
memcpy(pdev->type, "IntxBLK", 8);
pdev->major = cpu_to_le64(MAJOR(dev));
pdev->minor = cpu_to_le64(MINOR(dev));
- rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
- &bytes_written, iov, 1);
} else if (S_ISFIFO(mode)) {
memcpy(pdev->type, "LnxFIFO", 8);
- pdev->major = 0;
- pdev->minor = 0;
- rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
- &bytes_written, iov, 1);
}
- tcon->ses->server->ops->close(xid, tcon, &fid);
- d_drop(dentry);
+ rc = server->ops->sync_write(xid, &fid, &io_parms,
+ &bytes_written, iov, 1);
+ server->ops->close(xid, tcon, &fid);
+ d_drop(dentry);
/* FIXME: add code here to set EAs */
-
cifs_free_open_info(&buf);
return rc;
}
+static int smb2_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ /*
+ * Check if mounted with mount parm 'sfu' mount parm.
+ * SFU emulation should work with all servers, but only
+ * supports block and char device (no socket & fifo),
+ * and was used by default in earlier versions of Windows
+ */
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
+ return -EPERM;
+ /*
+ * TODO: Add ability to create instead via reparse point. Windows (e.g.
+ * their current NFS server) uses this approach to expose special files
+ * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions
+ */
+ return cifs_sfu_make_node(xid, inode, dentry, tcon,
+ full_path, mode, dev);
+}
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
struct smb_version_operations smb20_operations = {
.compare_fids = smb2_compare_fids,
@@ -5209,7 +5215,7 @@ struct smb_version_operations smb20_operations = {
.unlink = smb2_unlink,
.rename = smb2_rename_path,
.create_hardlink = smb2_create_hardlink,
- .query_symlink = smb2_query_symlink,
+ .parse_reparse_point = smb2_parse_reparse_point,
.query_mf_symlink = smb3_query_mf_symlink,
.create_mf_symlink = smb3_create_mf_symlink,
.open = smb2_open_file,
@@ -5311,7 +5317,7 @@ struct smb_version_operations smb21_operations = {
.unlink = smb2_unlink,
.rename = smb2_rename_path,
.create_hardlink = smb2_create_hardlink,
- .query_symlink = smb2_query_symlink,
+ .parse_reparse_point = smb2_parse_reparse_point,
.query_mf_symlink = smb3_query_mf_symlink,
.create_mf_symlink = smb3_create_mf_symlink,
.open = smb2_open_file,
@@ -5416,7 +5422,7 @@ struct smb_version_operations smb30_operations = {
.unlink = smb2_unlink,
.rename = smb2_rename_path,
.create_hardlink = smb2_create_hardlink,
- .query_symlink = smb2_query_symlink,
+ .parse_reparse_point = smb2_parse_reparse_point,
.query_mf_symlink = smb3_query_mf_symlink,
.create_mf_symlink = smb3_create_mf_symlink,
.open = smb2_open_file,
@@ -5530,7 +5536,7 @@ struct smb_version_operations smb311_operations = {
.unlink = smb2_unlink,
.rename = smb2_rename_path,
.create_hardlink = smb2_create_hardlink,
- .query_symlink = smb2_query_symlink,
+ .parse_reparse_point = smb2_parse_reparse_point,
.query_mf_symlink = smb3_query_mf_symlink,
.create_mf_symlink = smb3_create_mf_symlink,
.open = smb2_open_file,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 2eb29fa278c3..395e1230ddbc 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -3472,12 +3472,10 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
} else {
trace_smb3_close_done(xid, persistent_fid, tcon->tid,
ses->Suid);
- /*
- * Note that have to subtract 4 since struct network_open_info
- * has a final 4 byte pad that close response does not have
- */
if (pbuf)
- memcpy(pbuf, (char *)&rsp->CreationTime, sizeof(*pbuf) - 4);
+ memcpy(&pbuf->network_open_info,
+ &rsp->network_open_info,
+ sizeof(pbuf->network_open_info));
}
atomic_dec(&tcon->num_remote_opens);
diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h
index 220994d0a0f7..db08194484e0 100644
--- a/fs/smb/client/smb2pdu.h
+++ b/fs/smb/client/smb2pdu.h
@@ -319,13 +319,15 @@ struct smb2_file_reparse_point_info {
} __packed;
struct smb2_file_network_open_info {
- __le64 CreationTime;
- __le64 LastAccessTime;
- __le64 LastWriteTime;
- __le64 ChangeTime;
- __le64 AllocationSize;
- __le64 EndOfFile;
- __le32 Attributes;
+ struct_group(network_open_info,
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 AllocationSize;
+ __le64 EndOfFile;
+ __le32 Attributes;
+ );
__le32 Reserved;
} __packed; /* level 34 Query also similar returned in close rsp and open rsp */
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 8983f45f8430..9fbaaa387dcc 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -702,13 +702,16 @@ struct smb2_close_rsp {
__le16 StructureSize; /* 60 */
__le16 Flags;
__le32 Reserved;
- __le64 CreationTime;
- __le64 LastAccessTime;
- __le64 LastWriteTime;
- __le64 ChangeTime;
- __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */
- __le64 EndOfFile;
- __le32 Attributes;
+ struct_group(network_open_info,
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ /* Beginning of FILE_STANDARD_INFO equivalent */
+ __le64 AllocationSize;
+ __le64 EndOfFile;
+ __le32 Attributes;
+ );
} __packed;
diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c
index a2ed441e837a..d7c676c151e2 100644
--- a/fs/smb/server/ksmbd_work.c
+++ b/fs/smb/server/ksmbd_work.c
@@ -56,6 +56,9 @@ void ksmbd_free_work_struct(struct ksmbd_work *work)
kfree(work->tr_buf);
kvfree(work->request_buf);
kfree(work->iov);
+ if (!list_empty(&work->interim_entry))
+ list_del(&work->interim_entry);
+
if (work->async_id)
ksmbd_release_id(&work->conn->async_ida, work->async_id);
kmem_cache_free(work_cache, work);
@@ -106,7 +109,7 @@ static inline void __ksmbd_iov_pin(struct ksmbd_work *work, void *ib,
static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len,
void *aux_buf, unsigned int aux_size)
{
- struct aux_read *ar;
+ struct aux_read *ar = NULL;
int need_iov_cnt = 1;
if (aux_size) {
@@ -123,8 +126,11 @@ static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len,
new = krealloc(work->iov,
sizeof(struct kvec) * work->iov_alloc_cnt,
GFP_KERNEL | __GFP_ZERO);
- if (!new)
+ if (!new) {
+ kfree(ar);
+ work->iov_alloc_cnt -= 4;
return -ENOMEM;
+ }
work->iov = new;
}
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 9bc0103720f5..50c68beb71d6 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -833,7 +833,8 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
interim_entry);
setup_async_work(in_work, NULL, NULL);
smb2_send_interim_resp(in_work, STATUS_PENDING);
- list_del(&in_work->interim_entry);
+ list_del_init(&in_work->interim_entry);
+ release_async_work(in_work);
}
INIT_WORK(&work->work, __smb2_lease_break_noti);
ksmbd_queue_work(work);
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 658209839729..d369b98a6e10 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -657,13 +657,9 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls)
int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
{
- struct smb2_hdr *rsp_hdr;
struct ksmbd_conn *conn = work->conn;
int id;
- rsp_hdr = ksmbd_resp_buf_next(work);
- rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
-
id = ksmbd_acquire_async_msg_id(&conn->async_ida);
if (id < 0) {
pr_err("Failed to alloc async message id\n");
@@ -671,7 +667,6 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
}
work->asynchronous = true;
work->async_id = id;
- rsp_hdr->Id.AsyncId = cpu_to_le64(id);
ksmbd_debug(SMB,
"Send interim Response to inform async request id : %d\n",
@@ -723,6 +718,8 @@ void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status)
__SMB2_HEADER_STRUCTURE_SIZE);
rsp_hdr = smb2_get_msg(in_work->response_buf);
+ rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
+ rsp_hdr->Id.AsyncId = cpu_to_le64(work->async_id);
smb2_set_err_rsp(in_work);
rsp_hdr->Status = status;
@@ -2380,7 +2377,8 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
rc = 0;
} else {
rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value,
- le16_to_cpu(eabuf->EaValueLength), 0);
+ le16_to_cpu(eabuf->EaValueLength),
+ 0, true);
if (rc < 0) {
ksmbd_debug(SMB,
"ksmbd_vfs_setxattr is failed(%d)\n",
@@ -2443,7 +2441,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path,
return -EBADF;
}
- rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0);
+ rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0, false);
if (rc < 0)
pr_err("Failed to store XATTR stream name :%d\n", rc);
return 0;
@@ -2518,7 +2516,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path *
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da);
+ rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, false);
if (rc)
ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
}
@@ -2608,7 +2606,7 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
sizeof(struct create_sd_buf_req))
return -EINVAL;
return set_info_sec(work->conn, work->tcon, path, &sd_buf->ntsd,
- le32_to_cpu(sd_buf->ccontext.DataLength), true);
+ le32_to_cpu(sd_buf->ccontext.DataLength), true, false);
}
static void ksmbd_acls_fattr(struct smb_fattr *fattr,
@@ -2690,7 +2688,7 @@ int smb2_open(struct ksmbd_work *work)
*(char *)req->Buffer == '\\') {
pr_err("not allow directory name included leading slash\n");
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
name = smb2_get_name(req->Buffer,
@@ -2701,7 +2699,7 @@ int smb2_open(struct ksmbd_work *work)
if (rc != -ENOMEM)
rc = -ENOENT;
name = NULL;
- goto err_out1;
+ goto err_out2;
}
ksmbd_debug(SMB, "converted name = %s\n", name);
@@ -2709,28 +2707,28 @@ int smb2_open(struct ksmbd_work *work)
if (!test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_STREAMS)) {
rc = -EBADF;
- goto err_out1;
+ goto err_out2;
}
rc = parse_stream_name(name, &stream_name, &s_type);
if (rc < 0)
- goto err_out1;
+ goto err_out2;
}
rc = ksmbd_validate_filename(name);
if (rc < 0)
- goto err_out1;
+ goto err_out2;
if (ksmbd_share_veto_filename(share, name)) {
rc = -ENOENT;
ksmbd_debug(SMB, "Reject open(), vetoed file: %s\n",
name);
- goto err_out1;
+ goto err_out2;
}
} else {
name = kstrdup("", GFP_KERNEL);
if (!name) {
rc = -ENOMEM;
- goto err_out1;
+ goto err_out2;
}
}
@@ -2743,14 +2741,14 @@ int smb2_open(struct ksmbd_work *work)
le32_to_cpu(req->ImpersonationLevel));
rc = -EIO;
rsp->hdr.Status = STATUS_BAD_IMPERSONATION_LEVEL;
- goto err_out1;
+ goto err_out2;
}
if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK_LE)) {
pr_err("Invalid create options : 0x%x\n",
le32_to_cpu(req->CreateOptions));
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
} else {
if (req->CreateOptions & FILE_SEQUENTIAL_ONLY_LE &&
req->CreateOptions & FILE_RANDOM_ACCESS_LE)
@@ -2760,13 +2758,13 @@ int smb2_open(struct ksmbd_work *work)
(FILE_OPEN_BY_FILE_ID_LE | CREATE_TREE_CONNECTION |
FILE_RESERVE_OPFILTER_LE)) {
rc = -EOPNOTSUPP;
- goto err_out1;
+ goto err_out2;
}
if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
if (req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE) {
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
} else if (req->CreateOptions & FILE_NO_COMPRESSION_LE) {
req->CreateOptions = ~(FILE_NO_COMPRESSION_LE);
}
@@ -2778,21 +2776,21 @@ int smb2_open(struct ksmbd_work *work)
pr_err("Invalid create disposition : 0x%x\n",
le32_to_cpu(req->CreateDisposition));
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
if (!(req->DesiredAccess & DESIRED_ACCESS_MASK)) {
pr_err("Invalid desired access : 0x%x\n",
le32_to_cpu(req->DesiredAccess));
rc = -EACCES;
- goto err_out1;
+ goto err_out2;
}
if (req->FileAttributes && !(req->FileAttributes & FILE_ATTRIBUTE_MASK_LE)) {
pr_err("Invalid file attribute : 0x%x\n",
le32_to_cpu(req->FileAttributes));
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
if (req->CreateContextsOffset) {
@@ -2800,19 +2798,19 @@ int smb2_open(struct ksmbd_work *work)
context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out1;
+ goto err_out2;
} else if (context) {
ea_buf = (struct create_ea_buf_req *)context;
if (le16_to_cpu(context->DataOffset) +
le32_to_cpu(context->DataLength) <
sizeof(struct create_ea_buf_req)) {
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
if (req->CreateOptions & FILE_NO_EA_KNOWLEDGE_LE) {
rsp->hdr.Status = STATUS_ACCESS_DENIED;
rc = -EACCES;
- goto err_out1;
+ goto err_out2;
}
}
@@ -2820,7 +2818,7 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out1;
+ goto err_out2;
} else if (context) {
ksmbd_debug(SMB,
"get query maximal access context\n");
@@ -2831,11 +2829,11 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_TIMEWARP_REQUEST, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out1;
+ goto err_out2;
} else if (context) {
ksmbd_debug(SMB, "get timewarp context\n");
rc = -EBADF;
- goto err_out1;
+ goto err_out2;
}
if (tcon->posix_extensions) {
@@ -2843,7 +2841,7 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_TAG_POSIX, 16);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out1;
+ goto err_out2;
} else if (context) {
struct create_posix *posix =
(struct create_posix *)context;
@@ -2851,7 +2849,7 @@ int smb2_open(struct ksmbd_work *work)
le32_to_cpu(context->DataLength) <
sizeof(struct create_posix) - 4) {
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
ksmbd_debug(SMB, "get posix context\n");
@@ -2863,7 +2861,7 @@ int smb2_open(struct ksmbd_work *work)
if (ksmbd_override_fsids(work)) {
rc = -ENOMEM;
- goto err_out1;
+ goto err_out2;
}
rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS,
@@ -3038,7 +3036,7 @@ int smb2_open(struct ksmbd_work *work)
}
}
- rc = ksmbd_query_inode_status(d_inode(path.dentry->d_parent));
+ rc = ksmbd_query_inode_status(path.dentry->d_parent);
if (rc == KSMBD_INODE_STATUS_PENDING_DELETE) {
rc = -EBUSY;
goto err_out;
@@ -3152,7 +3150,8 @@ int smb2_open(struct ksmbd_work *work)
idmap,
&path,
pntsd,
- pntsd_size);
+ pntsd_size,
+ false);
kfree(pntsd);
if (rc)
pr_err("failed to store ntacl in xattr : %d\n",
@@ -3175,11 +3174,6 @@ int smb2_open(struct ksmbd_work *work)
fp->attrib_only = !(req->DesiredAccess & ~(FILE_READ_ATTRIBUTES_LE |
FILE_WRITE_ATTRIBUTES_LE | FILE_SYNCHRONIZE_LE));
- if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
- !fp->attrib_only && !stream_name) {
- smb_break_all_oplock(work, fp);
- need_truncate = 1;
- }
/* fp should be searchable through ksmbd_inode.m_fp_list
* after daccess, saccess, attrib_only, and stream are
@@ -3195,13 +3189,39 @@ int smb2_open(struct ksmbd_work *work)
goto err_out;
}
+ rc = ksmbd_vfs_getattr(&path, &stat);
+ if (rc)
+ goto err_out;
+
+ if (stat.result_mask & STATX_BTIME)
+ fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
+ else
+ fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
+ if (req->FileAttributes || fp->f_ci->m_fattr == 0)
+ fp->f_ci->m_fattr =
+ cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
+
+ if (!created)
+ smb2_update_xattrs(tcon, &path, fp);
+ else
+ smb2_new_xattrs(tcon, &path, fp);
+
+ if (file_present || created)
+ ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
+ if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
+ !fp->attrib_only && !stream_name) {
+ smb_break_all_oplock(work, fp);
+ need_truncate = 1;
+ }
+
share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp);
if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) ||
(req_op_level == SMB2_OPLOCK_LEVEL_LEASE &&
!(conn->vals->capabilities & SMB2_GLOBAL_CAP_LEASING))) {
if (share_ret < 0 && !S_ISDIR(file_inode(fp->filp)->i_mode)) {
rc = share_ret;
- goto err_out;
+ goto err_out1;
}
} else {
if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) {
@@ -3211,7 +3231,7 @@ int smb2_open(struct ksmbd_work *work)
name, req_op_level, lc->req_state);
rc = find_same_lease_key(sess, fp->f_ci, lc);
if (rc)
- goto err_out;
+ goto err_out1;
} else if (open_flags == O_RDONLY &&
(req_op_level == SMB2_OPLOCK_LEVEL_BATCH ||
req_op_level == SMB2_OPLOCK_LEVEL_EXCLUSIVE))
@@ -3222,16 +3242,16 @@ int smb2_open(struct ksmbd_work *work)
le32_to_cpu(req->hdr.Id.SyncId.TreeId),
lc, share_ret);
if (rc < 0)
- goto err_out;
+ goto err_out1;
}
if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)
ksmbd_fd_set_delete_on_close(fp, file_info);
if (need_truncate) {
- rc = smb2_create_truncate(&path);
+ rc = smb2_create_truncate(&fp->filp->f_path);
if (rc)
- goto err_out;
+ goto err_out1;
}
if (req->CreateContextsOffset) {
@@ -3241,7 +3261,7 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_ALLOCATION_SIZE, 4);
if (IS_ERR(az_req)) {
rc = PTR_ERR(az_req);
- goto err_out;
+ goto err_out1;
} else if (az_req) {
loff_t alloc_size;
int err;
@@ -3250,7 +3270,7 @@ int smb2_open(struct ksmbd_work *work)
le32_to_cpu(az_req->ccontext.DataLength) <
sizeof(struct create_alloc_size_req)) {
rc = -EINVAL;
- goto err_out;
+ goto err_out1;
}
alloc_size = le64_to_cpu(az_req->AllocationSize);
ksmbd_debug(SMB,
@@ -3268,30 +3288,13 @@ int smb2_open(struct ksmbd_work *work)
context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out;
+ goto err_out1;
} else if (context) {
ksmbd_debug(SMB, "get query on disk id context\n");
query_disk_id = 1;
}
}
- rc = ksmbd_vfs_getattr(&path, &stat);
- if (rc)
- goto err_out;
-
- if (stat.result_mask & STATX_BTIME)
- fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
- else
- fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
- if (req->FileAttributes || fp->f_ci->m_fattr == 0)
- fp->f_ci->m_fattr =
- cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
-
- if (!created)
- smb2_update_xattrs(tcon, &path, fp);
- else
- smb2_new_xattrs(tcon, &path, fp);
-
memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
rsp->StructureSize = cpu_to_le16(89);
@@ -3398,13 +3401,13 @@ int smb2_open(struct ksmbd_work *work)
}
err_out:
- if (file_present || created) {
- inode_unlock(d_inode(parent_path.dentry));
- path_put(&path);
- path_put(&parent_path);
- }
- ksmbd_revert_fsids(work);
+ if (rc && (file_present || created))
+ ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
err_out1:
+ ksmbd_revert_fsids(work);
+
+err_out2:
if (!rc) {
ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED);
rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len);
@@ -5537,7 +5540,7 @@ static int smb2_rename(struct ksmbd_work *work,
rc = ksmbd_vfs_setxattr(file_mnt_idmap(fp->filp),
&fp->filp->f_path,
xattr_stream_name,
- NULL, 0, 0);
+ NULL, 0, 0, true);
if (rc < 0) {
pr_err("failed to store stream name in xattr: %d\n",
rc);
@@ -5630,11 +5633,9 @@ static int smb2_create_link(struct ksmbd_work *work,
if (rc)
rc = -EINVAL;
out:
- if (file_present) {
- inode_unlock(d_inode(parent_path.dentry));
- path_put(&path);
- path_put(&parent_path);
- }
+ if (file_present)
+ ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
if (!IS_ERR(link_name))
kfree(link_name);
kfree(pathname);
@@ -5701,7 +5702,8 @@ static int set_file_basic_info(struct ksmbd_file *fp,
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da);
+ rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da,
+ true);
if (rc)
ksmbd_debug(SMB,
"failed to restore file attribute in EA\n");
@@ -6013,7 +6015,7 @@ static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info,
fp->saccess |= FILE_SHARE_DELETE_LE;
return set_info_sec(fp->conn, fp->tcon, &fp->filp->f_path, pntsd,
- buf_len, false);
+ buf_len, false, true);
}
/**
@@ -7582,7 +7584,8 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
da.attr = le32_to_cpu(fp->f_ci->m_fattr);
ret = ksmbd_vfs_set_dos_attrib_xattr(idmap,
- &fp->filp->f_path, &da);
+ &fp->filp->f_path,
+ &da, true);
if (ret)
fp->f_ci->m_fattr = old_fattr;
}
@@ -8231,7 +8234,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
return;
err_out:
- opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
atomic_dec(&opinfo->breaking_cnt);
wake_up_interruptible_all(&opinfo->oplock_brk);
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c
index 51b8bfab7481..1164365533f0 100644
--- a/fs/smb/server/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -1185,7 +1185,7 @@ pass:
pntsd_size += sizeof(struct smb_acl) + nt_size;
}
- ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size);
+ ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size, false);
kfree(pntsd);
}
@@ -1377,7 +1377,7 @@ err_out:
int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
- bool type_check)
+ bool type_check, bool get_write)
{
int rc;
struct smb_fattr fattr = {{0}};
@@ -1437,7 +1437,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) {
/* Update WinACL in xattr */
ksmbd_vfs_remove_sd_xattrs(idmap, path);
- ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len);
+ ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len,
+ get_write);
}
out:
diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h
index 49a8c292bd2e..2b52861707d8 100644
--- a/fs/smb/server/smbacl.h
+++ b/fs/smb/server/smbacl.h
@@ -207,7 +207,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
__le32 *pdaccess, int uid);
int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
- bool type_check);
+ bool type_check, bool get_write);
void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
void ksmbd_init_domain(u32 *sub_auth);
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index c53dea5598fc..9091dcd7a310 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -97,6 +97,13 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
return -ENOENT;
}
+ err = mnt_want_write(parent_path->mnt);
+ if (err) {
+ path_put(parent_path);
+ putname(filename);
+ return -ENOENT;
+ }
+
inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT);
d = lookup_one_qstr_excl(&last, parent_path->dentry, 0);
if (IS_ERR(d))
@@ -123,6 +130,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
err_out:
inode_unlock(d_inode(parent_path->dentry));
+ mnt_drop_write(parent_path->mnt);
path_put(parent_path);
putname(filename);
return -ENOENT;
@@ -451,7 +459,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
fp->stream.name,
(void *)stream_buf,
size,
- 0);
+ 0,
+ true);
if (err < 0)
goto out;
@@ -593,10 +602,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
goto out_err;
}
- err = mnt_want_write(path->mnt);
- if (err)
- goto out_err;
-
idmap = mnt_idmap(path->mnt);
if (S_ISDIR(d_inode(path->dentry)->i_mode)) {
err = vfs_rmdir(idmap, d_inode(parent), path->dentry);
@@ -607,7 +612,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
if (err)
ksmbd_debug(VFS, "unlink failed, err %d\n", err);
}
- mnt_drop_write(path->mnt);
out_err:
ksmbd_revert_fsids(work);
@@ -715,7 +719,7 @@ retry:
goto out3;
}
- parent_fp = ksmbd_lookup_fd_inode(d_inode(old_child->d_parent));
+ parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent);
if (parent_fp) {
if (parent_fp->daccess & FILE_DELETE_LE) {
pr_err("parent dir is opened with delete access\n");
@@ -907,18 +911,22 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap,
* @attr_value: xattr value to set
* @attr_size: size of xattr value
* @flags: destination buffer length
+ * @get_write: get write access to a mount
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
const struct path *path, const char *attr_name,
- void *attr_value, size_t attr_size, int flags)
+ void *attr_value, size_t attr_size, int flags,
+ bool get_write)
{
int err;
- err = mnt_want_write(path->mnt);
- if (err)
- return err;
+ if (get_write == true) {
+ err = mnt_want_write(path->mnt);
+ if (err)
+ return err;
+ }
err = vfs_setxattr(idmap,
path->dentry,
@@ -928,7 +936,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
flags);
if (err)
ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
- mnt_drop_write(path->mnt);
+ if (get_write == true)
+ mnt_drop_write(path->mnt);
return err;
}
@@ -1252,6 +1261,13 @@ out1:
}
if (!err) {
+ err = mnt_want_write(parent_path->mnt);
+ if (err) {
+ path_put(path);
+ path_put(parent_path);
+ return err;
+ }
+
err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
if (err) {
path_put(path);
@@ -1261,6 +1277,14 @@ out1:
return err;
}
+void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path)
+{
+ inode_unlock(d_inode(parent_path->dentry));
+ mnt_drop_write(parent_path->mnt);
+ path_put(path);
+ path_put(parent_path);
+}
+
struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
const char *name,
unsigned int flags,
@@ -1415,7 +1439,8 @@ out:
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
const struct path *path,
- struct smb_ntsd *pntsd, int len)
+ struct smb_ntsd *pntsd, int len,
+ bool get_write)
{
int rc;
struct ndr sd_ndr = {0}, acl_ndr = {0};
@@ -1475,7 +1500,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
rc = ksmbd_vfs_setxattr(idmap, path,
XATTR_NAME_SD, sd_ndr.data,
- sd_ndr.offset, 0);
+ sd_ndr.offset, 0, get_write);
if (rc < 0)
pr_err("Failed to store XATTR ntacl :%d\n", rc);
@@ -1564,7 +1589,8 @@ free_n_data:
int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
const struct path *path,
- struct xattr_dos_attrib *da)
+ struct xattr_dos_attrib *da,
+ bool get_write)
{
struct ndr n;
int err;
@@ -1574,7 +1600,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
return err;
err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE,
- (void *)n.data, n.offset, 0);
+ (void *)n.data, n.offset, 0, get_write);
if (err)
ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
kfree(n.data);
@@ -1846,10 +1872,6 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
}
posix_state_to_acl(&acl_state, acls->a_entries);
- rc = mnt_want_write(path->mnt);
- if (rc)
- goto out_err;
-
rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1861,9 +1883,7 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
- mnt_drop_write(path->mnt);
-out_err:
free_acl_state(&acl_state);
posix_acl_release(acls);
return rc;
@@ -1893,10 +1913,6 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
}
}
- rc = mnt_want_write(path->mnt);
- if (rc)
- goto out_err;
-
rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1908,9 +1924,7 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
- mnt_drop_write(path->mnt);
-out_err:
posix_acl_release(acls);
return rc;
}
diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h
index 00968081856e..cfe1c8092f23 100644
--- a/fs/smb/server/vfs.h
+++ b/fs/smb/server/vfs.h
@@ -109,7 +109,8 @@ ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap,
int attr_name_len);
int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
const struct path *path, const char *attr_name,
- void *attr_value, size_t attr_size, int flags);
+ void *attr_value, size_t attr_size, int flags,
+ bool get_write);
int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
size_t *xattr_stream_name_size, int s_type);
int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
@@ -117,6 +118,7 @@ int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
unsigned int flags, struct path *parent_path,
struct path *path, bool caseless);
+void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path);
struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
const char *name,
unsigned int flags,
@@ -144,14 +146,16 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path)
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
const struct path *path,
- struct smb_ntsd *pntsd, int len);
+ struct smb_ntsd *pntsd, int len,
+ bool get_write);
int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
struct dentry *dentry,
struct smb_ntsd **pntsd);
int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
const struct path *path,
- struct xattr_dos_attrib *da);
+ struct xattr_dos_attrib *da,
+ bool get_write);
int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da);
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index c91eac6514dd..ddf233994ddb 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -66,14 +66,14 @@ static unsigned long inode_hash(struct super_block *sb, unsigned long hashval)
return tmp & inode_hash_mask;
}
-static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
+static struct ksmbd_inode *__ksmbd_inode_lookup(struct dentry *de)
{
struct hlist_head *head = inode_hashtable +
- inode_hash(inode->i_sb, inode->i_ino);
+ inode_hash(d_inode(de)->i_sb, (unsigned long)de);
struct ksmbd_inode *ci = NULL, *ret_ci = NULL;
hlist_for_each_entry(ci, head, m_hash) {
- if (ci->m_inode == inode) {
+ if (ci->m_de == de) {
if (atomic_inc_not_zero(&ci->m_count))
ret_ci = ci;
break;
@@ -84,26 +84,16 @@ static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp)
{
- return __ksmbd_inode_lookup(file_inode(fp->filp));
+ return __ksmbd_inode_lookup(fp->filp->f_path.dentry);
}
-static struct ksmbd_inode *ksmbd_inode_lookup_by_vfsinode(struct inode *inode)
-{
- struct ksmbd_inode *ci;
-
- read_lock(&inode_hash_lock);
- ci = __ksmbd_inode_lookup(inode);
- read_unlock(&inode_hash_lock);
- return ci;
-}
-
-int ksmbd_query_inode_status(struct inode *inode)
+int ksmbd_query_inode_status(struct dentry *dentry)
{
struct ksmbd_inode *ci;
int ret = KSMBD_INODE_STATUS_UNKNOWN;
read_lock(&inode_hash_lock);
- ci = __ksmbd_inode_lookup(inode);
+ ci = __ksmbd_inode_lookup(dentry);
if (ci) {
ret = KSMBD_INODE_STATUS_OK;
if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS))
@@ -143,7 +133,7 @@ void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp,
static void ksmbd_inode_hash(struct ksmbd_inode *ci)
{
struct hlist_head *b = inode_hashtable +
- inode_hash(ci->m_inode->i_sb, ci->m_inode->i_ino);
+ inode_hash(d_inode(ci->m_de)->i_sb, (unsigned long)ci->m_de);
hlist_add_head(&ci->m_hash, b);
}
@@ -157,7 +147,6 @@ static void ksmbd_inode_unhash(struct ksmbd_inode *ci)
static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
{
- ci->m_inode = file_inode(fp->filp);
atomic_set(&ci->m_count, 1);
atomic_set(&ci->op_count, 0);
atomic_set(&ci->sop_count, 0);
@@ -166,6 +155,7 @@ static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
INIT_LIST_HEAD(&ci->m_fp_list);
INIT_LIST_HEAD(&ci->m_op_list);
rwlock_init(&ci->m_lock);
+ ci->m_de = fp->filp->f_path.dentry;
return 0;
}
@@ -488,12 +478,15 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid)
return fp;
}
-struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode)
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry)
{
struct ksmbd_file *lfp;
struct ksmbd_inode *ci;
+ struct inode *inode = d_inode(dentry);
- ci = ksmbd_inode_lookup_by_vfsinode(inode);
+ read_lock(&inode_hash_lock);
+ ci = __ksmbd_inode_lookup(dentry);
+ read_unlock(&inode_hash_lock);
if (!ci)
return NULL;
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h
index 03d0bf941216..8325cf4527c4 100644
--- a/fs/smb/server/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
@@ -51,7 +51,7 @@ struct ksmbd_inode {
atomic_t op_count;
/* opinfo count for streams */
atomic_t sop_count;
- struct inode *m_inode;
+ struct dentry *m_de;
unsigned int m_flags;
struct hlist_node m_hash;
struct list_head m_fp_list;
@@ -140,7 +140,7 @@ struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp);
struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id);
struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
-struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode);
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry);
unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp);
struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp);
void ksmbd_close_tree_conn_fds(struct ksmbd_work *work);
@@ -164,7 +164,7 @@ enum KSMBD_INODE_STATUS {
KSMBD_INODE_STATUS_PENDING_DELETE,
};
-int ksmbd_query_inode_status(struct inode *inode);
+int ksmbd_query_inode_status(struct dentry *dentry);
bool ksmbd_inode_pending_delete(struct ksmbd_file *fp);
void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp);
void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp);
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index f8a594a50ae6..0b90869fd805 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -27,16 +27,16 @@
/*
* eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
* to the ei->dentry must be done under this mutex and after checking
- * if ei->is_freed is not set. The ei->dentry is released under the
- * mutex at the same time ei->is_freed is set. If ei->is_freed is set
- * then the ei->dentry is invalid.
+ * if ei->is_freed is not set. When ei->is_freed is set, the dentry
+ * is on its way to being freed after the last dput() is made on it.
*/
static DEFINE_MUTEX(eventfs_mutex);
/*
* The eventfs_inode (ei) itself is protected by SRCU. It is released from
* its parent's list and will have is_freed set (under eventfs_mutex).
- * After the SRCU grace period is over, the ei may be freed.
+ * After the SRCU grace period is over and the last dput() is called
+ * the ei is freed.
*/
DEFINE_STATIC_SRCU(eventfs_srcu);
@@ -95,7 +95,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
if (!(dentry->d_inode->i_mode & S_IFDIR)) {
if (!ei->entry_attrs) {
ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
- GFP_KERNEL);
+ GFP_NOFS);
if (!ei->entry_attrs) {
ret = -ENOMEM;
goto out;
@@ -326,7 +326,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
struct eventfs_attr *attr = NULL;
struct dentry **e_dentry = &ei->d_children[idx];
struct dentry *dentry;
- bool invalidate = false;
+
+ WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
mutex_lock(&eventfs_mutex);
if (ei->is_freed) {
@@ -348,15 +349,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
mutex_unlock(&eventfs_mutex);
- /* The lookup already has the parent->d_inode locked */
- if (!lookup)
- inode_lock(parent->d_inode);
-
dentry = create_file(name, mode, attr, parent, data, fops);
- if (!lookup)
- inode_unlock(parent->d_inode);
-
mutex_lock(&eventfs_mutex);
if (IS_ERR_OR_NULL(dentry)) {
@@ -365,12 +359,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
* created the dentry for this e_dentry. In which case
* use that one.
*
- * Note, with the mutex held, the e_dentry cannot have content
- * and the ei->is_freed be true at the same time.
+ * If ei->is_freed is set, the e_dentry is currently on its
+ * way to being freed, don't return it. If e_dentry is NULL
+ * it means it was already freed.
*/
- dentry = *e_dentry;
- if (WARN_ON_ONCE(dentry && ei->is_freed))
+ if (ei->is_freed)
dentry = NULL;
+ else
+ dentry = *e_dentry;
/* The lookup does not need to up the dentry refcount */
if (dentry && !lookup)
dget(dentry);
@@ -387,17 +383,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
* Otherwise it means two dentries exist with the same name.
*/
WARN_ON_ONCE(!ei->is_freed);
- invalidate = true;
+ dentry = NULL;
}
mutex_unlock(&eventfs_mutex);
- if (invalidate)
- d_invalidate(dentry);
-
- if (lookup || invalidate)
+ if (lookup)
dput(dentry);
- return invalidate ? NULL : dentry;
+ return dentry;
}
/**
@@ -437,9 +430,10 @@ static struct dentry *
create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
struct dentry *parent, bool lookup)
{
- bool invalidate = false;
struct dentry *dentry = NULL;
+ WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
+
mutex_lock(&eventfs_mutex);
if (pei->is_freed || ei->is_freed) {
mutex_unlock(&eventfs_mutex);
@@ -456,15 +450,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
}
mutex_unlock(&eventfs_mutex);
- /* The lookup already has the parent->d_inode locked */
- if (!lookup)
- inode_lock(parent->d_inode);
-
dentry = create_dir(ei, parent);
- if (!lookup)
- inode_unlock(parent->d_inode);
-
mutex_lock(&eventfs_mutex);
if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
@@ -473,8 +460,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
* created the dentry for this e_dentry. In which case
* use that one.
*
- * Note, with the mutex held, the e_dentry cannot have content
- * and the ei->is_freed be true at the same time.
+ * If ei->is_freed is set, the e_dentry is currently on its
+ * way to being freed.
*/
dentry = ei->dentry;
if (dentry && !lookup)
@@ -493,16 +480,14 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
* Otherwise it means two dentries exist with the same name.
*/
WARN_ON_ONCE(!ei->is_freed);
- invalidate = true;
+ dentry = NULL;
}
mutex_unlock(&eventfs_mutex);
- if (invalidate)
- d_invalidate(dentry);
- if (lookup || invalidate)
+ if (lookup)
dput(dentry);
- return invalidate ? NULL : dentry;
+ return dentry;
}
/**
@@ -632,7 +617,7 @@ static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
{
struct dentry **tmp;
- tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
+ tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
if (!tmp)
return -1;
tmp[cnt] = d;
@@ -698,6 +683,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
return -ENOMEM;
}
+ inode_lock(parent->d_inode);
list_for_each_entry_srcu(ei_child, &ei->children, list,
srcu_read_lock_held(&eventfs_srcu)) {
d = create_dir_dentry(ei, ei_child, parent, false);
@@ -730,6 +716,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
cnt++;
}
}
+ inode_unlock(parent->d_inode);
srcu_read_unlock(&eventfs_srcu, idx);
ret = dcache_dir_open(inode, file);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 5b54948514fe..ae648deed019 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -509,20 +509,15 @@ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
struct dentry *dentry;
int error;
+ /* Must always have a parent. */
+ if (WARN_ON_ONCE(!parent))
+ return ERR_PTR(-EINVAL);
+
error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
&tracefs_mount_count);
if (error)
return ERR_PTR(error);
- /*
- * If the parent is not specified, we create it in the root.
- * We need the root dentry to do this, which is in the super
- * block. A pointer to that is in the struct vfsmount that we
- * have around.
- */
- if (!parent)
- parent = tracefs_mount->mnt_root;
-
if (unlikely(IS_DEADDIR(parent->d_inode)))
dentry = ERR_PTR(-ENOENT);
else
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index ac6ba646624d..a013b87ab8d5 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -562,7 +562,8 @@ xfs_dquot_from_disk(
struct xfs_dquot *dqp,
struct xfs_buf *bp)
{
- struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset;
+ struct xfs_dqblk *dqb = xfs_buf_offset(bp, dqp->q_bufoffset);
+ struct xfs_disk_dquot *ddqp = &dqb->dd_diskdq;
/*
* Ensure that we got the type and ID we were looking for.
@@ -1250,7 +1251,7 @@ xfs_qm_dqflush(
}
/* Flush the incore dquot to the ondisk buffer. */
- dqblk = bp->b_addr + dqp->q_bufoffset;
+ dqblk = xfs_buf_offset(bp, dqp->q_bufoffset);
xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp);
/*
diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
index 8966ba842395..2c2720ce6923 100644
--- a/fs/xfs/xfs_dquot_item_recover.c
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -19,6 +19,7 @@
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
+#include "xfs_error.h"
STATIC void
xlog_recover_dquot_ra_pass2(
@@ -65,6 +66,7 @@ xlog_recover_dquot_commit_pass2(
{
struct xfs_mount *mp = log->l_mp;
struct xfs_buf *bp;
+ struct xfs_dqblk *dqb;
struct xfs_disk_dquot *ddq, *recddq;
struct xfs_dq_logformat *dq_f;
xfs_failaddr_t fa;
@@ -130,14 +132,14 @@ xlog_recover_dquot_commit_pass2(
return error;
ASSERT(bp);
- ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
+ dqb = xfs_buf_offset(bp, dq_f->qlf_boffset);
+ ddq = &dqb->dd_diskdq;
/*
* If the dquot has an LSN in it, recover the dquot only if it's less
* than the lsn of the transaction we are replaying.
*/
if (xfs_has_crc(mp)) {
- struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn);
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
@@ -147,10 +149,23 @@ xlog_recover_dquot_commit_pass2(
memcpy(ddq, recddq, item->ri_buf[1].i_len);
if (xfs_has_crc(mp)) {
- xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
+ xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
XFS_DQUOT_CRC_OFF);
}
+ /* Validate the recovered dquot. */
+ fa = xfs_dqblk_verify(log->l_mp, dqb, dq_f->qlf_id);
+ if (fa) {
+ XFS_CORRUPTION_ERROR("Bad dquot after recovery",
+ XFS_ERRLEVEL_LOW, mp, dqb,
+ sizeof(struct xfs_dqblk));
+ xfs_alert(mp,
+ "Metadata corruption detected at %pS, dquot 0x%x",
+ fa, dq_f->qlf_id);
+ error = -EFSCORRUPTED;
+ goto out_release;
+ }
+
ASSERT(dq_f->qlf_size == 2);
ASSERT(bp->b_mount == mp);
bp->b_flags |= _XBF_LOGRECOVERY;