diff options
Diffstat (limited to 'tools/testing/selftests/ublk')
70 files changed, 7724 insertions, 0 deletions
diff --git a/tools/testing/selftests/ublk/.gitignore b/tools/testing/selftests/ublk/.gitignore new file mode 100644 index 000000000000..e17bd28f27e0 --- /dev/null +++ b/tools/testing/selftests/ublk/.gitignore @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +*-verify.state +/tools +kublk +metadata_size diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile new file mode 100644 index 000000000000..6e4fe8d1fed1 --- /dev/null +++ b/tools/testing/selftests/ublk/Makefile @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)/usr/include +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + +LDLIBS += -lpthread -lm -luring + +TEST_PROGS := test_generic_02.sh +TEST_PROGS += test_generic_03.sh +TEST_PROGS += test_generic_06.sh +TEST_PROGS += test_generic_07.sh + +TEST_PROGS += test_generic_08.sh +TEST_PROGS += test_generic_09.sh +TEST_PROGS += test_generic_10.sh +TEST_PROGS += test_generic_12.sh +TEST_PROGS += test_generic_13.sh +TEST_PROGS += test_generic_16.sh +TEST_PROGS += test_generic_17.sh + +TEST_PROGS += test_batch_01.sh +TEST_PROGS += test_batch_02.sh +TEST_PROGS += test_batch_03.sh + +TEST_PROGS += test_null_01.sh +TEST_PROGS += test_null_02.sh +TEST_PROGS += test_null_03.sh +TEST_PROGS += test_loop_01.sh +TEST_PROGS += test_loop_02.sh +TEST_PROGS += test_loop_03.sh +TEST_PROGS += test_loop_04.sh +TEST_PROGS += test_loop_05.sh +TEST_PROGS += test_loop_06.sh +TEST_PROGS += test_loop_07.sh + +TEST_PROGS += test_integrity_01.sh +TEST_PROGS += test_integrity_02.sh +TEST_PROGS += test_integrity_03.sh + +TEST_PROGS += test_recover_01.sh +TEST_PROGS += test_recover_02.sh +TEST_PROGS += test_recover_03.sh +TEST_PROGS += test_recover_04.sh +TEST_PROGS += test_stripe_01.sh +TEST_PROGS += test_stripe_02.sh +TEST_PROGS += test_stripe_03.sh +TEST_PROGS += test_stripe_04.sh +TEST_PROGS += test_stripe_05.sh +TEST_PROGS += test_stripe_06.sh + +TEST_PROGS += test_part_01.sh +TEST_PROGS += test_part_02.sh + +TEST_PROGS += test_shmemzc_01.sh +TEST_PROGS += test_shmemzc_02.sh +TEST_PROGS += test_shmemzc_03.sh +TEST_PROGS += test_shmemzc_04.sh + +TEST_PROGS += test_stress_01.sh +TEST_PROGS += test_stress_02.sh +TEST_PROGS += test_stress_03.sh +TEST_PROGS += test_stress_04.sh +TEST_PROGS += test_stress_05.sh +TEST_PROGS += test_stress_06.sh +TEST_PROGS += test_stress_07.sh +TEST_PROGS += test_stress_08.sh +TEST_PROGS += test_stress_09.sh + +TEST_FILES := settings + +TEST_GEN_PROGS_EXTENDED = kublk metadata_size +STANDALONE_UTILS := metadata_size.c + +LOCAL_HDRS += $(wildcard *.h) +include ../lib.mk + +$(OUTPUT)/kublk: $(filter-out $(STANDALONE_UTILS),$(wildcard *.c)) + +check: + shellcheck -x -f gcc *.sh + +# Test groups for running subsets of tests +# JOBS=1 (default): sequential with kselftest TAP output +# JOBS>1: parallel execution with xargs -P +# Usage: make run_null JOBS=4 +JOBS ?= 1 +export JOBS + +# Auto-detect test groups from TEST_PROGS (test_<group>_<num>.sh -> group) +TEST_GROUPS := $(shell echo "$(TEST_PROGS)" | tr ' ' '\n' | \ + sed 's/test_\([^_]*\)_.*/\1/' | sort -u) + +# Template for group test targets +# $(1) = group name (e.g., null, generic, stress) +define RUN_GROUP +run_$(1): all + @if [ $$(JOBS) -gt 1 ]; then \ + echo $$(filter test_$(1)_%.sh,$$(TEST_PROGS)) | tr ' ' '\n' | \ + xargs -P $$(JOBS) -n1 sh -c './"$$$$0"' || true; \ + else \ + $$(call RUN_TESTS, $$(filter test_$(1)_%.sh,$$(TEST_PROGS))); \ + fi +.PHONY: run_$(1) +endef + +# Generate targets for each discovered test group +$(foreach group,$(TEST_GROUPS),$(eval $(call RUN_GROUP,$(group)))) + +# Run all tests (parallel when JOBS>1) +run_all: all + @if [ $(JOBS) -gt 1 ]; then \ + echo $(TEST_PROGS) | tr ' ' '\n' | \ + xargs -P $(JOBS) -n1 sh -c './"$$0"' || true; \ + else \ + $(call RUN_TESTS, $(TEST_PROGS)); \ + fi +.PHONY: run_all diff --git a/tools/testing/selftests/ublk/batch.c b/tools/testing/selftests/ublk/batch.c new file mode 100644 index 000000000000..a54025b00917 --- /dev/null +++ b/tools/testing/selftests/ublk/batch.c @@ -0,0 +1,607 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: UBLK_F_BATCH_IO buffer management + */ + +#include "kublk.h" + +static inline void *ublk_get_commit_buf(struct ublk_thread *t, + unsigned short buf_idx) +{ + unsigned idx; + + if (buf_idx < t->commit_buf_start || + buf_idx >= t->commit_buf_start + t->nr_commit_buf) + return NULL; + idx = buf_idx - t->commit_buf_start; + return t->commit_buf + idx * t->commit_buf_size; +} + +/* + * Allocate one buffer for UBLK_U_IO_PREP_IO_CMDS or UBLK_U_IO_COMMIT_IO_CMDS + * + * Buffer index is returned. + */ +static inline unsigned short ublk_alloc_commit_buf(struct ublk_thread *t) +{ + int idx = allocator_get(&t->commit_buf_alloc); + + if (idx >= 0) + return idx + t->commit_buf_start; + return UBLKS_T_COMMIT_BUF_INV_IDX; +} + +/* + * Free one commit buffer which is used by UBLK_U_IO_PREP_IO_CMDS or + * UBLK_U_IO_COMMIT_IO_CMDS + */ +static inline void ublk_free_commit_buf(struct ublk_thread *t, + unsigned short i) +{ + unsigned short idx = i - t->commit_buf_start; + + ublk_assert(idx < t->nr_commit_buf); + ublk_assert(allocator_get_val(&t->commit_buf_alloc, idx) != 0); + + allocator_put(&t->commit_buf_alloc, idx); +} + +static unsigned char ublk_commit_elem_buf_size(struct ublk_dev *dev) +{ + if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY | + UBLK_F_AUTO_BUF_REG)) + return 8; + + /* one extra 8bytes for carrying buffer address */ + return 16; +} + +static unsigned ublk_commit_buf_size(struct ublk_thread *t) +{ + struct ublk_dev *dev = t->dev; + unsigned elem_size = ublk_commit_elem_buf_size(dev); + unsigned int total = elem_size * dev->dev_info.queue_depth; + unsigned int page_sz = getpagesize(); + + return round_up(total, page_sz); +} + +static void free_batch_commit_buf(struct ublk_thread *t) +{ + if (t->commit_buf) { + unsigned buf_size = ublk_commit_buf_size(t); + unsigned int total = buf_size * t->nr_commit_buf; + + munlock(t->commit_buf, total); + free(t->commit_buf); + } + allocator_deinit(&t->commit_buf_alloc); + free(t->commit); +} + +static int alloc_batch_commit_buf(struct ublk_thread *t) +{ + unsigned buf_size = ublk_commit_buf_size(t); + unsigned int total = buf_size * t->nr_commit_buf; + unsigned int page_sz = getpagesize(); + void *buf = NULL; + int i, ret, j = 0; + + t->commit = calloc(t->nr_queues, sizeof(*t->commit)); + for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++) { + if (t->q_map[i]) + t->commit[j++].q_id = i; + } + + allocator_init(&t->commit_buf_alloc, t->nr_commit_buf); + + t->commit_buf = NULL; + ret = posix_memalign(&buf, page_sz, total); + if (ret || !buf) + goto fail; + + t->commit_buf = buf; + + /* lock commit buffer pages for fast access */ + if (mlock(t->commit_buf, total)) + ublk_err("%s: can't lock commit buffer %s\n", __func__, + strerror(errno)); + + return 0; + +fail: + free_batch_commit_buf(t); + return ret; +} + +static unsigned int ublk_thread_nr_queues(const struct ublk_thread *t) +{ + int i; + int ret = 0; + + for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++) + ret += !!t->q_map[i]; + + return ret; +} + +void ublk_batch_prepare(struct ublk_thread *t) +{ + /* + * We only handle single device in this thread context. + * + * All queues have same feature flags, so use queue 0's for + * calculate uring_cmd flags. + * + * This way looks not elegant, but it works so far. + */ + struct ublk_queue *q = &t->dev->q[0]; + + /* cache nr_queues because we don't support dynamic load-balance yet */ + t->nr_queues = ublk_thread_nr_queues(t); + + t->commit_buf_elem_size = ublk_commit_elem_buf_size(t->dev); + t->commit_buf_size = ublk_commit_buf_size(t); + t->commit_buf_start = t->nr_bufs; + t->nr_commit_buf = 2 * t->nr_queues; + t->nr_bufs += t->nr_commit_buf; + + t->cmd_flags = 0; + if (ublk_queue_use_auto_zc(q)) { + if (ublk_queue_auto_zc_fallback(q)) + t->cmd_flags |= UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK; + } else if (!ublk_queue_no_buf(q)) + t->cmd_flags |= UBLK_BATCH_F_HAS_BUF_ADDR; + + t->state |= UBLKS_T_BATCH_IO; + + ublk_log("%s: thread %d commit(nr_bufs %u, buf_size %u, start %u)\n", + __func__, t->idx, + t->nr_commit_buf, t->commit_buf_size, + t->nr_bufs); +} + +static void free_batch_fetch_buf(struct ublk_thread *t) +{ + int i; + + for (i = 0; i < t->nr_fetch_bufs; i++) { + io_uring_free_buf_ring(&t->ring, t->fetch[i].br, 1, i); + munlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size); + free(t->fetch[i].fetch_buf); + } + free(t->fetch); +} + +static int alloc_batch_fetch_buf(struct ublk_thread *t) +{ + /* page aligned fetch buffer, and it is mlocked for speedup delivery */ + unsigned pg_sz = getpagesize(); + unsigned buf_size = round_up(t->dev->dev_info.queue_depth * 2, pg_sz); + int ret; + int i = 0; + + /* double fetch buffer for each queue */ + t->nr_fetch_bufs = t->nr_queues * 2; + t->fetch = calloc(t->nr_fetch_bufs, sizeof(*t->fetch)); + + /* allocate one buffer for each queue */ + for (i = 0; i < t->nr_fetch_bufs; i++) { + t->fetch[i].fetch_buf_size = buf_size; + + if (posix_memalign((void **)&t->fetch[i].fetch_buf, pg_sz, + t->fetch[i].fetch_buf_size)) + return -ENOMEM; + + /* lock fetch buffer page for fast fetching */ + if (mlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size)) + ublk_err("%s: can't lock fetch buffer %s\n", __func__, + strerror(errno)); + t->fetch[i].br = io_uring_setup_buf_ring(&t->ring, 1, + i, IOU_PBUF_RING_INC, &ret); + if (!t->fetch[i].br) { + ublk_err("Buffer ring register failed %d\n", ret); + return ret; + } + } + + return 0; +} + +int ublk_batch_alloc_buf(struct ublk_thread *t) +{ + int ret; + + ublk_assert(t->nr_commit_buf < 2 * UBLK_MAX_QUEUES); + + ret = alloc_batch_commit_buf(t); + if (ret) + return ret; + return alloc_batch_fetch_buf(t); +} + +void ublk_batch_free_buf(struct ublk_thread *t) +{ + free_batch_commit_buf(t); + free_batch_fetch_buf(t); +} + +static void ublk_init_batch_cmd(struct ublk_thread *t, __u16 q_id, + struct io_uring_sqe *sqe, unsigned op, + unsigned short elem_bytes, + unsigned short nr_elem, + unsigned short buf_idx) +{ + struct ublk_batch_io *cmd; + __u64 user_data; + + cmd = (struct ublk_batch_io *)ublk_get_sqe_cmd(sqe); + + ublk_set_sqe_cmd_op(sqe, op); + + sqe->fd = 0; /* dev->fds[0] */ + sqe->opcode = IORING_OP_URING_CMD; + sqe->flags = IOSQE_FIXED_FILE; + + cmd->q_id = q_id; + cmd->flags = 0; + cmd->reserved = 0; + cmd->elem_bytes = elem_bytes; + cmd->nr_elem = nr_elem; + + user_data = build_user_data(buf_idx, _IOC_NR(op), nr_elem, q_id, 0); + io_uring_sqe_set_data64(sqe, user_data); + + t->cmd_inflight += 1; + + ublk_dbg(UBLK_DBG_IO_CMD, "%s: thread %u qid %d cmd_op %x data %lx " + "nr_elem %u elem_bytes %u buf_size %u buf_idx %d " + "cmd_inflight %u\n", + __func__, t->idx, q_id, op, user_data, + cmd->nr_elem, cmd->elem_bytes, + nr_elem * elem_bytes, buf_idx, t->cmd_inflight); +} + +static void ublk_setup_commit_sqe(struct ublk_thread *t, + struct io_uring_sqe *sqe, + unsigned short buf_idx) +{ + struct ublk_batch_io *cmd; + + cmd = (struct ublk_batch_io *)ublk_get_sqe_cmd(sqe); + + /* Use plain user buffer instead of fixed buffer */ + cmd->flags |= t->cmd_flags; +} + +static void ublk_batch_queue_fetch(struct ublk_thread *t, + struct ublk_queue *q, + unsigned short buf_idx) +{ + unsigned short nr_elem = t->fetch[buf_idx].fetch_buf_size / 2; + struct io_uring_sqe *sqe; + + io_uring_buf_ring_add(t->fetch[buf_idx].br, t->fetch[buf_idx].fetch_buf, + t->fetch[buf_idx].fetch_buf_size, + 0, 0, 0); + io_uring_buf_ring_advance(t->fetch[buf_idx].br, 1); + + ublk_io_alloc_sqes(t, &sqe, 1); + + ublk_init_batch_cmd(t, q->q_id, sqe, UBLK_U_IO_FETCH_IO_CMDS, 2, nr_elem, + buf_idx); + + sqe->rw_flags= IORING_URING_CMD_MULTISHOT; + sqe->buf_group = buf_idx; + sqe->flags |= IOSQE_BUFFER_SELECT; + + t->fetch[buf_idx].fetch_buf_off = 0; +} + +void ublk_batch_start_fetch(struct ublk_thread *t) +{ + int i; + int j = 0; + + for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++) { + if (t->q_map[i]) { + struct ublk_queue *q = &t->dev->q[i]; + + /* submit two fetch commands for each queue */ + ublk_batch_queue_fetch(t, q, j++); + ublk_batch_queue_fetch(t, q, j++); + } + } +} + +static unsigned short ublk_compl_batch_fetch(struct ublk_thread *t, + struct ublk_queue *q, + const struct io_uring_cqe *cqe) +{ + unsigned short buf_idx = user_data_to_tag(cqe->user_data); + unsigned start = t->fetch[buf_idx].fetch_buf_off; + unsigned end = start + cqe->res; + void *buf = t->fetch[buf_idx].fetch_buf; + int i; + + if (cqe->res < 0) + return buf_idx; + + if ((end - start) / 2 > q->q_depth) { + ublk_err("%s: fetch duplicated ios offset %u count %u\n", __func__, start, cqe->res); + + for (i = start; i < end; i += 2) { + unsigned short tag = *(unsigned short *)(buf + i); + + ublk_err("%u ", tag); + } + ublk_err("\n"); + } + + for (i = start; i < end; i += 2) { + unsigned short tag = *(unsigned short *)(buf + i); + + if (tag >= q->q_depth) + ublk_err("%s: bad tag %u\n", __func__, tag); + + if (q->tgt_ops->queue_io) + q->tgt_ops->queue_io(t, q, tag); + } + t->fetch[buf_idx].fetch_buf_off = end; + return buf_idx; +} + +static int __ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q) +{ + unsigned short nr_elem = q->q_depth; + unsigned short buf_idx = ublk_alloc_commit_buf(t); + struct io_uring_sqe *sqe; + void *buf; + int i; + + ublk_assert(buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX); + + ublk_io_alloc_sqes(t, &sqe, 1); + + ublk_assert(nr_elem == q->q_depth); + buf = ublk_get_commit_buf(t, buf_idx); + for (i = 0; i < nr_elem; i++) { + struct ublk_batch_elem *elem = (struct ublk_batch_elem *)( + buf + i * t->commit_buf_elem_size); + struct ublk_io *io = &q->ios[i]; + + elem->tag = i; + elem->result = 0; + + if (ublk_queue_use_auto_zc(q)) + elem->buf_index = ublk_batch_io_buf_idx(t, q, i); + else if (!ublk_queue_no_buf(q)) + elem->buf_addr = (__u64)io->buf_addr; + } + + sqe->addr = (__u64)buf; + sqe->len = t->commit_buf_elem_size * nr_elem; + + ublk_init_batch_cmd(t, q->q_id, sqe, UBLK_U_IO_PREP_IO_CMDS, + t->commit_buf_elem_size, nr_elem, buf_idx); + ublk_setup_commit_sqe(t, sqe, buf_idx); + return 0; +} + +int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q) +{ + int ret = 0; + + pthread_spin_lock(&q->lock); + if (q->flags & UBLKS_Q_PREPARED) + goto unlock; + ret = __ublk_batch_queue_prep_io_cmds(t, q); + if (!ret) + q->flags |= UBLKS_Q_PREPARED; +unlock: + pthread_spin_unlock(&q->lock); + + return ret; +} + +static void ublk_batch_compl_commit_cmd(struct ublk_thread *t, + const struct io_uring_cqe *cqe, + unsigned op) +{ + unsigned short buf_idx = user_data_to_tag(cqe->user_data); + + if (op == _IOC_NR(UBLK_U_IO_PREP_IO_CMDS)) + ublk_assert(cqe->res == 0); + else if (op == _IOC_NR(UBLK_U_IO_COMMIT_IO_CMDS)) { + int nr_elem = user_data_to_tgt_data(cqe->user_data); + + ublk_assert(cqe->res == t->commit_buf_elem_size * nr_elem); + } else + ublk_assert(0); + + ublk_free_commit_buf(t, buf_idx); +} + +void ublk_batch_compl_cmd(struct ublk_thread *t, + const struct io_uring_cqe *cqe) +{ + unsigned op = user_data_to_op(cqe->user_data); + struct ublk_queue *q; + unsigned buf_idx; + unsigned q_id; + + if (op == _IOC_NR(UBLK_U_IO_PREP_IO_CMDS) || + op == _IOC_NR(UBLK_U_IO_COMMIT_IO_CMDS)) { + t->cmd_inflight--; + ublk_batch_compl_commit_cmd(t, cqe, op); + return; + } + + /* FETCH command is per queue */ + q_id = user_data_to_q_id(cqe->user_data); + q = &t->dev->q[q_id]; + buf_idx = ublk_compl_batch_fetch(t, q, cqe); + + if (cqe->res < 0 && cqe->res != -ENOBUFS) { + t->cmd_inflight--; + t->state |= UBLKS_T_STOPPING; + } else if (!(cqe->flags & IORING_CQE_F_MORE) || cqe->res == -ENOBUFS) { + t->cmd_inflight--; + ublk_batch_queue_fetch(t, q, buf_idx); + } +} + +static void __ublk_batch_commit_io_cmds(struct ublk_thread *t, + struct batch_commit_buf *cb) +{ + struct io_uring_sqe *sqe; + unsigned short buf_idx; + unsigned short nr_elem = cb->done; + + /* nothing to commit */ + if (!nr_elem) { + ublk_free_commit_buf(t, cb->buf_idx); + return; + } + + ublk_io_alloc_sqes(t, &sqe, 1); + buf_idx = cb->buf_idx; + sqe->addr = (__u64)cb->elem; + sqe->len = nr_elem * t->commit_buf_elem_size; + + /* commit isn't per-queue command */ + ublk_init_batch_cmd(t, cb->q_id, sqe, UBLK_U_IO_COMMIT_IO_CMDS, + t->commit_buf_elem_size, nr_elem, buf_idx); + ublk_setup_commit_sqe(t, sqe, buf_idx); +} + +void ublk_batch_commit_io_cmds(struct ublk_thread *t) +{ + int i; + + for (i = 0; i < t->nr_queues; i++) { + struct batch_commit_buf *cb = &t->commit[i]; + + if (cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX) + __ublk_batch_commit_io_cmds(t, cb); + } + +} + +static void __ublk_batch_init_commit(struct ublk_thread *t, + struct batch_commit_buf *cb, + unsigned short buf_idx) +{ + /* so far only support 1:1 queue/thread mapping */ + cb->buf_idx = buf_idx; + cb->elem = ublk_get_commit_buf(t, buf_idx); + cb->done = 0; + cb->count = t->commit_buf_size / + t->commit_buf_elem_size; +} + +/* COMMIT_IO_CMDS is per-queue command, so use its own commit buffer */ +static void ublk_batch_init_commit(struct ublk_thread *t, + struct batch_commit_buf *cb) +{ + unsigned short buf_idx = ublk_alloc_commit_buf(t); + + ublk_assert(buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX); + ublk_assert(!ublk_batch_commit_prepared(cb)); + + __ublk_batch_init_commit(t, cb, buf_idx); +} + +void ublk_batch_prep_commit(struct ublk_thread *t) +{ + int i; + + for (i = 0; i < t->nr_queues; i++) + t->commit[i].buf_idx = UBLKS_T_COMMIT_BUF_INV_IDX; +} + +void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int res) +{ + unsigned q_t_idx = ublk_queue_idx_in_thread(t, q); + struct batch_commit_buf *cb = &t->commit[q_t_idx]; + struct ublk_batch_elem *elem; + struct ublk_io *io = &q->ios[tag]; + + if (!ublk_batch_commit_prepared(cb)) + ublk_batch_init_commit(t, cb); + + ublk_assert(q->q_id == cb->q_id); + + elem = (struct ublk_batch_elem *)(cb->elem + cb->done * t->commit_buf_elem_size); + elem->tag = tag; + elem->buf_index = ublk_batch_io_buf_idx(t, q, tag); + elem->result = res; + + if (!ublk_queue_no_buf(q)) + elem->buf_addr = (__u64) (uintptr_t) io->buf_addr; + + cb->done += 1; + ublk_assert(cb->done <= cb->count); +} + +void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES], + int nthreads, int queues) +{ + int i, j; + + /* + * Setup round-robin queue-to-thread mapping for arbitrary N:M combinations. + * + * This algorithm distributes queues across threads (and threads across queues) + * in a balanced round-robin fashion to ensure even load distribution. + * + * Examples: + * - 2 threads, 4 queues: T0=[Q0,Q2], T1=[Q1,Q3] + * - 4 threads, 2 queues: T0=[Q0], T1=[Q1], T2=[Q0], T3=[Q1] + * - 3 threads, 3 queues: T0=[Q0], T1=[Q1], T2=[Q2] (1:1 mapping) + * + * Phase 1: Mark which queues each thread handles (boolean mapping) + */ + for (i = 0, j = 0; i < queues || j < nthreads; i++, j++) { + q_thread_map[j % nthreads][i % queues] = 1; + } + + /* + * Phase 2: Convert boolean mapping to sequential indices within each thread. + * + * Transform from: q_thread_map[thread][queue] = 1 (handles queue) + * To: q_thread_map[thread][queue] = N (queue index within thread) + * + * This allows each thread to know the local index of each queue it handles, + * which is essential for buffer allocation and management. For example: + * - Thread 0 handling queues [0,2] becomes: q_thread_map[0][0]=1, q_thread_map[0][2]=2 + * - Thread 1 handling queues [1,3] becomes: q_thread_map[1][1]=1, q_thread_map[1][3]=2 + */ + for (j = 0; j < nthreads; j++) { + unsigned char seq = 1; + + for (i = 0; i < queues; i++) { + if (q_thread_map[j][i]) + q_thread_map[j][i] = seq++; + } + } + +#if 0 + for (j = 0; j < nthreads; j++) { + printf("thread %0d: ", j); + for (i = 0; i < queues; i++) { + if (q_thread_map[j][i]) + printf("%03u ", i); + } + printf("\n"); + } + printf("\n"); + for (j = 0; j < nthreads; j++) { + for (i = 0; i < queues; i++) { + printf("%03u ", q_thread_map[j][i]); + } + printf("\n"); + } +#endif +} diff --git a/tools/testing/selftests/ublk/common.c b/tools/testing/selftests/ublk/common.c new file mode 100644 index 000000000000..530f9877c9dd --- /dev/null +++ b/tools/testing/selftests/ublk/common.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "kublk.h" + +void backing_file_tgt_deinit(struct ublk_dev *dev) +{ + int i; + + for (i = 1; i < dev->nr_fds; i++) { + fsync(dev->fds[i]); + close(dev->fds[i]); + } +} + +int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct) +{ + int fd, i; + + ublk_assert(dev->nr_fds == 1); + + for (i = 0; i < dev->tgt.nr_backing_files; i++) { + char *file = dev->tgt.backing_file[i]; + unsigned long bytes; + struct stat st; + + ublk_dbg(UBLK_DBG_DEV, "%s: file %d: %s\n", __func__, i, file); + + fd = open(file, O_RDWR | (i < nr_direct ? O_DIRECT : 0)); + if (fd < 0) { + ublk_err("%s: backing file %s can't be opened: %s\n", + __func__, file, strerror(errno)); + return -EBADF; + } + + if (fstat(fd, &st) < 0) { + close(fd); + return -EBADF; + } + + if (S_ISREG(st.st_mode)) + bytes = st.st_size; + else if (S_ISBLK(st.st_mode)) { + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) + return -1; + } else { + return -EINVAL; + } + + dev->tgt.backing_file_size[i] = bytes; + dev->fds[dev->nr_fds] = fd; + dev->nr_fds += 1; + } + + return 0; +} diff --git a/tools/testing/selftests/ublk/config b/tools/testing/selftests/ublk/config new file mode 100644 index 000000000000..592b0ba4d661 --- /dev/null +++ b/tools/testing/selftests/ublk/config @@ -0,0 +1 @@ +CONFIG_BLK_DEV_UBLK=m diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c new file mode 100644 index 000000000000..150896e02ff8 --- /dev/null +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Fault injection ublk target. Hack this up however you like for + * testing specific behaviors of ublk_drv. Currently is a null target + * with a configurable delay before completing each I/O. This delay can + * be used to test ublk_drv's handling of I/O outstanding to the ublk + * server when it dies. + */ + +#include "kublk.h" + +struct fi_opts { + long long delay_ns; + bool die_during_fetch; +}; + +static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, + struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + unsigned long dev_size = 250UL << 30; + struct fi_opts *opts = NULL; + + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + + dev->tgt.dev_size = dev_size; + dev->tgt.params = (struct ublk_params) { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = info->max_io_buf_bytes >> 9, + .dev_sectors = dev_size >> 9, + }, + }; + ublk_set_integrity_params(ctx, &dev->tgt.params); + + opts = calloc(1, sizeof(*opts)); + if (!opts) { + ublk_err("%s: couldn't allocate memory for opts\n", __func__); + return -ENOMEM; + } + + opts->delay_ns = ctx->fault_inject.delay_us * 1000; + opts->die_during_fetch = ctx->fault_inject.die_during_fetch; + dev->private_data = opts; + + return 0; +} + +static void ublk_fault_inject_pre_fetch_io(struct ublk_thread *t, + struct ublk_queue *q, int tag, + bool batch) +{ + struct fi_opts *opts = q->dev->private_data; + + if (!opts->die_during_fetch) + return; + + /* + * Each queue fetches its IOs in increasing order of tags, so + * dying just before we're about to fetch tag 1 (regardless of + * what queue we're on) guarantees that we've fetched a nonempty + * proper subset of the tags on that queue. + */ + if (tag == 1) { + /* + * Ensure our commands are actually live in the kernel + * before we die. + */ + io_uring_submit(&t->ring); + raise(SIGKILL); + } +} + +static int ublk_fault_inject_queue_io(struct ublk_thread *t, + struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe; + struct fi_opts *opts = q->dev->private_data; + struct __kernel_timespec ts = { + .tv_nsec = opts->delay_ns, + }; + + ublk_io_alloc_sqes(t, &sqe, 1); + io_uring_prep_timeout(sqe, &ts, 1, 0); + sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, q->q_id, 1); + + ublk_queued_tgt_io(t, q, tag, 1); + + return 0; +} + +static void ublk_fault_inject_tgt_io_done(struct ublk_thread *t, + struct ublk_queue *q, + const struct io_uring_cqe *cqe) +{ + unsigned tag = user_data_to_tag(cqe->user_data); + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + + if (cqe->res != -ETIME) + ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res); + + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, iod->nr_sectors << 9); + else + ublk_err("%s: io not complete after 1 cqe\n", __func__); +} + +static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "delay_us", 1, NULL, 0 }, + { "die_during_fetch", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->fault_inject.delay_us = 0; + ctx->fault_inject.die_during_fetch = false; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "delay_us")) + ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10); + if (!strcmp(longopts[option_idx].name, "die_during_fetch")) + ctx->fault_inject.die_during_fetch = strtoll(optarg, NULL, 10); + } + } +} + +static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tfault_inject: [--delay_us us (default 0)] [--die_during_fetch 1]\n"); +} + +const struct ublk_tgt_ops fault_inject_tgt_ops = { + .name = "fault_inject", + .init_tgt = ublk_fault_inject_tgt_init, + .pre_fetch_io = ublk_fault_inject_pre_fetch_io, + .queue_io = ublk_fault_inject_queue_io, + .tgt_io_done = ublk_fault_inject_tgt_io_done, + .parse_cmd_line = ublk_fault_inject_cmd_line, + .usage = ublk_fault_inject_usage, +}; diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c new file mode 100644 index 000000000000..d28da98f917a --- /dev/null +++ b/tools/testing/selftests/ublk/file_backed.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "kublk.h" + +static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int zc) +{ + unsigned ublk_op = ublksrv_get_op(iod); + + if (ublk_op == UBLK_IO_OP_READ) + return zc ? IORING_OP_READ_FIXED : IORING_OP_READ; + else if (ublk_op == UBLK_IO_OP_WRITE) + return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE; + ublk_assert(0); +} + +static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) +{ + unsigned ublk_op = ublksrv_get_op(iod); + struct io_uring_sqe *sqe[1]; + + ublk_io_alloc_sqes(t, sqe, 1); + io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC); + io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); + /* bit63 marks us as tgt io */ + sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); + return 1; +} + +/* + * Shared memory zero-copy I/O: when UBLK_IO_F_SHMEM_ZC is set, the + * request's data lives in a registered shared memory buffer. Decode + * index + offset from iod->addr and use the server's mmap of that + * buffer as the I/O buffer for the backing file. + */ +static int loop_queue_shmem_zc_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) +{ + unsigned ublk_op = ublksrv_get_op(iod); + enum io_uring_op op = ublk_to_uring_op(iod, 0); + __u64 file_offset = iod->start_sector << 9; + __u32 len = iod->nr_sectors << 9; + __u32 shmem_idx = ublk_shmem_zc_index(iod->addr); + __u32 shmem_off = ublk_shmem_zc_offset(iod->addr); + struct io_uring_sqe *sqe[1]; + void *addr; + + if (shmem_idx >= UBLK_BUF_MAX || !shmem_table[shmem_idx].mmap_base) + return -EINVAL; + + addr = shmem_table[shmem_idx].mmap_base + shmem_off; + + ublk_io_alloc_sqes(t, sqe, 1); + if (!sqe[0]) + return -ENOMEM; + + io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1), + addr, len, file_offset); + io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); + sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); + return 1; +} + +static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) +{ + unsigned ublk_op = ublksrv_get_op(iod); + unsigned zc = ublk_queue_use_zc(q); + unsigned auto_zc = ublk_queue_use_auto_zc(q); + enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc); + struct ublk_io *io = ublk_get_io(q, tag); + __u64 offset = iod->start_sector << 9; + __u32 len = iod->nr_sectors << 9; + struct io_uring_sqe *sqe[3]; + void *addr = io->buf_addr; + unsigned short buf_index = ublk_io_buf_idx(t, q, tag); + + /* shared memory zero-copy path */ + if (iod->op_flags & UBLK_IO_F_SHMEM_ZC) + return loop_queue_shmem_zc_io(t, q, iod, tag); + + if (iod->op_flags & UBLK_IO_F_INTEGRITY) { + ublk_io_alloc_sqes(t, sqe, 1); + /* Use second backing file for integrity data */ + io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 2), + io->integrity_buf, + ublk_integrity_len(q, len), + ublk_integrity_len(q, offset)); + sqe[0]->flags = IOSQE_FIXED_FILE; + /* tgt_data = 1 indicates integrity I/O */ + sqe[0]->user_data = build_user_data(tag, ublk_op, 1, q->q_id, 1); + } + + if (!zc || auto_zc) { + ublk_io_alloc_sqes(t, sqe, 1); + if (!sqe[0]) + return -ENOMEM; + + io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, + addr, + len, + offset); + if (auto_zc) + sqe[0]->buf_index = buf_index; + io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); + /* bit63 marks us as tgt io */ + sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); + return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 1; + } + + ublk_io_alloc_sqes(t, sqe, 3); + + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_index); + sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; + sqe[0]->user_data = build_user_data(tag, + ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); + + io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0, + len, + offset); + sqe[1]->buf_index = buf_index; + sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; + sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); + + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, buf_index); + sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); + + return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 2; +} + +static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + unsigned ublk_op = ublksrv_get_op(iod); + int ret; + + switch (ublk_op) { + case UBLK_IO_OP_FLUSH: + ret = loop_queue_flush_io(t, q, iod, tag); + break; + case UBLK_IO_OP_WRITE_ZEROES: + case UBLK_IO_OP_DISCARD: + ret = -ENOTSUP; + break; + case UBLK_IO_OP_READ: + case UBLK_IO_OP_WRITE: + ret = loop_queue_tgt_rw_io(t, q, iod, tag); + break; + default: + ret = -EINVAL; + break; + } + + ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag, + iod->op_flags, iod->start_sector, iod->nr_sectors << 9); + return ret; +} + +static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) +{ + int queued = loop_queue_tgt_io(t, q, tag); + + ublk_queued_tgt_io(t, q, tag, queued); + return 0; +} + +static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q, + const struct io_uring_cqe *cqe) +{ + unsigned tag = user_data_to_tag(cqe->user_data); + unsigned op = user_data_to_op(cqe->user_data); + struct ublk_io *io = ublk_get_io(q, tag); + + if (cqe->res < 0) { + io->result = cqe->res; + ublk_err("%s: io failed op %x user_data %lx\n", + __func__, op, cqe->user_data); + } else if (op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) { + __s32 data_len = user_data_to_tgt_data(cqe->user_data) + ? ublk_integrity_data_len(q, cqe->res) + : cqe->res; + + if (!io->result || data_len < io->result) + io->result = data_len; + } + + /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ + if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) + io->tgt_ios += 1; + + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, io->result); +} + +static int ublk_loop_memset_file(int fd, __u8 byte, size_t len) +{ + off_t offset = 0; + __u8 buf[4096]; + + memset(buf, byte, sizeof(buf)); + while (len) { + int ret = pwrite(fd, buf, min(len, sizeof(buf)), offset); + + if (ret < 0) + return -errno; + if (!ret) + return -EIO; + + len -= ret; + offset += ret; + } + return 0; +} + +static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + unsigned long long bytes; + unsigned long blocks; + int ret; + struct ublk_params p = { + .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN, + .basic = { + .attrs = UBLK_ATTR_VOLATILE_CACHE, + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, + }, + .dma = { + .alignment = 511, + }, + }; + + ublk_set_integrity_params(ctx, &p); + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + + /* Use O_DIRECT only for data file */ + ret = backing_file_tgt_init(dev, 1); + if (ret) + return ret; + + /* Expect a second file for integrity data */ + if (dev->tgt.nr_backing_files != 1 + !!ctx->metadata_size) + return -EINVAL; + + blocks = dev->tgt.backing_file_size[0] >> p.basic.logical_bs_shift; + if (ctx->metadata_size) { + unsigned long metadata_blocks = + dev->tgt.backing_file_size[1] / ctx->metadata_size; + unsigned long integrity_len; + + /* Ensure both data and integrity data fit in backing files */ + blocks = min(blocks, metadata_blocks); + integrity_len = blocks * ctx->metadata_size; + /* + * Initialize PI app tag and ref tag to 0xFF + * to disable bio-integrity-auto checks + */ + ret = ublk_loop_memset_file(dev->fds[2], 0xFF, integrity_len); + if (ret) + return ret; + } + bytes = blocks << p.basic.logical_bs_shift; + dev->tgt.dev_size = bytes; + p.basic.dev_sectors = bytes >> 9; + dev->tgt.params = p; + + return 0; +} + +const struct ublk_tgt_ops loop_tgt_ops = { + .name = "loop", + .init_tgt = ublk_loop_tgt_init, + .deinit_tgt = backing_file_tgt_deinit, + .queue_io = ublk_loop_queue_io, + .tgt_io_done = ublk_loop_io_done, +}; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c new file mode 100644 index 000000000000..0b23c09daea5 --- /dev/null +++ b/tools/testing/selftests/ublk/kublk.c @@ -0,0 +1,2380 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: uring_cmd based ublk + */ + +#include <linux/fs.h> +#include <sys/un.h> +#include "kublk.h" + +#define MAX_NR_TGT_ARG 64 + +unsigned int ublk_dbg_mask = UBLK_LOG; +static const struct ublk_tgt_ops *tgt_ops_list[] = { + &null_tgt_ops, + &loop_tgt_ops, + &stripe_tgt_ops, + &fault_inject_tgt_ops, +}; + +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) +{ + int i; + + if (name == NULL) + return NULL; + + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) + if (strcmp(tgt_ops_list[i]->name, name) == 0) + return tgt_ops_list[i]; + return NULL; +} + +static inline int ublk_setup_ring(struct io_uring *r, int depth, + int cq_depth, unsigned flags) +{ + struct io_uring_params p; + + memset(&p, 0, sizeof(p)); + p.flags = flags | IORING_SETUP_CQSIZE; + p.cq_entries = cq_depth; + + return io_uring_queue_init_params(depth, r, &p); +} + +static void ublk_ctrl_init_cmd(struct ublk_dev *dev, + struct io_uring_sqe *sqe, + struct ublk_ctrl_cmd_data *data) +{ + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); + + sqe->fd = dev->ctrl_fd; + sqe->opcode = IORING_OP_URING_CMD; + sqe->ioprio = 0; + + if (data->flags & CTRL_CMD_HAS_BUF) { + cmd->addr = data->addr; + cmd->len = data->len; + } + + if (data->flags & CTRL_CMD_HAS_DATA) + cmd->data[0] = data->data[0]; + + cmd->dev_id = info->dev_id; + cmd->queue_id = -1; + + ublk_set_sqe_cmd_op(sqe, data->cmd_op); + + io_uring_sqe_set_data(sqe, cmd); +} + +static int __ublk_ctrl_cmd(struct ublk_dev *dev, + struct ublk_ctrl_cmd_data *data) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret = -EINVAL; + + sqe = io_uring_get_sqe(&dev->ring); + if (!sqe) { + ublk_err("%s: can't get sqe ret %d\n", __func__, ret); + return ret; + } + + ublk_ctrl_init_cmd(dev, sqe, data); + + ret = io_uring_submit(&dev->ring); + if (ret < 0) { + ublk_err("uring submit ret %d\n", ret); + return ret; + } + + ret = io_uring_wait_cqe(&dev->ring, &cqe); + if (ret < 0) { + ublk_err("wait cqe: %s\n", strerror(-ret)); + return ret; + } + io_uring_cqe_seen(&dev->ring, cqe); + + return cqe->res; +} + +static int ublk_ctrl_stop_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_STOP_DEV, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_try_stop_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_TRY_STOP_DEV, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_start_dev(struct ublk_dev *dev, + int daemon_pid) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_START_DEV, + .flags = CTRL_CMD_HAS_DATA, + }; + + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, + .flags = CTRL_CMD_HAS_DATA, + }; + + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_add_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_ADD_DEV, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64) (uintptr_t) &dev->dev_info, + .len = sizeof(struct ublksrv_ctrl_dev_info), + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_del_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_DEL_DEV, + .flags = 0, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_get_info(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_DEV_INFO, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64) (uintptr_t) &dev->dev_info, + .len = sizeof(struct ublksrv_ctrl_dev_info), + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_set_params(struct ublk_dev *dev, + struct ublk_params *params) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_SET_PARAMS, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64) (uintptr_t) params, + .len = sizeof(*params), + }; + params->len = sizeof(*params); + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_get_params(struct ublk_dev *dev, + struct ublk_params *params) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_PARAMS, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64)params, + .len = sizeof(*params), + }; + + params->len = sizeof(*params); + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_get_features(struct ublk_dev *dev, + __u64 *features) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_FEATURES, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64) (uintptr_t) features, + .len = sizeof(*features), + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_update_size(struct ublk_dev *dev, + __u64 nr_sects) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_UPDATE_SIZE, + .flags = CTRL_CMD_HAS_DATA, + }; + + data.data[0] = nr_sects; + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev, + unsigned int timeout_ms) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_QUIESCE_DEV, + .flags = CTRL_CMD_HAS_DATA, + }; + + data.data[0] = timeout_ms; + return __ublk_ctrl_cmd(dev, &data); +} + +static const char *ublk_dev_state_desc(struct ublk_dev *dev) +{ + switch (dev->dev_info.state) { + case UBLK_S_DEV_DEAD: + return "DEAD"; + case UBLK_S_DEV_LIVE: + return "LIVE"; + case UBLK_S_DEV_QUIESCED: + return "QUIESCED"; + default: + return "UNKNOWN"; + }; +} + +static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) +{ + unsigned done = 0; + int i; + + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, set)) + done += snprintf(&buf[done], len - done, "%d ", i); + } +} + +static void ublk_adjust_affinity(cpu_set_t *set) +{ + int j, updated = 0; + + /* + * Just keep the 1st CPU now. + * + * In future, auto affinity selection can be tried. + */ + for (j = 0; j < CPU_SETSIZE; j++) { + if (CPU_ISSET(j, set)) { + if (!updated) { + updated = 1; + continue; + } + CPU_CLR(j, set); + } + } +} + +/* Caller must free the allocated buffer */ +static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, + .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, + }; + cpu_set_t *buf; + int i, ret; + + buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); + if (!buf) + return -ENOMEM; + + for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { + data.data[0] = i; + data.len = sizeof(cpu_set_t); + data.addr = (__u64)&buf[i]; + + ret = __ublk_ctrl_cmd(ctrl_dev, &data); + if (ret < 0) { + free(buf); + return ret; + } + ublk_adjust_affinity(&buf[i]); + } + + *ptr_buf = buf; + return 0; +} + +static void ublk_ctrl_dump(struct ublk_dev *dev) +{ + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + struct ublk_params p; + cpu_set_t *affinity; + int ret; + + ret = ublk_ctrl_get_params(dev, &p); + if (ret < 0) { + ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); + return; + } + + ret = ublk_ctrl_get_affinity(dev, &affinity); + if (ret < 0) { + ublk_err("failed to get affinity %m\n"); + return; + } + + ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", + info->dev_id, info->nr_hw_queues, info->queue_depth, + 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); + ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", + info->max_io_buf_bytes, info->ublksrv_pid, info->flags, + ublk_dev_state_desc(dev)); + + if (affinity) { + char buf[512]; + int i; + + for (i = 0; i < info->nr_hw_queues; i++) { + ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); + printf("\tqueue %u: affinity(%s)\n", + i, buf); + } + free(affinity); + } + + fflush(stdout); +} + +static void ublk_ctrl_deinit(struct ublk_dev *dev) +{ + close(dev->ctrl_fd); + free(dev); +} + +static struct ublk_dev *ublk_ctrl_init(void) +{ + struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + int ret; + + dev->ctrl_fd = open(CTRL_DEV, O_RDWR); + if (dev->ctrl_fd < 0) { + free(dev); + return NULL; + } + + info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; + + ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, + UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); + if (ret < 0) { + ublk_err("queue_init: %s\n", strerror(-ret)); + free(dev); + return NULL; + } + dev->nr_fds = 1; + + return dev; +} + +static int __ublk_queue_cmd_buf_sz(unsigned depth) +{ + int size = depth * sizeof(struct ublksrv_io_desc); + unsigned int page_sz = getpagesize(); + + return round_up(size, page_sz); +} + +static int ublk_queue_max_cmd_buf_sz(void) +{ + return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH); +} + +static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) +{ + return __ublk_queue_cmd_buf_sz(q->q_depth); +} + +static void ublk_queue_deinit(struct ublk_queue *q) +{ + int i; + int nr_ios = q->q_depth; + + if (q->io_cmd_buf) + munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); + + for (i = 0; i < nr_ios; i++) { + free(q->ios[i].buf_addr); + free(q->ios[i].integrity_buf); + } +} + +static void ublk_thread_deinit(struct ublk_thread *t) +{ + io_uring_unregister_buffers(&t->ring); + + ublk_batch_free_buf(t); + + io_uring_unregister_ring_fd(&t->ring); + + if (t->ring.ring_fd > 0) { + io_uring_unregister_files(&t->ring); + close(t->ring.ring_fd); + t->ring.ring_fd = -1; + } +} + +static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags, + __u8 metadata_size) +{ + struct ublk_dev *dev = q->dev; + int depth = dev->dev_info.queue_depth; + int i; + int cmd_buf_size, io_buf_size, integrity_size; + unsigned long off; + + pthread_spin_init(&q->lock, PTHREAD_PROCESS_PRIVATE); + q->tgt_ops = dev->tgt.ops; + q->flags = 0; + q->q_depth = depth; + q->flags = dev->dev_info.flags; + q->flags |= extra_flags; + q->metadata_size = metadata_size; + + /* Cache fd in queue for fast path access */ + q->ublk_fd = dev->fds[0]; + + cmd_buf_size = ublk_queue_cmd_buf_sz(q); + off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); + q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, + MAP_SHARED | MAP_POPULATE, dev->fds[0], off); + if (q->io_cmd_buf == MAP_FAILED) { + ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", + q->dev->dev_info.dev_id, q->q_id); + goto fail; + } + + io_buf_size = dev->dev_info.max_io_buf_bytes; + integrity_size = ublk_integrity_len(q, io_buf_size); + for (i = 0; i < q->q_depth; i++) { + q->ios[i].buf_addr = NULL; + q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE; + q->ios[i].tag = i; + + if (integrity_size) { + q->ios[i].integrity_buf = malloc(integrity_size); + if (!q->ios[i].integrity_buf) { + ublk_err("ublk dev %d queue %d io %d malloc(%d) failed: %m\n", + dev->dev_info.dev_id, q->q_id, i, + integrity_size); + goto fail; + } + } + + + if (ublk_queue_no_buf(q)) + continue; + + if (posix_memalign((void **)&q->ios[i].buf_addr, + getpagesize(), io_buf_size)) { + ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n", + dev->dev_info.dev_id, q->q_id, i); + goto fail; + } + } + + return 0; + fail: + ublk_queue_deinit(q); + ublk_err("ublk dev %d queue %d failed\n", + dev->dev_info.dev_id, q->q_id); + return -ENOMEM; +} + +static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flags) +{ + struct ublk_dev *dev = t->dev; + unsigned long long flags = dev->dev_info.flags | extra_flags; + int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; + int ret; + + /* FETCH_IO_CMDS is multishot, so increase cq depth for BATCH_IO */ + if (ublk_dev_batch_io(dev)) + cq_depth += dev->dev_info.queue_depth * 2; + + ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth, + IORING_SETUP_COOP_TASKRUN | + IORING_SETUP_SINGLE_ISSUER | + IORING_SETUP_DEFER_TASKRUN); + if (ret < 0) { + ublk_err("ublk dev %d thread %d setup io_uring failed %d\n", + dev->dev_info.dev_id, t->idx, ret); + goto fail; + } + + if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { + unsigned nr_ios = dev->dev_info.queue_depth * dev->dev_info.nr_hw_queues; + unsigned max_nr_ios_per_thread = nr_ios / dev->nthreads; + max_nr_ios_per_thread += !!(nr_ios % dev->nthreads); + + t->nr_bufs = max_nr_ios_per_thread; + } else { + t->nr_bufs = 0; + } + + if (ublk_dev_batch_io(dev)) + ublk_batch_prepare(t); + + if (t->nr_bufs) { + ret = io_uring_register_buffers_sparse(&t->ring, t->nr_bufs); + if (ret) { + ublk_err("ublk dev %d thread %d register spare buffers failed %d\n", + dev->dev_info.dev_id, t->idx, ret); + goto fail; + } + } + + if (ublk_dev_batch_io(dev)) { + ret = ublk_batch_alloc_buf(t); + if (ret) { + ublk_err("ublk dev %d thread %d alloc batch buf failed %d\n", + dev->dev_info.dev_id, t->idx, ret); + goto fail; + } + } + + io_uring_register_ring_fd(&t->ring); + + if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + /* Register only backing files starting from index 1, exclude ublk control device */ + if (dev->nr_fds > 1) { + ret = io_uring_register_files(&t->ring, &dev->fds[1], dev->nr_fds - 1); + } else { + /* No backing files to register, skip file registration */ + ret = 0; + } + } else { + ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); + } + if (ret) { + ublk_err("ublk dev %d thread %d register files failed %d\n", + t->dev->dev_info.dev_id, t->idx, ret); + goto fail; + } + + return 0; +fail: + ublk_thread_deinit(t); + ublk_err("ublk dev %d thread %d init failed\n", + dev->dev_info.dev_id, t->idx); + return -ENOMEM; +} + +#define WAIT_USEC 100000 +#define MAX_WAIT_USEC (3 * 1000000) +static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + int dev_id = dev->dev_info.dev_id; + unsigned int wait_usec = 0; + int ret = 0, fd = -1; + char buf[64]; + + snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); + + while (wait_usec < MAX_WAIT_USEC) { + fd = open(buf, O_RDWR); + if (fd >= 0) + break; + usleep(WAIT_USEC); + wait_usec += WAIT_USEC; + } + if (fd < 0) { + ublk_err("can't open %s %s\n", buf, strerror(errno)); + return -1; + } + + dev->fds[0] = fd; + if (dev->tgt.ops->init_tgt) + ret = dev->tgt.ops->init_tgt(ctx, dev); + if (ret) + close(dev->fds[0]); + return ret; +} + +static void ublk_dev_unprep(struct ublk_dev *dev) +{ + if (dev->tgt.ops->deinit_tgt) + dev->tgt.ops->deinit_tgt(dev); + close(dev->fds[0]); +} + +static void ublk_set_auto_buf_reg(const struct ublk_thread *t, + const struct ublk_queue *q, + struct io_uring_sqe *sqe, + unsigned short tag) +{ + struct ublk_auto_buf_reg buf = {}; + + if (q->tgt_ops->buf_index) + buf.index = q->tgt_ops->buf_index(t, q, tag); + else + buf.index = ublk_io_buf_idx(t, q, tag); + + if (ublk_queue_auto_zc_fallback(q)) + buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; + + sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); +} + +/* Copy in pieces to test the buffer offset logic */ +#define UBLK_USER_COPY_LEN 2048 + +static void ublk_user_copy(const struct ublk_io *io, __u8 match_ublk_op) +{ + const struct ublk_queue *q = ublk_io_to_queue(io); + const struct ublksrv_io_desc *iod = ublk_get_iod(q, io->tag); + __u64 off = ublk_user_copy_offset(q->q_id, io->tag); + __u8 ublk_op = ublksrv_get_op(iod); + __u32 len = iod->nr_sectors << 9; + void *addr = io->buf_addr; + ssize_t copied; + + if (ublk_op != match_ublk_op) + return; + + while (len) { + __u32 copy_len = min(len, UBLK_USER_COPY_LEN); + + if (ublk_op == UBLK_IO_OP_WRITE) + copied = pread(q->ublk_fd, addr, copy_len, off); + else if (ublk_op == UBLK_IO_OP_READ) + copied = pwrite(q->ublk_fd, addr, copy_len, off); + else + assert(0); + assert(copied == (ssize_t)copy_len); + addr += copy_len; + off += copy_len; + len -= copy_len; + } + + if (!(iod->op_flags & UBLK_IO_F_INTEGRITY)) + return; + + len = ublk_integrity_len(q, iod->nr_sectors << 9); + off = ublk_user_copy_offset(q->q_id, io->tag); + off |= UBLKSRV_IO_INTEGRITY_FLAG; + if (ublk_op == UBLK_IO_OP_WRITE) + copied = pread(q->ublk_fd, io->integrity_buf, len, off); + else if (ublk_op == UBLK_IO_OP_READ) + copied = pwrite(q->ublk_fd, io->integrity_buf, len, off); + else + assert(0); + assert(copied == (ssize_t)len); +} + +int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) +{ + struct ublk_queue *q = ublk_io_to_queue(io); + struct ublksrv_io_cmd *cmd; + struct io_uring_sqe *sqe[1]; + unsigned int cmd_op = 0; + __u64 user_data; + + /* only freed io can be issued */ + if (!(io->flags & UBLKS_IO_FREE)) + return 0; + + /* + * we issue because we need either fetching or committing or + * getting data + */ + if (!(io->flags & + (UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA))) + return 0; + + if (io->flags & UBLKS_IO_NEED_GET_DATA) + cmd_op = UBLK_U_IO_NEED_GET_DATA; + else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) { + if (ublk_queue_use_user_copy(q)) + ublk_user_copy(io, UBLK_IO_OP_READ); + + cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; + } else if (io->flags & UBLKS_IO_NEED_FETCH_RQ) + cmd_op = UBLK_U_IO_FETCH_REQ; + + if (io_uring_sq_space_left(&t->ring) < 1) + io_uring_submit(&t->ring); + + ublk_io_alloc_sqes(t, sqe, 1); + if (!sqe[0]) { + ublk_err("%s: run out of sqe. thread %u, tag %d\n", + __func__, t->idx, io->tag); + return -1; + } + + cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]); + + if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ) + cmd->result = io->result; + + /* These fields should be written once, never change */ + ublk_set_sqe_cmd_op(sqe[0], cmd_op); + sqe[0]->fd = ublk_get_registered_fd(q, 0); /* dev->fds[0] */ + sqe[0]->opcode = IORING_OP_URING_CMD; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe[0]->flags = 0; /* Use raw FD, not fixed file */ + else + sqe[0]->flags = IOSQE_FIXED_FILE; + sqe[0]->rw_flags = 0; + cmd->tag = io->tag; + cmd->q_id = q->q_id; + if (!ublk_queue_no_buf(q) && !ublk_queue_use_user_copy(q)) + cmd->addr = (__u64) (uintptr_t) io->buf_addr; + else + cmd->addr = 0; + + if (ublk_queue_use_auto_zc(q)) + ublk_set_auto_buf_reg(t, q, sqe[0], io->tag); + + user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0); + io_uring_sqe_set_data64(sqe[0], user_data); + + io->flags = 0; + + t->cmd_inflight += 1; + + ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n", + __func__, t->idx, q->q_id, io->tag, cmd_op, + io->flags, !!(t->state & UBLKS_T_STOPPING)); + return 1; +} + +static void ublk_submit_fetch_commands(struct ublk_thread *t) +{ + struct ublk_queue *q; + struct ublk_io *io; + int i = 0, j = 0; + + if (t->dev->per_io_tasks) { + /* + * Lexicographically order all the (qid,tag) pairs, with + * qid taking priority (so (1,0) > (0,1)). Then make + * this thread the daemon for every Nth entry in this + * list (N is the number of threads), starting at this + * thread's index. This ensures that each queue is + * handled by as many ublk server threads as possible, + * so that load that is concentrated on one or a few + * queues can make use of all ublk server threads. + */ + const struct ublksrv_ctrl_dev_info *dinfo = &t->dev->dev_info; + int nr_ios = dinfo->nr_hw_queues * dinfo->queue_depth; + for (i = t->idx; i < nr_ios; i += t->dev->nthreads) { + int q_id = i / dinfo->queue_depth; + int tag = i % dinfo->queue_depth; + q = &t->dev->q[q_id]; + io = &q->ios[tag]; + io->buf_index = j++; + if (q->tgt_ops->pre_fetch_io) + q->tgt_ops->pre_fetch_io(t, q, tag, false); + ublk_queue_io_cmd(t, io); + } + } else { + /* + * Service exclusively the queue whose q_id matches our + * thread index. + */ + struct ublk_queue *q = &t->dev->q[t->idx]; + for (i = 0; i < q->q_depth; i++) { + io = &q->ios[i]; + io->buf_index = i; + if (q->tgt_ops->pre_fetch_io) + q->tgt_ops->pre_fetch_io(t, q, i, false); + ublk_queue_io_cmd(t, io); + } + } +} + +static int ublk_thread_is_idle(struct ublk_thread *t) +{ + return !io_uring_sq_ready(&t->ring) && !t->io_inflight; +} + +static int ublk_thread_is_done(struct ublk_thread *t) +{ + return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t) && !t->cmd_inflight; +} + +static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, + struct ublk_queue *q, + struct io_uring_cqe *cqe) +{ + if (cqe->res < 0 && cqe->res != -EAGAIN) + ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", + __func__, cqe->res, q->q_id, + user_data_to_tag(cqe->user_data), + user_data_to_op(cqe->user_data)); + + if (q->tgt_ops->tgt_io_done) + q->tgt_ops->tgt_io_done(t, q, cqe); +} + +static void ublk_handle_uring_cmd(struct ublk_thread *t, + struct ublk_queue *q, + const struct io_uring_cqe *cqe) +{ + int fetch = (cqe->res != UBLK_IO_RES_ABORT) && + !(t->state & UBLKS_T_STOPPING); + unsigned tag = user_data_to_tag(cqe->user_data); + struct ublk_io *io = &q->ios[tag]; + + t->cmd_inflight--; + + if (!fetch) { + t->state |= UBLKS_T_STOPPING; + io->flags &= ~UBLKS_IO_NEED_FETCH_RQ; + } + + if (cqe->res == UBLK_IO_RES_OK) { + ublk_assert(tag < q->q_depth); + + if (ublk_queue_use_user_copy(q)) + ublk_user_copy(io, UBLK_IO_OP_WRITE); + + if (q->tgt_ops->queue_io) + q->tgt_ops->queue_io(t, q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE; + ublk_queue_io_cmd(t, io); + } else { + /* + * COMMIT_REQ will be completed immediately since no fetching + * piggyback is required. + * + * Marking IO_FREE only, then this io won't be issued since + * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*) + * + * */ + io->flags = UBLKS_IO_FREE; + } +} + +static void ublk_handle_cqe(struct ublk_thread *t, + struct io_uring_cqe *cqe, void *data) +{ + struct ublk_dev *dev = t->dev; + unsigned q_id = user_data_to_q_id(cqe->user_data); + unsigned cmd_op = user_data_to_op(cqe->user_data); + + if (cqe->res < 0 && cqe->res != -ENODEV && cqe->res != -ENOBUFS) + ublk_err("%s: res %d userdata %llx thread state %x\n", __func__, + cqe->res, cqe->user_data, t->state); + + ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (thread %d qid %d tag %u cmd_op %x " + "data %lx target %d/%d) stopping %d\n", + __func__, cqe->res, t->idx, q_id, + user_data_to_tag(cqe->user_data), + cmd_op, cqe->user_data, is_target_io(cqe->user_data), + user_data_to_tgt_data(cqe->user_data), + (t->state & UBLKS_T_STOPPING)); + + /* Don't retrieve io in case of target io */ + if (is_target_io(cqe->user_data)) { + ublksrv_handle_tgt_cqe(t, &dev->q[q_id], cqe); + return; + } + + if (ublk_thread_batch_io(t)) + ublk_batch_compl_cmd(t, cqe); + else + ublk_handle_uring_cmd(t, &dev->q[q_id], cqe); +} + +static int ublk_reap_events_uring(struct ublk_thread *t) +{ + struct io_uring_cqe *cqe; + unsigned head; + int count = 0; + + io_uring_for_each_cqe(&t->ring, head, cqe) { + ublk_handle_cqe(t, cqe, NULL); + count += 1; + } + io_uring_cq_advance(&t->ring, count); + + return count; +} + +static int ublk_process_io(struct ublk_thread *t) +{ + int ret, reapped; + + ublk_dbg(UBLK_DBG_THREAD, "dev%d-t%u: to_submit %d inflight cmd %u stopping %d\n", + t->dev->dev_info.dev_id, + t->idx, io_uring_sq_ready(&t->ring), + t->cmd_inflight, + (t->state & UBLKS_T_STOPPING)); + + if (ublk_thread_is_done(t)) + return -ENODEV; + + ret = io_uring_submit_and_wait(&t->ring, 1); + if (ublk_thread_batch_io(t)) { + ublk_batch_prep_commit(t); + reapped = ublk_reap_events_uring(t); + ublk_batch_commit_io_cmds(t); + } else { + reapped = ublk_reap_events_uring(t); + } + + ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n", + ret, reapped, (t->state & UBLKS_T_STOPPING), + (t->state & UBLKS_T_IDLE)); + + return reapped; +} + +struct ublk_thread_info { + struct ublk_dev *dev; + pthread_t thread; + unsigned idx; + sem_t *ready; + cpu_set_t *affinity; + unsigned long long extra_flags; + unsigned char (*q_thread_map)[UBLK_MAX_QUEUES]; +}; + +static void ublk_thread_set_sched_affinity(const struct ublk_thread_info *info) +{ + if (pthread_setaffinity_np(pthread_self(), sizeof(*info->affinity), info->affinity) < 0) + ublk_err("ublk dev %u thread %u set affinity failed", + info->dev->dev_info.dev_id, info->idx); +} + +static void ublk_batch_setup_queues(struct ublk_thread *t) +{ + int i; + + for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++) { + struct ublk_queue *q = &t->dev->q[i]; + int ret; + + /* + * Only prepare io commands in the mapped thread context, + * otherwise io command buffer index may not work as expected + */ + if (t->q_map[i] == 0) + continue; + + if (q->tgt_ops->pre_fetch_io) + q->tgt_ops->pre_fetch_io(t, q, 0, true); + + ret = ublk_batch_queue_prep_io_cmds(t, q); + ublk_assert(ret >= 0); + } +} + +static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_info *info) +{ + struct ublk_thread t = { + .dev = info->dev, + .idx = info->idx, + }; + int dev_id = info->dev->dev_info.dev_id; + int ret; + + /* Copy per-thread queue mapping into thread-local variable */ + if (info->q_thread_map) + memcpy(t.q_map, info->q_thread_map[info->idx], sizeof(t.q_map)); + + ret = ublk_thread_init(&t, info->extra_flags); + if (ret) { + ublk_err("ublk dev %d thread %u init failed\n", + dev_id, t.idx); + return ret; + } + sem_post(info->ready); + + ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n", + gettid(), dev_id, t.idx); + + if (!ublk_thread_batch_io(&t)) { + /* submit all io commands to ublk driver */ + ublk_submit_fetch_commands(&t); + } else { + ublk_batch_setup_queues(&t); + ublk_batch_start_fetch(&t); + } + + do { + if (ublk_process_io(&t) < 0) + break; + } while (1); + + ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n", + gettid(), dev_id, t.idx); + ublk_thread_deinit(&t); + return 0; +} + +static void *ublk_io_handler_fn(void *data) +{ + struct ublk_thread_info *info = data; + + /* + * IO perf is sensitive with queue pthread affinity on NUMA machine + * + * Set sched_affinity at beginning, so following allocated memory/pages + * could be CPU/NUMA aware. + */ + if (info->affinity) + ublk_thread_set_sched_affinity(info); + + __ublk_io_handler_fn(info); + + return NULL; +} + +static void ublk_set_parameters(struct ublk_dev *dev) +{ + int ret; + + ret = ublk_ctrl_set_params(dev, &dev->tgt.params); + if (ret) + ublk_err("dev %d set basic parameter failed %d\n", + dev->dev_info.dev_id, ret); +} + +static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) +{ + uint64_t id; + int evtfd = ctx->_evtfd; + + if (evtfd < 0) + return -EBADF; + + if (dev_id >= 0) + id = dev_id + 1; + else + id = ERROR_EVTFD_DEVID; + + if (dev && ctx->shadow_dev) + memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); + + if (write(evtfd, &id, sizeof(id)) != sizeof(id)) + return -EINVAL; + + close(evtfd); + shmdt(ctx->shadow_dev); + + return 0; +} + + +/* + * Shared memory registration socket listener. + * + * The parent daemon context listens on a per-device unix socket at + * /run/ublk/ublkb<dev_id>.sock for shared memory registration requests + * from clients. Clients send a memfd via SCM_RIGHTS; the server + * registers it with the kernel, mmaps it, and returns the assigned index. + */ +#define UBLK_SHMEM_SOCK_DIR "/run/ublk" + +/* defined in kublk.h, shared with file_backed.c (loop target) */ +struct ublk_shmem_entry shmem_table[UBLK_BUF_MAX]; +int shmem_count; + +static void ublk_shmem_sock_path(int dev_id, char *buf, size_t len) +{ + snprintf(buf, len, "%s/ublkb%d.sock", UBLK_SHMEM_SOCK_DIR, dev_id); +} + +static int ublk_shmem_sock_create(int dev_id) +{ + struct sockaddr_un addr = { .sun_family = AF_UNIX }; + char path[108]; + int fd; + + mkdir(UBLK_SHMEM_SOCK_DIR, 0755); + ublk_shmem_sock_path(dev_id, path, sizeof(path)); + unlink(path); + + fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0); + if (fd < 0) + return -1; + + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", path); + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + close(fd); + return -1; + } + + listen(fd, 4); + ublk_dbg(UBLK_DBG_DEV, "shmem socket created: %s\n", path); + return fd; +} + +static void ublk_shmem_sock_destroy(int dev_id, int sock_fd) +{ + char path[108]; + + if (sock_fd >= 0) + close(sock_fd); + ublk_shmem_sock_path(dev_id, path, sizeof(path)); + unlink(path); +} + +/* Receive a memfd from a client via SCM_RIGHTS */ +static int ublk_shmem_recv_fd(int client_fd) +{ + char buf[1]; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) }; + union { + char cmsg_buf[CMSG_SPACE(sizeof(int))]; + struct cmsghdr align; + } u; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = u.cmsg_buf, + .msg_controllen = sizeof(u.cmsg_buf), + }; + struct cmsghdr *cmsg; + + if (recvmsg(client_fd, &msg, 0) <= 0) + return -1; + + cmsg = CMSG_FIRSTHDR(&msg); + if (!cmsg || cmsg->cmsg_level != SOL_SOCKET || + cmsg->cmsg_type != SCM_RIGHTS) + return -1; + + return *(int *)CMSG_DATA(cmsg); +} + +/* Register a shared memory buffer: store fd, mmap it, return index */ +static int ublk_shmem_register(int shmem_fd) +{ + off_t size; + void *base; + int idx; + + if (shmem_count >= UBLK_BUF_MAX) + return -1; + + size = lseek(shmem_fd, 0, SEEK_END); + if (size <= 0) + return -1; + + base = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + shmem_fd, 0); + if (base == MAP_FAILED) + return -1; + + idx = shmem_count++; + shmem_table[idx].fd = shmem_fd; + shmem_table[idx].mmap_base = base; + shmem_table[idx].size = size; + + ublk_dbg(UBLK_DBG_DEV, "shmem registered: index=%d fd=%d size=%zu\n", + idx, shmem_fd, (size_t)size); + return idx; +} + +static void ublk_shmem_unregister_all(void) +{ + int i; + + for (i = 0; i < shmem_count; i++) { + if (shmem_table[i].mmap_base) { + munmap(shmem_table[i].mmap_base, + shmem_table[i].size); + close(shmem_table[i].fd); + shmem_table[i].mmap_base = NULL; + } + } + shmem_count = 0; +} + +static int ublk_ctrl_reg_buf(struct ublk_dev *dev, void *addr, size_t size, + __u32 flags) +{ + struct ublk_shmem_buf_reg buf_reg = { + .addr = (unsigned long)addr, + .len = size, + .flags = flags, + }; + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_REG_BUF, + .flags = CTRL_CMD_HAS_BUF, + .addr = (unsigned long)&buf_reg, + .len = sizeof(buf_reg), + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +/* + * Handle one client connection: receive memfd, mmap it, register + * the VA range with kernel, send back the assigned index. + */ +static void ublk_shmem_handle_client(int sock_fd, struct ublk_dev *dev) +{ + int client_fd, memfd, idx, ret; + int32_t reply; + off_t size; + void *base; + + client_fd = accept(sock_fd, NULL, NULL); + if (client_fd < 0) + return; + + memfd = ublk_shmem_recv_fd(client_fd); + if (memfd < 0) { + reply = -1; + goto out; + } + + /* mmap the memfd in server address space */ + size = lseek(memfd, 0, SEEK_END); + if (size <= 0) { + reply = -1; + close(memfd); + goto out; + } + base = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, memfd, 0); + if (base == MAP_FAILED) { + reply = -1; + close(memfd); + goto out; + } + + /* Register server's VA range with kernel for PFN matching */ + ret = ublk_ctrl_reg_buf(dev, base, size, 0); + if (ret < 0) { + ublk_dbg(UBLK_DBG_DEV, + "shmem_zc: kernel reg failed %d\n", ret); + munmap(base, size); + close(memfd); + reply = ret; + goto out; + } + + /* Store in table for I/O handling */ + idx = ublk_shmem_register(memfd); + if (idx >= 0) { + shmem_table[idx].mmap_base = base; + shmem_table[idx].size = size; + } + reply = idx; +out: + send(client_fd, &reply, sizeof(reply), 0); + close(client_fd); +} + +struct shmem_listener_info { + int dev_id; + int stop_efd; /* eventfd to signal listener to stop */ + int sock_fd; /* listener socket fd (output) */ + struct ublk_dev *dev; +}; + +/* + * Socket listener thread: runs in the parent daemon context alongside + * the I/O threads. Accepts shared memory registration requests from + * clients via SCM_RIGHTS. Exits when stop_efd is signaled. + */ +static void *ublk_shmem_listener_fn(void *data) +{ + struct shmem_listener_info *info = data; + struct pollfd pfds[2]; + + info->sock_fd = ublk_shmem_sock_create(info->dev_id); + if (info->sock_fd < 0) + return NULL; + + pfds[0].fd = info->sock_fd; + pfds[0].events = POLLIN; + pfds[1].fd = info->stop_efd; + pfds[1].events = POLLIN; + + while (1) { + int ret = poll(pfds, 2, -1); + + if (ret < 0) + break; + + /* Stop signal from parent */ + if (pfds[1].revents & POLLIN) + break; + + /* Client connection */ + if (pfds[0].revents & POLLIN) + ublk_shmem_handle_client(info->sock_fd, info->dev); + } + + return NULL; +} + +static int ublk_shmem_htlb_setup(const struct dev_ctx *ctx, + struct ublk_dev *dev) +{ + int fd, idx, ret; + struct stat st; + void *base; + + fd = open(ctx->htlb_path, O_RDWR); + if (fd < 0) { + ublk_err("htlb: can't open %s\n", ctx->htlb_path); + return -errno; + } + + if (fstat(fd, &st) < 0 || st.st_size <= 0) { + ublk_err("htlb: invalid file size\n"); + close(fd); + return -EINVAL; + } + + base = mmap(NULL, st.st_size, + ctx->rdonly_shmem_buf ? PROT_READ : PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (base == MAP_FAILED) { + ublk_err("htlb: mmap failed\n"); + close(fd); + return -ENOMEM; + } + + ret = ublk_ctrl_reg_buf(dev, base, st.st_size, + ctx->rdonly_shmem_buf ? UBLK_SHMEM_BUF_READ_ONLY : 0); + if (ret < 0) { + ublk_err("htlb: reg_buf failed: %d\n", ret); + munmap(base, st.st_size); + close(fd); + return ret; + } + + if (shmem_count >= UBLK_BUF_MAX) { + munmap(base, st.st_size); + close(fd); + return -ENOMEM; + } + + idx = shmem_count++; + shmem_table[idx].fd = fd; + shmem_table[idx].mmap_base = base; + shmem_table[idx].size = st.st_size; + + ublk_dbg(UBLK_DBG_DEV, "htlb registered: index=%d size=%zu\n", + idx, (size_t)st.st_size); + return 0; +} + +static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; + struct shmem_listener_info linfo = {}; + struct ublk_thread_info *tinfo; + unsigned long long extra_flags = 0; + cpu_set_t *affinity_buf; + unsigned char (*q_thread_map)[UBLK_MAX_QUEUES] = NULL; + uint64_t stop_val = 1; + pthread_t listener; + void *thread_ret; + sem_t ready; + int ret, i; + + ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); + + tinfo = calloc(sizeof(struct ublk_thread_info), dev->nthreads); + if (!tinfo) + return -ENOMEM; + + sem_init(&ready, 0, 0); + ret = ublk_dev_prep(ctx, dev); + if (ret) + return ret; + + ret = ublk_ctrl_get_affinity(dev, &affinity_buf); + if (ret) + return ret; + + if (ublk_dev_batch_io(dev)) { + q_thread_map = calloc(dev->nthreads, sizeof(*q_thread_map)); + if (!q_thread_map) { + ret = -ENOMEM; + goto fail; + } + ublk_batch_setup_map(q_thread_map, dev->nthreads, + dinfo->nr_hw_queues); + } + + if (ctx->auto_zc_fallback) + extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; + if (ctx->no_ublk_fixed_fd) + extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD; + + for (i = 0; i < dinfo->nr_hw_queues; i++) { + dev->q[i].dev = dev; + dev->q[i].q_id = i; + + ret = ublk_queue_init(&dev->q[i], extra_flags, + ctx->metadata_size); + if (ret) { + ublk_err("ublk dev %d queue %d init queue failed\n", + dinfo->dev_id, i); + goto fail; + } + } + + for (i = 0; i < dev->nthreads; i++) { + tinfo[i].dev = dev; + tinfo[i].idx = i; + tinfo[i].ready = &ready; + tinfo[i].extra_flags = extra_flags; + tinfo[i].q_thread_map = q_thread_map; + + /* + * If threads are not tied 1:1 to queues, setting thread + * affinity based on queue affinity makes little sense. + * However, thread CPU affinity has significant impact + * on performance, so to compare fairly, we'll still set + * thread CPU affinity based on queue affinity where + * possible. + */ + if (dev->nthreads == dinfo->nr_hw_queues) + tinfo[i].affinity = &affinity_buf[i]; + pthread_create(&tinfo[i].thread, NULL, + ublk_io_handler_fn, + &tinfo[i]); + } + + for (i = 0; i < dev->nthreads; i++) + sem_wait(&ready); + free(affinity_buf); + free(q_thread_map); + + /* everything is fine now, start us */ + if (ctx->recovery) + ret = ublk_ctrl_end_user_recovery(dev, getpid()); + else { + ublk_set_parameters(dev); + ret = ublk_ctrl_start_dev(dev, getpid()); + } + if (ret < 0) { + ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); + /* stop device so that inflight uring_cmd can be cancelled */ + ublk_ctrl_stop_dev(dev); + goto fail_start; + } + + if (ctx->htlb_path) { + ret = ublk_shmem_htlb_setup(ctx, dev); + if (ret < 0) { + ublk_err("htlb setup failed: %d\n", ret); + ublk_ctrl_stop_dev(dev); + goto fail_start; + } + } + + ublk_ctrl_get_info(dev); + if (ctx->fg) + ublk_ctrl_dump(dev); + else + ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); +fail_start: + /* + * Wait for I/O threads to exit. While waiting, a listener + * thread accepts shared memory registration requests from + * clients via a per-device unix socket (SCM_RIGHTS fd passing). + */ + linfo.dev_id = dinfo->dev_id; + linfo.dev = dev; + linfo.stop_efd = eventfd(0, 0); + if (linfo.stop_efd >= 0) + pthread_create(&listener, NULL, + ublk_shmem_listener_fn, &linfo); + + for (i = 0; i < (int)dev->nthreads; i++) + pthread_join(tinfo[i].thread, &thread_ret); + + /* Signal listener thread to stop and wait for it */ + if (linfo.stop_efd >= 0) { + write(linfo.stop_efd, &stop_val, sizeof(stop_val)); + pthread_join(listener, NULL); + close(linfo.stop_efd); + ublk_shmem_sock_destroy(dinfo->dev_id, linfo.sock_fd); + } + ublk_shmem_unregister_all(); + free(tinfo); + fail: + for (i = 0; i < dinfo->nr_hw_queues; i++) + ublk_queue_deinit(&dev->q[i]); + ublk_dev_unprep(dev); + ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); + + return ret; +} + +static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout) +{ +#define EV_SIZE (sizeof(struct inotify_event)) +#define EV_BUF_LEN (128 * (EV_SIZE + 16)) + struct pollfd pfd; + int fd, wd; + int ret = -EINVAL; + const char *dev_name = basename(path); + + fd = inotify_init(); + if (fd < 0) { + ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__); + return fd; + } + + wd = inotify_add_watch(fd, "/dev", evt_mask); + if (wd == -1) { + ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__); + goto fail; + } + + pfd.fd = fd; + pfd.events = POLL_IN; + while (1) { + int i = 0; + char buffer[EV_BUF_LEN]; + ret = poll(&pfd, 1, 1000 * timeout); + + if (ret == -1) { + ublk_err("%s: poll inotify failed: %d\n", __func__, ret); + goto rm_watch; + } else if (ret == 0) { + ublk_err("%s: poll inotify timeout\n", __func__); + ret = -ETIMEDOUT; + goto rm_watch; + } + + ret = read(fd, buffer, EV_BUF_LEN); + if (ret < 0) { + ublk_err("%s: read inotify fd failed\n", __func__); + goto rm_watch; + } + + while (i < ret) { + struct inotify_event *event = (struct inotify_event *)&buffer[i]; + + ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n", + __func__, event->mask, event->name); + if (event->mask & evt_mask) { + if (!strcmp(event->name, dev_name)) { + ret = 0; + goto rm_watch; + } + } + i += EV_SIZE + event->len; + } + } +rm_watch: + inotify_rm_watch(fd, wd); +fail: + close(fd); + return ret; +} + +static int ublk_stop_io_daemon(const struct ublk_dev *dev) +{ + int daemon_pid = dev->dev_info.ublksrv_pid; + int dev_id = dev->dev_info.dev_id; + char ublkc[64]; + int ret = 0; + + if (daemon_pid < 0) + return 0; + + /* daemon may be dead already */ + if (kill(daemon_pid, 0) < 0) + goto wait; + + snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id); + + /* ublk char device may be gone already */ + if (access(ublkc, F_OK) != 0) + goto wait; + + /* Wait until ublk char device is closed, when the daemon is shutdown */ + ret = wait_ublk_dev(ublkc, IN_CLOSE, 10); + /* double check and since it may be closed before starting inotify */ + if (ret == -ETIMEDOUT) + ret = kill(daemon_pid, 0) < 0; +wait: + waitpid(daemon_pid, NULL, 0); + ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n", + __func__, daemon_pid, dev_id, ret); + + return ret; +} + +static int __cmd_dev_add(const struct dev_ctx *ctx) +{ + unsigned nthreads = ctx->nthreads; + unsigned nr_queues = ctx->nr_hw_queues; + const char *tgt_type = ctx->tgt_type; + unsigned depth = ctx->queue_depth; + __u64 features; + const struct ublk_tgt_ops *ops; + struct ublksrv_ctrl_dev_info *info; + struct ublk_dev *dev = NULL; + int dev_id = ctx->dev_id; + int ret, i; + + ops = ublk_find_tgt(tgt_type); + if (!ops) { + ublk_err("%s: no such tgt type, type %s\n", + __func__, tgt_type); + ret = -ENODEV; + goto fail; + } + + if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { + ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", + __func__, nr_queues, depth); + ret = -EINVAL; + goto fail; + } + + /* default to 1:1 threads:queues if nthreads is unspecified */ + if (!nthreads) + nthreads = nr_queues; + + if (nthreads > UBLK_MAX_THREADS) { + ublk_err("%s: %u is too many threads (max %u)\n", + __func__, nthreads, UBLK_MAX_THREADS); + ret = -EINVAL; + goto fail; + } + + if (nthreads != nr_queues && (!ctx->per_io_tasks && + !(ctx->flags & UBLK_F_BATCH_IO))) { + ublk_err("%s: threads %u must be same as queues %u if " + "not using per_io_tasks\n", + __func__, nthreads, nr_queues); + ret = -EINVAL; + goto fail; + } + + dev = ublk_ctrl_init(); + if (!dev) { + ublk_err("%s: can't alloc dev id %d, type %s\n", + __func__, dev_id, tgt_type); + ret = -ENOMEM; + goto fail; + } + + /* kernel doesn't support get_features */ + ret = ublk_ctrl_get_features(dev, &features); + if (ret < 0) { + ret = -EINVAL; + goto fail; + } + + if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) { + ret = -ENOTSUP; + goto fail; + } + + info = &dev->dev_info; + info->dev_id = ctx->dev_id; + info->nr_hw_queues = nr_queues; + info->queue_depth = depth; + info->flags = ctx->flags; + if ((features & UBLK_F_QUIESCE) && + (info->flags & UBLK_F_USER_RECOVERY)) + info->flags |= UBLK_F_QUIESCE; + dev->nthreads = nthreads; + dev->per_io_tasks = ctx->per_io_tasks; + dev->tgt.ops = ops; + dev->tgt.sq_depth = depth; + dev->tgt.cq_depth = depth; + + for (i = 0; i < MAX_BACK_FILES; i++) { + if (ctx->files[i]) { + strcpy(dev->tgt.backing_file[i], ctx->files[i]); + dev->tgt.nr_backing_files++; + } + } + + if (ctx->recovery) + ret = ublk_ctrl_start_user_recovery(dev); + else + ret = ublk_ctrl_add_dev(dev); + if (ret < 0) { + ublk_err("%s: can't add dev id %d, type %s ret %d\n", + __func__, dev_id, tgt_type, ret); + goto fail; + } + + /* + * The kernel may reduce nr_hw_queues (e.g. capped to nr_cpu_ids). + * Cap nthreads to the actual queue count to avoid creating extra + * handler threads that will hang during device removal. + * + * per_io_tasks mode is excluded: threads interleave across all + * queues so nthreads > nr_hw_queues is valid and intentional. + */ + if (!ctx->per_io_tasks && dev->nthreads > info->nr_hw_queues) + dev->nthreads = info->nr_hw_queues; + + ret = ublk_start_daemon(ctx, dev); + ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret); + if (ret < 0) + ublk_ctrl_del_dev(dev); + +fail: + if (ret < 0) + ublk_send_dev_event(ctx, dev, -1); + if (dev) + ublk_ctrl_deinit(dev); + return ret; +} + +static int __cmd_dev_list(struct dev_ctx *ctx); + +static int cmd_dev_add(struct dev_ctx *ctx) +{ + int res; + + if (ctx->fg) + goto run; + + ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); + if (ctx->_shmid < 0) { + ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); + exit(-1); + } + ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); + if (ctx->shadow_dev == (struct ublk_dev *)-1) { + ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); + exit(-1); + } + ctx->_evtfd = eventfd(0, 0); + if (ctx->_evtfd < 0) { + ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); + exit(-1); + } + + res = fork(); + if (res == 0) { + int res2; + + setsid(); + res2 = fork(); + if (res2 == 0) { + /* prepare for detaching */ + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); +run: + res = __cmd_dev_add(ctx); + return res; + } else { + /* detached from the foreground task */ + exit(EXIT_SUCCESS); + } + } else if (res > 0) { + uint64_t id; + int exit_code = EXIT_FAILURE; + + res = read(ctx->_evtfd, &id, sizeof(id)); + close(ctx->_evtfd); + if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { + ctx->dev_id = id - 1; + if (__cmd_dev_list(ctx) >= 0) + exit_code = EXIT_SUCCESS; + } + shmdt(ctx->shadow_dev); + shmctl(ctx->_shmid, IPC_RMID, NULL); + /* wait for child and detach from it */ + wait(NULL); + if (exit_code == EXIT_FAILURE) + ublk_err("%s: command failed\n", __func__); + exit(exit_code); + } else { + exit(EXIT_FAILURE); + } +} + +static int __cmd_dev_del(struct dev_ctx *ctx) +{ + int number = ctx->dev_id; + struct ublk_dev *dev; + int ret; + + dev = ublk_ctrl_init(); + dev->dev_info.dev_id = number; + + ret = ublk_ctrl_get_info(dev); + if (ret < 0) + goto fail; + + ret = ublk_ctrl_stop_dev(dev); + if (ret < 0) + ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret); + + ret = ublk_stop_io_daemon(dev); + if (ret < 0) + ublk_err("%s: stop daemon id %d dev %d, ret %d\n", + __func__, dev->dev_info.ublksrv_pid, number, ret); + ublk_ctrl_del_dev(dev); +fail: + ublk_ctrl_deinit(dev); + + return (ret >= 0) ? 0 : ret; +} + +static int cmd_dev_del(struct dev_ctx *ctx) +{ + int i; + + if (ctx->dev_id >= 0 || !ctx->all) + return __cmd_dev_del(ctx); + + for (i = 0; i < 255; i++) { + ctx->dev_id = i; + __cmd_dev_del(ctx); + } + return 0; +} + +static int cmd_dev_stop(struct dev_ctx *ctx) +{ + int number = ctx->dev_id; + struct ublk_dev *dev; + int ret; + + if (number < 0) { + ublk_err("%s: device id is required\n", __func__); + return -EINVAL; + } + + dev = ublk_ctrl_init(); + dev->dev_info.dev_id = number; + + ret = ublk_ctrl_get_info(dev); + if (ret < 0) + goto fail; + + if (ctx->safe_stop) { + ret = ublk_ctrl_try_stop_dev(dev); + if (ret < 0) + ublk_err("%s: try_stop dev %d failed ret %d\n", + __func__, number, ret); + } else { + ret = ublk_ctrl_stop_dev(dev); + if (ret < 0) + ublk_err("%s: stop dev %d failed ret %d\n", + __func__, number, ret); + } + +fail: + ublk_ctrl_deinit(dev); + + return ret; +} + +static int __cmd_dev_list(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + int ret; + + if (!dev) + return -ENODEV; + + dev->dev_info.dev_id = ctx->dev_id; + + ret = ublk_ctrl_get_info(dev); + if (ret < 0) { + if (ctx->logging) + ublk_err("%s: can't get dev info from %d: %d\n", + __func__, ctx->dev_id, ret); + } else { + if (ctx->shadow_dev) + memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); + + ublk_ctrl_dump(dev); + } + + ublk_ctrl_deinit(dev); + + return ret; +} + +static int cmd_dev_list(struct dev_ctx *ctx) +{ + int i; + + if (ctx->dev_id >= 0 || !ctx->all) + return __cmd_dev_list(ctx); + + ctx->logging = false; + for (i = 0; i < 255; i++) { + ctx->dev_id = i; + __cmd_dev_list(ctx); + } + return 0; +} + +static int cmd_dev_get_features(void) +{ +#define const_ilog2(x) (63 - __builtin_clzll(x)) +#define FEAT_NAME(f) [const_ilog2(f)] = #f + static const char *feat_map[] = { + FEAT_NAME(UBLK_F_SUPPORT_ZERO_COPY), + FEAT_NAME(UBLK_F_URING_CMD_COMP_IN_TASK), + FEAT_NAME(UBLK_F_NEED_GET_DATA), + FEAT_NAME(UBLK_F_USER_RECOVERY), + FEAT_NAME(UBLK_F_USER_RECOVERY_REISSUE), + FEAT_NAME(UBLK_F_UNPRIVILEGED_DEV), + FEAT_NAME(UBLK_F_CMD_IOCTL_ENCODE), + FEAT_NAME(UBLK_F_USER_COPY), + FEAT_NAME(UBLK_F_ZONED), + FEAT_NAME(UBLK_F_USER_RECOVERY_FAIL_IO), + FEAT_NAME(UBLK_F_UPDATE_SIZE), + FEAT_NAME(UBLK_F_AUTO_BUF_REG), + FEAT_NAME(UBLK_F_QUIESCE), + FEAT_NAME(UBLK_F_PER_IO_DAEMON), + FEAT_NAME(UBLK_F_BUF_REG_OFF_DAEMON), + FEAT_NAME(UBLK_F_INTEGRITY), + FEAT_NAME(UBLK_F_SAFE_STOP_DEV), + FEAT_NAME(UBLK_F_BATCH_IO), + FEAT_NAME(UBLK_F_NO_AUTO_PART_SCAN), + FEAT_NAME(UBLK_F_SHMEM_ZC), + }; + struct ublk_dev *dev; + __u64 features = 0; + int ret; + + dev = ublk_ctrl_init(); + if (!dev) { + fprintf(stderr, "ublksrv_ctrl_init failed id\n"); + return -EOPNOTSUPP; + } + + ret = ublk_ctrl_get_features(dev, &features); + if (!ret) { + int i; + + printf("ublk_drv features: 0x%llx\n", features); + + for (i = 0; i < sizeof(features) * 8; i++) { + const char *feat; + + if (!((1ULL << i) & features)) + continue; + if (i < ARRAY_SIZE(feat_map)) + feat = feat_map[i]; + else + feat = "unknown"; + printf("0x%-16llx: %s\n", 1ULL << i, feat); + } + } + + return ret; +} + +static int cmd_dev_update_size(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + struct ublk_params p; + int ret = -EINVAL; + + if (!dev) + return -ENODEV; + + if (ctx->dev_id < 0) { + fprintf(stderr, "device id isn't provided\n"); + goto out; + } + + dev->dev_info.dev_id = ctx->dev_id; + ret = ublk_ctrl_get_params(dev, &p); + if (ret < 0) { + ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); + goto out; + } + + if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) { + ublk_err("size isn't aligned with logical block size\n"); + ret = -EINVAL; + goto out; + } + + ret = ublk_ctrl_update_size(dev, ctx->size >> 9); +out: + ublk_ctrl_deinit(dev); + return ret; +} + +static int cmd_dev_quiesce(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + int ret = -EINVAL; + + if (!dev) + return -ENODEV; + + if (ctx->dev_id < 0) { + fprintf(stderr, "device id isn't provided for quiesce\n"); + goto out; + } + dev->dev_info.dev_id = ctx->dev_id; + ret = ublk_ctrl_quiesce_dev(dev, 10000); + +out: + ublk_ctrl_deinit(dev); + return ret; +} + +static void __cmd_create_help(char *exe, bool recovery) +{ + int i; + + printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", + exe, recovery ? "recover" : "add"); + printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1] [-g] [-u]\n"); + printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n"); + printf("\t[--nthreads threads] [--per_io_tasks]\n"); + printf("\t[--integrity_capable] [--integrity_reftag] [--metadata_size SIZE] " + "[--pi_offset OFFSET] [--csum_type ip|t10dif|nvme] [--tag_size SIZE]\n"); + printf("\t[--batch|-b] [--no_auto_part_scan]\n"); + printf("\t[target options] [backfile1] [backfile2] ...\n"); + printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); + printf("\tdefault: nthreads=nr_queues"); + + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) { + const struct ublk_tgt_ops *ops = tgt_ops_list[i]; + + if (ops->usage) + ops->usage(ops); + } +} + +static void cmd_add_help(char *exe) +{ + __cmd_create_help(exe, false); + printf("\n"); +} + +static void cmd_recover_help(char *exe) +{ + __cmd_create_help(exe, true); + printf("\tPlease provide exact command line for creating this device with real dev_id\n"); + printf("\n"); +} + +static int cmd_dev_help(char *exe) +{ + cmd_add_help(exe); + cmd_recover_help(exe); + + printf("%s del [-n dev_id] -a \n", exe); + printf("\t -a delete all devices -n delete specified device\n\n"); + printf("%s stop -n dev_id [--safe]\n", exe); + printf("\t --safe only stop if device has no active openers\n\n"); + printf("%s list [-n dev_id] -a \n", exe); + printf("\t -a list all devices, -n list specified device, default -a \n\n"); + printf("%s features\n", exe); + printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe); + printf("%s quiesce -n dev_id\n", exe); + return 0; +} + +int main(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "all", 0, NULL, 'a' }, + { "type", 1, NULL, 't' }, + { "number", 1, NULL, 'n' }, + { "queues", 1, NULL, 'q' }, + { "depth", 1, NULL, 'd' }, + { "debug_mask", 1, NULL, 0 }, + { "quiet", 0, NULL, 0 }, + { "zero_copy", 0, NULL, 'z' }, + { "foreground", 0, NULL, 0 }, + { "recovery", 1, NULL, 'r' }, + { "recovery_fail_io", 1, NULL, 'e'}, + { "recovery_reissue", 1, NULL, 'i'}, + { "get_data", 1, NULL, 'g'}, + { "auto_zc", 0, NULL, 0 }, + { "auto_zc_fallback", 0, NULL, 0 }, + { "user_copy", 0, NULL, 'u'}, + { "size", 1, NULL, 's'}, + { "nthreads", 1, NULL, 0 }, + { "per_io_tasks", 0, NULL, 0 }, + { "no_ublk_fixed_fd", 0, NULL, 0 }, + { "integrity_capable", 0, NULL, 0 }, + { "integrity_reftag", 0, NULL, 0 }, + { "metadata_size", 1, NULL, 0 }, + { "pi_offset", 1, NULL, 0 }, + { "csum_type", 1, NULL, 0 }, + { "tag_size", 1, NULL, 0 }, + { "safe", 0, NULL, 0 }, + { "batch", 0, NULL, 'b'}, + { "no_auto_part_scan", 0, NULL, 0 }, + { "shmem_zc", 0, NULL, 0 }, + { "htlb", 1, NULL, 0 }, + { "rdonly_shmem_buf", 0, NULL, 0 }, + { 0, 0, 0, 0 } + }; + const struct ublk_tgt_ops *ops = NULL; + int option_idx, opt; + const char *cmd = argv[1]; + struct dev_ctx ctx = { + ._evtfd = -1, + .queue_depth = 128, + .nr_hw_queues = 2, + .dev_id = -1, + .tgt_type = "unknown", + .csum_type = LBMD_PI_CSUM_NONE, + }; + int ret = -EINVAL, i; + int tgt_argc = 1; + char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; + int value; + + if (argc == 1) + return ret; + + opterr = 0; + optind = 2; + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gazub", + longopts, &option_idx)) != -1) { + switch (opt) { + case 'a': + ctx.all = 1; + break; + case 'b': + ctx.flags |= UBLK_F_BATCH_IO; + break; + case 'n': + ctx.dev_id = strtol(optarg, NULL, 10); + break; + case 't': + if (strlen(optarg) < sizeof(ctx.tgt_type)) + strcpy(ctx.tgt_type, optarg); + break; + case 'q': + ctx.nr_hw_queues = strtol(optarg, NULL, 10); + break; + case 'd': + ctx.queue_depth = strtol(optarg, NULL, 10); + break; + case 'z': + ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY; + break; + case 'r': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY; + break; + case 'e': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; + break; + case 'i': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; + break; + case 'g': + ctx.flags |= UBLK_F_NEED_GET_DATA; + break; + case 'u': + ctx.flags |= UBLK_F_USER_COPY; + break; + case 's': + ctx.size = strtoull(optarg, NULL, 10); + break; + case 0: + if (!strcmp(longopts[option_idx].name, "debug_mask")) + ublk_dbg_mask = strtol(optarg, NULL, 16); + if (!strcmp(longopts[option_idx].name, "quiet")) + ublk_dbg_mask = 0; + if (!strcmp(longopts[option_idx].name, "foreground")) + ctx.fg = 1; + if (!strcmp(longopts[option_idx].name, "auto_zc")) + ctx.flags |= UBLK_F_AUTO_BUF_REG; + if (!strcmp(longopts[option_idx].name, "auto_zc_fallback")) + ctx.auto_zc_fallback = 1; + if (!strcmp(longopts[option_idx].name, "nthreads")) + ctx.nthreads = strtol(optarg, NULL, 10); + if (!strcmp(longopts[option_idx].name, "per_io_tasks")) + ctx.per_io_tasks = 1; + if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd")) + ctx.no_ublk_fixed_fd = 1; + if (!strcmp(longopts[option_idx].name, "integrity_capable")) + ctx.integrity_flags |= LBMD_PI_CAP_INTEGRITY; + if (!strcmp(longopts[option_idx].name, "integrity_reftag")) + ctx.integrity_flags |= LBMD_PI_CAP_REFTAG; + if (!strcmp(longopts[option_idx].name, "metadata_size")) + ctx.metadata_size = strtoul(optarg, NULL, 0); + if (!strcmp(longopts[option_idx].name, "pi_offset")) + ctx.pi_offset = strtoul(optarg, NULL, 0); + if (!strcmp(longopts[option_idx].name, "csum_type")) { + if (!strcmp(optarg, "ip")) { + ctx.csum_type = LBMD_PI_CSUM_IP; + } else if (!strcmp(optarg, "t10dif")) { + ctx.csum_type = LBMD_PI_CSUM_CRC16_T10DIF; + } else if (!strcmp(optarg, "nvme")) { + ctx.csum_type = LBMD_PI_CSUM_CRC64_NVME; + } else { + ublk_err("invalid csum_type: %s\n", optarg); + return -EINVAL; + } + } + if (!strcmp(longopts[option_idx].name, "tag_size")) + ctx.tag_size = strtoul(optarg, NULL, 0); + if (!strcmp(longopts[option_idx].name, "safe")) + ctx.safe_stop = 1; + if (!strcmp(longopts[option_idx].name, "no_auto_part_scan")) + ctx.flags |= UBLK_F_NO_AUTO_PART_SCAN; + if (!strcmp(longopts[option_idx].name, "shmem_zc")) + ctx.flags |= UBLK_F_SHMEM_ZC; + if (!strcmp(longopts[option_idx].name, "htlb")) + ctx.htlb_path = strdup(optarg); + if (!strcmp(longopts[option_idx].name, "rdonly_shmem_buf")) + ctx.rdonly_shmem_buf = 1; + break; + case '?': + /* + * target requires every option must have argument + */ + if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { + fprintf(stderr, "every target option requires argument: %s %s\n", + argv[optind - 1], argv[optind]); + exit(EXIT_FAILURE); + } + + if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { + tgt_argv[tgt_argc++] = argv[optind - 1]; + tgt_argv[tgt_argc++] = argv[optind]; + } else { + fprintf(stderr, "too many target options\n"); + exit(EXIT_FAILURE); + } + optind += 1; + break; + } + } + + if (ctx.per_io_tasks && (ctx.flags & UBLK_F_BATCH_IO)) { + ublk_err("per_io_task and F_BATCH_IO conflict\n"); + return -EINVAL; + } + + /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */ + if (ctx.auto_zc_fallback && + !((ctx.flags & UBLK_F_AUTO_BUF_REG) && + (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) { + ublk_err("%s: auto_zc_fallback is set but neither " + "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n", + __func__); + return -EINVAL; + } + + if (!!(ctx.flags & UBLK_F_NEED_GET_DATA) + + !!(ctx.flags & UBLK_F_USER_COPY) + + (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY && !ctx.auto_zc_fallback) + + (ctx.flags & UBLK_F_AUTO_BUF_REG && !ctx.auto_zc_fallback) + + ctx.auto_zc_fallback > 1) { + fprintf(stderr, "too many data copy modes specified\n"); + return -EINVAL; + } + + if (ctx.metadata_size) { + if (!(ctx.flags & UBLK_F_USER_COPY)) { + ublk_err("integrity requires user_copy\n"); + return -EINVAL; + } + + ctx.flags |= UBLK_F_INTEGRITY; + } else if (ctx.integrity_flags || + ctx.pi_offset || + ctx.csum_type != LBMD_PI_CSUM_NONE || + ctx.tag_size) { + ublk_err("integrity parameters require metadata_size\n"); + return -EINVAL; + } + + if ((ctx.flags & UBLK_F_AUTO_BUF_REG) && + (ctx.flags & UBLK_F_BATCH_IO) && + (ctx.nthreads > ctx.nr_hw_queues)) { + ublk_err("too many threads for F_AUTO_BUF_REG & F_BATCH_IO\n"); + return -EINVAL; + } + + i = optind; + while (i < argc && ctx.nr_files < MAX_BACK_FILES) { + ctx.files[ctx.nr_files++] = argv[i++]; + } + + ops = ublk_find_tgt(ctx.tgt_type); + if (ops && ops->parse_cmd_line) { + optind = 0; + + tgt_argv[0] = ctx.tgt_type; + ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); + } + + if (!strcmp(cmd, "add")) + ret = cmd_dev_add(&ctx); + else if (!strcmp(cmd, "recover")) { + if (ctx.dev_id < 0) { + fprintf(stderr, "device id isn't provided for recovering\n"); + ret = -EINVAL; + } else { + ctx.recovery = 1; + ret = cmd_dev_add(&ctx); + } + } else if (!strcmp(cmd, "del")) + ret = cmd_dev_del(&ctx); + else if (!strcmp(cmd, "stop")) + ret = cmd_dev_stop(&ctx); + else if (!strcmp(cmd, "list")) { + ctx.all = 1; + ret = cmd_dev_list(&ctx); + } else if (!strcmp(cmd, "help")) + ret = cmd_dev_help(argv[0]); + else if (!strcmp(cmd, "features")) + ret = cmd_dev_get_features(); + else if (!strcmp(cmd, "update_size")) + ret = cmd_dev_update_size(&ctx); + else if (!strcmp(cmd, "quiesce")) + ret = cmd_dev_quiesce(&ctx); + else + cmd_dev_help(argv[0]); + + return ret; +} diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h new file mode 100644 index 000000000000..742c41d77df1 --- /dev/null +++ b/tools/testing/selftests/ublk/kublk.h @@ -0,0 +1,628 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KUBLK_INTERNAL_H +#define KUBLK_INTERNAL_H + +#include <unistd.h> +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <pthread.h> +#include <getopt.h> +#include <limits.h> +#include <poll.h> +#include <fcntl.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/inotify.h> +#include <sys/wait.h> +#include <sys/eventfd.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <linux/io_uring.h> +#include <liburing.h> +#include <semaphore.h> + +/* allow ublk_dep.h to override ublk_cmd.h */ +#include "ublk_dep.h" +#include <linux/ublk_cmd.h> + +#include "utils.h" + +#define MAX_BACK_FILES 4 + +/****************** part 1: libublk ********************/ + +#define CTRL_DEV "/dev/ublk-control" +#define UBLKC_DEV "/dev/ublkc" +#define UBLKB_DEV "/dev/ublkb" +#define UBLK_CTRL_RING_DEPTH 32 +#define ERROR_EVTFD_DEVID -2 + +#define UBLK_IO_MAX_BYTES (1 << 20) +#define UBLK_MAX_QUEUES_SHIFT 5 +#define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) +#define UBLK_MAX_THREADS_SHIFT 5 +#define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) +#define UBLK_QUEUE_DEPTH 1024 + +struct ublk_dev; +struct ublk_queue; +struct ublk_thread; + +struct stripe_ctx { + /* stripe */ + unsigned int chunk_size; +}; + +struct fault_inject_ctx { + /* fault_inject */ + unsigned long delay_us; + bool die_during_fetch; +}; + +struct dev_ctx { + char tgt_type[16]; + unsigned long flags; + unsigned nr_hw_queues; + unsigned short nthreads; + unsigned queue_depth; + int dev_id; + int nr_files; + char *files[MAX_BACK_FILES]; + unsigned int logging:1; + unsigned int all:1; + unsigned int fg:1; + unsigned int recovery:1; + unsigned int auto_zc_fallback:1; + unsigned int per_io_tasks:1; + unsigned int no_ublk_fixed_fd:1; + unsigned int safe_stop:1; + unsigned int no_auto_part_scan:1; + unsigned int rdonly_shmem_buf:1; + __u32 integrity_flags; + __u8 metadata_size; + __u8 pi_offset; + __u8 csum_type; + __u8 tag_size; + + int _evtfd; + int _shmid; + + /* built from shmem, only for ublk_dump_dev() */ + struct ublk_dev *shadow_dev; + + /* for 'update_size' command */ + unsigned long long size; + + char *htlb_path; + + union { + struct stripe_ctx stripe; + struct fault_inject_ctx fault_inject; + }; +}; + +struct ublk_ctrl_cmd_data { + __u32 cmd_op; +#define CTRL_CMD_HAS_DATA 1 +#define CTRL_CMD_HAS_BUF 2 + __u32 flags; + + __u64 data[2]; + __u64 addr; + __u32 len; +}; + +struct ublk_io { + char *buf_addr; + void *integrity_buf; + +#define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) +#define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) +#define UBLKS_IO_FREE (1UL << 2) +#define UBLKS_IO_NEED_GET_DATA (1UL << 3) +#define UBLKS_IO_NEED_REG_BUF (1UL << 4) + unsigned short flags; + unsigned short refs; /* used by target code only */ + + int tag; + + int result; + + unsigned short buf_index; + unsigned short tgt_ios; + void *private_data; +}; + +struct ublk_tgt_ops { + const char *name; + int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); + void (*deinit_tgt)(struct ublk_dev *); + + void (*pre_fetch_io)(struct ublk_thread *t, struct ublk_queue *q, + int tag, bool batch); + int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); + void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, + const struct io_uring_cqe *); + + /* + * Target specific command line handling + * + * each option requires argument for target command line + */ + void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); + void (*usage)(const struct ublk_tgt_ops *ops); + + /* return buffer index for UBLK_F_AUTO_BUF_REG */ + unsigned short (*buf_index)(const struct ublk_thread *t, + const struct ublk_queue *, int tag); +}; + +struct ublk_tgt { + unsigned long dev_size; + unsigned int sq_depth; + unsigned int cq_depth; + const struct ublk_tgt_ops *ops; + struct ublk_params params; + + int nr_backing_files; + unsigned long backing_file_size[MAX_BACK_FILES]; + char backing_file[MAX_BACK_FILES][PATH_MAX]; +}; + +struct ublk_queue { + int q_id; + int q_depth; + struct ublk_dev *dev; + const struct ublk_tgt_ops *tgt_ops; + struct ublksrv_io_desc *io_cmd_buf; + +/* borrow three bit of ublk uapi flags, which may never be used */ +#define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) +#define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) +#define UBLKS_Q_PREPARED (1ULL << 61) + __u64 flags; + int ublk_fd; /* cached ublk char device fd */ + __u8 metadata_size; + struct ublk_io ios[UBLK_QUEUE_DEPTH]; + + /* used for prep io commands */ + pthread_spinlock_t lock; +}; + +/* align with `ublk_elem_header` */ +struct ublk_batch_elem { + __u16 tag; + __u16 buf_index; + __s32 result; + __u64 buf_addr; +}; + +struct batch_commit_buf { + unsigned short q_id; + unsigned short buf_idx; + void *elem; + unsigned short done; + unsigned short count; +}; + +struct batch_fetch_buf { + struct io_uring_buf_ring *br; + void *fetch_buf; + unsigned int fetch_buf_size; + unsigned int fetch_buf_off; +}; + +struct ublk_thread { + /* Thread-local copy of queue-to-thread mapping for this thread */ + unsigned char q_map[UBLK_MAX_QUEUES]; + + struct ublk_dev *dev; + unsigned short idx; + unsigned short nr_queues; + +#define UBLKS_T_STOPPING (1U << 0) +#define UBLKS_T_IDLE (1U << 1) +#define UBLKS_T_BATCH_IO (1U << 31) /* readonly */ + unsigned state; + unsigned int cmd_inflight; + unsigned int io_inflight; + + unsigned short nr_bufs; + + /* followings are for BATCH_IO */ + unsigned short commit_buf_start; + unsigned char commit_buf_elem_size; + /* + * We just support single device, so pre-calculate commit/prep flags + */ + unsigned short cmd_flags; + unsigned int nr_commit_buf; + unsigned int commit_buf_size; + void *commit_buf; +#define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1) + struct allocator commit_buf_alloc; + struct batch_commit_buf *commit; + /* FETCH_IO_CMDS buffer */ + unsigned short nr_fetch_bufs; + struct batch_fetch_buf *fetch; + + struct io_uring ring; +}; + +struct ublk_dev { + struct ublk_tgt tgt; + struct ublksrv_ctrl_dev_info dev_info; + struct ublk_queue q[UBLK_MAX_QUEUES]; + unsigned nthreads; + unsigned per_io_tasks; + + int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ + int nr_fds; + int ctrl_fd; + struct io_uring ring; + + void *private_data; +}; + +extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); + +static inline int __ublk_use_batch_io(__u64 flags) +{ + return flags & UBLK_F_BATCH_IO; +} + +static inline int ublk_queue_batch_io(const struct ublk_queue *q) +{ + return __ublk_use_batch_io(q->flags); +} + +static inline int ublk_dev_batch_io(const struct ublk_dev *dev) +{ + return __ublk_use_batch_io(dev->dev_info.flags); +} + +/* only work for handle single device in this pthread context */ +static inline int ublk_thread_batch_io(const struct ublk_thread *t) +{ + return t->state & UBLKS_T_BATCH_IO; +} + +static inline void ublk_set_integrity_params(const struct dev_ctx *ctx, + struct ublk_params *params) +{ + if (!ctx->metadata_size) + return; + + params->types |= UBLK_PARAM_TYPE_INTEGRITY; + params->integrity = (struct ublk_param_integrity) { + .flags = ctx->integrity_flags, + .interval_exp = params->basic.logical_bs_shift, + .metadata_size = ctx->metadata_size, + .pi_offset = ctx->pi_offset, + .csum_type = ctx->csum_type, + .tag_size = ctx->tag_size, + }; +} + +static inline size_t ublk_integrity_len(const struct ublk_queue *q, size_t len) +{ + /* All targets currently use interval_exp = logical_bs_shift = 9 */ + return (len >> 9) * q->metadata_size; +} + +static inline size_t +ublk_integrity_data_len(const struct ublk_queue *q, size_t integrity_len) +{ + return (integrity_len / q->metadata_size) << 9; +} + +static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) +{ + return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); +} + +static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag) +{ + return UBLKSRV_IO_BUF_OFFSET + + ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF); +} + +static inline int is_target_io(__u64 user_data) +{ + return (user_data & (1ULL << 63)) != 0; +} + +static inline __u64 build_user_data(unsigned tag, unsigned op, + unsigned tgt_data, unsigned q_id, unsigned is_target_io) +{ + /* we only have 7 bits to encode q_id */ + _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7, "UBLK_MAX_QUEUES_SHIFT must be <= 7"); + ublk_assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7)); + + return tag | ((__u64)op << 16) | ((__u64)tgt_data << 24) | + (__u64)q_id << 56 | (__u64)is_target_io << 63; +} + +static inline unsigned int user_data_to_tag(__u64 user_data) +{ + return user_data & 0xffff; +} + +static inline unsigned int user_data_to_op(__u64 user_data) +{ + return (user_data >> 16) & 0xff; +} + +static inline unsigned int user_data_to_tgt_data(__u64 user_data) +{ + return (user_data >> 24) & 0xffff; +} + +static inline unsigned int user_data_to_q_id(__u64 user_data) +{ + return (user_data >> 56) & 0x7f; +} + +static inline unsigned short ublk_cmd_op_nr(unsigned int op) +{ + return _IOC_NR(op); +} + +static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) +{ + return container_of(io, struct ublk_queue, ios[io->tag]); +} + +static inline int ublk_io_alloc_sqes(struct ublk_thread *t, + struct io_uring_sqe *sqes[], int nr_sqes) +{ + struct io_uring *ring = &t->ring; + unsigned left = io_uring_sq_space_left(ring); + int i; + + if (left < nr_sqes) + io_uring_submit(ring); + + for (i = 0; i < nr_sqes; i++) { + sqes[i] = io_uring_get_sqe(ring); + if (!sqes[i]) + return i; + } + + return nr_sqes; +} + +static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) +{ + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + if (fd_index == 0) + /* Return the raw ublk FD for index 0 */ + return q->ublk_fd; + /* Adjust index for backing files (index 1 becomes 0, etc.) */ + return fd_index - 1; + } + return fd_index; +} + +static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) +{ + struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + int dev_fd = ublk_get_registered_fd(q, 0); + + io_uring_prep_read(sqe, dev_fd, 0, 0, 0); + sqe->opcode = IORING_OP_URING_CMD; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe->flags &= ~IOSQE_FIXED_FILE; + else + sqe->flags |= IOSQE_FIXED_FILE; + + cmd->tag = tag; + cmd->addr = index; + cmd->q_id = q_id; +} + +static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) +{ + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); + sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; +} + +static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) +{ + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); + sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; +} + +static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) +{ + return (void *)&sqe->cmd; +} + +static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) +{ + q->ios[tag].result = res; +} + +static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) +{ + return q->ios[tag].result; +} + +static inline void ublk_mark_io_done(struct ublk_io *io, int res) +{ + io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); + io->result = res; +} + +static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) +{ + return &q->io_cmd_buf[tag]; +} + +static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) +{ + __u32 *addr = (__u32 *)&sqe->off; + + addr[0] = cmd_op; + addr[1] = 0; +} + +static inline unsigned short ublk_batch_io_buf_idx( + const struct ublk_thread *t, const struct ublk_queue *q, + unsigned tag); + +static inline unsigned short ublk_io_buf_idx(const struct ublk_thread *t, + const struct ublk_queue *q, + unsigned tag) +{ + if (ublk_queue_batch_io(q)) + return ublk_batch_io_buf_idx(t, q, tag); + return q->ios[tag].buf_index; +} + +static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) +{ + return &q->ios[tag]; +} + +static inline int ublk_completed_tgt_io(struct ublk_thread *t, + struct ublk_queue *q, unsigned tag) +{ + struct ublk_io *io = ublk_get_io(q, tag); + + t->io_inflight--; + + return --io->tgt_ios == 0; +} + +static inline bool ublk_queue_use_zc(const struct ublk_queue *q) +{ + return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY); +} + +static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q) +{ + return !!(q->flags & UBLK_F_AUTO_BUF_REG); +} + +static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q) +{ + return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK); +} + +static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q) +{ + return !!(q->flags & UBLK_F_USER_COPY); +} + +static inline int ublk_queue_no_buf(const struct ublk_queue *q) +{ + return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); +} + +static inline int ublk_batch_commit_prepared(struct batch_commit_buf *cb) +{ + return cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX; +} + +static inline unsigned ublk_queue_idx_in_thread(const struct ublk_thread *t, + const struct ublk_queue *q) +{ + unsigned char idx; + + idx = t->q_map[q->q_id]; + ublk_assert(idx != 0); + return idx - 1; +} + +/* + * Each IO's buffer index has to be calculated by this helper for + * UBLKS_T_BATCH_IO + */ +static inline unsigned short ublk_batch_io_buf_idx( + const struct ublk_thread *t, const struct ublk_queue *q, + unsigned tag) +{ + return ublk_queue_idx_in_thread(t, q) * q->q_depth + tag; +} + +/* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */ +int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q); +/* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */ +void ublk_batch_start_fetch(struct ublk_thread *t); +/* Handle completion of batch I/O commands (prep/commit) */ +void ublk_batch_compl_cmd(struct ublk_thread *t, + const struct io_uring_cqe *cqe); +/* Initialize batch I/O state and calculate buffer parameters */ +void ublk_batch_prepare(struct ublk_thread *t); +/* Allocate and register commit buffers for batch operations */ +int ublk_batch_alloc_buf(struct ublk_thread *t); +/* Free commit buffers and cleanup batch allocator */ +void ublk_batch_free_buf(struct ublk_thread *t); + +/* Prepare a new commit buffer for batching completed I/O operations */ +void ublk_batch_prep_commit(struct ublk_thread *t); +/* Submit UBLK_U_IO_COMMIT_IO_CMDS with batched completed I/O operations */ +void ublk_batch_commit_io_cmds(struct ublk_thread *t); +/* Add a completed I/O operation to the current batch commit buffer */ +void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int res); +void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES], + int nthreads, int queues); + +static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int res) +{ + if (ublk_queue_batch_io(q)) { + ublk_batch_complete_io(t, q, tag, res); + return 0; + } else { + struct ublk_io *io = &q->ios[tag]; + + ublk_mark_io_done(io, res); + return ublk_queue_io_cmd(t, io); + } +} + +static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int queued) +{ + if (queued < 0) + ublk_complete_io(t, q, tag, queued); + else { + struct ublk_io *io = ublk_get_io(q, tag); + + t->io_inflight += queued; + io->tgt_ios = queued; + io->result = 0; + } +} + +/* shared memory zero-copy support */ +#define UBLK_BUF_MAX 256 + +struct ublk_shmem_entry { + int fd; + void *mmap_base; + size_t size; +}; + +extern struct ublk_shmem_entry shmem_table[UBLK_BUF_MAX]; +extern int shmem_count; + +extern const struct ublk_tgt_ops null_tgt_ops; +extern const struct ublk_tgt_ops loop_tgt_ops; +extern const struct ublk_tgt_ops stripe_tgt_ops; +extern const struct ublk_tgt_ops fault_inject_tgt_ops; + +void backing_file_tgt_deinit(struct ublk_dev *dev); +int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct); + +#endif diff --git a/tools/testing/selftests/ublk/metadata_size.c b/tools/testing/selftests/ublk/metadata_size.c new file mode 100644 index 000000000000..76ecddf04d25 --- /dev/null +++ b/tools/testing/selftests/ublk/metadata_size.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> +#include <linux/fs.h> +#include <stdio.h> +#include <sys/ioctl.h> + +int main(int argc, char **argv) +{ + struct logical_block_metadata_cap cap = {}; + const char *filename; + int fd; + int result; + + if (argc != 2) { + fprintf(stderr, "Usage: %s BLOCK_DEVICE\n", argv[0]); + return 1; + } + + filename = argv[1]; + fd = open(filename, O_RDONLY); + if (fd < 0) { + perror(filename); + return 1; + } + + result = ioctl(fd, FS_IOC_GETLBMD_CAP, &cap); + if (result < 0) { + perror("ioctl"); + return 1; + } + + printf("metadata_size: %u\n", cap.lbmd_size); + printf("pi_offset: %u\n", cap.lbmd_pi_offset); + printf("pi_tuple_size: %u\n", cap.lbmd_pi_size); + return 0; +} diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c new file mode 100644 index 000000000000..7656888f4149 --- /dev/null +++ b/tools/testing/selftests/ublk/null.c @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "kublk.h" + +#ifndef IORING_NOP_INJECT_RESULT +#define IORING_NOP_INJECT_RESULT (1U << 0) +#endif + +#ifndef IORING_NOP_FIXED_BUFFER +#define IORING_NOP_FIXED_BUFFER (1U << 3) +#endif + +static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + unsigned long dev_size = 250UL << 30; + + dev->tgt.dev_size = dev_size; + dev->tgt.params = (struct ublk_params) { + .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN | + UBLK_PARAM_TYPE_SEGMENT, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = info->max_io_buf_bytes >> 9, + .dev_sectors = dev_size >> 9, + }, + .dma = { + .alignment = 4095, + }, + .seg = { + .seg_boundary_mask = 4095, + .max_segment_size = 32 << 10, + .max_segments = 32, + }, + }; + ublk_set_integrity_params(ctx, &dev->tgt.params); + + if (info->flags & UBLK_F_SUPPORT_ZERO_COPY) + dev->tgt.sq_depth = dev->tgt.cq_depth = 2 * info->queue_depth; + return 0; +} + +static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod, + struct io_uring_sqe *sqe, int q_id, unsigned buf_idx) +{ + unsigned ublk_op = ublksrv_get_op(iod); + + io_uring_prep_nop(sqe); + sqe->buf_index = buf_idx; + sqe->flags |= IOSQE_FIXED_FILE; + sqe->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT; + sqe->len = iod->nr_sectors << 9; /* injected result */ + sqe->user_data = build_user_data(tag, ublk_op, 0, q_id, 1); +} + +static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe[3]; + unsigned short buf_idx = ublk_io_buf_idx(t, q, tag); + + ublk_io_alloc_sqes(t, sqe, 3); + + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_idx); + sqe[0]->user_data = build_user_data(tag, + ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); + sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; + + __setup_nop_io(tag, iod, sqe[1], q->q_id, buf_idx); + sqe[1]->flags |= IOSQE_IO_HARDLINK; + + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, buf_idx); + sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); + + // buf register is marked as IOSQE_CQE_SKIP_SUCCESS + return 2; +} + +static int null_queue_auto_zc_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe[1]; + + ublk_io_alloc_sqes(t, sqe, 1); + __setup_nop_io(tag, iod, sqe[0], q->q_id, ublk_io_buf_idx(t, q, tag)); + return 1; +} + +static void ublk_null_io_done(struct ublk_thread *t, struct ublk_queue *q, + const struct io_uring_cqe *cqe) +{ + unsigned tag = user_data_to_tag(cqe->user_data); + unsigned op = user_data_to_op(cqe->user_data); + struct ublk_io *io = ublk_get_io(q, tag); + + if (cqe->res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) { + if (!io->result) + io->result = cqe->res; + if (cqe->res < 0) + ublk_err("%s: io failed op %x user_data %lx\n", + __func__, op, cqe->user_data); + } + + /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ + if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) + io->tgt_ios += 1; + + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, io->result); +} + +static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + unsigned auto_zc = ublk_queue_use_auto_zc(q); + unsigned zc = ublk_queue_use_zc(q); + int queued; + + if (auto_zc && !ublk_io_auto_zc_fallback(iod)) + queued = null_queue_auto_zc_io(t, q, tag); + else if (zc) + queued = null_queue_zc_io(t, q, tag); + else { + ublk_complete_io(t, q, tag, iod->nr_sectors << 9); + return 0; + } + ublk_queued_tgt_io(t, q, tag, queued); + return 0; +} + +/* + * return invalid buffer index for triggering auto buffer register failure, + * then UBLK_IO_RES_NEED_REG_BUF handling is covered + */ +static unsigned short ublk_null_buf_index(const struct ublk_thread *t, + const struct ublk_queue *q, int tag) +{ + if (ublk_queue_auto_zc_fallback(q)) + return (unsigned short)-1; + return ublk_io_buf_idx(t, q, tag); +} + +const struct ublk_tgt_ops null_tgt_ops = { + .name = "null", + .init_tgt = ublk_null_tgt_init, + .queue_io = ublk_null_queue_io, + .tgt_io_done = ublk_null_io_done, + .buf_index = ublk_null_buf_index, +}; diff --git a/tools/testing/selftests/ublk/settings b/tools/testing/selftests/ublk/settings new file mode 100644 index 000000000000..682a40f1c8e6 --- /dev/null +++ b/tools/testing/selftests/ublk/settings @@ -0,0 +1 @@ +timeout=150 diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c new file mode 100644 index 000000000000..dca819f5366e --- /dev/null +++ b/tools/testing/selftests/ublk/stripe.c @@ -0,0 +1,396 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "kublk.h" + +#define NR_STRIPE MAX_BACK_FILES + +struct stripe_conf { + unsigned nr_files; + unsigned shift; +}; + +struct stripe { + loff_t start; + unsigned nr_sects; + int seq; + + struct iovec *vec; + unsigned nr_vec; + unsigned cap; +}; + +struct stripe_array { + struct stripe s[NR_STRIPE]; + unsigned nr; + struct iovec _vec[]; +}; + +static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q) +{ + return (struct stripe_conf *)q->dev->private_data; +} + +static inline unsigned calculate_nr_vec(const struct stripe_conf *conf, + const struct ublksrv_io_desc *iod) +{ + const unsigned shift = conf->shift - 9; + const unsigned unit_sects = conf->nr_files << shift; + loff_t start = iod->start_sector; + loff_t end = start + iod->nr_sectors; + + return (end / unit_sects) - (start / unit_sects) + 1; +} + +static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf, + const struct ublksrv_io_desc *iod) +{ + unsigned nr_vecs = calculate_nr_vec(conf, iod); + unsigned total = nr_vecs * conf->nr_files; + struct stripe_array *s; + int i; + + s = malloc(sizeof(*s) + total * sizeof(struct iovec)); + + s->nr = 0; + for (i = 0; i < conf->nr_files; i++) { + struct stripe *t = &s->s[i]; + + t->nr_vec = 0; + t->vec = &s->_vec[i * nr_vecs]; + t->nr_sects = 0; + t->cap = nr_vecs; + } + + return s; +} + +static void free_stripe_array(struct stripe_array *s) +{ + free(s); +} + +static void calculate_stripe_array(const struct stripe_conf *conf, + const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base) +{ + const unsigned shift = conf->shift - 9; + const unsigned chunk_sects = 1 << shift; + const unsigned unit_sects = conf->nr_files << shift; + off64_t start = iod->start_sector; + off64_t end = start + iod->nr_sectors; + unsigned long done = 0; + unsigned idx = 0; + + while (start < end) { + unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1)); + loff_t unit_off = (start / unit_sects) * unit_sects; + unsigned seq = (start - unit_off) >> shift; + struct stripe *this = &s->s[idx]; + loff_t stripe_off = (unit_off / conf->nr_files) + + (start & (chunk_sects - 1)); + + if (nr_sects > end - start) + nr_sects = end - start; + if (this->nr_sects == 0) { + this->nr_sects = nr_sects; + this->start = stripe_off; + this->seq = seq; + s->nr += 1; + } else { + ublk_assert(seq == this->seq); + ublk_assert(this->start + this->nr_sects == stripe_off); + this->nr_sects += nr_sects; + } + + ublk_assert(this->nr_vec < this->cap); + this->vec[this->nr_vec].iov_base = (void *)(base + done); + this->vec[this->nr_vec++].iov_len = nr_sects << 9; + + start += nr_sects; + done += nr_sects << 9; + idx = (idx + 1) % conf->nr_files; + } +} + +static inline enum io_uring_op stripe_to_uring_op( + const struct ublksrv_io_desc *iod, int zc) +{ + unsigned ublk_op = ublksrv_get_op(iod); + + if (ublk_op == UBLK_IO_OP_READ) + return zc ? IORING_OP_READV_FIXED : IORING_OP_READV; + else if (ublk_op == UBLK_IO_OP_WRITE) + return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV; + ublk_assert(0); +} + +static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) +{ + const struct stripe_conf *conf = get_chunk_shift(q); + unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0); + unsigned zc = (ublk_queue_use_zc(q) != 0); + enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc); + struct io_uring_sqe *sqe[NR_STRIPE]; + struct stripe_array *s = alloc_stripe_array(conf, iod); + struct ublk_io *io = ublk_get_io(q, tag); + int i, extra = zc ? 2 : 0; + void *base = io->buf_addr; + unsigned short buf_idx = ublk_io_buf_idx(t, q, tag); + + io->private_data = s; + calculate_stripe_array(conf, iod, s, base); + + ublk_io_alloc_sqes(t, sqe, s->nr + extra); + + if (zc) { + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_idx); + sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; + sqe[0]->user_data = build_user_data(tag, + ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); + } + + for (i = zc; i < s->nr + extra - zc; i++) { + struct stripe *t = &s->s[i - zc]; + + io_uring_prep_rw(op, sqe[i], + t->seq + 1, + (void *)t->vec, + t->nr_vec, + t->start << 9); + io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + if (auto_zc || zc) { + sqe[i]->buf_index = buf_idx; + if (zc) + sqe[i]->flags |= IOSQE_IO_HARDLINK; + } + /* bit63 marks us as tgt io */ + sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, q->q_id, 1); + } + if (zc) { + struct io_uring_sqe *unreg = sqe[s->nr + 1]; + + io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, buf_idx); + unreg->user_data = build_user_data( + tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1); + } + + /* register buffer is skip_success */ + return s->nr + zc; +} + +static int handle_flush(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) +{ + const struct stripe_conf *conf = get_chunk_shift(q); + struct io_uring_sqe *sqe[NR_STRIPE]; + int i; + + ublk_io_alloc_sqes(t, sqe, conf->nr_files); + for (i = 0; i < conf->nr_files; i++) { + io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC); + io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, q->q_id, 1); + } + return conf->nr_files; +} + +static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + unsigned ublk_op = ublksrv_get_op(iod); + int ret = 0; + + switch (ublk_op) { + case UBLK_IO_OP_FLUSH: + ret = handle_flush(t, q, iod, tag); + break; + case UBLK_IO_OP_WRITE_ZEROES: + case UBLK_IO_OP_DISCARD: + ret = -ENOTSUP; + break; + case UBLK_IO_OP_READ: + case UBLK_IO_OP_WRITE: + ret = stripe_queue_tgt_rw_io(t, q, iod, tag); + break; + default: + ret = -EINVAL; + break; + } + ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag, + iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret); + return ret; +} + +static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) +{ + int queued = stripe_queue_tgt_io(t, q, tag); + + ublk_queued_tgt_io(t, q, tag, queued); + return 0; +} + +static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q, + const struct io_uring_cqe *cqe) +{ + unsigned tag = user_data_to_tag(cqe->user_data); + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + unsigned op = user_data_to_op(cqe->user_data); + struct ublk_io *io = ublk_get_io(q, tag); + int res = cqe->res; + + if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) { + if (!io->result) + io->result = res; + if (res < 0) + ublk_err("%s: io failure %d tag %u\n", __func__, res, tag); + } + + /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ + if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) + io->tgt_ios += 1; + + /* fail short READ/WRITE simply */ + if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) { + unsigned seq = user_data_to_tgt_data(cqe->user_data); + struct stripe_array *s = io->private_data; + + if (res < s->s[seq].nr_sects << 9) { + io->result = -EIO; + ublk_err("%s: short rw op %u res %d exp %u tag %u\n", + __func__, op, res, s->s[seq].vec->iov_len, tag); + } + } + + if (ublk_completed_tgt_io(t, q, tag)) { + int res = io->result; + + if (!res) + res = iod->nr_sectors << 9; + + ublk_complete_io(t, q, tag, res); + + free_stripe_array(io->private_data); + io->private_data = NULL; + } +} + +static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + struct ublk_params p = { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .attrs = UBLK_ATTR_VOLATILE_CACHE, + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, + }, + }; + unsigned chunk_size = ctx->stripe.chunk_size; + struct stripe_conf *conf; + unsigned chunk_shift; + loff_t bytes = 0; + int ret, i, mul = 1; + + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + if (ctx->metadata_size) { + ublk_err("%s: integrity not supported\n", __func__); + return -EINVAL; + } + + if ((chunk_size & (chunk_size - 1)) || !chunk_size) { + ublk_err("invalid chunk size %u\n", chunk_size); + return -EINVAL; + } + + if (chunk_size < 4096 || chunk_size > 512 * 1024) { + ublk_err("invalid chunk size %u\n", chunk_size); + return -EINVAL; + } + + chunk_shift = ilog2(chunk_size); + + ret = backing_file_tgt_init(dev, dev->tgt.nr_backing_files); + if (ret) + return ret; + + if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE) + return -EINVAL; + + ublk_assert(dev->nr_fds == dev->tgt.nr_backing_files + 1); + + for (i = 0; i < dev->tgt.nr_backing_files; i++) + dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1); + + for (i = 0; i < dev->tgt.nr_backing_files; i++) { + unsigned long size = dev->tgt.backing_file_size[i]; + + if (size != dev->tgt.backing_file_size[0]) + return -EINVAL; + bytes += size; + } + + conf = malloc(sizeof(*conf)); + conf->shift = chunk_shift; + conf->nr_files = dev->tgt.nr_backing_files; + + dev->private_data = conf; + dev->tgt.dev_size = bytes; + p.basic.dev_sectors = bytes >> 9; + dev->tgt.params = p; + + if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) + mul = 2; + dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files; + dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files; + + printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files); + + return 0; +} + +static void ublk_stripe_tgt_deinit(struct ublk_dev *dev) +{ + free(dev->private_data); + backing_file_tgt_deinit(dev); +} + +static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "chunk_size", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->stripe.chunk_size = 65536; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "chunk_size")) + ctx->stripe.chunk_size = strtol(optarg, NULL, 10); + } + } +} + +static void ublk_stripe_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n"); +} + +const struct ublk_tgt_ops stripe_tgt_ops = { + .name = "stripe", + .init_tgt = ublk_stripe_tgt_init, + .deinit_tgt = ublk_stripe_tgt_deinit, + .queue_io = ublk_stripe_queue_io, + .tgt_io_done = ublk_stripe_io_done, + .parse_cmd_line = ublk_stripe_cmd_line, + .usage = ublk_stripe_usage, +}; diff --git a/tools/testing/selftests/ublk/test_batch_01.sh b/tools/testing/selftests/ublk/test_batch_01.sh new file mode 100755 index 000000000000..6e19303706a9 --- /dev/null +++ b/tools/testing/selftests/ublk/test_batch_01.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_feature "BATCH_IO"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test basic function of UBLK_F_BATCH_IO" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t loop -q 2 -b "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +if ! _mkfs_mount_test /dev/ublkb"${dev_id}"; then + _cleanup_test + _show_result $TID 255 +fi + +dev_id=$(_add_ublk_dev -t stripe -b --auto_zc "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_batch_02.sh b/tools/testing/selftests/ublk/test_batch_02.sh new file mode 100755 index 000000000000..7c683f755379 --- /dev/null +++ b/tools/testing/selftests/ublk/test_batch_02.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_feature "BATCH_IO"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_BATCH_IO with 4_threads vs. 1_queues" + +_create_backfile 0 512M + +dev_id=$(_add_ublk_dev -t loop -q 1 --nthreads 4 -b "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite \ + --iodepth=32 --size=100M --numjobs=4 > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_batch_03.sh b/tools/testing/selftests/ublk/test_batch_03.sh new file mode 100755 index 000000000000..914ccd6a335d --- /dev/null +++ b/tools/testing/selftests/ublk/test_batch_03.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_feature "BATCH_IO"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_BATCH_IO with 1_threads vs. 4_queues" + +_create_backfile 0 512M + +dev_id=$(_add_ublk_dev -t loop -q 4 --nthreads 1 -b "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite \ + --iodepth=32 --size=100M --numjobs=4 > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh new file mode 100755 index 000000000000..af2ea4fa1111 --- /dev/null +++ b/tools/testing/selftests/ublk/test_common.sh @@ -0,0 +1,443 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Derive TID from script name: test_<type>_<num>.sh -> <type>_<num> +# Can be overridden in test script after sourcing this file +TID=$(basename "$0" .sh) +TID=${TID#test_} + +UBLK_SKIP_CODE=4 + +_have_program() { + if command -v "$1" >/dev/null 2>&1; then + return 0 + fi + return 1 +} + +# Sleep with awareness of parallel execution. +# Usage: _ublk_sleep <normal_secs> <parallel_secs> +_ublk_sleep() { + if [ "${JOBS:-1}" -gt 1 ]; then + sleep "$2" + else + sleep "$1" + fi +} + +_get_disk_dev_t() { + local dev_id=$1 + local dev + local major + local minor + + dev=/dev/ublkb"${dev_id}" + major="0x"$(stat -c '%t' "$dev") + minor="0x"$(stat -c '%T' "$dev") + + echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) +} + +_get_disk_size() +{ + lsblk -b -o SIZE -n "$1" +} + +_run_fio_verify_io() { + fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \ + --bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \ + --verify_state_save=0 "$@" > /dev/null +} + +_create_backfile() { + local index=$1 + local new_size=$2 + local old_file + local new_file + + old_file="${UBLK_BACKFILES[$index]}" + [ -f "$old_file" ] && rm -f "$old_file" + + new_file=$(mktemp ${UBLK_TEST_DIR}/ublk_file_"${new_size}"_XXXXX) + truncate -s "${new_size}" "${new_file}" + UBLK_BACKFILES["$index"]="$new_file" +} + +_remove_files() { + local file + + for file in "${UBLK_BACKFILES[@]}"; do + [ -f "$file" ] && rm -f "$file" + done + [ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP" +} + +_create_tmp_dir() { + local my_file; + + my_file=$(mktemp -d ${UBLK_TEST_DIR}/ublk_dir_XXXXX) + echo "$my_file" +} + +_remove_tmp_dir() { + local dir=$1 + + [ -d "$dir" ] && rmdir "$dir" +} + +_mkfs_mount_test() +{ + local dev=$1 + shift + local err_code=0 + local mnt_dir; + + mnt_dir=$(_create_tmp_dir) + mkfs.ext4 -F "$dev" > /dev/null 2>&1 + err_code=$? + if [ $err_code -ne 0 ]; then + return $err_code + fi + + mount -t ext4 "$dev" "$mnt_dir" > /dev/null 2>&1 + if [ $# -gt 0 ]; then + cd "$mnt_dir" && "$@" + err_code=$? + cd - > /dev/null + fi + umount "$dev" + if [ $err_code -eq 0 ]; then + err_code=$? + fi + _remove_tmp_dir "$mnt_dir" + return $err_code +} + +_check_root() { + local ksft_skip=4 + + if [ $UID != 0 ]; then + echo please run this as root >&2 + exit $ksft_skip + fi +} + +_get_ublk_dev_state() { + ${UBLK_PROG} list -n "$1" | grep "state" | awk '{print $11}' +} + +_get_ublk_daemon_pid() { + ${UBLK_PROG} list -n "$1" | grep "pid" | awk '{print $7}' +} + +_prep_test() { + _check_root + local type=$1 + shift 1 + modprobe ublk_drv > /dev/null 2>&1 + local base_dir=${TMPDIR:-./ublktest-dir} + mkdir -p "$base_dir" + UBLK_TEST_DIR=$(mktemp -d ${base_dir}/${TID}.XXXXXX) + UBLK_TEST_DIR=$(realpath ${UBLK_TEST_DIR}) + UBLK_TMP=$(mktemp ${UBLK_TEST_DIR}/ublk_test_XXXXX) + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*" + echo "ublk selftest: $TID starting at $(date '+%F %T')" | tee /dev/kmsg +} + +_remove_test_files() +{ + local files=$* + + for file in ${files}; do + [ -f "${file}" ] && rm -f "${file}" + done +} + +_show_result() +{ + if [ "$UBLK_TEST_SHOW_RESULT" -ne 0 ]; then + if [ "$2" -eq 0 ]; then + echo "$1 : [PASS]" + elif [ "$2" -eq 4 ]; then + echo "$1 : [SKIP]" + else + echo "$1 : [FAIL]" + fi + fi + if [ "$2" -ne 0 ]; then + _remove_files + exit "$2" + fi + return 0 +} + +# don't call from sub-shell, otherwise can't exit +_check_add_dev() +{ + local tid=$1 + local code=$2 + + if [ "${code}" -ne 0 ]; then + _show_result "${tid}" "${code}" + fi +} + +_cleanup_test() { + if [ -f "${UBLK_TEST_DIR}/.ublk_devs" ]; then + while read -r dev_id; do + ${UBLK_PROG} del -n "${dev_id}" + done < "${UBLK_TEST_DIR}/.ublk_devs" + rm -f "${UBLK_TEST_DIR}/.ublk_devs" + fi + + _remove_files + rmdir ${UBLK_TEST_DIR} + echo "ublk selftest: $TID done at $(date '+%F %T')" | tee /dev/kmsg +} + +_have_feature() +{ + if $UBLK_PROG "features" | grep "$1" > /dev/null 2>&1; then + return 0 + fi + return 1 +} + +_create_ublk_dev() { + local dev_id; + local cmd=$1 + local settle=$2 + + shift 2 + + if [ ! -c /dev/ublk-control ]; then + return ${UBLK_SKIP_CODE} + fi + if echo "$@" | grep -q "\-z"; then + if ! _have_feature "ZERO_COPY"; then + return ${UBLK_SKIP_CODE} + fi + fi + + if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then + echo "fail to add ublk dev $*" + return 255 + fi + + if [ "$settle" = "yes" ]; then + udevadm settle --timeout=20 + fi + + if [[ "$dev_id" =~ ^[0-9]+$ ]]; then + echo "$dev_id" >> "${UBLK_TEST_DIR}/.ublk_devs" + echo "${dev_id}" + else + return 255 + fi +} + +_add_ublk_dev() { + _create_ublk_dev "add" "yes" "$@" +} + +_add_ublk_dev_no_settle() { + _create_ublk_dev "add" "no" "$@" +} + +_recover_ublk_dev() { + local dev_id + local state + + dev_id=$(_create_ublk_dev "recover" "yes" "$@") + for ((j=0;j<100;j++)); do + state=$(_get_ublk_dev_state "${dev_id}") + [ "$state" == "LIVE" ] && break + sleep 1 + done + echo "$state" +} + +# quiesce device and return ublk device state +__ublk_quiesce_dev() +{ + local dev_id=$1 + local exp_state=$2 + local state + + if ! ${UBLK_PROG} quiesce -n "${dev_id}"; then + state=$(_get_ublk_dev_state "${dev_id}") + return "$state" + fi + + for ((j=0;j<100;j++)); do + state=$(_get_ublk_dev_state "${dev_id}") + [ "$state" == "$exp_state" ] && break + sleep 1 + done + echo "$state" +} + +# kill the ublk daemon and return ublk device state +__ublk_kill_daemon() +{ + local dev_id=$1 + local exp_state=$2 + local daemon_pid + local state + + daemon_pid=$(_get_ublk_daemon_pid "${dev_id}") + state=$(_get_ublk_dev_state "${dev_id}") + + for ((j=0;j<100;j++)); do + [ "$state" == "$exp_state" ] && break + kill -9 "$daemon_pid" > /dev/null 2>&1 + sleep 1 + state=$(_get_ublk_dev_state "${dev_id}") + done + echo "$state" +} + +_ublk_del_dev() { + local dev_id=$1 + + ${UBLK_PROG} del -n "${dev_id}" + + # Remove from tracking file + if [ -f "${UBLK_TEST_DIR}/.ublk_devs" ]; then + sed -i "/^${dev_id}$/d" "${UBLK_TEST_DIR}/.ublk_devs" + fi +} + +__remove_ublk_dev_return() { + local dev_id=$1 + + _ublk_del_dev "${dev_id}" + local res=$? + udevadm settle --timeout=20 + return ${res} +} + +__run_io_and_remove() +{ + local dev_id=$1 + local size=$2 + local kill_server=$3 + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=randrw --norandommap --iodepth=256 --size="${size}" --numjobs="$(nproc)" \ + --runtime=20 --time_based > /dev/null 2>&1 & + fio --name=batchjob --filename=/dev/ublkb"${dev_id}" --ioengine=io_uring \ + --rw=randrw --norandommap --iodepth=256 --size="${size}" \ + --numjobs="$(nproc)" --runtime=20 --time_based \ + --iodepth_batch_submit=32 --iodepth_batch_complete_min=32 \ + --force_async=7 > /dev/null 2>&1 & + sleep 2 + if [ "${kill_server}" = "yes" ]; then + local state + state=$(__ublk_kill_daemon "${dev_id}" "DEAD") + if [ "$state" != "DEAD" ]; then + echo "device isn't dead($state) after killing daemon" + return 255 + fi + fi + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi + wait +} + +run_io_and_remove() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "no"; then + echo "/dev/ublkc$dev_id isn't removed" + exit 255 + fi +} + +run_io_and_kill_daemon() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then + echo "/dev/ublkc$dev_id isn't removed res ${res}" + exit 255 + fi +} + +run_io_and_recover() +{ + local size=$1 + local action=$2 + local state + local dev_id + + shift 2 + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=randread --iodepth=256 --size="${size}" --numjobs=4 \ + --runtime=20 --time_based > /dev/null 2>&1 & + sleep 4 + + if [ "$action" == "kill_daemon" ]; then + state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") + elif [ "$action" == "quiesce_dev" ]; then + state=$(__ublk_quiesce_dev "${dev_id}" "QUIESCED") + fi + if [ "$state" != "QUIESCED" ]; then + echo "device isn't quiesced($state) after $action" + return 255 + fi + + state=$(_recover_ublk_dev -n "$dev_id" "$@") + if [ "$state" != "LIVE" ]; then + echo "faile to recover to LIVE($state)" + return 255 + fi + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi + wait +} + + +_ublk_test_top_dir() +{ + cd "$(dirname "$0")" && pwd +} + +METADATA_SIZE_PROG="$(_ublk_test_top_dir)/metadata_size" + +_get_metadata_size() +{ + local dev_id=$1 + local field=$2 + + "$METADATA_SIZE_PROG" "/dev/ublkb$dev_id" | grep "$field" | grep -o "[0-9]*" +} + +UBLK_PROG=$(_ublk_test_top_dir)/kublk +UBLK_TEST_QUIET=1 +UBLK_TEST_SHOW_RESULT=1 +UBLK_BACKFILES=() +export UBLK_PROG +export UBLK_TEST_QUIET +export UBLK_TEST_SHOW_RESULT diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh new file mode 100755 index 000000000000..2afc8cdbed8f --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_02.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program bpftrace; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "ublk dispatch won't reorder IO for MQ" + +dev_id=$(_add_ublk_dev -t null -q 2) +_check_add_dev $TID $? + +dev_t=$(_get_disk_dev_t "$dev_id") +bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 & +btrace_pid=$! + +# Wait for bpftrace probes to be attached (BEGIN block prints BPFTRACE_READY) +for _ in $(seq 100); do + grep -q "BPFTRACE_READY" "$UBLK_TMP" 2>/dev/null && break + sleep 0.1 +done + +if ! kill -0 "$btrace_pid" 2>/dev/null; then + _cleanup_test + exit "$UBLK_SKIP_CODE" +fi + +# run fio over this ublk disk (pinned to CPU 0) +taskset -c 0 fio --name=write_seq \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=libaio --iodepth=16 \ + --rw=write \ + --size=512M \ + --direct=1 \ + --bs=4k > /dev/null 2>&1 +ERR_CODE=$? +kill "$btrace_pid" +wait + +# Check for out-of-order completions detected by bpftrace +if grep -q "^out_of_order:" "$UBLK_TMP"; then + echo "I/O reordering detected:" + grep "^out_of_order:" "$UBLK_TMP" + ERR_CODE=255 +fi +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_03.sh b/tools/testing/selftests/ublk/test_generic_03.sh new file mode 100755 index 000000000000..8e78be860d34 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_03.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "null" "check dma & segment limits for zero copy" + +dev_id=$(_add_ublk_dev -t null -z) +_check_add_dev $TID $? + +sysfs_path=/sys/block/ublkb"${dev_id}" +dma_align=$(cat "$sysfs_path"/queue/dma_alignment) +max_segments=$(cat "$sysfs_path"/queue/max_segments) +max_segment_size=$(cat "$sysfs_path"/queue/max_segment_size) +if [ "$dma_align" != "4095" ]; then + ERR_CODE=255 +fi +if [ "$max_segments" != "32" ]; then + ERR_CODE=255 +fi +if [ "$max_segment_size" != "32768" ]; then + ERR_CODE=255 +fi +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh new file mode 100755 index 000000000000..a8b3634b6b4b --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_06.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server" + +# configure ublk server to sleep 2s before completing each I/O +dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000) +_check_add_dev $TID $? + +STARTTIME=${SECONDS} + +dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 & +dd_pid=$! + +__ublk_kill_daemon ${dev_id} "DEAD" >/dev/null + +wait $dd_pid +dd_exitcode=$? + +ENDTIME=${SECONDS} +ELAPSED=$(($ENDTIME - $STARTTIME)) + +# assert that dd sees an error and exits quickly after ublk server is +# killed. previously this relied on seeing an I/O timeout and so would +# take ~30s +if [ $dd_exitcode -eq 0 ]; then + echo "dd unexpectedly exited successfully!" + ERR_CODE=255 +fi +if [ $ELAPSED -ge 5 ]; then + echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!" + ERR_CODE=255 +fi + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh new file mode 100755 index 000000000000..d2c5e65bd124 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_07.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_NEED_GET_DATA" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? +if [ "$ERR_CODE" -eq 0 ]; then + _mkfs_mount_test /dev/ublkb"${dev_id}" + ERR_CODE=$? +fi + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_08.sh b/tools/testing/selftests/ublk/test_generic_08.sh new file mode 100755 index 000000000000..77a18b313f3d --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_08.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_AUTO_BUF_REG" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t loop -q 2 --auto_zc "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +if ! _mkfs_mount_test /dev/ublkb"${dev_id}"; then + _cleanup_test + _show_result $TID 255 +fi + +dev_id=$(_add_ublk_dev -t stripe --auto_zc "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_09.sh b/tools/testing/selftests/ublk/test_generic_09.sh new file mode 100755 index 000000000000..6c25242f245f --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_09.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "basic IO test" + +dev_id=$(_add_ublk_dev -t null -z --auto_zc --auto_zc_fallback) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_10.sh b/tools/testing/selftests/ublk/test_generic_10.sh new file mode 100755 index 000000000000..fdabc9d9075e --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_10.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_feature "UPDATE_SIZE"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "check update size" + +dev_id=$(_add_ublk_dev -t null) +_check_add_dev $TID $? + +size=$(_get_disk_size /dev/ublkb"${dev_id}") +size=$(( size / 2 )) +if ! "$UBLK_PROG" update_size -n "$dev_id" -s "$size"; then + ERR_CODE=255 +fi + +new_size=$(_get_disk_size /dev/ublkb"${dev_id}") +if [ "$new_size" != "$size" ]; then + ERR_CODE=255 +fi + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_12.sh b/tools/testing/selftests/ublk/test_generic_12.sh new file mode 100755 index 000000000000..435497f8da8d --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_12.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program bpftrace; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "do imbalanced load, it should be balanced over I/O threads" + +NTHREADS=6 +dev_id=$(_add_ublk_dev -t null -q 4 -d 16 --nthreads $NTHREADS --per_io_tasks) +_check_add_dev $TID $? + +dev_t=$(_get_disk_dev_t "$dev_id") +bpftrace trace/count_ios_per_tid.bt "$dev_t" > "$UBLK_TMP" 2>&1 & +btrace_pid=$! +sleep 2 + +if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then + _cleanup_test + exit "$UBLK_SKIP_CODE" +fi + +# do imbalanced I/O on the ublk device +# pin to cpu 0 to prevent migration/only target one queue +fio --name=write_seq \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=libaio --iodepth=16 \ + --rw=write \ + --size=512M \ + --direct=1 \ + --bs=4k \ + --cpus_allowed=0 > /dev/null 2>&1 +ERR_CODE=$? +kill "$btrace_pid" +wait + +# check that every task handles some I/O, even though all I/O was issued +# from a single CPU. when ublk gets support for round-robin tag +# allocation, this check can be strengthened to assert that every thread +# handles the same number of I/Os +NR_THREADS_THAT_HANDLED_IO=$(grep -c '@' ${UBLK_TMP}) +if [[ $NR_THREADS_THAT_HANDLED_IO -ne $NTHREADS ]]; then + echo "only $NR_THREADS_THAT_HANDLED_IO handled I/O! expected $NTHREADS" + cat "$UBLK_TMP" + ERR_CODE=255 +fi + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_13.sh b/tools/testing/selftests/ublk/test_generic_13.sh new file mode 100755 index 000000000000..2c1be6286db8 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_13.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "null" "check that feature list is complete" + +if ${UBLK_PROG} features | grep -q unknown; then + echo "# unknown feature detected!" + echo "# did you add a feature and forget to update feat_map in kublk?" + echo "# this failure is expected if running an older test suite against" + echo "# a newer kernel with new features added" + ERR_CODE=255 +fi + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_16.sh b/tools/testing/selftests/ublk/test_generic_16.sh new file mode 100755 index 000000000000..6a4952146ea1 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_16.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "null" "stop --safe command" + +# Check if SAFE_STOP_DEV feature is supported +if ! _have_feature "SAFE_STOP_DEV"; then + _cleanup_test + exit "$UBLK_SKIP_CODE" +fi + +# Test 1: stop --safe on idle device should succeed +dev_id=$(_add_ublk_dev -t null -q 2 -d 32) +_check_add_dev $TID $? + +# Device is idle (no openers), stop --safe should succeed +if ! ${UBLK_PROG} stop -n "${dev_id}" --safe; then + echo "stop --safe on idle device failed unexpectedly!" + ERR_CODE=255 +fi + +# Clean up device +_ublk_del_dev "${dev_id}" > /dev/null 2>&1 +udevadm settle + +# Test 2: stop --safe on device with active opener should fail +dev_id=$(_add_ublk_dev -t null -q 2 -d 32) +_check_add_dev $TID $? + +# Open device in background (dd reads indefinitely) +dd if=/dev/ublkb${dev_id} of=/dev/null bs=4k iflag=direct > /dev/null 2>&1 & +dd_pid=$! + +# Give dd time to start +sleep 0.2 + +# Device has active opener, stop --safe should fail with -EBUSY +if ${UBLK_PROG} stop -n "${dev_id}" --safe 2>/dev/null; then + echo "stop --safe on busy device succeeded unexpectedly!" + ERR_CODE=255 +fi + +# Kill dd and clean up +kill $dd_pid 2>/dev/null +wait $dd_pid 2>/dev/null + +# Now device should be idle, regular delete should work +_ublk_del_dev "${dev_id}" +udevadm settle + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_17.sh b/tools/testing/selftests/ublk/test_generic_17.sh new file mode 100755 index 000000000000..b483d53a897a --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_17.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "fault_inject" "teardown after incomplete recovery" + +# First start and stop a ublk server with device configured for recovery +dev_id=$(_add_ublk_dev -t fault_inject -r 1) +_check_add_dev $TID $? +state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") +if [ "$state" != "QUIESCED" ]; then + echo "device isn't quiesced($state) after $action" + ERR_CODE=255 +fi + +# Then recover the device, but use --die_during_fetch to have the ublk +# server die while a queue has some (but not all) I/Os fetched +${UBLK_PROG} recover -n "${dev_id}" --foreground -t fault_inject --die_during_fetch 1 +RECOVER_RES=$? +# 137 is the result when dying of SIGKILL +if (( RECOVER_RES != 137 )); then + echo "recover command exited with unexpected code ${RECOVER_RES}!" + ERR_CODE=255 +fi + +# Clean up the device. This can only succeed once teardown of the above +# exited ublk server completes. So if teardown never completes, we will +# time out here +_ublk_del_dev "${dev_id}" + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_integrity_01.sh b/tools/testing/selftests/ublk/test_integrity_01.sh new file mode 100755 index 000000000000..6713b280a6ff --- /dev/null +++ b/tools/testing/selftests/ublk/test_integrity_01.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_check_value() { + local name=$1 + local actual=$2 + local expected=$3 + + if [ "$actual" != "$expected" ]; then + echo "$name $actual != $expected" + ERR_CODE=255 + return 1 + fi + return 0 +} + +_test_metadata_only() { + local dev_id + + dev_id=$(_add_ublk_dev -t null -u --no_auto_part_scan --metadata_size 8) + _check_add_dev "$TID" $? + + _check_value "metadata_size" "$(_get_metadata_size "$dev_id" metadata_size)" 8 && + _check_value "pi_offset" "$(_get_metadata_size "$dev_id" pi_offset)" 0 && + _check_value "pi_tuple_size" "$(_get_metadata_size "$dev_id" pi_tuple_size)" 0 && + _check_value "device_is_integrity_capable" \ + "$(cat "/sys/block/ublkb$dev_id/integrity/device_is_integrity_capable")" 0 && + _check_value "format" "$(cat "/sys/block/ublkb$dev_id/integrity/format")" nop && + _check_value "protection_interval_bytes" \ + "$(cat "/sys/block/ublkb$dev_id/integrity/protection_interval_bytes")" 512 && + _check_value "tag_size" "$(cat "/sys/block/ublkb$dev_id/integrity/tag_size")" 0 + + _ublk_del_dev "${dev_id}" +} + +_test_integrity_capable_ip() { + local dev_id + + dev_id=$(_add_ublk_dev -t null -u --no_auto_part_scan --integrity_capable --metadata_size 64 --pi_offset 56 --csum_type ip) + _check_add_dev "$TID" $? + + _check_value "metadata_size" "$(_get_metadata_size "$dev_id" metadata_size)" 64 && + _check_value "pi_offset" "$(_get_metadata_size "$dev_id" pi_offset)" 56 && + _check_value "pi_tuple_size" "$(_get_metadata_size "$dev_id" pi_tuple_size)" 8 && + _check_value "device_is_integrity_capable" \ + "$(cat "/sys/block/ublkb$dev_id/integrity/device_is_integrity_capable")" 1 && + _check_value "format" "$(cat "/sys/block/ublkb$dev_id/integrity/format")" T10-DIF-TYPE3-IP && + _check_value "protection_interval_bytes" \ + "$(cat "/sys/block/ublkb$dev_id/integrity/protection_interval_bytes")" 512 && + _check_value "tag_size" "$(cat "/sys/block/ublkb$dev_id/integrity/tag_size")" 0 + + _ublk_del_dev "${dev_id}" +} + +_test_integrity_reftag_t10dif() { + local dev_id + + dev_id=$(_add_ublk_dev -t null -u --no_auto_part_scan --integrity_reftag --metadata_size 8 --csum_type t10dif) + _check_add_dev "$TID" $? + + _check_value "metadata_size" "$(_get_metadata_size "$dev_id" metadata_size)" 8 && + _check_value "pi_offset" "$(_get_metadata_size "$dev_id" pi_offset)" 0 && + _check_value "pi_tuple_size" "$(_get_metadata_size "$dev_id" pi_tuple_size)" 8 && + _check_value "device_is_integrity_capable" \ + "$(cat "/sys/block/ublkb$dev_id/integrity/device_is_integrity_capable")" 0 && + _check_value "format" "$(cat "/sys/block/ublkb$dev_id/integrity/format")" T10-DIF-TYPE1-CRC && + _check_value "protection_interval_bytes" \ + "$(cat "/sys/block/ublkb$dev_id/integrity/protection_interval_bytes")" 512 && + _check_value "tag_size" "$(cat "/sys/block/ublkb$dev_id/integrity/tag_size")" 0 + + _ublk_del_dev "${dev_id}" +} + +_test_nvme_csum() { + local dev_id + + dev_id=$(_add_ublk_dev -t null -u --no_auto_part_scan --metadata_size 16 --csum_type nvme --tag_size 8) + _check_add_dev "$TID" $? + + _check_value "metadata_size" "$(_get_metadata_size "$dev_id" metadata_size)" 16 && + _check_value "pi_offset" "$(_get_metadata_size "$dev_id" pi_offset)" 0 && + _check_value "pi_tuple_size" "$(_get_metadata_size "$dev_id" pi_tuple_size)" 16 && + _check_value "device_is_integrity_capable" \ + "$(cat "/sys/block/ublkb$dev_id/integrity/device_is_integrity_capable")" 0 && + _check_value "format" "$(cat "/sys/block/ublkb$dev_id/integrity/format")" EXT-DIF-TYPE3-CRC64 && + _check_value "protection_interval_bytes" \ + "$(cat "/sys/block/ublkb$dev_id/integrity/protection_interval_bytes")" 512 && + _check_value "tag_size" "$(cat "/sys/block/ublkb$dev_id/integrity/tag_size")" 8 + + _ublk_del_dev "${dev_id}" +} + +_prep_test "null" "integrity params" + +_test_metadata_only +_test_integrity_capable_ip +_test_integrity_reftag_t10dif +_test_nvme_csum + +_cleanup_test +_show_result "$TID" $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_integrity_02.sh b/tools/testing/selftests/ublk/test_integrity_02.sh new file mode 100755 index 000000000000..2c35fbc8a7cc --- /dev/null +++ b/tools/testing/selftests/ublk/test_integrity_02.sh @@ -0,0 +1,142 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +if ! _have_program fio; then + exit $UBLK_SKIP_CODE +fi + +min_fio_version=fio-3.42 +fio_version=$(fio --version) +if ! sort --version-sort --check=quiet <(printf "%s\n%s\n" "$min_fio_version" "$fio_version"); then + echo "Requires fio version with https://github.com/axboe/fio/pull/1992" + exit $UBLK_SKIP_CODE +fi + +ERR_CODE=0 + +# Global variables set during device setup +dev_id="" +fio_args="" +fio_err="" + +_setup_device() { + _create_backfile 0 256M + _create_backfile 1 32M # 256M * (64 integrity bytes / 512 data bytes) + + local integrity_params="--integrity_capable --integrity_reftag + --metadata_size 64 --pi_offset 56 --csum_type t10dif" + dev_id=$(_add_ublk_dev -t loop -u $integrity_params "${UBLK_BACKFILES[@]}") + _check_add_dev "$TID" $? + + # 1M * (64 integrity bytes / 512 data bytes) = 128K + fio_args="--ioengine io_uring --direct 1 --bsrange 512-1M --iodepth 32 + --md_per_io_size 128K --pi_act 0 --pi_chk GUARD,REFTAG,APPTAG + --filename /dev/ublkb$dev_id" + + fio_err=$(mktemp "${UBLK_TEST_DIR}"/fio_err_XXXXX) +} + +_test_fill_and_verify() { + fio --name fill --rw randwrite $fio_args > /dev/null + if [ $? != 0 ]; then + echo "fio fill failed" + ERR_CODE=255 + return 1 + fi + + fio --name verify --rw randread $fio_args > /dev/null + if [ $? != 0 ]; then + echo "fio verify failed" + ERR_CODE=255 + return 1 + fi +} + +_test_corrupted_reftag() { + local dd_reftag_args="bs=1 seek=60 count=4 oflag=dsync conv=notrunc status=none" + local expected_err="REFTAG compare error: LBA: 0 Expected=0, Actual=" + + # Overwrite 4-byte reftag at offset 56 + 4 = 60 + dd if=/dev/urandom "of=${UBLK_BACKFILES[1]}" $dd_reftag_args + if [ $? != 0 ]; then + echo "dd corrupted_reftag failed" + ERR_CODE=255 + return 1 + fi + + if fio --name corrupted_reftag --rw randread $fio_args > /dev/null 2> "$fio_err"; then + echo "fio corrupted_reftag unexpectedly succeeded" + ERR_CODE=255 + return 1 + fi + + if ! grep -q "$expected_err" "$fio_err"; then + echo "fio corrupted_reftag message not found: $expected_err" + ERR_CODE=255 + return 1 + fi + + # Reset to 0 + dd if=/dev/zero "of=${UBLK_BACKFILES[1]}" $dd_reftag_args + if [ $? != 0 ]; then + echo "dd restore corrupted_reftag failed" + ERR_CODE=255 + return 1 + fi +} + +_test_corrupted_data() { + local dd_data_args="bs=512 count=1 oflag=direct,dsync conv=notrunc status=none" + local expected_err="Guard compare error: LBA: 0 Expected=0, Actual=" + + dd if=/dev/zero "of=${UBLK_BACKFILES[0]}" $dd_data_args + if [ $? != 0 ]; then + echo "dd corrupted_data failed" + ERR_CODE=255 + return 1 + fi + + if fio --name corrupted_data --rw randread $fio_args > /dev/null 2> "$fio_err"; then + echo "fio corrupted_data unexpectedly succeeded" + ERR_CODE=255 + return 1 + fi + + if ! grep -q "$expected_err" "$fio_err"; then + echo "fio corrupted_data message not found: $expected_err" + ERR_CODE=255 + return 1 + fi +} + +_test_bad_apptag() { + local expected_err="APPTAG compare error: LBA: [0-9]* Expected=4321, Actual=1234" + + if fio --name bad_apptag --rw randread $fio_args --apptag 0x4321 > /dev/null 2> "$fio_err"; then + echo "fio bad_apptag unexpectedly succeeded" + ERR_CODE=255 + return 1 + fi + + if ! grep -q "$expected_err" "$fio_err"; then + echo "fio bad_apptag message not found: $expected_err" + ERR_CODE=255 + return 1 + fi +} + +_prep_test "loop" "end-to-end integrity" + +_setup_device + +_test_fill_and_verify && \ +_test_corrupted_reftag && \ +_test_corrupted_data && \ +_test_bad_apptag + +rm -f "$fio_err" + +_cleanup_test +_show_result "$TID" $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_integrity_03.sh b/tools/testing/selftests/ublk/test_integrity_03.sh new file mode 100755 index 000000000000..10f02339ea2d --- /dev/null +++ b/tools/testing/selftests/ublk/test_integrity_03.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +if ! _have_program fio; then + exit $UBLK_SKIP_CODE +fi + +_test_fill_and_verify() { + fio --name fill --rw randwrite $fio_args > /dev/null + if [ $? != 0 ]; then + echo "fio fill failed" + ERR_CODE=255 + return 1 + fi + + fio --name verify --rw randread $fio_args > /dev/null + if [ $? != 0 ]; then + echo "fio verify failed" + ERR_CODE=255 + return 1 + fi +} + +_test_corrupted_reftag() { + local dd_reftag_args="bs=1 seek=58 count=6 oflag=dsync conv=notrunc status=none" + + # Overwrite 6-byte reftag at offset 48 + 10 = 58 + dd if=/dev/urandom "of=${UBLK_BACKFILES[1]}" $dd_reftag_args + if [ $? != 0 ]; then + echo "dd corrupted_reftag failed" + ERR_CODE=255 + return 1 + fi + + if fio --name corrupted_reftag --rw randread $fio_args > /dev/null 2> "$fio_err"; then + echo "fio corrupted_reftag unexpectedly succeeded" + ERR_CODE=255 + return 1 + fi + + if ! grep -q "$expected_err" "$fio_err"; then + echo "fio corrupted_reftag message not found: $expected_err" + ERR_CODE=255 + return 1 + fi + + # Reset to 0 + dd if=/dev/zero "of=${UBLK_BACKFILES[1]}" $dd_reftag_args + if [ $? != 0 ]; then + echo "dd restore corrupted_reftag failed" + ERR_CODE=255 + return 1 + fi +} + +_test_corrupted_data() { + local dd_data_args="bs=512 count=1 oflag=direct,dsync conv=notrunc status=none" + + dd if=/dev/zero "of=${UBLK_BACKFILES[0]}" $dd_data_args + if [ $? != 0 ]; then + echo "dd corrupted_data failed" + ERR_CODE=255 + return 1 + fi + + if fio --name corrupted_data --rw randread $fio_args > /dev/null 2> "$fio_err"; then + echo "fio corrupted_data unexpectedly succeeded" + ERR_CODE=255 + return 1 + fi + + if ! grep -q "$expected_err" "$fio_err"; then + echo "fio corrupted_data message not found: $expected_err" + ERR_CODE=255 + return 1 + fi +} + +_prep_test "loop" "end-to-end auto integrity" + +_create_backfile 0 256M +_create_backfile 1 32M # 256M * (64 integrity bytes / 512 data bytes) +integrity_params="--integrity_capable --integrity_reftag + --metadata_size 64 --pi_offset 48 --csum_type nvme" +dev_id=$(_add_ublk_dev -t loop -u $integrity_params "${UBLK_BACKFILES[@]}") +_check_add_dev "$TID" $? + +fio_args="--ioengine libaio --direct 1 --bsrange 512-1M --iodepth 32 + --filename /dev/ublkb$dev_id" +fio_err=$(mktemp "${UBLK_TEST_DIR}"/fio_err_XXXXX) +ERR_CODE=0 + +expected_err="Invalid or incomplete multibyte or wide character: read offset=0" +_test_fill_and_verify && \ +_test_corrupted_reftag && \ +_test_corrupted_data + +rm -f "$fio_err" + +_cleanup_test +_show_result "$TID" $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh new file mode 100755 index 000000000000..c0f5b619ad6e --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "loop" "write and verify test" + +_create_backfile 0 256M + +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh new file mode 100755 index 000000000000..f4191ea71f50 --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_02.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "loop" "mkfs & mount & umount" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh new file mode 100755 index 000000000000..aaac0c59a5ad --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "loop" "write and verify over zero copy" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh new file mode 100755 index 000000000000..f584c119f1d2 --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_04.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "loop" "mkfs & mount & umount with zero copy" + +_create_backfile 0 256M + +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh new file mode 100755 index 000000000000..ca1a5df5f9de --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_05.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "loop" "write and verify test" + +_create_backfile 0 256M + +dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_06.sh b/tools/testing/selftests/ublk/test_loop_06.sh new file mode 100755 index 000000000000..26f710ba9db7 --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_06.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "loop" "write and verify over user copy" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -u "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_07.sh b/tools/testing/selftests/ublk/test_loop_07.sh new file mode 100755 index 000000000000..a9ab0b671cb2 --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_07.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "loop" "mkfs & mount & umount with user copy" + +_create_backfile 0 256M + +dev_id=$(_add_ublk_dev -t loop -u "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_null_01.sh b/tools/testing/selftests/ublk/test_null_01.sh new file mode 100755 index 000000000000..d2c38cbb2dd5 --- /dev/null +++ b/tools/testing/selftests/ublk/test_null_01.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "basic IO test" + +dev_id=$(_add_ublk_dev -t null) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_null_02.sh b/tools/testing/selftests/ublk/test_null_02.sh new file mode 100755 index 000000000000..7b205ca56367 --- /dev/null +++ b/tools/testing/selftests/ublk/test_null_02.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "basic IO test with zero copy" + +dev_id=$(_add_ublk_dev -t null -z) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_null_03.sh b/tools/testing/selftests/ublk/test_null_03.sh new file mode 100755 index 000000000000..eee7a87a60da --- /dev/null +++ b/tools/testing/selftests/ublk/test_null_03.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "basic IO test with user copy" + +dev_id=$(_add_ublk_dev -t null -u) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_part_01.sh b/tools/testing/selftests/ublk/test_part_01.sh new file mode 100755 index 000000000000..fa3b1a9af894 --- /dev/null +++ b/tools/testing/selftests/ublk/test_part_01.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +format_backing_file() +{ + local backing_file=$1 + + # Create ublk device to write partition table + local tmp_dev=$(_add_ublk_dev -t loop "${backing_file}") + [ $? -ne 0 ] && return 1 + + # Write partition table with sfdisk + sfdisk /dev/ublkb"${tmp_dev}" > /dev/null 2>&1 <<EOF +label: dos +start=2048, size=100MiB, type=83 +start=206848, size=100MiB, type=83 +EOF + local ret=$? + + "${UBLK_PROG}" del -n "${tmp_dev}" + + return $ret +} + +test_auto_part_scan() +{ + local backing_file=$1 + + # Create device WITHOUT --no_auto_part_scan + local dev_id=$(_add_ublk_dev -t loop "${backing_file}") + [ $? -ne 0 ] && return 1 + + udevadm settle + + # Partitions should be auto-detected + if [ ! -e /dev/ublkb"${dev_id}"p1 ] || [ ! -e /dev/ublkb"${dev_id}"p2 ]; then + "${UBLK_PROG}" del -n "${dev_id}" + return 1 + fi + + "${UBLK_PROG}" del -n "${dev_id}" + return 0 +} + +test_no_auto_part_scan() +{ + local backing_file=$1 + + # Create device WITH --no_auto_part_scan + local dev_id=$(_add_ublk_dev -t loop --no_auto_part_scan "${backing_file}") + [ $? -ne 0 ] && return 1 + + udevadm settle + + # Partitions should NOT be auto-detected + if [ -e /dev/ublkb"${dev_id}"p1 ]; then + "${UBLK_PROG}" del -n "${dev_id}" + return 1 + fi + + # Manual scan should work + blockdev --rereadpt /dev/ublkb"${dev_id}" > /dev/null 2>&1 + udevadm settle + + if [ ! -e /dev/ublkb"${dev_id}"p1 ] || [ ! -e /dev/ublkb"${dev_id}"p2 ]; then + "${UBLK_PROG}" del -n "${dev_id}" + return 1 + fi + + "${UBLK_PROG}" del -n "${dev_id}" + return 0 +} + +if ! _have_program sfdisk || ! _have_program blockdev; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_NO_AUTO_PART_SCAN" + +if ! _have_feature "UBLK_F_NO_AUTO_PART_SCAN"; then + _cleanup_test + exit "$UBLK_SKIP_CODE" +fi + + +# Create and format backing file with partition table +_create_backfile 0 256M +format_backing_file "${UBLK_BACKFILES[0]}" +[ $? -ne 0 ] && ERR_CODE=255 + +# Test normal auto partition scan +[ "$ERR_CODE" -eq 0 ] && test_auto_part_scan "${UBLK_BACKFILES[0]}" +[ $? -ne 0 ] && ERR_CODE=255 + +# Test no auto partition scan with manual scan +[ "$ERR_CODE" -eq 0 ] && test_no_auto_part_scan "${UBLK_BACKFILES[0]}" +[ $? -ne 0 ] && ERR_CODE=255 + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_part_02.sh b/tools/testing/selftests/ublk/test_part_02.sh new file mode 100755 index 000000000000..d9ec06f36aee --- /dev/null +++ b/tools/testing/selftests/ublk/test_part_02.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_test_partition_scan_no_hang() +{ + local recovery_flag=$1 + local expected_state=$2 + local dev_id + local state + local daemon_pid + local start_time + local elapsed + + # Create ublk device with fault_inject target and very large delay + # to simulate hang during partition table read + # --delay_us 60000000 = 60 seconds delay + # Use _add_ublk_dev_no_settle to avoid udevadm settle hang waiting + # for partition scan events to complete + if [ "$recovery_flag" = "yes" ]; then + echo "Testing partition scan with recovery support..." + dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000 -r 1) + else + echo "Testing partition scan without recovery..." + dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000) + fi + + _check_add_dev "$TID" $? + + # The add command should return quickly because partition scan is async. + # Now sleep briefly to let the async partition scan work start and hit + # the delay in the fault_inject handler. + _ublk_sleep 1 5 + + # Kill the ublk daemon while partition scan is potentially blocked + # And check state transitions properly + start_time=${SECONDS} + daemon_pid=$(_get_ublk_daemon_pid "${dev_id}") + state=$(__ublk_kill_daemon "${dev_id}" "${expected_state}") + elapsed=$((SECONDS - start_time)) + + # Verify the device transitioned to expected state + if [ "$state" != "${expected_state}" ]; then + echo "FAIL: Device state is $state, expected ${expected_state}" + ERR_CODE=255 + _ublk_del_dev "${dev_id}" > /dev/null 2>&1 + return + fi + echo "PASS: Device transitioned to ${expected_state} in ${elapsed}s without hanging" + + # Clean up the device + _ublk_del_dev "${dev_id}" > /dev/null 2>&1 +} + +_prep_test "partition_scan" "verify async partition scan prevents IO hang" + +# Test 1: Without recovery support - should transition to DEAD +_test_partition_scan_no_hang "no" "DEAD" + +# Test 2: With recovery support - should transition to QUIESCED +_test_partition_scan_no_hang "yes" "QUIESCED" + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_recover_01.sh b/tools/testing/selftests/ublk/test_recover_01.sh new file mode 100755 index 000000000000..1cddc2345dab --- /dev/null +++ b/tools/testing/selftests/ublk/test_recover_01.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover 256M "kill_daemon" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 -b & +ublk_run_recover_test -t loop -q 2 -r 1 -b "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -b "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 & +ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_recover_02.sh b/tools/testing/selftests/ublk/test_recover_02.sh new file mode 100755 index 000000000000..9c3f481880d3 --- /dev/null +++ b/tools/testing/selftests/ublk/test_recover_02.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover 256M "kill_daemon" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification (zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 -z -b & +ublk_run_recover_test -t loop -q 2 -r 1 -z -b "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z -b "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -z & +ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_recover_03.sh b/tools/testing/selftests/ublk/test_recover_03.sh new file mode 100755 index 000000000000..2554805e5b02 --- /dev/null +++ b/tools/testing/selftests/ublk/test_recover_03.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +ublk_run_quiesce_recover() +{ + run_io_and_recover 256M "quiesce_dev" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_feature "QUIESCE"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "quiesce" "basic quiesce & recover function verification" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_quiesce_recover -t null -q 2 -r 1 & +ublk_run_quiesce_recover -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & +ublk_run_quiesce_recover -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_quiesce_recover -t null -q 2 -r 1 -i 1 & +ublk_run_quiesce_recover -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_quiesce_recover -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_recover_04.sh b/tools/testing/selftests/ublk/test_recover_04.sh new file mode 100755 index 000000000000..4c83c1840c68 --- /dev/null +++ b/tools/testing/selftests/ublk/test_recover_04.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover 256M "kill_daemon" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification (user copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 -u & +ublk_run_recover_test -t loop -q 2 -r 1 -u "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -u -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -u -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -u -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_shmemzc_01.sh b/tools/testing/selftests/ublk/test_shmemzc_01.sh new file mode 100755 index 000000000000..b244ab3479a2 --- /dev/null +++ b/tools/testing/selftests/ublk/test_shmemzc_01.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Test: shmem_zc with hugetlbfs buffer on null target +# +# kublk and fio both mmap the same hugetlbfs file (MAP_SHARED), +# so they share physical pages. The kernel PFN match enables +# zero-copy I/O without socket-based fd passing. + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "shmem_zc" "null target hugetlbfs shmem zero-copy test" + +if ! _have_program fio; then + echo "SKIP: fio not available" + exit "$UBLK_SKIP_CODE" +fi + +if ! grep -q hugetlbfs /proc/filesystems; then + echo "SKIP: hugetlbfs not supported" + exit "$UBLK_SKIP_CODE" +fi + +# Allocate hugepages +OLD_NR_HP=$(cat /proc/sys/vm/nr_hugepages) +echo 10 > /proc/sys/vm/nr_hugepages +NR_HP=$(cat /proc/sys/vm/nr_hugepages) +if [ "$NR_HP" -lt 2 ]; then + echo "SKIP: cannot allocate hugepages" + echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + exit "$UBLK_SKIP_CODE" +fi + +# Mount hugetlbfs +HTLB_MNT=$(mktemp -d "${UBLK_TEST_DIR}/htlb_mnt_XXXXXX") +if ! mount -t hugetlbfs none "$HTLB_MNT"; then + echo "SKIP: cannot mount hugetlbfs" + rmdir "$HTLB_MNT" + echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + exit "$UBLK_SKIP_CODE" +fi + +HTLB_FILE="$HTLB_MNT/ublk_buf" +fallocate -l 4M "$HTLB_FILE" + +dev_id=$(_add_ublk_dev -t null --shmem_zc --htlb "$HTLB_FILE") +_check_add_dev $TID $? + +fio --name=htlb_zc \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=io_uring \ + --rw=randwrite \ + --direct=1 \ + --bs=4k \ + --size=4M \ + --iodepth=32 \ + --mem=mmaphuge:"$HTLB_FILE" \ + > /dev/null 2>&1 +ERR_CODE=$? + +# Delete device first so daemon releases the htlb mmap +_ublk_del_dev "${dev_id}" + +rm -f "$HTLB_FILE" +umount "$HTLB_MNT" +rmdir "$HTLB_MNT" +echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_shmemzc_02.sh b/tools/testing/selftests/ublk/test_shmemzc_02.sh new file mode 100755 index 000000000000..810dccba6d84 --- /dev/null +++ b/tools/testing/selftests/ublk/test_shmemzc_02.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Test: shmem_zc with hugetlbfs buffer on loop target +# +# kublk and fio both mmap the same hugetlbfs file (MAP_SHARED), +# so they share physical pages. The kernel PFN match enables +# zero-copy I/O without socket-based fd passing. + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "shmem_zc" "loop target hugetlbfs shmem zero-copy test" + +if ! _have_program fio; then + echo "SKIP: fio not available" + exit "$UBLK_SKIP_CODE" +fi + +if ! grep -q hugetlbfs /proc/filesystems; then + echo "SKIP: hugetlbfs not supported" + exit "$UBLK_SKIP_CODE" +fi + +# Allocate hugepages +OLD_NR_HP=$(cat /proc/sys/vm/nr_hugepages) +echo 10 > /proc/sys/vm/nr_hugepages +NR_HP=$(cat /proc/sys/vm/nr_hugepages) +if [ "$NR_HP" -lt 2 ]; then + echo "SKIP: cannot allocate hugepages" + echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + exit "$UBLK_SKIP_CODE" +fi + +# Mount hugetlbfs +HTLB_MNT=$(mktemp -d "${UBLK_TEST_DIR}/htlb_mnt_XXXXXX") +if ! mount -t hugetlbfs none "$HTLB_MNT"; then + echo "SKIP: cannot mount hugetlbfs" + rmdir "$HTLB_MNT" + echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + exit "$UBLK_SKIP_CODE" +fi + +HTLB_FILE="$HTLB_MNT/ublk_buf" +fallocate -l 4M "$HTLB_FILE" + +_create_backfile 0 128M +BACKFILE="${UBLK_BACKFILES[0]}" + +dev_id=$(_add_ublk_dev -t loop --shmem_zc --htlb "$HTLB_FILE" "$BACKFILE") +_check_add_dev $TID $? + +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" \ + --size=128M \ + --mem=mmaphuge:"$HTLB_FILE" +ERR_CODE=$? + +# Delete device first so daemon releases the htlb mmap +_ublk_del_dev "${dev_id}" + +rm -f "$HTLB_FILE" +umount "$HTLB_MNT" +rmdir "$HTLB_MNT" +echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_shmemzc_03.sh b/tools/testing/selftests/ublk/test_shmemzc_03.sh new file mode 100755 index 000000000000..606362491a32 --- /dev/null +++ b/tools/testing/selftests/ublk/test_shmemzc_03.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Test: shmem_zc with fio verify over filesystem on loop target +# +# mkfs + mount ext4 on the ublk device, then run fio verify on a +# file inside that filesystem. Exercises the full stack: +# filesystem -> block layer -> ublk shmem_zc -> loop target backing file. + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "shmem_zc" "loop target hugetlbfs shmem zero-copy fs verify test" + +if ! _have_program fio; then + echo "SKIP: fio not available" + exit "$UBLK_SKIP_CODE" +fi + +if ! grep -q hugetlbfs /proc/filesystems; then + echo "SKIP: hugetlbfs not supported" + exit "$UBLK_SKIP_CODE" +fi + +# Allocate hugepages +OLD_NR_HP=$(cat /proc/sys/vm/nr_hugepages) +echo 10 > /proc/sys/vm/nr_hugepages +NR_HP=$(cat /proc/sys/vm/nr_hugepages) +if [ "$NR_HP" -lt 2 ]; then + echo "SKIP: cannot allocate hugepages" + echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + exit "$UBLK_SKIP_CODE" +fi + +# Mount hugetlbfs +HTLB_MNT=$(mktemp -d "${UBLK_TEST_DIR}/htlb_mnt_XXXXXX") +if ! mount -t hugetlbfs none "$HTLB_MNT"; then + echo "SKIP: cannot mount hugetlbfs" + rmdir "$HTLB_MNT" + echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + exit "$UBLK_SKIP_CODE" +fi + +HTLB_FILE="$HTLB_MNT/ublk_buf" +fallocate -l 4M "$HTLB_FILE" + +_create_backfile 0 256M +BACKFILE="${UBLK_BACKFILES[0]}" + +dev_id=$(_add_ublk_dev -t loop --shmem_zc --htlb "$HTLB_FILE" "$BACKFILE") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" \ + _run_fio_verify_io --filename=testfile \ + --size=128M \ + --mem=mmaphuge:"$HTLB_FILE" +ERR_CODE=$? + +# Delete device first so daemon releases the htlb mmap +_ublk_del_dev "${dev_id}" + +rm -f "$HTLB_FILE" +umount "$HTLB_MNT" +rmdir "$HTLB_MNT" +echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_shmemzc_04.sh b/tools/testing/selftests/ublk/test_shmemzc_04.sh new file mode 100755 index 000000000000..9a2a6c2e8abe --- /dev/null +++ b/tools/testing/selftests/ublk/test_shmemzc_04.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Test: shmem_zc with read-only buffer registration on null target +# +# Same as test_shmemzc_01 but with --rdonly_shmem_buf: pages are pinned +# without FOLL_WRITE (UBLK_BUF_F_READ). Write I/O works because +# the server only reads from the shared buffer. + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "shmem_zc" "null target hugetlbfs shmem zero-copy rdonly_buf test" + +if ! _have_program fio; then + echo "SKIP: fio not available" + exit "$UBLK_SKIP_CODE" +fi + +if ! grep -q hugetlbfs /proc/filesystems; then + echo "SKIP: hugetlbfs not supported" + exit "$UBLK_SKIP_CODE" +fi + +# Allocate hugepages +OLD_NR_HP=$(cat /proc/sys/vm/nr_hugepages) +echo 10 > /proc/sys/vm/nr_hugepages +NR_HP=$(cat /proc/sys/vm/nr_hugepages) +if [ "$NR_HP" -lt 2 ]; then + echo "SKIP: cannot allocate hugepages" + echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + exit "$UBLK_SKIP_CODE" +fi + +# Mount hugetlbfs +HTLB_MNT=$(mktemp -d "${UBLK_TEST_DIR}/htlb_mnt_XXXXXX") +if ! mount -t hugetlbfs none "$HTLB_MNT"; then + echo "SKIP: cannot mount hugetlbfs" + rmdir "$HTLB_MNT" + echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + exit "$UBLK_SKIP_CODE" +fi + +HTLB_FILE="$HTLB_MNT/ublk_buf" +fallocate -l 4M "$HTLB_FILE" + +dev_id=$(_add_ublk_dev -t null --shmem_zc --htlb "$HTLB_FILE" --rdonly_shmem_buf) +_check_add_dev $TID $? + +fio --name=htlb_zc_rdonly \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=io_uring \ + --rw=randwrite \ + --direct=1 \ + --bs=4k \ + --size=4M \ + --iodepth=32 \ + --mem=mmaphuge:"$HTLB_FILE" \ + > /dev/null 2>&1 +ERR_CODE=$? + +# Delete device first so daemon releases the htlb mmap +_ublk_del_dev "${dev_id}" + +rm -f "$HTLB_FILE" +umount "$HTLB_MNT" +rmdir "$HTLB_MNT" +echo "$OLD_NR_HP" > /proc/sys/vm/nr_hugepages + +_cleanup_test + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh new file mode 100755 index 000000000000..f91783f27649 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +ERR_CODE=0 + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and remove device" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_remove 8G -t null -q 4 & +ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh new file mode 100755 index 000000000000..b128d11658a8 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +ublk_io_and_kill_daemon() +{ + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +_prep_test "stress" "run IO and kill ublk server" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +for nr_queue in 1 4; do + ublk_io_and_kill_daemon 8G -t null -q "$nr_queue" & + ublk_io_and_kill_daemon 256M -t loop -q "$nr_queue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q "$nr_queue" "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + wait +done + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh new file mode 100755 index 000000000000..a0f0aba8eebc --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +ERR_CODE=0 + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and remove device(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_remove 8G -t null -q 4 -z & +ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +if _have_feature "AUTO_BUF_REG"; then + ublk_io_and_remove 8G -t null -q 4 --auto_zc & + ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & + wait +fi + +if _have_feature "PER_IO_DAEMON"; then + ublk_io_and_remove 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks & + ublk_io_and_remove 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks & + wait +fi + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh new file mode 100755 index 000000000000..896eae68d444 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +ERR_CODE=0 + +ublk_io_and_kill_daemon() +{ + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and kill ublk server(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_kill_daemon 8G -t null -q 4 -z --no_ublk_fixed_fd & +ublk_io_and_kill_daemon 256M -t loop -q 4 -z --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +if _have_feature "AUTO_BUF_REG"; then + ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc & + ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --no_ublk_fixed_fd "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & + wait +fi + +if _have_feature "PER_IO_DAEMON"; then + ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks & + ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks & + wait +fi + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh new file mode 100755 index 000000000000..d6c00c72080d --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +run_io_and_remove() +{ + local size=$1 + local dev_id + local dev_pid + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev $TID $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \ + --runtime=40 --time_based > /dev/null 2>&1 & + sleep 4 + + dev_pid=$(_get_ublk_daemon_pid "$dev_id") + kill -9 "$dev_pid" + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi +} + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +_prep_test "stress" "run IO and remove device with recovery enabled" + +_create_backfile 0 256M +_create_backfile 1 256M + +for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + wait +done + +if _have_feature "ZERO_COPY"; then + for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + wait + done +fi + +if _have_feature "AUTO_BUF_REG"; then + for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 --auto_zc -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" & + wait + done +fi + +if _have_feature "PER_IO_DAEMON"; then + ublk_io_and_remove 8G -t null -q 4 --nthreads 8 --per_io_tasks -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 --nthreads 8 --per_io_tasks -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 8G -t null -q 4 --nthreads 8 --per_io_tasks -r 1 -i "$reissue" & +fi +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_06.sh b/tools/testing/selftests/ublk/test_stress_06.sh new file mode 100755 index 000000000000..9481a273a4b4 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_06.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +ERR_CODE=0 + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and remove device (user copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_remove 8G -t null -q 4 -u & +ublk_io_and_remove 256M -t loop -q 4 -u "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_io_and_remove 8G -t null -q 4 -u --nthreads 8 --per_io_tasks & +ublk_io_and_remove 256M -t loop -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_07.sh b/tools/testing/selftests/ublk/test_stress_07.sh new file mode 100755 index 000000000000..3e01c037cffb --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_07.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +ERR_CODE=0 + +ublk_io_and_kill_daemon() +{ + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and kill ublk server (user copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_kill_daemon 8G -t null -q 4 -u --no_ublk_fixed_fd & +ublk_io_and_kill_daemon 256M -t loop -q 4 -u --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_io_and_kill_daemon 8G -t null -q 4 -u --nthreads 8 --per_io_tasks & +ublk_io_and_kill_daemon 256M -t loop -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_08.sh b/tools/testing/selftests/ublk/test_stress_08.sh new file mode 100755 index 000000000000..5f32424d2892 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_08.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +ERR_CODE=0 + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "BATCH_IO"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and remove device(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_remove 8G -t null -q 4 -b & +ublk_io_and_remove 256M -t loop -q 4 --auto_zc -b "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 --auto_zc -b "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback -b & +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_09.sh b/tools/testing/selftests/ublk/test_stress_09.sh new file mode 100755 index 000000000000..64cb8d9b0438 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_09.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +ERR_CODE=0 + +ublk_io_and_kill_daemon() +{ + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "BATCH_IO"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and kill ublk server(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_kill_daemon 8G -t null -q 4 -z -b & +ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc -b "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -b "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback -b & +wait + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh new file mode 100755 index 000000000000..9ffce477b461 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stripe" "write and verify test" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh new file mode 100755 index 000000000000..4c172950a247 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_02.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh new file mode 100755 index 000000000000..2cdf9f958988 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_03.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stripe" "write and verify test" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh new file mode 100755 index 000000000000..e24120eaca0e --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_04.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount on zero copy" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_05.sh b/tools/testing/selftests/ublk/test_stripe_05.sh new file mode 100755 index 000000000000..f3de2d5cdfe4 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_05.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stripe" "write and verify test on user copy" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe -q 2 -u "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_06.sh b/tools/testing/selftests/ublk/test_stripe_06.sh new file mode 100755 index 000000000000..3fd5cd902956 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_06.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount on user copy" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe -u -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/trace/count_ios_per_tid.bt b/tools/testing/selftests/ublk/trace/count_ios_per_tid.bt new file mode 100644 index 000000000000..f4aa63ff2938 --- /dev/null +++ b/tools/testing/selftests/ublk/trace/count_ios_per_tid.bt @@ -0,0 +1,11 @@ +/* + * Tabulates and prints I/O completions per thread for the given device + * + * $1: dev_t +*/ +tracepoint:block:block_rq_complete +{ + if (args.dev == $1) { + @[tid] = count(); + } +} diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt new file mode 100644 index 000000000000..9d36ba35468f --- /dev/null +++ b/tools/testing/selftests/ublk/trace/seq_io.bt @@ -0,0 +1,53 @@ +/* + $1: dev_t + $2: RWBS + $3: strlen($2) + + Track request order between block_io_start and block_rq_complete. + Sequence starts at 1 so 0 means "never seen". On first valid + completion, sync complete_seq to handle probe attachment races. + block_rq_complete listed first to reduce missed completion window. +*/ + +BEGIN { + @start_seq = (uint64)1; + @complete_seq = (uint64)0; + @out_of_order = (uint64)0; + @start_order[0] = (uint64)0; + delete(@start_order[0]); + printf("BPFTRACE_READY\n"); +} + +tracepoint:block:block_rq_complete +/(int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)/ +{ + $expected = @start_order[args.sector]; + if ($expected > 0) { + if (@complete_seq == 0) { + @complete_seq = $expected; + } + if ($expected != @complete_seq) { + printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n", + args.sector, $expected, @complete_seq); + @out_of_order = @out_of_order + 1; + } + delete(@start_order[args.sector]); + @complete_seq = @complete_seq + 1; + } +} + +tracepoint:block:block_io_start +/(int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)/ +{ + @start_order[args.sector] = @start_seq; + @start_seq = @start_seq + 1; +} + +END { + printf("total_start: %llu total_complete: %llu out_of_order: %llu\n", + @start_seq - 1, @complete_seq, @out_of_order); + clear(@start_order); + clear(@start_seq); + clear(@complete_seq); + clear(@out_of_order); +} diff --git a/tools/testing/selftests/ublk/ublk_dep.h b/tools/testing/selftests/ublk/ublk_dep.h new file mode 100644 index 000000000000..f68fa7eac939 --- /dev/null +++ b/tools/testing/selftests/ublk/ublk_dep.h @@ -0,0 +1,18 @@ +#ifndef UBLK_DEP_H +#define UBLK_DEP_H + +#ifndef UBLK_U_IO_REGISTER_IO_BUF +#define UBLK_U_IO_REGISTER_IO_BUF \ + _IOWR('u', 0x23, struct ublksrv_io_cmd) +#define UBLK_U_IO_UNREGISTER_IO_BUF \ + _IOWR('u', 0x24, struct ublksrv_io_cmd) +#endif + +#ifndef UBLK_F_USER_RECOVERY_FAIL_IO +#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) +#endif + +#ifndef UBLK_F_ZONED +#define UBLK_F_ZONED (1ULL << 8) +#endif +#endif diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h new file mode 100644 index 000000000000..aab522f26167 --- /dev/null +++ b/tools/testing/selftests/ublk/utils.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KUBLK_UTILS_H +#define KUBLK_UTILS_H + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + unsigned long __mptr = (unsigned long)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); }) +#endif + +#define round_up(val, rnd) \ + (((val) + ((rnd) - 1)) & ~((rnd) - 1)) + +/* small sized & per-thread allocator */ +struct allocator { + unsigned int size; + cpu_set_t *set; +}; + +static inline int allocator_init(struct allocator *a, unsigned size) +{ + a->set = CPU_ALLOC(size); + a->size = size; + + if (a->set) + return 0; + return -ENOMEM; +} + +static inline void allocator_deinit(struct allocator *a) +{ + CPU_FREE(a->set); + a->set = NULL; + a->size = 0; +} + +static inline int allocator_get(struct allocator *a) +{ + int i; + + for (i = 0; i < a->size; i += 1) { + size_t set_size = CPU_ALLOC_SIZE(a->size); + + if (!CPU_ISSET_S(i, set_size, a->set)) { + CPU_SET_S(i, set_size, a->set); + return i; + } + } + + return -1; +} + +static inline void allocator_put(struct allocator *a, int i) +{ + size_t set_size = CPU_ALLOC_SIZE(a->size); + + if (i >= 0 && i < a->size) + CPU_CLR_S(i, set_size, a->set); +} + +static inline int allocator_get_val(struct allocator *a, int i) +{ + size_t set_size = CPU_ALLOC_SIZE(a->size); + + return CPU_ISSET_S(i, set_size, a->set); +} + +static inline unsigned int ilog2(unsigned int x) +{ + if (x == 0) + return 0; + return (sizeof(x) * 8 - 1) - __builtin_clz(x); +} + +#define UBLK_DBG_DEV (1U << 0) +#define UBLK_DBG_THREAD (1U << 1) +#define UBLK_DBG_IO_CMD (1U << 2) +#define UBLK_DBG_IO (1U << 3) +#define UBLK_DBG_CTRL_CMD (1U << 4) +#define UBLK_LOG (1U << 5) + +extern unsigned int ublk_dbg_mask; + +static inline void ublk_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static inline void ublk_log(const char *fmt, ...) +{ + if (ublk_dbg_mask & UBLK_LOG) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + } +} + +static inline void ublk_dbg(int level, const char *fmt, ...) +{ + if (level & ublk_dbg_mask) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + } +} + +#define ublk_assert(x) do { \ + if (!(x)) { \ + ublk_err("%s %d: assert!\n", __func__, __LINE__); \ + assert(x); \ + } \ +} while (0) + +#endif |
