summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-03-28 11:42:05 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-03-28 11:42:05 -0700
commitb44d1ddcf835b39a8dc14276d770074deaed297c (patch)
tree3d3beeb763cfc8f3eb6ee16e8d92637aeb4bd62e
parentabed516ecd02ceb30fbd091e9b26205ea3192c65 (diff)
parent2b8ed1c94182dbbd0163d0eb443a934cbf6b0d85 (diff)
downloadlwn-b44d1ddcf835b39a8dc14276d770074deaed297c.tar.gz
lwn-b44d1ddcf835b39a8dc14276d770074deaed297c.zip
Merge tag 'io_uring-5.12-2021-03-27' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: - Use thread info versions of flag testing, as discussed last week. - The series enabling PF_IO_WORKER to just take signals, instead of needing to special case that they do not in a bunch of places. Ends up being pretty trivial to do, and then we can revert all the special casing we're currently doing. - Kill dead pointer assignment - Fix hashed part of async work queue trace - Fix sign extension issue for IORING_OP_PROVIDE_BUFFERS - Fix a link completion ordering regression in this merge window - Cancellation fixes * tag 'io_uring-5.12-2021-03-27' of git://git.kernel.dk/linux-block: io_uring: remove unsued assignment to pointer io io_uring: don't cancel extra on files match io_uring: don't cancel-track common timeouts io_uring: do post-completion chore on t-out cancel io_uring: fix timeout cancel return code Revert "signal: don't allow STOP on PF_IO_WORKER threads" Revert "kernel: freezer should treat PF_IO_WORKER like PF_KTHREAD for freezing" Revert "kernel: treat PF_IO_WORKER like PF_KTHREAD for ptrace/signals" Revert "signal: don't allow sending any signals to PF_IO_WORKER threads" kernel: stop masking signals in create_io_thread() io_uring: handle signals for IO threads like a normal thread kernel: don't call do_exit() for PF_IO_WORKER threads io_uring: maintain CQE order of a failed link io-wq: fix race around pending work on teardown io_uring: do ctx sqd ejection in a clear context io_uring: fix provide_buffers sign extension io_uring: don't skip file_end_write() on reissue io_uring: correct io_queue_async_work() traces io_uring: don't use {test,clear}_tsk_thread_flag() for current
-rw-r--r--fs/io-wq.c32
-rw-r--r--fs/io_uring.c98
-rw-r--r--kernel/fork.c16
-rw-r--r--kernel/freezer.c2
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/signal.c20
6 files changed, 94 insertions, 76 deletions
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 3dc10bfd8c3b..7434eb40ca8c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -16,7 +16,6 @@
#include <linux/rculist_nulls.h>
#include <linux/cpu.h>
#include <linux/tracehook.h>
-#include <linux/freezer.h>
#include "../kernel/sched/sched.h"
#include "io-wq.h"
@@ -388,11 +387,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
static bool io_flush_signals(void)
{
- if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) {
+ if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) {
__set_current_state(TASK_RUNNING);
- if (current->task_works)
- task_work_run();
- clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL);
+ tracehook_notify_signal();
return true;
}
return false;
@@ -505,10 +502,15 @@ loop:
if (io_flush_signals())
continue;
ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
- if (try_to_freeze() || ret)
- continue;
- if (fatal_signal_pending(current))
+ if (signal_pending(current)) {
+ struct ksignal ksig;
+
+ if (!get_signal(&ksig))
+ continue;
break;
+ }
+ if (ret)
+ continue;
/* timed out, exit unless we're the fixed worker */
if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
!(worker->flags & IO_WORKER_F_FIXED))
@@ -716,9 +718,13 @@ static int io_wq_manager(void *data)
set_current_state(TASK_INTERRUPTIBLE);
io_wq_check_workers(wq);
schedule_timeout(HZ);
- try_to_freeze();
- if (fatal_signal_pending(current))
+ if (signal_pending(current)) {
+ struct ksignal ksig;
+
+ if (!get_signal(&ksig))
+ continue;
set_bit(IO_WQ_BIT_EXIT, &wq->state);
+ }
} while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
io_wq_check_workers(wq);
@@ -1065,7 +1071,11 @@ static void io_wq_destroy(struct io_wq *wq)
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
- WARN_ON_ONCE(!wq_list_empty(&wqe->work_list));
+ struct io_cb_cancel_data match = {
+ .fn = io_wq_work_match_all,
+ .cancel_all = true,
+ };
+ io_wqe_cancel_pending_work(wqe, &match);
kfree(wqe);
}
io_wq_put_hash(wq->hash);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 543551d70327..1949b80677e7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -78,7 +78,6 @@
#include <linux/task_work.h>
#include <linux/pagemap.h>
#include <linux/io_uring.h>
-#include <linux/freezer.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -1095,8 +1094,6 @@ static bool io_match_task(struct io_kiocb *head,
io_for_each_link(req, head) {
if (req->flags & REQ_F_INFLIGHT)
return true;
- if (req->task->files == files)
- return true;
}
return false;
}
@@ -1239,16 +1236,16 @@ static void io_queue_async_work(struct io_kiocb *req)
BUG_ON(!tctx);
BUG_ON(!tctx->io_wq);
- trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
- &req->work, req->flags);
/* init ->work of the whole link before punting */
io_prep_async_link(req);
+ trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
+ &req->work, req->flags);
io_wq_enqueue(tctx->io_wq, &req->work);
if (link)
io_queue_linked_timeout(link);
}
-static void io_kill_timeout(struct io_kiocb *req)
+static void io_kill_timeout(struct io_kiocb *req, int status)
{
struct io_timeout_data *io = req->async_data;
int ret;
@@ -1258,31 +1255,11 @@ static void io_kill_timeout(struct io_kiocb *req)
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
- io_cqring_fill_event(req, 0);
+ io_cqring_fill_event(req, status);
io_put_req_deferred(req, 1);
}
}
-/*
- * Returns true if we found and killed one or more timeouts
- */
-static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
- struct files_struct *files)
-{
- struct io_kiocb *req, *tmp;
- int canceled = 0;
-
- spin_lock_irq(&ctx->completion_lock);
- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
- if (io_match_task(req, tsk, files)) {
- io_kill_timeout(req);
- canceled++;
- }
- }
- spin_unlock_irq(&ctx->completion_lock);
- return canceled != 0;
-}
-
static void __io_queue_deferred(struct io_ring_ctx *ctx)
{
do {
@@ -1327,7 +1304,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
break;
list_del_init(&req->timeout.list);
- io_kill_timeout(req);
+ io_kill_timeout(req, 0);
} while (!list_empty(&ctx->timeout_list));
ctx->cq_last_tm_flush = seq;
@@ -2524,13 +2501,12 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
{
int cflags = 0;
+ if (req->rw.kiocb.ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_reissue(req))
return;
if (res != req->result)
req_set_fail_links(req);
-
- if (req->rw.kiocb.ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
if (req->flags & REQ_F_BUFFER_SELECTED)
cflags = io_put_rw_kbuf(req);
__io_req_complete(req, issue_flags, res, cflags);
@@ -3978,6 +3954,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
static int io_provide_buffers_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
+ unsigned long size;
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
@@ -3991,7 +3968,8 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
p->addr = READ_ONCE(sqe->addr);
p->len = READ_ONCE(sqe->len);
- if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
+ size = (unsigned long)p->len * p->nbufs;
+ if (!access_ok(u64_to_user_ptr(p->addr), size))
return -EFAULT;
p->bgid = READ_ONCE(sqe->buf_group);
@@ -4820,7 +4798,6 @@ static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
ret = -ENOMEM;
goto out;
}
- io = req->async_data;
memcpy(req->async_data, &__io, sizeof(__io));
return -EAGAIN;
}
@@ -5583,7 +5560,8 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
data->mode = io_translate_timeout_mode(flags);
hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
- io_req_track_inflight(req);
+ if (is_timeout_link)
+ io_req_track_inflight(req);
return 0;
}
@@ -6479,8 +6457,6 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_init_req(ctx, req, sqe);
if (unlikely(ret)) {
fail_req:
- io_put_req(req);
- io_req_complete(req, ret);
if (link->head) {
/* fail even hard links since we don't submit */
link->head->flags |= REQ_F_FAIL_LINK;
@@ -6488,6 +6464,8 @@ fail_req:
io_req_complete(link->head, -ECANCELED);
link->head = NULL;
}
+ io_put_req(req);
+ io_req_complete(req, ret);
return ret;
}
ret = io_req_prep(req, sqe);
@@ -6764,8 +6742,13 @@ static int io_sq_thread(void *data)
timeout = jiffies + sqd->sq_thread_idle;
continue;
}
- if (fatal_signal_pending(current))
+ if (signal_pending(current)) {
+ struct ksignal ksig;
+
+ if (!get_signal(&ksig))
+ continue;
break;
+ }
sqt_spin = false;
cap_entries = !list_is_singular(&sqd->ctx_list);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
@@ -6808,7 +6791,6 @@ static int io_sq_thread(void *data)
mutex_unlock(&sqd->lock);
schedule();
- try_to_freeze();
mutex_lock(&sqd->lock);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_clear_wakeup_flag(ctx);
@@ -6873,7 +6855,7 @@ static int io_run_task_work_sig(void)
return 1;
if (!signal_pending(current))
return 0;
- if (test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))
+ if (test_thread_flag(TIF_NOTIFY_SIGNAL))
return -ERESTARTSYS;
return -EINTR;
}
@@ -8563,6 +8545,14 @@ static void io_ring_exit_work(struct work_struct *work)
struct io_tctx_node *node;
int ret;
+ /* prevent SQPOLL from submitting new requests */
+ if (ctx->sq_data) {
+ io_sq_thread_park(ctx->sq_data);
+ list_del_init(&ctx->sqd_list);
+ io_sqd_update_thread_idle(ctx->sq_data);
+ io_sq_thread_unpark(ctx->sq_data);
+ }
+
/*
* If we're doing polled IO and end up having requests being
* submitted async (out-of-line), then completions can come in while
@@ -8599,6 +8589,28 @@ static void io_ring_exit_work(struct work_struct *work)
io_ring_ctx_free(ctx);
}
+/* Returns true if we found and killed one or more timeouts */
+static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
+ struct files_struct *files)
+{
+ struct io_kiocb *req, *tmp;
+ int canceled = 0;
+
+ spin_lock_irq(&ctx->completion_lock);
+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
+ if (io_match_task(req, tsk, files)) {
+ io_kill_timeout(req, -ECANCELED);
+ canceled++;
+ }
+ }
+ io_commit_cqring(ctx);
+ spin_unlock_irq(&ctx->completion_lock);
+
+ if (canceled != 0)
+ io_cqring_ev_posted(ctx);
+ return canceled != 0;
+}
+
static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
unsigned long index;
@@ -8614,14 +8626,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_unregister_personality(ctx, index);
mutex_unlock(&ctx->uring_lock);
- /* prevent SQPOLL from submitting new requests */
- if (ctx->sq_data) {
- io_sq_thread_park(ctx->sq_data);
- list_del_init(&ctx->sqd_list);
- io_sqd_update_thread_idle(ctx->sq_data);
- io_sq_thread_unpark(ctx->sq_data);
- }
-
io_kill_timeouts(ctx, NULL, NULL);
io_poll_remove_all(ctx, NULL, NULL);
diff --git a/kernel/fork.c b/kernel/fork.c
index 54cc905e5fe0..426cd0c51f9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1948,8 +1948,14 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
- if (args->io_thread)
+ if (args->io_thread) {
+ /*
+ * Mark us an IO worker, and block any signal that isn't
+ * fatal or STOP
+ */
p->flags |= PF_IO_WORKER;
+ siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
+ }
/*
* This _must_ happen before we call free_task(), i.e. before we jump
@@ -2438,14 +2444,8 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
.stack_size = (unsigned long)arg,
.io_thread = 1,
};
- struct task_struct *tsk;
- tsk = copy_process(NULL, 0, node, &args);
- if (!IS_ERR(tsk)) {
- sigfillset(&tsk->blocked);
- sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
- }
- return tsk;
+ return copy_process(NULL, 0, node, &args);
}
/*
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 1a2d57d1327c..dc520f01f99d 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -134,7 +134,7 @@ bool freeze_task(struct task_struct *p)
return false;
}
- if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))
+ if (!(p->flags & PF_KTHREAD))
fake_signal_wake_up(p);
else
wake_up_state(p, TASK_INTERRUPTIBLE);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 821cf1723814..61db50f7ca86 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -375,7 +375,7 @@ static int ptrace_attach(struct task_struct *task, long request,
audit_ptrace(task);
retval = -EPERM;
- if (unlikely(task->flags & (PF_KTHREAD | PF_IO_WORKER)))
+ if (unlikely(task->flags & PF_KTHREAD))
goto out;
if (same_thread_group(task, current))
goto out;
diff --git a/kernel/signal.c b/kernel/signal.c
index f2a1b898da29..f2718350bf4b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -91,7 +91,7 @@ static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
return true;
/* Only allow kernel generated signals to this kthread */
- if (unlikely((t->flags & (PF_KTHREAD | PF_IO_WORKER)) &&
+ if (unlikely((t->flags & PF_KTHREAD) &&
(handler == SIG_KTHREAD_KERNEL) && !force))
return true;
@@ -288,8 +288,7 @@ bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask)
JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING));
BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK));
- if (unlikely(fatal_signal_pending(task) ||
- (task->flags & (PF_EXITING | PF_IO_WORKER))))
+ if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING)))
return false;
if (mask & JOBCTL_STOP_SIGMASK)
@@ -834,9 +833,6 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info,
if (!valid_signal(sig))
return -EINVAL;
- /* PF_IO_WORKER threads don't take any signals */
- if (t->flags & PF_IO_WORKER)
- return -ESRCH;
if (!si_fromuser(info))
return 0;
@@ -1100,7 +1096,7 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
/*
* Skip useless siginfo allocation for SIGKILL and kernel threads.
*/
- if ((sig == SIGKILL) || (t->flags & (PF_KTHREAD | PF_IO_WORKER)))
+ if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
goto out_set;
/*
@@ -2772,13 +2768,21 @@ relock:
}
/*
+ * PF_IO_WORKER threads will catch and exit on fatal signals
+ * themselves. They have cleanup that must be performed, so
+ * we cannot call do_exit() on their behalf.
+ */
+ if (current->flags & PF_IO_WORKER)
+ goto out;
+
+ /*
* Death signals, no core dump.
*/
do_group_exit(ksig->info.si_signo);
/* NOTREACHED */
}
spin_unlock_irq(&sighand->siglock);
-
+out:
ksig->sig = signr;
if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))