summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2019-12-03 10:12:10 +0200
committerOded Gabbay <oded.gabbay@gmail.com>2019-12-14 15:12:21 +0200
commit018e0e3594f7dcd029d258e368c485e742fa9cdb (patch)
treef6ea45a670b7d7e81b16b831e864ca731fe5f987
parent16981742717b04644a41052570fb502682a315d2 (diff)
downloadlwn-018e0e3594f7dcd029d258e368c485e742fa9cdb.tar.gz
lwn-018e0e3594f7dcd029d258e368c485e742fa9cdb.zip
habanalabs: rate limit error msg on waiting for CS
In case a user submits a CS, and the submission fails, and the user doesn't check the return value and instead use the error return value as a valid sequence number of a CS and ask to wait on it, the driver will print an error and return an error code for that wait. The real problem happens if now the user ignores the error of the wait, and try to wait again and again. This can lead to a flood of error messages from the driver and even soft lockup event. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Reviewed-by: Tomer Tayar <ttayar@habana.ai>
-rw-r--r--drivers/misc/habanalabs/command_submission.c5
-rw-r--r--drivers/misc/habanalabs/context.c2
2 files changed, 4 insertions, 3 deletions
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 8850f475a413..0bf08678431b 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -824,8 +824,9 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
memset(args, 0, sizeof(*args));
if (rc < 0) {
- dev_err(hdev->dev, "Error %ld on waiting for CS handle %llu\n",
- rc, seq);
+ dev_err_ratelimited(hdev->dev,
+ "Error %ld on waiting for CS handle %llu\n",
+ rc, seq);
if (rc == -ERESTARTSYS) {
args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
rc = -EINTR;
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index 17db7b3dfb4c..2df6fb87e7ff 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -176,7 +176,7 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
spin_lock(&ctx->cs_lock);
if (seq >= ctx->cs_sequence) {
- dev_notice(hdev->dev,
+ dev_notice_ratelimited(hdev->dev,
"Can't wait on seq %llu because current CS is at seq %llu\n",
seq, ctx->cs_sequence);
spin_unlock(&ctx->cs_lock);