summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
diff options
context:
space:
mode:
authorThomas Hellstrom <thellstrom@vmware.com>2017-08-24 08:06:29 +0200
committerSinclair Yeh <syeh@vmware.com>2017-08-28 17:40:46 +0200
commit65b97a2bec2fc75e9137e8a17954cb66809215bf (patch)
tree44d3b6e344866347de986da999e0d4003bfc8b17 /drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
parentef369904aaf717e0390b483efd47daba9ba8ddf2 (diff)
downloadlwn-65b97a2bec2fc75e9137e8a17954cb66809215bf.tar.gz
lwn-65b97a2bec2fc75e9137e8a17954cb66809215bf.zip
drm/vmwgfx: Restart command buffers after errors
Previously we skipped the command buffer and added an extra fence to avoid hangs due to skipped fence commands. Now we instead restart the command buffer after the failing command, if there are any commands left. In addition we print out some information about the failing command and its location in the command buffer. Testing Done: ran glxgears using mesa modified to send the NOP_ERROR command before each 10th clear and verified that we detected the device error properly and that there were no other device errors caused by incorrectly ordered command buffers. Also ran the piglit "quick" test suite which generates a couple of device errors and verified that they were handled as intended. Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> Reviewed-by: Brian Paul <brianp@vmware.com> Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Diffstat (limited to 'drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c')
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c183
1 files changed, 158 insertions, 25 deletions
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
index 2e5718509c23..847490bbb2e2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -51,6 +51,7 @@ struct vmw_cmdbuf_context {
struct list_head hw_submitted;
struct list_head preempted;
unsigned num_hw_submitted;
+ bool block_submission;
};
/**
@@ -60,6 +61,9 @@ struct vmw_cmdbuf_context {
* kernel command submissions, @cur.
* @space_mutex: Mutex to protect against starvation when we allocate
* main pool buffer space.
+ * @error_mutex: Mutex to serialize the work queue error handling.
+ * Note this is not needed if the same workqueue handler
+ * can't race with itself...
* @work: A struct work_struct implementeing command buffer error handling.
* Immutable.
* @dev_priv: Pointer to the device private struct. Immutable.
@@ -101,6 +105,7 @@ struct vmw_cmdbuf_context {
struct vmw_cmdbuf_man {
struct mutex cur_mutex;
struct mutex space_mutex;
+ struct mutex error_mutex;
struct work_struct work;
struct vmw_private *dev_priv;
struct vmw_cmdbuf_context ctx[SVGA_CB_CONTEXT_MAX];
@@ -179,12 +184,13 @@ struct vmw_cmdbuf_alloc_info {
};
/* Loop over each context in the command buffer manager. */
-#define for_each_cmdbuf_ctx(_man, _i, _ctx) \
+#define for_each_cmdbuf_ctx(_man, _i, _ctx) \
for (_i = 0, _ctx = &(_man)->ctx[0]; (_i) < SVGA_CB_CONTEXT_MAX; \
++(_i), ++(_ctx))
-static int vmw_cmdbuf_startstop(struct vmw_cmdbuf_man *man, bool enable);
-
+static int vmw_cmdbuf_startstop(struct vmw_cmdbuf_man *man, u32 context,
+ bool enable);
+static int vmw_cmdbuf_preempt(struct vmw_cmdbuf_man *man, u32 context);
/**
* vmw_cmdbuf_cur_lock - Helper to lock the cur_mutex.
@@ -329,7 +335,8 @@ static void vmw_cmdbuf_ctx_submit(struct vmw_cmdbuf_man *man,
struct vmw_cmdbuf_context *ctx)
{
while (ctx->num_hw_submitted < man->max_hw_submitted &&
- !list_empty(&ctx->submitted)) {
+ !list_empty(&ctx->submitted) &&
+ !ctx->block_submission) {
struct vmw_cmdbuf_header *entry;
SVGACBStatus status;
@@ -384,12 +391,17 @@ static void vmw_cmdbuf_ctx_process(struct vmw_cmdbuf_man *man,
__vmw_cmdbuf_header_free(entry);
break;
case SVGA_CB_STATUS_COMMAND_ERROR:
- case SVGA_CB_STATUS_CB_HEADER_ERROR:
+ entry->cb_header->status = SVGA_CB_STATUS_NONE;
list_add_tail(&entry->list, &man->error);
schedule_work(&man->work);
break;
case SVGA_CB_STATUS_PREEMPTED:
- list_add(&entry->list, &ctx->preempted);
+ entry->cb_header->status = SVGA_CB_STATUS_NONE;
+ list_add_tail(&entry->list, &ctx->preempted);
+ break;
+ case SVGA_CB_STATUS_CB_HEADER_ERROR:
+ WARN_ONCE(true, "Command buffer header error.\n");
+ __vmw_cmdbuf_header_free(entry);
break;
default:
WARN_ONCE(true, "Undefined command buffer status.\n");
@@ -497,24 +509,111 @@ static void vmw_cmdbuf_work_func(struct work_struct *work)
container_of(work, struct vmw_cmdbuf_man, work);
struct vmw_cmdbuf_header *entry, *next;
uint32_t dummy;
- bool restart = false;
+ bool restart[SVGA_CB_CONTEXT_MAX];
+ bool send_fence = false;
+ struct list_head restart_head[SVGA_CB_CONTEXT_MAX];
+ int i;
+ struct vmw_cmdbuf_context *ctx;
+ for_each_cmdbuf_ctx(man, i, ctx) {
+ INIT_LIST_HEAD(&restart_head[i]);
+ restart[i] = false;
+ }
+
+ mutex_lock(&man->error_mutex);
spin_lock(&man->lock);
list_for_each_entry_safe(entry, next, &man->error, list) {
- restart = true;
- DRM_ERROR("Command buffer error.\n");
+ SVGACBHeader *cb_hdr = entry->cb_header;
+ SVGA3dCmdHeader *header = (SVGA3dCmdHeader *)
+ (entry->cmd + cb_hdr->errorOffset);
+ u32 error_cmd_size, new_start_offset;
+ const char *cmd_name;
+
+ list_del_init(&entry->list);
+ restart[entry->cb_context] = true;
+
+ if (!vmw_cmd_describe(header, &error_cmd_size, &cmd_name)) {
+ DRM_ERROR("Unknown command causing device error.\n");
+ DRM_ERROR("Command buffer offset is %lu\n",
+ (unsigned long) cb_hdr->errorOffset);
+ __vmw_cmdbuf_header_free(entry);
+ send_fence = true;
+ continue;
+ }
- list_del(&entry->list);
- __vmw_cmdbuf_header_free(entry);
- wake_up_all(&man->idle_queue);
+ DRM_ERROR("Command \"%s\" causing device error.\n", cmd_name);
+ DRM_ERROR("Command buffer offset is %lu\n",
+ (unsigned long) cb_hdr->errorOffset);
+ DRM_ERROR("Command size is %lu\n",
+ (unsigned long) error_cmd_size);
+
+ new_start_offset = cb_hdr->errorOffset + error_cmd_size;
+
+ if (new_start_offset >= cb_hdr->length) {
+ __vmw_cmdbuf_header_free(entry);
+ send_fence = true;
+ continue;
+ }
+
+ if (man->using_mob)
+ cb_hdr->ptr.mob.mobOffset += new_start_offset;
+ else
+ cb_hdr->ptr.pa += (u64) new_start_offset;
+
+ entry->cmd += new_start_offset;
+ cb_hdr->length -= new_start_offset;
+ cb_hdr->errorOffset = 0;
+ list_add_tail(&entry->list, &restart_head[entry->cb_context]);
+ man->ctx[entry->cb_context].block_submission = true;
+ }
+ spin_unlock(&man->lock);
+
+ /* Preempt all contexts with errors */
+ for_each_cmdbuf_ctx(man, i, ctx) {
+ if (ctx->block_submission && vmw_cmdbuf_preempt(man, i))
+ DRM_ERROR("Failed preempting command buffer "
+ "context %u.\n", i);
+ }
+
+ spin_lock(&man->lock);
+ for_each_cmdbuf_ctx(man, i, ctx) {
+ if (!ctx->block_submission)
+ continue;
+
+ /* Move preempted command buffers to the preempted queue. */
+ vmw_cmdbuf_ctx_process(man, ctx, &dummy);
+
+ /*
+ * Add the preempted queue after the command buffer
+ * that caused an error.
+ */
+ list_splice_init(&ctx->preempted, restart_head[i].prev);
+
+ /*
+ * Finally add all command buffers first in the submitted
+ * queue, to rerun them.
+ */
+ list_splice_init(&restart_head[i], &ctx->submitted);
+
+ ctx->block_submission = false;
}
+
+ vmw_cmdbuf_man_process(man);
spin_unlock(&man->lock);
- if (restart && vmw_cmdbuf_startstop(man, true))
- DRM_ERROR("Failed restarting command buffer context 0.\n");
+ for_each_cmdbuf_ctx(man, i, ctx) {
+ if (restart[i] && vmw_cmdbuf_startstop(man, i, true))
+ DRM_ERROR("Failed restarting command buffer "
+ "context %u.\n", i);
+ }
/* Send a new fence in case one was removed */
- vmw_fifo_send_fence(man->dev_priv, &dummy);
+ if (send_fence) {
+ vmw_fifo_send_fence(man->dev_priv, &dummy);
+ wake_up_all(&man->idle_queue);
+ }
+
+ mutex_unlock(&man->error_mutex);
}
/**
@@ -1057,6 +1156,29 @@ static int vmw_cmdbuf_send_device_command(struct vmw_cmdbuf_man *man,
}
/**
+ * vmw_cmdbuf_preempt - Send a preempt command through the device
+ * context.
+ *
+ * @man: The command buffer manager.
+ *
+ * Synchronously sends a preempt command.
+ */
+static int vmw_cmdbuf_preempt(struct vmw_cmdbuf_man *man, u32 context)
+{
+ struct {
+ uint32 id;
+ SVGADCCmdPreempt body;
+ } __packed cmd;
+
+ cmd.id = SVGA_DC_CMD_PREEMPT;
+ cmd.body.context = SVGA_CB_CONTEXT_0 + context;
+ cmd.body.ignoreIDZero = 0;
+
+ return vmw_cmdbuf_send_device_command(man, &cmd, sizeof(cmd));
+}
+
+
+/**
* vmw_cmdbuf_startstop - Send a start / stop command through the device
* context.
*
@@ -1065,7 +1187,7 @@ static int vmw_cmdbuf_send_device_command(struct vmw_cmdbuf_man *man,
*
* Synchronously sends a device start / stop context command.
*/
-static int vmw_cmdbuf_startstop(struct vmw_cmdbuf_man *man,
+static int vmw_cmdbuf_startstop(struct vmw_cmdbuf_man *man, u32 context,
bool enable)
{
struct {
@@ -1075,7 +1197,7 @@ static int vmw_cmdbuf_startstop(struct vmw_cmdbuf_man *man,
cmd.id = SVGA_DC_CMD_START_STOP_CONTEXT;
cmd.body.enable = (enable) ? 1 : 0;
- cmd.body.context = SVGA_CB_CONTEXT_0;
+ cmd.body.context = SVGA_CB_CONTEXT_0 + context;
return vmw_cmdbuf_send_device_command(man, &cmd, sizeof(cmd));
}
@@ -1174,7 +1296,7 @@ struct vmw_cmdbuf_man *vmw_cmdbuf_man_create(struct vmw_private *dev_priv)
{
struct vmw_cmdbuf_man *man;
struct vmw_cmdbuf_context *ctx;
- int i;
+ unsigned int i;
int ret;
if (!(dev_priv->capabilities & SVGA_CAP_COMMAND_BUFFERS))
@@ -1209,6 +1331,7 @@ struct vmw_cmdbuf_man *vmw_cmdbuf_man_create(struct vmw_private *dev_priv)
spin_lock_init(&man->lock);
mutex_init(&man->cur_mutex);
mutex_init(&man->space_mutex);
+ mutex_init(&man->error_mutex);
man->default_size = VMW_CMDBUF_INLINE_SIZE;
init_waitqueue_head(&man->alloc_queue);
init_waitqueue_head(&man->idle_queue);
@@ -1217,11 +1340,14 @@ struct vmw_cmdbuf_man *vmw_cmdbuf_man_create(struct vmw_private *dev_priv)
INIT_WORK(&man->work, &vmw_cmdbuf_work_func);
vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_ERROR,
&dev_priv->error_waiters);
- ret = vmw_cmdbuf_startstop(man, true);
- if (ret) {
- DRM_ERROR("Failed starting command buffer context 0.\n");
- vmw_cmdbuf_man_destroy(man);
- return ERR_PTR(ret);
+ for_each_cmdbuf_ctx(man, i, ctx) {
+ ret = vmw_cmdbuf_startstop(man, i, true);
+ if (ret) {
+ DRM_ERROR("Failed starting command buffer "
+ "context %u.\n", i);
+ vmw_cmdbuf_man_destroy(man);
+ return ERR_PTR(ret);
+ }
}
return man;
@@ -1271,10 +1397,16 @@ void vmw_cmdbuf_remove_pool(struct vmw_cmdbuf_man *man)
*/
void vmw_cmdbuf_man_destroy(struct vmw_cmdbuf_man *man)
{
+ struct vmw_cmdbuf_context *ctx;
+ unsigned int i;
+
WARN_ON_ONCE(man->has_pool);
(void) vmw_cmdbuf_idle(man, false, 10*HZ);
- if (vmw_cmdbuf_startstop(man, false))
- DRM_ERROR("Failed stopping command buffer context 0.\n");
+
+ for_each_cmdbuf_ctx(man, i, ctx)
+ if (vmw_cmdbuf_startstop(man, i, false))
+ DRM_ERROR("Failed stopping command buffer "
+ "context %u.\n", i);
vmw_generic_waiter_remove(man->dev_priv, SVGA_IRQFLAG_ERROR,
&man->dev_priv->error_waiters);
@@ -1283,5 +1415,6 @@ void vmw_cmdbuf_man_destroy(struct vmw_cmdbuf_man *man)
dma_pool_destroy(man->headers);
mutex_destroy(&man->cur_mutex);
mutex_destroy(&man->space_mutex);
+ mutex_destroy(&man->error_mutex);
kfree(man);
}