1 files changed, 35 insertions, 269 deletions
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 4745537fed25..0df86402e9ce 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -66,7 +66,6 @@ enum stdu_content_type {
  */
 struct vmw_stdu_dirty {
 	struct vmw_kms_dirty base;
-	SVGA3dTransferType  transfer;
 	s32 left, right, top, bottom;
 	s32 fb_left, fb_top;
 	u32 pitch;
@@ -137,12 +136,6 @@ static void vmw_stdu_destroy(struct vmw_screen_target_display_unit *stdu);
  * Screen Target Display Unit CRTC Functions
  *****************************************************************************/
 
-static bool vmw_stdu_use_cpu_blit(const struct vmw_private *vmw)
-{
-	return !(vmw->capabilities & SVGA_CAP_3D) || vmw->vram_size < (32 * 1024 * 1024);
-}
-
-
 /**
  * vmw_stdu_crtc_destroy - cleans up the STDU
  *
@@ -452,93 +445,6 @@ static void vmw_stdu_crtc_atomic_disable(struct drm_crtc *crtc,
 }
 
 /**
- * vmw_stdu_bo_clip - Callback to encode a suface DMA command cliprect
- *
- * @dirty: The closure structure.
- *
- * Encodes a surface DMA command cliprect and updates the bounding box
- * for the DMA.
- */
-static void vmw_stdu_bo_clip(struct vmw_kms_dirty *dirty)
-{
-	struct vmw_stdu_dirty *ddirty =
-		container_of(dirty, struct vmw_stdu_dirty, base);
-	struct vmw_stdu_dma *cmd = dirty->cmd;
-	struct SVGA3dCopyBox *blit = (struct SVGA3dCopyBox *) &cmd[1];
-
-	blit += dirty->num_hits;
-	blit->srcx = dirty->fb_x;
-	blit->srcy = dirty->fb_y;
-	blit->x = dirty->unit_x1;
-	blit->y = dirty->unit_y1;
-	blit->d = 1;
-	blit->w = dirty->unit_x2 - dirty->unit_x1;
-	blit->h = dirty->unit_y2 - dirty->unit_y1;
-	dirty->num_hits++;
-
-	if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM)
-		return;
-
-	/* Destination bounding box */
-	ddirty->left = min_t(s32, ddirty->left, dirty->unit_x1);
-	ddirty->top = min_t(s32, ddirty->top, dirty->unit_y1);
-	ddirty->right = max_t(s32, ddirty->right, dirty->unit_x2);
-	ddirty->bottom = max_t(s32, ddirty->bottom, dirty->unit_y2);
-}
-
-/**
- * vmw_stdu_bo_fifo_commit - Callback to fill in and submit a DMA command.
- *
- * @dirty: The closure structure.
- *
- * Fills in the missing fields in a DMA command, and optionally encodes
- * a screen target update command, depending on transfer direction.
- */
-static void vmw_stdu_bo_fifo_commit(struct vmw_kms_dirty *dirty)
-{
-	struct vmw_stdu_dirty *ddirty =
-		container_of(dirty, struct vmw_stdu_dirty, base);
-	struct vmw_screen_target_display_unit *stdu =
-		container_of(dirty->unit, typeof(*stdu), base);
-	struct vmw_stdu_dma *cmd = dirty->cmd;
-	struct SVGA3dCopyBox *blit = (struct SVGA3dCopyBox *) &cmd[1];
-	SVGA3dCmdSurfaceDMASuffix *suffix =
-		(SVGA3dCmdSurfaceDMASuffix *) &blit[dirty->num_hits];
-	size_t blit_size = sizeof(*blit) * dirty->num_hits + sizeof(*suffix);
-
-	if (!dirty->num_hits) {
-		vmw_cmd_commit(dirty->dev_priv, 0);
-		return;
-	}
-
-	cmd->header.id = SVGA_3D_CMD_SURFACE_DMA;
-	cmd->header.size = sizeof(cmd->body) + blit_size;
-	vmw_bo_get_guest_ptr(&ddirty->buf->base, &cmd->body.guest.ptr);
-	cmd->body.guest.pitch = ddirty->pitch;
-	cmd->body.host.sid = stdu->display_srf->res.id;
-	cmd->body.host.face = 0;
-	cmd->body.host.mipmap = 0;
-	cmd->body.transfer = ddirty->transfer;
-	suffix->suffixSize = sizeof(*suffix);
-	suffix->maximumOffset = ddirty->buf->base.base.size;
-
-	if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
-		blit_size += sizeof(struct vmw_stdu_update);
-
-		vmw_stdu_populate_update(&suffix[1], stdu->base.unit,
-					 ddirty->left, ddirty->right,
-					 ddirty->top, ddirty->bottom);
-	}
-
-	vmw_cmd_commit(dirty->dev_priv, sizeof(*cmd) + blit_size);
-
-	stdu->display_srf->res.res_dirty = true;
-	ddirty->left = ddirty->top = S32_MAX;
-	ddirty->right = ddirty->bottom = S32_MIN;
-}
-
-
-/**
  * vmw_stdu_bo_cpu_clip - Callback to encode a CPU blit
  *
  * @dirty: The closure structure.
@@ -598,62 +504,21 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
 		return;
 
 	/* Assume we are blitting from Guest (bo) to Host (display_srf) */
-	dst_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp;
-	dst_bo = &stdu->display_srf->res.backup->base;
-	dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;
-
-	src_pitch = ddirty->pitch;
-	src_bo = &ddirty->buf->base;
-	src_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp;
+	src_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp;
+	src_bo = &stdu->display_srf->res.backup->base;
+	src_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;
 
-	/* Swap src and dst if the assumption was wrong. */
-	if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM) {
-		swap(dst_pitch, src_pitch);
-		swap(dst_bo, src_bo);
-		swap(src_offset, dst_offset);
-	}
+	dst_pitch = ddirty->pitch;
+	dst_bo = &ddirty->buf->base;
+	dst_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp;
 
 	(void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
 			       src_bo, src_offset, src_pitch,
 			       width * stdu->cpp, height, &diff);
-
-	if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM &&
-	    drm_rect_visible(&diff.rect)) {
-		struct vmw_private *dev_priv;
-		struct vmw_stdu_update *cmd;
-		struct drm_clip_rect region;
-		int ret;
-
-		/* We are updating the actual surface, not a proxy */
-		region.x1 = diff.rect.x1;
-		region.x2 = diff.rect.x2;
-		region.y1 = diff.rect.y1;
-		region.y2 = diff.rect.y2;
-		ret = vmw_kms_update_proxy(&stdu->display_srf->res, &region,
-					   1, 1);
-		if (ret)
-			goto out_cleanup;
-
-
-		dev_priv = vmw_priv(stdu->base.crtc.dev);
-		cmd = VMW_CMD_RESERVE(dev_priv, sizeof(*cmd));
-		if (!cmd)
-			goto out_cleanup;
-
-		vmw_stdu_populate_update(cmd, stdu->base.unit,
-					 region.x1, region.x2,
-					 region.y1, region.y2);
-
-		vmw_cmd_commit(dev_priv, sizeof(*cmd));
-	}
-
-out_cleanup:
-	ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
-	ddirty->right = ddirty->bottom = S32_MIN;
 }
 
 /**
- * vmw_kms_stdu_dma - Perform a DMA transfer between a buffer-object backed
+ * vmw_kms_stdu_readback - Perform a readback from a buffer-object backed
  * framebuffer and the screen target system.
  *
  * @dev_priv: Pointer to the device private structure.
@@ -666,9 +531,6 @@ out_cleanup:
  * be NULL.
  * @num_clips: Number of clip rects in @clips or @vclips.
  * @increment: Increment to use when looping over @clips or @vclips.
- * @to_surface: Whether to DMA to the screen target system as opposed to
- * from the screen target system.
- * @interruptible: Whether to perform waits interruptible if possible.
  * @crtc: If crtc is passed, perform stdu dma on that crtc only.
  *
  * If DMA-ing till the screen target system, the function will also notify
@@ -677,59 +539,49 @@ out_cleanup:
  * Returns 0 on success, negative error code on failure. -ERESTARTSYS if
  * interrupted.
  */
-int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
-		     struct drm_file *file_priv,
-		     struct vmw_framebuffer *vfb,
-		     struct drm_vmw_fence_rep __user *user_fence_rep,
-		     struct drm_clip_rect *clips,
-		     struct drm_vmw_rect *vclips,
-		     uint32_t num_clips,
-		     int increment,
-		     bool to_surface,
-		     bool interruptible,
-		     struct drm_crtc *crtc)
+int vmw_kms_stdu_readback(struct vmw_private *dev_priv,
+			  struct drm_file *file_priv,
+			  struct vmw_framebuffer *vfb,
+			  struct drm_vmw_fence_rep __user *user_fence_rep,
+			  struct drm_clip_rect *clips,
+			  struct drm_vmw_rect *vclips,
+			  uint32_t num_clips,
+			  int increment,
+			  struct drm_crtc *crtc)
 {
 	struct vmw_bo *buf =
 		container_of(vfb, struct vmw_framebuffer_bo, base)->buffer;
 	struct vmw_stdu_dirty ddirty;
 	int ret;
-	bool cpu_blit = vmw_stdu_use_cpu_blit(dev_priv);
 	DECLARE_VAL_CONTEXT(val_ctx, NULL, 0);
 
 	/*
-	 * VMs without 3D support don't have the surface DMA command and
-	 * we'll be using a CPU blit, and the framebuffer should be moved out
-	 * of VRAM.
+	 * The GMR domain might seem confusing because it might seem like it should
+	 * never happen with screen targets but e.g. the xorg vmware driver issues
+	 * CMD_SURFACE_DMA for various pixmap updates which might transition our bo to
+	 * a GMR. Instead of forcing another transition we can optimize the readback
+	 * by reading directly from the GMR.
 	 */
-	ret = vmw_validation_add_bo(&val_ctx, buf, false, cpu_blit);
+	vmw_bo_placement_set(buf,
+			     VMW_BO_DOMAIN_MOB | VMW_BO_DOMAIN_SYS | VMW_BO_DOMAIN_GMR,
+			     VMW_BO_DOMAIN_MOB | VMW_BO_DOMAIN_SYS | VMW_BO_DOMAIN_GMR);
+	ret = vmw_validation_add_bo(&val_ctx, buf);
 	if (ret)
 		return ret;
 
-	ret = vmw_validation_prepare(&val_ctx, NULL, interruptible);
+	ret = vmw_validation_prepare(&val_ctx, NULL, true);
 	if (ret)
 		goto out_unref;
 
-	ddirty.transfer = (to_surface) ? SVGA3D_WRITE_HOST_VRAM :
-		SVGA3D_READ_HOST_VRAM;
 	ddirty.left = ddirty.top = S32_MAX;
 	ddirty.right = ddirty.bottom = S32_MIN;
 	ddirty.fb_left = ddirty.fb_top = S32_MAX;
 	ddirty.pitch = vfb->base.pitches[0];
 	ddirty.buf = buf;
-	ddirty.base.fifo_commit = vmw_stdu_bo_fifo_commit;
-	ddirty.base.clip = vmw_stdu_bo_clip;
-	ddirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_dma) +
-		num_clips * sizeof(SVGA3dCopyBox) +
-		sizeof(SVGA3dCmdSurfaceDMASuffix);
-	if (to_surface)
-		ddirty.base.fifo_reserve_size += sizeof(struct vmw_stdu_update);
-
-
-	if (cpu_blit) {
-		ddirty.base.fifo_commit = vmw_stdu_bo_cpu_commit;
-		ddirty.base.clip = vmw_stdu_bo_cpu_clip;
-		ddirty.base.fifo_reserve_size = 0;
-	}
+
+	ddirty.base.fifo_commit = vmw_stdu_bo_cpu_commit;
+	ddirty.base.clip = vmw_stdu_bo_cpu_clip;
+	ddirty.base.fifo_reserve_size = 0;
 
 	ddirty.base.crtc = crtc;
 
@@ -1161,11 +1013,8 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 	/*
 	 * This should only happen if the buffer object is too large to create a
 	 * proxy surface for.
-	 * If we are a 2D VM with a buffer object then we have to use CPU blit
-	 * so cache these mappings
 	 */
-	if (vps->content_fb_type == SEPARATE_BO &&
-	    vmw_stdu_use_cpu_blit(dev_priv))
+	if (vps->content_fb_type == SEPARATE_BO)
 		vps->cpp = new_fb->pitches[0] / new_fb->width;
 
 	return 0;
@@ -1175,14 +1024,6 @@ out_srf_unref:
 	return ret;
 }
 
-static uint32_t vmw_stdu_bo_fifo_size(struct vmw_du_update_plane *update,
-				      uint32_t num_hits)
-{
-	return sizeof(struct vmw_stdu_dma) + sizeof(SVGA3dCopyBox) * num_hits +
-		sizeof(SVGA3dCmdSurfaceDMASuffix) +
-		sizeof(struct vmw_stdu_update);
-}
-
 static uint32_t vmw_stdu_bo_fifo_size_cpu(struct vmw_du_update_plane *update,
 					  uint32_t num_hits)
 {
@@ -1190,68 +1031,6 @@ static uint32_t vmw_stdu_bo_fifo_size_cpu(struct vmw_du_update_plane *update,
 		sizeof(struct vmw_stdu_update);
 }
 
-static uint32_t vmw_stdu_bo_populate_dma(struct vmw_du_update_plane  *update,
-					 void *cmd, uint32_t num_hits)
-{
-	struct vmw_screen_target_display_unit *stdu;
-	struct vmw_framebuffer_bo *vfbbo;
-	struct vmw_stdu_dma *cmd_dma = cmd;
-
-	stdu = container_of(update->du, typeof(*stdu), base);
-	vfbbo = container_of(update->vfb, typeof(*vfbbo), base);
-
-	cmd_dma->header.id = SVGA_3D_CMD_SURFACE_DMA;
-	cmd_dma->header.size = sizeof(cmd_dma->body) +
-		sizeof(struct SVGA3dCopyBox) * num_hits +
-		sizeof(SVGA3dCmdSurfaceDMASuffix);
-	vmw_bo_get_guest_ptr(&vfbbo->buffer->base, &cmd_dma->body.guest.ptr);
-	cmd_dma->body.guest.pitch = update->vfb->base.pitches[0];
-	cmd_dma->body.host.sid = stdu->display_srf->res.id;
-	cmd_dma->body.host.face = 0;
-	cmd_dma->body.host.mipmap = 0;
-	cmd_dma->body.transfer = SVGA3D_WRITE_HOST_VRAM;
-
-	return sizeof(*cmd_dma);
-}
-
-static uint32_t vmw_stdu_bo_populate_clip(struct vmw_du_update_plane  *update,
-					  void *cmd, struct drm_rect *clip,
-					  uint32_t fb_x, uint32_t fb_y)
-{
-	struct SVGA3dCopyBox *box = cmd;
-
-	box->srcx = fb_x;
-	box->srcy = fb_y;
-	box->srcz = 0;
-	box->x = clip->x1;
-	box->y = clip->y1;
-	box->z = 0;
-	box->w = drm_rect_width(clip);
-	box->h = drm_rect_height(clip);
-	box->d = 1;
-
-	return sizeof(*box);
-}
-
-static uint32_t vmw_stdu_bo_populate_update(struct vmw_du_update_plane  *update,
-					    void *cmd, struct drm_rect *bb)
-{
-	struct vmw_screen_target_display_unit *stdu;
-	struct vmw_framebuffer_bo *vfbbo;
-	SVGA3dCmdSurfaceDMASuffix *suffix = cmd;
-
-	stdu = container_of(update->du, typeof(*stdu), base);
-	vfbbo = container_of(update->vfb, typeof(*vfbbo), base);
-
-	suffix->suffixSize = sizeof(*suffix);
-	suffix->maximumOffset = vfbbo->buffer->base.base.size;
-
-	vmw_stdu_populate_update(&suffix[1], stdu->base.unit, bb->x1, bb->x2,
-				 bb->y1, bb->y2);
-
-	return sizeof(*suffix) + sizeof(struct vmw_stdu_update);
-}
-
 static uint32_t vmw_stdu_bo_pre_clip_cpu(struct vmw_du_update_plane  *update,
 					 void *cmd, uint32_t num_hits)
 {
@@ -1369,24 +1148,12 @@ static int vmw_stdu_plane_update_bo(struct vmw_private *dev_priv,
 	bo_update.base.vfb = vfb;
 	bo_update.base.out_fence = out_fence;
 	bo_update.base.mutex = NULL;
-	bo_update.base.cpu_blit = vmw_stdu_use_cpu_blit(dev_priv);
 	bo_update.base.intr = false;
 
-	/*
-	 * VM without 3D support don't have surface DMA command and framebuffer
-	 * should be moved out of VRAM.
-	 */
-	if (bo_update.base.cpu_blit) {
-		bo_update.base.calc_fifo_size = vmw_stdu_bo_fifo_size_cpu;
-		bo_update.base.pre_clip = vmw_stdu_bo_pre_clip_cpu;
-		bo_update.base.clip = vmw_stdu_bo_clip_cpu;
-		bo_update.base.post_clip = vmw_stdu_bo_populate_update_cpu;
-	} else {
-		bo_update.base.calc_fifo_size = vmw_stdu_bo_fifo_size;
-		bo_update.base.pre_clip = vmw_stdu_bo_populate_dma;
-		bo_update.base.clip = vmw_stdu_bo_populate_clip;
-		bo_update.base.post_clip = vmw_stdu_bo_populate_update;
-	}
+	bo_update.base.calc_fifo_size = vmw_stdu_bo_fifo_size_cpu;
+	bo_update.base.pre_clip = vmw_stdu_bo_pre_clip_cpu;
+	bo_update.base.clip = vmw_stdu_bo_clip_cpu;
+	bo_update.base.post_clip = vmw_stdu_bo_populate_update_cpu;
 
 	return vmw_du_helper_plane_update(&bo_update.base);
 }
@@ -1549,7 +1316,6 @@ static int vmw_stdu_plane_update_surface(struct vmw_private *dev_priv,
 	srf_update.vfb = vfb;
 	srf_update.out_fence = out_fence;
 	srf_update.mutex = &dev_priv->cmdbuf_mutex;
-	srf_update.cpu_blit = false;
 	srf_update.intr = true;
 
 	if (vfbs->is_bo_proxy)