diff options
author | Francisco Jerez <currojerez@riseup.net> | 2010-10-02 17:04:46 +0200 |
---|---|---|
committer | Ben Skeggs <bskeggs@redhat.com> | 2010-10-05 09:59:28 +1000 |
commit | 8af29ccd7917ab448ea7b5cf581fa7b2b4ea3cba (patch) | |
tree | 1b572625051c09fccca446db2966f6b07fa837f8 /drivers | |
parent | 647988175234a733cc0d4cf968949344803a77a7 (diff) | |
download | lwn-8af29ccd7917ab448ea7b5cf581fa7b2b4ea3cba.tar.gz lwn-8af29ccd7917ab448ea7b5cf581fa7b2b4ea3cba.zip |
drm/nv50: Fix large 3D performance regression caused by the interchannel sync patches.
Reported-by: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Tested-by: Maarten Maathuis <madman2003@gmail.com>
Tested-by: Xavier Chantry <chantry.xavier@gmail.com>
Tested-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_drv.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_fence.c | 46 |
2 files changed, 40 insertions, 7 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index a308c132c19b..3a07e580d27a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -1473,6 +1473,7 @@ nv_match_device(struct drm_device *dev, unsigned device, #define NV_SW_SEMAPHORE_OFFSET 0x00000064 #define NV_SW_SEMAPHORE_ACQUIRE 0x00000068 #define NV_SW_SEMAPHORE_RELEASE 0x0000006c +#define NV_SW_YIELD 0x00000080 #define NV_SW_DMA_VBLSEM 0x0000018c #define NV_SW_VBLSEM_OFFSET 0x00000400 #define NV_SW_VBLSEM_RELEASE_VALUE 0x00000404 diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index f42675cc9d14..441b12420bb1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -308,21 +308,54 @@ emit_semaphore(struct nouveau_channel *chan, int method, { struct drm_nouveau_private *dev_priv = sema->dev->dev_private; struct nouveau_fence *fence; + bool smart = (dev_priv->card_type >= NV_50); int ret; - ret = RING_SPACE(chan, dev_priv->card_type >= NV_50 ? 6 : 4); + ret = RING_SPACE(chan, smart ? 8 : 4); if (ret) return ret; - if (dev_priv->card_type >= NV_50) { + if (smart) { BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); OUT_RING(chan, NvSema); } BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1); OUT_RING(chan, sema->mem->start); + + if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) { + /* + * NV50 tries to be too smart and context-switch + * between semaphores instead of doing a "first come, + * first served" strategy like previous cards + * do. + * + * That's bad because the ACQUIRE latency can get as + * large as the PFIFO context time slice in the + * typical DRI2 case where you have several + * outstanding semaphores at the same moment. + * + * If we're going to ACQUIRE, force the card to + * context switch before, just in case the matching + * RELEASE is already scheduled to be executed in + * another channel. + */ + BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); + OUT_RING(chan, 0); + } + BEGIN_RING(chan, NvSubSw, method, 1); OUT_RING(chan, 1); + if (smart && method == NV_SW_SEMAPHORE_RELEASE) { + /* + * Force the card to context switch, there may be + * another channel waiting for the semaphore we just + * released. + */ + BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); + OUT_RING(chan, 0); + } + /* Delay semaphore destruction until its work is done */ ret = nouveau_fence_new(chan, &fence, true); if (ret) @@ -355,14 +388,13 @@ nouveau_fence_sync(struct nouveau_fence *fence, return nouveau_fence_wait(fence, NULL, false, false); } - /* Signal the semaphore from chan */ - ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema); - if (ret) - goto out; - /* Make wchan wait until it gets signalled */ ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema); + if (ret) + goto out; + /* Signal the semaphore from chan */ + ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema); out: kref_put(&sema->ref, free_semaphore); return ret; |