diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 316 |
1 files changed, 223 insertions, 93 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 94c84d744100..a546a71fb060 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -31,6 +31,7 @@ #include "i915_drv.h" #include "i915_trace.h" #include "intel_drv.h" +#include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/pci.h> @@ -359,8 +360,7 @@ i915_gem_shmem_pread_fast(struct drm_device *dev, if ((page_offset + remain) > PAGE_SIZE) page_length = PAGE_SIZE - page_offset; - page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT, - GFP_HIGHUSER | __GFP_RECLAIMABLE); + page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); if (IS_ERR(page)) return PTR_ERR(page); @@ -463,8 +463,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT, - GFP_HIGHUSER | __GFP_RECLAIMABLE); + page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); if (IS_ERR(page)) { ret = PTR_ERR(page); goto out; @@ -797,8 +796,7 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev, if ((page_offset + remain) > PAGE_SIZE) page_length = PAGE_SIZE - page_offset; - page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT, - GFP_HIGHUSER | __GFP_RECLAIMABLE); + page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); if (IS_ERR(page)) return PTR_ERR(page); @@ -907,8 +905,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT, - GFP_HIGHUSER | __GFP_RECLAIMABLE); + page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); if (IS_ERR(page)) { ret = PTR_ERR(page); goto out; @@ -1219,11 +1216,11 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ret = i915_gem_object_bind_to_gtt(obj, 0, true); if (ret) goto unlock; - } - ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) - goto unlock; + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) + goto unlock; + } if (obj->tiling_mode == I915_TILING_NONE) ret = i915_gem_object_put_fence(obj); @@ -1377,25 +1374,24 @@ i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj) } static uint32_t -i915_gem_get_gtt_size(struct drm_i915_gem_object *obj) +i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) { - struct drm_device *dev = obj->base.dev; - uint32_t size; + uint32_t gtt_size; if (INTEL_INFO(dev)->gen >= 4 || - obj->tiling_mode == I915_TILING_NONE) - return obj->base.size; + tiling_mode == I915_TILING_NONE) + return size; /* Previous chips need a power-of-two fence region when tiling */ if (INTEL_INFO(dev)->gen == 3) - size = 1024*1024; + gtt_size = 1024*1024; else - size = 512*1024; + gtt_size = 512*1024; - while (size < obj->base.size) - size <<= 1; + while (gtt_size < size) + gtt_size <<= 1; - return size; + return gtt_size; } /** @@ -1406,59 +1402,52 @@ i915_gem_get_gtt_size(struct drm_i915_gem_object *obj) * potential fence register mapping. */ static uint32_t -i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj) +i915_gem_get_gtt_alignment(struct drm_device *dev, + uint32_t size, + int tiling_mode) { - struct drm_device *dev = obj->base.dev; - /* * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ if (INTEL_INFO(dev)->gen >= 4 || - obj->tiling_mode == I915_TILING_NONE) + tiling_mode == I915_TILING_NONE) return 4096; /* * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ - return i915_gem_get_gtt_size(obj); + return i915_gem_get_gtt_size(dev, size, tiling_mode); } /** * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an * unfenced object - * @obj: object to check + * @dev: the device + * @size: size of the object + * @tiling_mode: tiling mode of the object * * Return the required GTT alignment for an object, only taking into account * unfenced tiled surface requirements. */ uint32_t -i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj) +i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, + uint32_t size, + int tiling_mode) { - struct drm_device *dev = obj->base.dev; - int tile_height; - /* * Minimum alignment is 4k (GTT page size) for sane hw. */ if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || - obj->tiling_mode == I915_TILING_NONE) + tiling_mode == I915_TILING_NONE) return 4096; - /* - * Older chips need unfenced tiled buffers to be aligned to the left - * edge of an even tile row (where tile rows are counted as if the bo is - * placed in a fenced gtt region). + /* Previous hardware however needs to be aligned to a power-of-two + * tile height. The simplest method for determining this is to reuse + * the power-of-tile object size. */ - if (IS_GEN2(dev)) - tile_height = 16; - else if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) - tile_height = 32; - else - tile_height = 8; - - return tile_height * obj->stride * 2; + return i915_gem_get_gtt_size(dev, size, tiling_mode); } int @@ -1558,12 +1547,10 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, inode = obj->base.filp->f_path.dentry->d_inode; mapping = inode->i_mapping; + gfpmask |= mapping_gfp_mask(mapping); + for (i = 0; i < page_count; i++) { - page = read_cache_page_gfp(mapping, i, - GFP_HIGHUSER | - __GFP_COLD | - __GFP_RECLAIMABLE | - gfpmask); + page = shmem_read_mapping_page_gfp(mapping, i, gfpmask); if (IS_ERR(page)) goto err_pages; @@ -1701,13 +1688,10 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) /* Our goal here is to return as much of the memory as * is possible back to the system as we are called from OOM. * To do this we must instruct the shmfs to drop all of its - * backing pages, *now*. Here we mirror the actions taken - * when by shmem_delete_inode() to release the backing store. + * backing pages, *now*. */ inode = obj->base.filp->f_path.dentry->d_inode; - truncate_inode_pages(inode->i_mapping, 0); - if (inode->i_op->truncate_range) - inode->i_op->truncate_range(inode, 0, (loff_t)-1); + shmem_truncate_range(inode, 0, (loff_t)-1); obj->madv = __I915_MADV_PURGED; } @@ -1779,8 +1763,11 @@ i915_add_request(struct intel_ring_buffer *ring, ring->outstanding_lazy_request = false; if (!dev_priv->mm.suspended) { - mod_timer(&dev_priv->hangcheck_timer, - jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + if (i915_enable_hangcheck) { + mod_timer(&dev_priv->hangcheck_timer, + jiffies + + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + } if (was_empty) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); @@ -2080,8 +2067,8 @@ i915_wait_request(struct intel_ring_buffer *ring, if (!ier) { DRM_ERROR("something (likely vbetool) disabled " "interrupts, re-enabling\n"); - i915_driver_irq_preinstall(ring->dev); - i915_driver_irq_postinstall(ring->dev); + ring->dev->driver->irq_preinstall(ring->dev); + ring->dev->driver->irq_postinstall(ring->dev); } trace_i915_gem_request_wait_begin(ring, seqno); @@ -2151,6 +2138,30 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) return 0; } +static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) +{ + u32 old_write_domain, old_read_domains; + + /* Act a barrier for all accesses through the GTT */ + mb(); + + /* Force a pagefault for domain tracking on next user access */ + i915_gem_release_mmap(obj); + + if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) + return; + + old_read_domains = obj->base.read_domains; + old_write_domain = obj->base.write_domain; + + obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; + obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); +} + /** * Unbinds an object from the GTT aperture. */ @@ -2167,23 +2178,28 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return -EINVAL; } - /* blow away mappings if mapped through GTT */ - i915_gem_release_mmap(obj); - - /* Move the object to the CPU domain to ensure that - * any possible CPU writes while it's not in the GTT - * are flushed when we go to remap it. This will - * also ensure that all pending GPU writes are finished - * before we unbind. - */ - ret = i915_gem_object_set_to_cpu_domain(obj, 1); + ret = i915_gem_object_finish_gpu(obj); if (ret == -ERESTARTSYS) return ret; /* Continue on if we fail due to EIO, the GPU is hung so we * should be safe and we need to cleanup or else we might * cause memory corruption through use-after-free. */ + + i915_gem_object_finish_gtt(obj); + + /* Move the object to the CPU domain to ensure that + * any possible CPU writes while it's not in the GTT + * are flushed when we go to remap it. + */ + if (ret == 0) + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret == -ERESTARTSYS) + return ret; if (ret) { + /* In the event of a disaster, abandon all caches and + * hope for the best. + */ i915_gem_clflush_object(obj); obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } @@ -2752,9 +2768,16 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, return -EINVAL; } - fence_size = i915_gem_get_gtt_size(obj); - fence_alignment = i915_gem_get_gtt_alignment(obj); - unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj); + fence_size = i915_gem_get_gtt_size(dev, + obj->base.size, + obj->tiling_mode); + fence_alignment = i915_gem_get_gtt_alignment(dev, + obj->base.size, + obj->tiling_mode); + unfenced_alignment = + i915_gem_get_unfenced_gtt_alignment(dev, + obj->base.size, + obj->tiling_mode); if (alignment == 0) alignment = map_and_fenceable ? fence_alignment : @@ -2926,8 +2949,6 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) */ wmb(); - i915_gem_release_mmap(obj); - old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; @@ -3007,51 +3028,139 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) return 0; } +int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level) +{ + int ret; + + if (obj->cache_level == cache_level) + return 0; + + if (obj->pin_count) { + DRM_DEBUG("can not change the cache level of pinned objects\n"); + return -EBUSY; + } + + if (obj->gtt_space) { + ret = i915_gem_object_finish_gpu(obj); + if (ret) + return ret; + + i915_gem_object_finish_gtt(obj); + + /* Before SandyBridge, you could not use tiling or fence + * registers with snooped memory, so relinquish any fences + * currently pointing to our region in the aperture. + */ + if (INTEL_INFO(obj->base.dev)->gen < 6) { + ret = i915_gem_object_put_fence(obj); + if (ret) + return ret; + } + + i915_gem_gtt_rebind_object(obj, cache_level); + } + + if (cache_level == I915_CACHE_NONE) { + u32 old_read_domains, old_write_domain; + + /* If we're coming from LLC cached, then we haven't + * actually been tracking whether the data is in the + * CPU cache or not, since we only allow one bit set + * in obj->write_domain and have been skipping the clflushes. + * Just set it to the CPU cache for now. + */ + WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); + WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); + + old_read_domains = obj->base.read_domains; + old_write_domain = obj->base.write_domain; + + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + } + + obj->cache_level = cache_level; + return 0; +} + /* - * Prepare buffer for display plane. Use uninterruptible for possible flush - * wait, as in modesetting process we're not supposed to be interrupted. + * Prepare buffer for display plane (scanout, cursors, etc). + * Can be called from an uninterruptible phase (modesetting) and allows + * any flushes to be pipelined (for pageflips). + * + * For the display plane, we want to be in the GTT but out of any write + * domains. So in many ways this looks like set_to_gtt_domain() apart from the + * ability to pipeline the waits, pinning and any additional subtleties + * that may differentiate the display plane from ordinary buffers. */ int -i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj, +i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, + u32 alignment, struct intel_ring_buffer *pipelined) { - uint32_t old_read_domains; + u32 old_read_domains, old_write_domain; int ret; - /* Not valid to be called on unbound objects. */ - if (obj->gtt_space == NULL) - return -EINVAL; - ret = i915_gem_object_flush_gpu_write_domain(obj); if (ret) return ret; - - /* Currently, we are always called from an non-interruptible context. */ if (pipelined != obj->ring) { ret = i915_gem_object_wait_rendering(obj); - if (ret) + if (ret == -ERESTARTSYS) return ret; } + /* The display engine is not coherent with the LLC cache on gen6. As + * a result, we make sure that the pinning that is about to occur is + * done with uncached PTEs. This is lowest common denominator for all + * chipsets. + * + * However for gen6+, we could do better by using the GFDT bit instead + * of uncaching, which would allow us to flush all the LLC-cached data + * with that bit in the PTE to main memory with just one PIPE_CONTROL. + */ + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); + if (ret) + return ret; + + /* As the user may map the buffer once pinned in the display plane + * (e.g. libkms for the bootup splash), we have to ensure that we + * always use map_and_fenceable for all scanout buffers. + */ + ret = i915_gem_object_pin(obj, alignment, true); + if (ret) + return ret; + i915_gem_object_flush_cpu_write_domain(obj); + old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; + + /* It should now be out of any other write domains, and we can update + * the domain values for our changes. + */ + BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); obj->base.read_domains |= I915_GEM_DOMAIN_GTT; trace_i915_gem_object_change_domain(obj, old_read_domains, - obj->base.write_domain); + old_write_domain); return 0; } int -i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj) +i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) { int ret; - if (!obj->active) + if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) return 0; if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { @@ -3060,6 +3169,9 @@ i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj) return ret; } + /* Ensure that we invalidate the GPU's caches and TLBs. */ + obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; + return i915_gem_object_wait_rendering(obj); } @@ -3567,6 +3679,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; + struct address_space *mapping; obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (obj == NULL) @@ -3577,12 +3690,31 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, return NULL; } + mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; + mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE); + i915_gem_info_add_obj(dev_priv, size); obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; + if (IS_GEN6(dev)) { + /* On Gen6, we can have the GPU use the LLC (the CPU + * cache) for about a 10% performance improvement + * compared to uncached. Graphics requests other than + * display scanout are coherent with the CPU in + * accessing this cache. This means in this mode we + * don't need to clflush on the CPU side, and on the + * GPU side we only need to flush internal caches to + * get data visible to the CPU. + * + * However, we maintain the display planes as UC, and so + * need to rebind when first used as such. + */ + obj->cache_level = I915_CACHE_LLC; + } else + obj->cache_level = I915_CACHE_NONE; + obj->base.driver_private = NULL; obj->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj->mm_list); @@ -3952,8 +4084,7 @@ void i915_gem_detach_phys_object(struct drm_device *dev, page_count = obj->base.size / PAGE_SIZE; for (i = 0; i < page_count; i++) { - struct page *page = read_cache_page_gfp(mapping, i, - GFP_HIGHUSER | __GFP_RECLAIMABLE); + struct page *page = shmem_read_mapping_page(mapping, i); if (!IS_ERR(page)) { char *dst = kmap_atomic(page); memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); @@ -4014,8 +4145,7 @@ i915_gem_attach_phys_object(struct drm_device *dev, struct page *page; char *dst, *src; - page = read_cache_page_gfp(mapping, i, - GFP_HIGHUSER | __GFP_RECLAIMABLE); + page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) return PTR_ERR(page); |