]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.8-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 28 Oct 2016 08:28:09 +0000 (04:28 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 28 Oct 2016 08:28:09 +0000 (04:28 -0400)
added patches:
drm-prime-pass-the-right-module-owner-through-to-dma_buf_export.patch
drm-vc4-fix-races-when-the-cs-reads-from-render-targets.patch

queue-4.8/drm-prime-pass-the-right-module-owner-through-to-dma_buf_export.patch [new file with mode: 0644]
queue-4.8/drm-vc4-fix-races-when-the-cs-reads-from-render-targets.patch [new file with mode: 0644]

diff --git a/queue-4.8/drm-prime-pass-the-right-module-owner-through-to-dma_buf_export.patch b/queue-4.8/drm-prime-pass-the-right-module-owner-through-to-dma_buf_export.patch
new file mode 100644 (file)
index 0000000..242e1f8
--- /dev/null
@@ -0,0 +1,78 @@
+From 56a76c0123d6cb034975901c80fce2627338ef9e Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed, 5 Oct 2016 13:21:43 +0100
+Subject: drm/prime: Pass the right module owner through to dma_buf_export()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 56a76c0123d6cb034975901c80fce2627338ef9e upstream.
+
+dma_buf_export() adds a reference to the owning module to the dmabuf (to
+prevent the driver from being unloaded whilst a third party still refers
+to the dmabuf). However, drm_gem_prime_export() was passing its own
+THIS_MODULE (i.e. drm.ko) rather than the driver. Extract the right
+owner from the device->fops instead.
+
+v2: Use C99 initializers to zero out unset elements of
+dma_buf_export_info
+v3: Extract the right module from dev->fops.
+
+Testcase: igt/vgem_basic/unload
+Reported-by: Petri Latvala <petri.latvala@intel.com>
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Petri Latvala <petri.latvala@intel.com>
+Cc: Christian König <christian.koenig@amd.com>
+Tested-by: Petri Latvala <petri.latvala@intel.com>
+Reviewed-by: Petri Latvala <petri.latvala@intel.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Link: http://patchwork.freedesktop.org/patch/msgid/20161005122145.1507-1-chris@chris-wilson.co.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/drm_prime.c |   17 ++++++++++-------
+ include/drm/drmP.h          |    3 ++-
+ 2 files changed, 12 insertions(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/drm_prime.c
++++ b/drivers/gpu/drm/drm_prime.c
+@@ -335,14 +335,17 @@ static const struct dma_buf_ops drm_gem_
+  * using the PRIME helpers.
+  */
+ struct dma_buf *drm_gem_prime_export(struct drm_device *dev,
+-                                   struct drm_gem_object *obj, int flags)
++                                   struct drm_gem_object *obj,
++                                   int flags)
+ {
+-      DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+-
+-      exp_info.ops = &drm_gem_prime_dmabuf_ops;
+-      exp_info.size = obj->size;
+-      exp_info.flags = flags;
+-      exp_info.priv = obj;
++      struct dma_buf_export_info exp_info = {
++              .exp_name = KBUILD_MODNAME, /* white lie for debug */
++              .owner = dev->driver->fops->owner,
++              .ops = &drm_gem_prime_dmabuf_ops,
++              .size = obj->size,
++              .flags = flags,
++              .priv = obj,
++      };
+       if (dev->driver->gem_prime_res_obj)
+               exp_info.resv = dev->driver->gem_prime_res_obj(obj);
+--- a/include/drm/drmP.h
++++ b/include/drm/drmP.h
+@@ -938,7 +938,8 @@ static inline int drm_debugfs_remove_fil
+ #endif
+ extern struct dma_buf *drm_gem_prime_export(struct drm_device *dev,
+-              struct drm_gem_object *obj, int flags);
++                                          struct drm_gem_object *obj,
++                                          int flags);
+ extern int drm_gem_prime_handle_to_fd(struct drm_device *dev,
+               struct drm_file *file_priv, uint32_t handle, uint32_t flags,
+               int *prime_fd);
diff --git a/queue-4.8/drm-vc4-fix-races-when-the-cs-reads-from-render-targets.patch b/queue-4.8/drm-vc4-fix-races-when-the-cs-reads-from-render-targets.patch
new file mode 100644 (file)
index 0000000..a473400
--- /dev/null
@@ -0,0 +1,225 @@
+From 7edabee06a5622190d59689a64f5e17d1c343cc3 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Tue, 27 Sep 2016 09:03:13 -0700
+Subject: drm/vc4: Fix races when the CS reads from render targets.
+
+From: Eric Anholt <eric@anholt.net>
+
+commit 7edabee06a5622190d59689a64f5e17d1c343cc3 upstream.
+
+With the introduction of bin/render pipelining, the previous job may
+not be completed when we start binning the next one.  If the previous
+job wrote our VBO, IB, or CS textures, then the binning stage might
+get stale or uninitialized results.
+
+Fixes the major rendering failure in glmark2 -b terrain.
+
+Signed-off-by: Eric Anholt <eric@anholt.net>
+Fixes: ca26d28bbaa3 ("drm/vc4: improve throughput by pipelining binning and rendering jobs")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/vc4/vc4_drv.h       |   19 ++++++++++++++++++-
+ drivers/gpu/drm/vc4/vc4_gem.c       |   13 +++++++++++++
+ drivers/gpu/drm/vc4/vc4_render_cl.c |   21 +++++++++++++++++----
+ drivers/gpu/drm/vc4/vc4_validate.c  |   17 ++++++++++++++---
+ 4 files changed, 62 insertions(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/vc4/vc4_drv.h
++++ b/drivers/gpu/drm/vc4/vc4_drv.h
+@@ -122,9 +122,16 @@ to_vc4_dev(struct drm_device *dev)
+ struct vc4_bo {
+       struct drm_gem_cma_object base;
+-      /* seqno of the last job to render to this BO. */
++      /* seqno of the last job to render using this BO. */
+       uint64_t seqno;
++      /* seqno of the last job to use the RCL to write to this BO.
++       *
++       * Note that this doesn't include binner overflow memory
++       * writes.
++       */
++      uint64_t write_seqno;
++
+       /* List entry for the BO's position in either
+        * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
+        */
+@@ -216,6 +223,9 @@ struct vc4_exec_info {
+       /* Sequence number for this bin/render job. */
+       uint64_t seqno;
++      /* Latest write_seqno of any BO that binning depends on. */
++      uint64_t bin_dep_seqno;
++
+       /* Last current addresses the hardware was processing when the
+        * hangcheck timer checked on us.
+        */
+@@ -230,6 +240,13 @@ struct vc4_exec_info {
+       struct drm_gem_cma_object **bo;
+       uint32_t bo_count;
++      /* List of BOs that are being written by the RCL.  Other than
++       * the binner temporary storage, this is all the BOs written
++       * by the job.
++       */
++      struct drm_gem_cma_object *rcl_write_bo[4];
++      uint32_t rcl_write_bo_count;
++
+       /* Pointers for our position in vc4->job_list */
+       struct list_head head;
+--- a/drivers/gpu/drm/vc4/vc4_gem.c
++++ b/drivers/gpu/drm/vc4/vc4_gem.c
+@@ -471,6 +471,11 @@ vc4_update_bo_seqnos(struct vc4_exec_inf
+       list_for_each_entry(bo, &exec->unref_list, unref_head) {
+               bo->seqno = seqno;
+       }
++
++      for (i = 0; i < exec->rcl_write_bo_count; i++) {
++              bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
++              bo->write_seqno = seqno;
++      }
+ }
+ /* Queues a struct vc4_exec_info for execution.  If no job is
+@@ -673,6 +678,14 @@ vc4_get_bcl(struct drm_device *dev, stru
+               goto fail;
+       ret = vc4_validate_shader_recs(dev, exec);
++      if (ret)
++              goto fail;
++
++      /* Block waiting on any previous rendering into the CS's VBO,
++       * IB, or textures, so that pixels are actually written by the
++       * time we try to read them.
++       */
++      ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
+ fail:
+       drm_free_large(temp);
+--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
++++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
+@@ -45,6 +45,8 @@ struct vc4_rcl_setup {
+       struct drm_gem_cma_object *rcl;
+       u32 next_offset;
++
++      u32 next_write_bo_index;
+ };
+ static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
+@@ -407,6 +409,8 @@ static int vc4_rcl_msaa_surface_setup(st
+       if (!*obj)
+               return -EINVAL;
++      exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
++
+       if (surf->offset & 0xf) {
+               DRM_ERROR("MSAA write must be 16b aligned.\n");
+               return -EINVAL;
+@@ -417,7 +421,8 @@ static int vc4_rcl_msaa_surface_setup(st
+ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
+                                struct drm_gem_cma_object **obj,
+-                               struct drm_vc4_submit_rcl_surface *surf)
++                               struct drm_vc4_submit_rcl_surface *surf,
++                               bool is_write)
+ {
+       uint8_t tiling = VC4_GET_FIELD(surf->bits,
+                                      VC4_LOADSTORE_TILE_BUFFER_TILING);
+@@ -440,6 +445,9 @@ static int vc4_rcl_surface_setup(struct
+       if (!*obj)
+               return -EINVAL;
++      if (is_write)
++              exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
++
+       if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+               if (surf == &exec->args->zs_write) {
+                       DRM_ERROR("general zs write may not be a full-res.\n");
+@@ -542,6 +550,8 @@ vc4_rcl_render_config_surface_setup(stru
+       if (!*obj)
+               return -EINVAL;
++      exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
++
+       if (tiling > VC4_TILING_FORMAT_LT) {
+               DRM_ERROR("Bad tiling format\n");
+               return -EINVAL;
+@@ -599,15 +609,18 @@ int vc4_get_rcl(struct drm_device *dev,
+       if (ret)
+               return ret;
+-      ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
++      ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read,
++                                  false);
+       if (ret)
+               return ret;
+-      ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
++      ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read,
++                                  false);
+       if (ret)
+               return ret;
+-      ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
++      ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write,
++                                  true);
+       if (ret)
+               return ret;
+--- a/drivers/gpu/drm/vc4/vc4_validate.c
++++ b/drivers/gpu/drm/vc4/vc4_validate.c
+@@ -267,6 +267,9 @@ validate_indexed_prim_list(VALIDATE_ARGS
+       if (!ib)
+               return -EINVAL;
++      exec->bin_dep_seqno = max(exec->bin_dep_seqno,
++                                to_vc4_bo(&ib->base)->write_seqno);
++
+       if (offset > ib->base.size ||
+           (ib->base.size - offset) / index_size < length) {
+               DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
+@@ -555,8 +558,7 @@ static bool
+ reloc_tex(struct vc4_exec_info *exec,
+         void *uniform_data_u,
+         struct vc4_texture_sample_info *sample,
+-        uint32_t texture_handle_index)
+-
++        uint32_t texture_handle_index, bool is_cs)
+ {
+       struct drm_gem_cma_object *tex;
+       uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
+@@ -714,6 +716,11 @@ reloc_tex(struct vc4_exec_info *exec,
+       *validated_p0 = tex->paddr + p0;
++      if (is_cs) {
++              exec->bin_dep_seqno = max(exec->bin_dep_seqno,
++                                        to_vc4_bo(&tex->base)->write_seqno);
++      }
++
+       return true;
+  fail:
+       DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
+@@ -835,7 +842,8 @@ validate_gl_shader_rec(struct drm_device
+                       if (!reloc_tex(exec,
+                                      uniform_data_u,
+                                      &validated_shader->texture_samples[tex],
+-                                     texture_handles_u[tex])) {
++                                     texture_handles_u[tex],
++                                     i == 2)) {
+                               return -EINVAL;
+                       }
+               }
+@@ -867,6 +875,9 @@ validate_gl_shader_rec(struct drm_device
+               uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
+               uint32_t max_index;
++              exec->bin_dep_seqno = max(exec->bin_dep_seqno,
++                                        to_vc4_bo(&vbo->base)->write_seqno);
++
+               if (state->addr & 0x8)
+                       stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;