Fixes for 5.10

author Sasha Levin <sashal@kernel.org>

Mon, 2 Aug 2021 22:03:21 +0000 (18:03 -0400)

committer Sasha Levin <sashal@kernel.org>

Mon, 2 Aug 2021 22:03:21 +0000 (18:03 -0400)
author Sasha Levin <sashal@kernel.org>
Mon, 2 Aug 2021 22:03:21 +0000 (18:03 -0400)
committer Sasha Levin <sashal@kernel.org>
Mon, 2 Aug 2021 22:03:21 +0000 (18:03 -0400)
diff --git a/queue-5.10/btrfs-fix-lost-inode-on-log-replay-after-mix-of-fsyn.patch b/queue-5.10/btrfs-fix-lost-inode-on-log-replay-after-mix-of-fsyn.patch

new file mode 100644 (file)

index 0000000..5f58062
--- /dev/null
+++ b/queue-5.10/btrfs-fix-lost-inode-on-log-replay-after-mix-of-fsyn.patch
@@ -0,0 +1,106 @@
+From 2db3c15efa89ea9d597d19c148c516060df49e57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Jul 2021 11:24:43 +0100
+Subject: btrfs: fix lost inode on log replay after mix of fsync, rename and
+ inode eviction
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit ecc64fab7d49c678e70bd4c35fe64d2ab3e3d212 ]
+
+When checking if we need to log the new name of a renamed inode, we are
+checking if the inode and its parent inode have been logged before, and if
+not we don't log the new name. The check however is buggy, as it directly
+compares the logged_trans field of the inodes versus the ID of the current
+transaction. The problem is that logged_trans is a transient field, only
+stored in memory and never persisted in the inode item, so if an inode
+was logged before, evicted and reloaded, its logged_trans field is set to
+a value of 0, meaning the check will return false and the new name of the
+renamed inode is not logged. If the old parent directory was previously
+fsynced and we deleted the logged directory entries corresponding to the
+old name, we end up with a log that when replayed will delete the renamed
+inode.
+
+The following example triggers the problem:
+
+  $ mkfs.btrfs -f /dev/sdc
+  $ mount /dev/sdc /mnt
+
+  $ mkdir /mnt/A
+  $ mkdir /mnt/B
+  $ echo -n "hello world" > /mnt/A/foo
+
+  $ sync
+
+  # Add some new file to A and fsync directory A.
+  $ touch /mnt/A/bar
+  $ xfs_io -c "fsync" /mnt/A
+
+  # Now trigger inode eviction. We are only interested in triggering
+  # eviction for the inode of directory A.
+  $ echo 2 > /proc/sys/vm/drop_caches
+
+  # Move foo from directory A to directory B.
+  # This deletes the directory entries for foo in A from the log, and
+  # does not add the new name for foo in directory B to the log, because
+  # logged_trans of A is 0, which is less than the current transaction ID.
+  $ mv /mnt/A/foo /mnt/B/foo
+
+  # Now make an fsync to anything except A, B or any file inside them,
+  # like for example create a file at the root directory and fsync this
+  # new file. This syncs the log that contains all the changes done by
+  # previous rename operation.
+  $ touch /mnt/baz
+  $ xfs_io -c "fsync" /mnt/baz
+
+  <power fail>
+
+  # Mount the filesystem and replay the log.
+  $ mount /dev/sdc /mnt
+
+  # Check the filesystem content.
+  $ ls -1R /mnt
+  /mnt/:
+  A
+  B
+  baz
+
+  /mnt/A:
+  bar
+
+  /mnt/B:
+  $
+
+  # File foo is gone, it's neither in A/ nor in B/.
+
+Fix this by using the inode_logged() helper at btrfs_log_new_name(), which
+safely checks if an inode was logged before in the current transaction.
+
+A test case for fstests will follow soon.
+
+CC: stable@vger.kernel.org # 4.14+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index d3a2bec931ca..f36928efcf92 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -6456,8 +6456,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+        * if this inode hasn't been logged and directory we're renaming it
+        * from hasn't been logged, we don't need to log it
+        */
+-      if (inode->logged_trans < trans->transid &&
+-          (!old_dir || old_dir->logged_trans < trans->transid))
++      if (!inode_logged(trans, inode) &&
++          (!old_dir || !inode_logged(trans, old_dir)))
+               return;
+ 
+       btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
+-- 
+2.30.2
+
diff --git a/queue-5.10/btrfs-fix-race-causing-unnecessary-inode-logging-dur.patch b/queue-5.10/btrfs-fix-race-causing-unnecessary-inode-logging-dur.patch

new file mode 100644 (file)

index 0000000..d60686a
--- /dev/null
+++ b/queue-5.10/btrfs-fix-race-causing-unnecessary-inode-logging-dur.patch
@@ -0,0 +1,93 @@
+From 2ba947720296f96acbf3ee27ac00282f070e3d54 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Nov 2020 12:19:23 +0000
+Subject: btrfs: fix race causing unnecessary inode logging during link and
+ rename
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit de53d892e5c51dfa0a158e812575a75a6c991f39 ]
+
+When we are doing a rename or a link operation for an inode that was logged
+in the previous transaction and that transaction is still committing, we
+have a time window where we incorrectly consider that the inode was logged
+previously in the current transaction and therefore decide to log it to
+update it in the log. The following steps give an example on how this
+happens during a link operation:
+
+1) Inode X is logged in transaction 1000, so its logged_trans field is set
+   to 1000;
+
+2) Task A starts to commit transaction 1000;
+
+3) The state of transaction 1000 is changed to TRANS_STATE_UNBLOCKED;
+
+4) Task B starts a link operation for inode X, and as a consequence it
+   starts transaction 1001;
+
+5) Task A is still committing transaction 1000, therefore the value stored
+   at fs_info->last_trans_committed is still 999;
+
+6) Task B calls btrfs_log_new_name(), it reads a value of 999 from
+   fs_info->last_trans_committed and because the logged_trans field of
+   inode X has a value of 1000, the function does not return immediately,
+   instead it proceeds to logging the inode, which should not happen
+   because the inode was logged in the previous transaction (1000) and
+   not in the current one (1001).
+
+This is not a functional problem, just wasted time and space logging an
+inode that does not need to be logged, contributing to higher latency
+for link and rename operations.
+
+So fix this by comparing the inodes' logged_trans field with the
+generation of the current transaction instead of comparing with the value
+stored in fs_info->last_trans_committed.
+
+This case is often hit when running dbench for a long enough duration, as
+it does lots of rename operations.
+
+This patch belongs to a patch set that is comprised of the following
+patches:
+
+  btrfs: fix race causing unnecessary inode logging during link and rename
+  btrfs: fix race that results in logging old extents during a fast fsync
+  btrfs: fix race that causes unnecessary logging of ancestor inodes
+  btrfs: fix race that makes inode logging fallback to transaction commit
+  btrfs: fix race leading to unnecessary transaction commit when logging inode
+  btrfs: do not block inode logging for so long during transaction commit
+
+Performance results are mentioned in the change log of the last patch.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 4b913de2f24f..d3a2bec931ca 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -6443,7 +6443,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+                       struct btrfs_inode *inode, struct btrfs_inode *old_dir,
+                       struct dentry *parent)
+ {
+-      struct btrfs_fs_info *fs_info = trans->fs_info;
+       struct btrfs_log_ctx ctx;
+ 
+       /*
+@@ -6457,8 +6456,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+        * if this inode hasn't been logged and directory we're renaming it
+        * from hasn't been logged, we don't need to log it
+        */
+-      if (inode->logged_trans <= fs_info->last_trans_committed &&
+-          (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
++      if (inode->logged_trans < trans->transid &&
++          (!old_dir || old_dir->logged_trans < trans->transid))
+               return;
+ 
+       btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
+-- 
+2.30.2
+
diff --git a/queue-5.10/drm-i915-revert-drm-i915-gem-asynchronous-cmdparser.patch b/queue-5.10/drm-i915-revert-drm-i915-gem-asynchronous-cmdparser.patch

new file mode 100644 (file)

index 0000000..d220a92
--- /dev/null
+++ b/queue-5.10/drm-i915-revert-drm-i915-gem-asynchronous-cmdparser.patch
@@ -0,0 +1,345 @@
+From 0eaa17f57395147b97fc55a4a161f3dd6a6c00eb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Aug 2021 13:48:01 -0500
+Subject: drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"
+
+From: Jason Ekstrand <jason@jlekstrand.net>
+
+commit c9d9fdbc108af8915d3f497bbdf3898bf8f321b8 upstream.
+
+This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser").  The
+justification for this commit in the git history was a vague comment
+about getting it out from under the struct_mutex.  While this may
+improve perf for some workloads on Gen7 platforms where we rely on the
+command parser for features such as indirect rendering, no numbers were
+provided to prove such an improvement.  It claims to closed two
+gitlab/bugzilla issues but with no explanation whatsoever as to why or
+what bug it's fixing.
+
+Meanwhile, by moving command parsing off to an async callback, it leaves
+us with a problem of what to do on error.  When things were synchronous,
+EXECBUFFER2 would fail with an error code if parsing failed.  When
+moving it to async, we needed another way to handle that error and the
+solution employed was to set an error on the dma_fence and then trust
+that said error gets propagated to the client eventually.  Moving back
+to synchronous will help us untangle the fence error propagation mess.
+
+This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser
+pinning to execbuffer") which is a refactor of some of our allocation
+paths for asynchronous parsing.  Now that everything is synchronous, we
+don't need it.
+
+v2 (Daniel Vetter):
+ - Add stabel Cc and Fixes tag
+
+Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
+Cc: <stable@vger.kernel.org> # v5.6+
+Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences")
+Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-2-jason@jlekstrand.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 164 +-----------------
+ drivers/gpu/drm/i915/i915_cmd_parser.c        |  28 +--
+ 2 files changed, 25 insertions(+), 167 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+index bd3046e5a934..e5ac0936a587 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+@@ -24,7 +24,6 @@
+ #include "i915_gem_clflush.h"
+ #include "i915_gem_context.h"
+ #include "i915_gem_ioctls.h"
+-#include "i915_sw_fence_work.h"
+ #include "i915_trace.h"
+ #include "i915_user_extensions.h"
+ 
+@@ -1401,6 +1400,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
+               int err;
+               struct intel_engine_cs *engine = eb->engine;
+ 
++              /* If we need to copy for the cmdparser, we will stall anyway */
++              if (eb_use_cmdparser(eb))
++                      return ERR_PTR(-EWOULDBLOCK);
++
+               if (!reloc_can_use_engine(engine)) {
+                       engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
+                       if (!engine)
+@@ -2267,152 +2270,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
+       return vma;
+ }
+ 
+-struct eb_parse_work {
+-      struct dma_fence_work base;
+-      struct intel_engine_cs *engine;
+-      struct i915_vma *batch;
+-      struct i915_vma *shadow;
+-      struct i915_vma *trampoline;
+-      unsigned long batch_offset;
+-      unsigned long batch_length;
+-};
+-
+-static int __eb_parse(struct dma_fence_work *work)
+-{
+-      struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+-
+-      return intel_engine_cmd_parser(pw->engine,
+-                                     pw->batch,
+-                                     pw->batch_offset,
+-                                     pw->batch_length,
+-                                     pw->shadow,
+-                                     pw->trampoline);
+-}
+-
+-static void __eb_parse_release(struct dma_fence_work *work)
+-{
+-      struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+-
+-      if (pw->trampoline)
+-              i915_active_release(&pw->trampoline->active);
+-      i915_active_release(&pw->shadow->active);
+-      i915_active_release(&pw->batch->active);
+-}
+-
+-static const struct dma_fence_work_ops eb_parse_ops = {
+-      .name = "eb_parse",
+-      .work = __eb_parse,
+-      .release = __eb_parse_release,
+-};
+-
+-static inline int
+-__parser_mark_active(struct i915_vma *vma,
+-                   struct intel_timeline *tl,
+-                   struct dma_fence *fence)
+-{
+-      struct intel_gt_buffer_pool_node *node = vma->private;
+-
+-      return i915_active_ref(&node->active, tl->fence_context, fence);
+-}
+-
+-static int
+-parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl)
+-{
+-      int err;
+-
+-      mutex_lock(&tl->mutex);
+-
+-      err = __parser_mark_active(pw->shadow, tl, &pw->base.dma);
+-      if (err)
+-              goto unlock;
+-
+-      if (pw->trampoline) {
+-              err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma);
+-              if (err)
+-                      goto unlock;
+-      }
+-
+-unlock:
+-      mutex_unlock(&tl->mutex);
+-      return err;
+-}
+-
+-static int eb_parse_pipeline(struct i915_execbuffer *eb,
+-                           struct i915_vma *shadow,
+-                           struct i915_vma *trampoline)
+-{
+-      struct eb_parse_work *pw;
+-      int err;
+-
+-      GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset));
+-      GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length));
+-
+-      pw = kzalloc(sizeof(*pw), GFP_KERNEL);
+-      if (!pw)
+-              return -ENOMEM;
+-
+-      err = i915_active_acquire(&eb->batch->vma->active);
+-      if (err)
+-              goto err_free;
+-
+-      err = i915_active_acquire(&shadow->active);
+-      if (err)
+-              goto err_batch;
+-
+-      if (trampoline) {
+-              err = i915_active_acquire(&trampoline->active);
+-              if (err)
+-                      goto err_shadow;
+-      }
+-
+-      dma_fence_work_init(&pw->base, &eb_parse_ops);
+-
+-      pw->engine = eb->engine;
+-      pw->batch = eb->batch->vma;
+-      pw->batch_offset = eb->batch_start_offset;
+-      pw->batch_length = eb->batch_len;
+-      pw->shadow = shadow;
+-      pw->trampoline = trampoline;
+-
+-      /* Mark active refs early for this worker, in case we get interrupted */
+-      err = parser_mark_active(pw, eb->context->timeline);
+-      if (err)
+-              goto err_commit;
+-
+-      err = dma_resv_reserve_shared(pw->batch->resv, 1);
+-      if (err)
+-              goto err_commit;
+-
+-      /* Wait for all writes (and relocs) into the batch to complete */
+-      err = i915_sw_fence_await_reservation(&pw->base.chain,
+-                                            pw->batch->resv, NULL, false,
+-                                            0, I915_FENCE_GFP);
+-      if (err < 0)
+-              goto err_commit;
+-
+-      /* Keep the batch alive and unwritten as we parse */
+-      dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
+-
+-      /* Force execution to wait for completion of the parser */
+-      dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
+-
+-      dma_fence_work_commit_imm(&pw->base);
+-      return 0;
+-
+-err_commit:
+-      i915_sw_fence_set_error_once(&pw->base.chain, err);
+-      dma_fence_work_commit_imm(&pw->base);
+-      return err;
+-
+-err_shadow:
+-      i915_active_release(&shadow->active);
+-err_batch:
+-      i915_active_release(&eb->batch->vma->active);
+-err_free:
+-      kfree(pw);
+-      return err;
+-}
+-
+ static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
+ {
+       /*
+@@ -2494,13 +2351,11 @@ static int eb_parse(struct i915_execbuffer *eb)
+               eb->batch_flags |= I915_DISPATCH_SECURE;
+       }
+ 
+-      batch = eb_dispatch_secure(eb, shadow);
+-      if (IS_ERR(batch)) {
+-              err = PTR_ERR(batch);
+-              goto err_trampoline;
+-      }
+-
+-      err = eb_parse_pipeline(eb, shadow, trampoline);
++      err = intel_engine_cmd_parser(eb->engine,
++                                    eb->batch->vma,
++                                    eb->batch_start_offset,
++                                    eb->batch_len,
++                                    shadow, trampoline);
+       if (err)
+               goto err_unpin_batch;
+ 
+@@ -2522,7 +2377,6 @@ static int eb_parse(struct i915_execbuffer *eb)
+ err_unpin_batch:
+       if (batch)
+               i915_vma_unpin(batch);
+-err_trampoline:
+       if (trampoline)
+               i915_vma_unpin(trampoline);
+ err_shadow:
+diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
+index 9ce174950340..635aae9145cb 100644
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -1143,27 +1143,30 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
+ /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
+ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
+                      struct drm_i915_gem_object *src_obj,
+-                     unsigned long offset, unsigned long length)
++                     u32 offset, u32 length)
+ {
+-      bool needs_clflush;
++      unsigned int src_needs_clflush;
++      unsigned int dst_needs_clflush;
+       void *dst, *src;
+       int ret;
+ 
++      ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
++      if (ret)
++              return ERR_PTR(ret);
++
+       dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
++      i915_gem_object_finish_access(dst_obj);
+       if (IS_ERR(dst))
+               return dst;
+ 
+-      ret = i915_gem_object_pin_pages(src_obj);
++      ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+       if (ret) {
+               i915_gem_object_unpin_map(dst_obj);
+               return ERR_PTR(ret);
+       }
+ 
+-      needs_clflush =
+-              !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
+-
+       src = ERR_PTR(-ENODEV);
+-      if (needs_clflush && i915_has_memcpy_from_wc()) {
++      if (src_needs_clflush && i915_has_memcpy_from_wc()) {
+               src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
+               if (!IS_ERR(src)) {
+                       i915_unaligned_memcpy_from_wc(dst,
+@@ -1185,7 +1188,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
+                * validate up to the end of the batch.
+                */
+               remain = length;
+-              if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
++              if (dst_needs_clflush & CLFLUSH_BEFORE)
+                       remain = round_up(remain,
+                                         boot_cpu_data.x86_clflush_size);
+ 
+@@ -1195,7 +1198,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
+                       int len = min(remain, PAGE_SIZE - x);
+ 
+                       src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+-                      if (needs_clflush)
++                      if (src_needs_clflush)
+                               drm_clflush_virt_range(src + x, len);
+                       memcpy(ptr, src + x, len);
+                       kunmap_atomic(src);
+@@ -1206,11 +1209,10 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
+               }
+       }
+ 
+-      i915_gem_object_unpin_pages(src_obj);
++      i915_gem_object_finish_access(src_obj);
+ 
+       memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32));
+ 
+-      /* dst_obj is returned with vmap pinned */
+       return dst;
+ }
+ 
+@@ -1417,6 +1419,7 @@ static unsigned long *alloc_whitelist(u32 batch_length)
+  * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
+  * if the batch appears legal but should use hardware parsing
+  */
++
+ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+                           struct i915_vma *batch,
+                           unsigned long batch_offset,
+@@ -1437,7 +1440,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+                                    batch->size));
+       GEM_BUG_ON(!batch_length);
+ 
+-      cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length);
++      cmd = copy_batch(shadow->obj, batch->obj,
++                       batch_offset, batch_length);
+       if (IS_ERR(cmd)) {
+               DRM_DEBUG("CMD: Failed to copy batch\n");
+               return PTR_ERR(cmd);
+-- 
+2.30.2
+
diff --git a/queue-5.10/revert-drm-i915-propagate-errors-on-awaiting-already.patch b/queue-5.10/revert-drm-i915-propagate-errors-on-awaiting-already.patch

new file mode 100644 (file)

index 0000000..bd898c5
--- /dev/null
+++ b/queue-5.10/revert-drm-i915-propagate-errors-on-awaiting-already.patch
@@ -0,0 +1,94 @@
+From 259a4f369cd71414a13c123033310cb4a5099967 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Aug 2021 13:48:02 -0500
+Subject: Revert "drm/i915: Propagate errors on awaiting already signaled
+ fences"
+
+From: Jason Ekstrand <jason@jlekstrand.net>
+
+commit 3761baae908a7b5012be08d70fa553cc2eb82305 upstream.
+
+This reverts commit 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7.  Ever
+since that commit, we've been having issues where a hang in one client
+can propagate to another.  In particular, a hang in an app can propagate
+to the X server which causes the whole desktop to lock up.
+
+Error propagation along fences sound like a good idea, but as your bug
+shows, surprising consequences, since propagating errors across security
+boundaries is not a good thing.
+
+What we do have is track the hangs on the ctx, and report information to
+userspace using RESET_STATS. That's how arb_robustness works. Also, if my
+understanding is still correct, the EIO from execbuf is when your context
+is banned (because not recoverable or too many hangs). And in all these
+cases it's up to userspace to figure out what is all impacted and should
+be reported to the application, that's not on the kernel to guess and
+automatically propagate.
+
+What's more, we're also building more features on top of ctx error
+reporting with RESET_STATS ioctl: Encrypted buffers use the same, and the
+userspace fence wait also relies on that mechanism. So it is the path
+going forward for reporting gpu hangs and resets to userspace.
+
+So all together that's why I think we should just bury this idea again as
+not quite the direction we want to go to, hence why I think the revert is
+the right option here.
+
+For backporters: Please note that you _must_ have a backport of
+https://lore.kernel.org/dri-devel/20210602164149.391653-2-jason@jlekstrand.net/
+for otherwise backporting just this patch opens up a security bug.
+
+v2: Augment commit message. Also restore Jason's sob that I
+accidentally lost.
+
+v3: Add a note for backporters
+
+Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
+Reported-by: Marcin Slusarz <marcin.slusarz@intel.com>
+Cc: <stable@vger.kernel.org> # v5.6+
+Cc: Jason Ekstrand <jason.ekstrand@intel.com>
+Cc: Marcin Slusarz <marcin.slusarz@intel.com>
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3080
+Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences")
+Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-3-jason@jlekstrand.net
+(cherry picked from commit 93a2711cddd5760e2f0f901817d71c93183c3b87)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/i915_request.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
+index 0e813819b041..d8fef42ca38e 100644
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -1285,10 +1285,8 @@ i915_request_await_execution(struct i915_request *rq,
+ 
+       do {
+               fence = *child++;
+-              if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+-                      i915_sw_fence_set_error_once(&rq->submit, fence->error);
++              if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+                       continue;
+-              }
+ 
+               if (fence->context == rq->fence.context)
+                       continue;
+@@ -1386,10 +1384,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
+ 
+       do {
+               fence = *child++;
+-              if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+-                      i915_sw_fence_set_error_once(&rq->submit, fence->error);
++              if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+                       continue;
+-              }
+ 
+               /*
+                * Requests on the same timeline are explicitly ordered, along
+-- 
+2.30.2
+
diff --git a/queue-5.10/series b/queue-5.10/series

index 438b3abbbe8f323eac75036bb675166eee93f4a3..d7a18f6d343a65652c0a2089b1eba9f1f70320d5 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -65,3 +65,7 @@ powerpc-pseries-fix-regression-while-building-external-modules.patch
  revert-perf-map-fix-dso-nsinfo-refcounting.patch
  i40e-add-additional-info-to-phy-type-error.patch
  can-j1939-j1939_session_deactivate-clarify-lifetime-of-session-object.patch
+drm-i915-revert-drm-i915-gem-asynchronous-cmdparser.patch
+revert-drm-i915-propagate-errors-on-awaiting-already.patch
+btrfs-fix-race-causing-unnecessary-inode-logging-dur.patch
+btrfs-fix-lost-inode-on-log-replay-after-mix-of-fsyn.patch
author	Sasha Levin <sashal@kernel.org>
	Mon, 2 Aug 2021 22:03:21 +0000 (18:03 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 2 Aug 2021 22:03:21 +0000 (18:03 -0400)
queue-5.10/btrfs-fix-lost-inode-on-log-replay-after-mix-of-fsyn.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/btrfs-fix-race-causing-unnecessary-inode-logging-dur.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/drm-i915-revert-drm-i915-gem-asynchronous-cmdparser.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/revert-drm-i915-propagate-errors-on-awaiting-already.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history