]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 May 2023 14:17:42 +0000 (16:17 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 May 2023 14:17:42 +0000 (16:17 +0200)
added patches:
mm-page_alloc-fix-potential-deadlock-on-zonelist_update_seq-seqlock.patch

queue-4.19/mm-page_alloc-fix-potential-deadlock-on-zonelist_update_seq-seqlock.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/mm-page_alloc-fix-potential-deadlock-on-zonelist_update_seq-seqlock.patch b/queue-4.19/mm-page_alloc-fix-potential-deadlock-on-zonelist_update_seq-seqlock.patch
new file mode 100644 (file)
index 0000000..ba39149
--- /dev/null
@@ -0,0 +1,184 @@
+From 1007843a91909a4995ee78a538f62d8665705b66 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Tue, 4 Apr 2023 23:31:58 +0900
+Subject: mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+commit 1007843a91909a4995ee78a538f62d8665705b66 upstream.
+
+syzbot is reporting circular locking dependency which involves
+zonelist_update_seq seqlock [1], for this lock is checked by memory
+allocation requests which do not need to be retried.
+
+One deadlock scenario is kmalloc(GFP_ATOMIC) from an interrupt handler.
+
+  CPU0
+  ----
+  __build_all_zonelists() {
+    write_seqlock(&zonelist_update_seq); // makes zonelist_update_seq.seqcount odd
+    // e.g. timer interrupt handler runs at this moment
+      some_timer_func() {
+        kmalloc(GFP_ATOMIC) {
+          __alloc_pages_slowpath() {
+            read_seqbegin(&zonelist_update_seq) {
+              // spins forever because zonelist_update_seq.seqcount is odd
+            }
+          }
+        }
+      }
+    // e.g. timer interrupt handler finishes
+    write_sequnlock(&zonelist_update_seq); // makes zonelist_update_seq.seqcount even
+  }
+
+This deadlock scenario can be easily eliminated by not calling
+read_seqbegin(&zonelist_update_seq) from !__GFP_DIRECT_RECLAIM allocation
+requests, for retry is applicable to only __GFP_DIRECT_RECLAIM allocation
+requests.  But Michal Hocko does not know whether we should go with this
+approach.
+
+Another deadlock scenario which syzbot is reporting is a race between
+kmalloc(GFP_ATOMIC) from tty_insert_flip_string_and_push_buffer() with
+port->lock held and printk() from __build_all_zonelists() with
+zonelist_update_seq held.
+
+  CPU0                                   CPU1
+  ----                                   ----
+  pty_write() {
+    tty_insert_flip_string_and_push_buffer() {
+                                         __build_all_zonelists() {
+                                           write_seqlock(&zonelist_update_seq);
+                                           build_zonelists() {
+                                             printk() {
+                                               vprintk() {
+                                                 vprintk_default() {
+                                                   vprintk_emit() {
+                                                     console_unlock() {
+                                                       console_flush_all() {
+                                                         console_emit_next_record() {
+                                                           con->write() = serial8250_console_write() {
+      spin_lock_irqsave(&port->lock, flags);
+      tty_insert_flip_string() {
+        tty_insert_flip_string_fixed_flag() {
+          __tty_buffer_request_room() {
+            tty_buffer_alloc() {
+              kmalloc(GFP_ATOMIC | __GFP_NOWARN) {
+                __alloc_pages_slowpath() {
+                  zonelist_iter_begin() {
+                    read_seqbegin(&zonelist_update_seq); // spins forever because zonelist_update_seq.seqcount is odd
+                                                             spin_lock_irqsave(&port->lock, flags); // spins forever because port->lock is held
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      spin_unlock_irqrestore(&port->lock, flags);
+                                                             // message is printed to console
+                                                             spin_unlock_irqrestore(&port->lock, flags);
+                                                           }
+                                                         }
+                                                       }
+                                                     }
+                                                   }
+                                                 }
+                                               }
+                                             }
+                                           }
+                                           write_sequnlock(&zonelist_update_seq);
+                                         }
+    }
+  }
+
+This deadlock scenario can be eliminated by
+
+  preventing interrupt context from calling kmalloc(GFP_ATOMIC)
+
+and
+
+  preventing printk() from calling console_flush_all()
+
+while zonelist_update_seq.seqcount is odd.
+
+Since Petr Mladek thinks that __build_all_zonelists() can become a
+candidate for deferring printk() [2], let's address this problem by
+
+  disabling local interrupts in order to avoid kmalloc(GFP_ATOMIC)
+
+and
+
+  disabling synchronous printk() in order to avoid console_flush_all()
+
+.
+
+As a side effect of minimizing duration of zonelist_update_seq.seqcount
+being odd by disabling synchronous printk(), latency at
+read_seqbegin(&zonelist_update_seq) for both !__GFP_DIRECT_RECLAIM and
+__GFP_DIRECT_RECLAIM allocation requests will be reduced.  Although, from
+lockdep perspective, not calling read_seqbegin(&zonelist_update_seq) (i.e.
+do not record unnecessary locking dependency) from interrupt context is
+still preferable, even if we don't allow calling kmalloc(GFP_ATOMIC)
+inside
+write_seqlock(&zonelist_update_seq)/write_sequnlock(&zonelist_update_seq)
+section...
+
+Link: https://lkml.kernel.org/r/8796b95c-3da3-5885-fddd-6ef55f30e4d3@I-love.SAKURA.ne.jp
+Fixes: 3d36424b3b58 ("mm/page_alloc: fix race condition between build_all_zonelists and page allocation")
+Link: https://lkml.kernel.org/r/ZCrs+1cDqPWTDFNM@alley [2]
+Reported-by: syzbot <syzbot+223c7461c58c58a4cb10@syzkaller.appspotmail.com>
+  Link: https://syzkaller.appspot.com/bug?extid=223c7461c58c58a4cb10 [1]
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Cc: Petr Mladek <pmladek@suse.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Cc: John Ogness <john.ogness@linutronix.de>
+Cc: Patrick Daly <quic_pdaly@quicinc.com>
+Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c |   16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -5425,7 +5425,21 @@ static void __build_all_zonelists(void *
+       int nid;
+       int __maybe_unused cpu;
+       pg_data_t *self = data;
++      unsigned long flags;
++      /*
++       * Explicitly disable this CPU's interrupts before taking seqlock
++       * to prevent any IRQ handler from calling into the page allocator
++       * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock.
++       */
++      local_irq_save(flags);
++      /*
++       * Explicitly disable this CPU's synchronous printk() before taking
++       * seqlock to prevent any printk() from trying to hold port->lock, for
++       * tty_insert_flip_string_and_push_buffer() on other CPU might be
++       * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.
++       */
++      printk_deferred_enter();
+       write_seqlock(&zonelist_update_seq);
+ #ifdef CONFIG_NUMA
+@@ -5460,6 +5474,8 @@ static void __build_all_zonelists(void *
+       }
+       write_sequnlock(&zonelist_update_seq);
++      printk_deferred_exit();
++      local_irq_restore(flags);
+ }
+ static noinline void __init
index c0046c15e8660e5ca6c1ead85fea78232c16f5c1..1744757dfd8985b23e133dac699ad2e5ffbfe83f 100644 (file)
@@ -188,3 +188,4 @@ drbd-correctly-submit-flush-bio-on-barrier.patch
 pci-pciehp-use-down_read-write_nested-reset_lock-to-fix-lockdep-errors.patch
 pci-pciehp-fix-ab-ba-deadlock-between-reset_lock-and-device_lock.patch
 printk-declare-printk_deferred_-enter-safe-in-include-linux-printk.h.patch
+mm-page_alloc-fix-potential-deadlock-on-zonelist_update_seq-seqlock.patch