]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 11 Jan 2018 14:04:44 +0000 (15:04 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 11 Jan 2018 14:04:44 +0000 (15:04 +0100)
added patches:
futex-replace-barrier-in-unqueue_me-with-read_once.patch
locking-mutex-allow-next-waiter-lockless-wakeup.patch
locks-don-t-check-for-race-with-close-when-setting-ofd-lock.patch
mm-compaction-fix-invalid-free_pfn-and-compact_cached_free_pfn.patch
mm-compaction-pass-only-pageblock-aligned-range-to-pageblock_pfn_to_page.patch
mm-page-writeback-fix-dirty_ratelimit-calculation.patch
mm-zswap-use-workqueue-to-destroy-pool.patch
zswap-don-t-param_set_charp-while-holding-spinlock.patch

queue-4.4/futex-replace-barrier-in-unqueue_me-with-read_once.patch [new file with mode: 0644]
queue-4.4/locking-mutex-allow-next-waiter-lockless-wakeup.patch [new file with mode: 0644]
queue-4.4/locks-don-t-check-for-race-with-close-when-setting-ofd-lock.patch [new file with mode: 0644]
queue-4.4/mm-compaction-fix-invalid-free_pfn-and-compact_cached_free_pfn.patch [new file with mode: 0644]
queue-4.4/mm-compaction-pass-only-pageblock-aligned-range-to-pageblock_pfn_to_page.patch [new file with mode: 0644]
queue-4.4/mm-page-writeback-fix-dirty_ratelimit-calculation.patch [new file with mode: 0644]
queue-4.4/mm-zswap-use-workqueue-to-destroy-pool.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/zswap-don-t-param_set_charp-while-holding-spinlock.patch [new file with mode: 0644]

diff --git a/queue-4.4/futex-replace-barrier-in-unqueue_me-with-read_once.patch b/queue-4.4/futex-replace-barrier-in-unqueue_me-with-read_once.patch
new file mode 100644 (file)
index 0000000..56f865e
--- /dev/null
@@ -0,0 +1,61 @@
+From 29b75eb2d56a714190a93d7be4525e617591077a Mon Sep 17 00:00:00 2001
+From: Jianyu Zhan <nasa4836@gmail.com>
+Date: Mon, 7 Mar 2016 09:32:24 +0800
+Subject: futex: Replace barrier() in unqueue_me() with READ_ONCE()
+
+From: Jianyu Zhan <nasa4836@gmail.com>
+
+commit 29b75eb2d56a714190a93d7be4525e617591077a upstream.
+
+Commit e91467ecd1ef ("bug in futex unqueue_me") introduced a barrier() in
+unqueue_me() to prevent the compiler from rereading the lock pointer which
+might change after a check for NULL.
+
+Replace the barrier() with a READ_ONCE() for the following reasons:
+
+1) READ_ONCE() is a weaker form of barrier() that affects only the specific
+   load operation, while barrier() is a general compiler level memory barrier.
+   READ_ONCE() was not available at the time when the barrier was added.
+
+2) Aside of that READ_ONCE() is descriptive and self explainatory while a
+   barrier without comment is not clear to the casual reader.
+
+No functional change.
+
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Jianyu Zhan <nasa4836@gmail.com>
+Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Acked-by: Darren Hart <dvhart@linux.intel.com>
+Cc: dave@stgolabs.net
+Cc: peterz@infradead.org
+Cc: linux@rasmusvillemoes.dk
+Cc: akpm@linux-foundation.org
+Cc: fengguang.wu@intel.com
+Cc: bigeasy@linutronix.de
+Link: http://lkml.kernel.org/r/1457314344-5685-1-git-send-email-nasa4836@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1939,8 +1939,12 @@ static int unqueue_me(struct futex_q *q)
+       /* In the common case we don't take the spinlock, which is nice. */
+ retry:
+-      lock_ptr = q->lock_ptr;
+-      barrier();
++      /*
++       * q->lock_ptr can change between this read and the following spin_lock.
++       * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
++       * optimizing lock_ptr out of the logic below.
++       */
++      lock_ptr = READ_ONCE(q->lock_ptr);
+       if (lock_ptr != NULL) {
+               spin_lock(lock_ptr);
+               /*
diff --git a/queue-4.4/locking-mutex-allow-next-waiter-lockless-wakeup.patch b/queue-4.4/locking-mutex-allow-next-waiter-lockless-wakeup.patch
new file mode 100644 (file)
index 0000000..1292101
--- /dev/null
@@ -0,0 +1,62 @@
+From 1329ce6fbbe4536592dfcfc8d64d61bfeb598fe6 Mon Sep 17 00:00:00 2001
+From: Davidlohr Bueso <dave@stgolabs.net>
+Date: Sun, 24 Jan 2016 18:23:43 -0800
+Subject: locking/mutex: Allow next waiter lockless wakeup
+
+From: Davidlohr Bueso <dave@stgolabs.net>
+
+commit 1329ce6fbbe4536592dfcfc8d64d61bfeb598fe6 upstream.
+
+Make use of wake-queues and enable the wakeup to occur after releasing the
+wait_lock. This is similar to what we do with rtmutex top waiter,
+slightly shortening the critical region and allow other waiters to
+acquire the wait_lock sooner. In low contention cases it can also help
+the recently woken waiter to find the wait_lock available (fastpath)
+when it continues execution.
+
+Reviewed-by: Waiman Long <Waiman.Long@hpe.com>
+Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Ding Tianhong <dingtianhong@huawei.com>
+Cc: Jason Low <jason.low2@hp.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Paul E. McKenney <paulmck@us.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Waiman Long <waiman.long@hpe.com>
+Cc: Will Deacon <Will.Deacon@arm.com>
+Link: http://lkml.kernel.org/r/20160125022343.GA3322@linux-uzut.site
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/locking/mutex.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/locking/mutex.c
++++ b/kernel/locking/mutex.c
+@@ -719,6 +719,7 @@ static inline void
+ __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
+ {
+       unsigned long flags;
++      WAKE_Q(wake_q);
+       /*
+        * As a performance measurement, release the lock before doing other
+@@ -746,11 +747,11 @@ __mutex_unlock_common_slowpath(struct mu
+                                          struct mutex_waiter, list);
+               debug_mutex_wake_waiter(lock, waiter);
+-
+-              wake_up_process(waiter->task);
++              wake_q_add(&wake_q, waiter->task);
+       }
+       spin_unlock_mutex(&lock->wait_lock, flags);
++      wake_up_q(&wake_q);
+ }
+ /*
diff --git a/queue-4.4/locks-don-t-check-for-race-with-close-when-setting-ofd-lock.patch b/queue-4.4/locks-don-t-check-for-race-with-close-when-setting-ofd-lock.patch
new file mode 100644 (file)
index 0000000..fd64002
--- /dev/null
@@ -0,0 +1,57 @@
+From 0752ba807b04ccd69cb4bc8bbf829a80ee208a3c Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jeff.layton@primarydata.com>
+Date: Fri, 8 Jan 2016 07:30:43 -0500
+Subject: locks: don't check for race with close when setting OFD lock
+
+From: Jeff Layton <jeff.layton@primarydata.com>
+
+commit 0752ba807b04ccd69cb4bc8bbf829a80ee208a3c upstream.
+
+We don't clean out OFD locks on close(), so there's no need to check
+for a race with them here. They'll get cleaned out at the same time
+that flock locks are.
+
+Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
+Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+
+
+---
+ fs/locks.c |   16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -2220,10 +2220,12 @@ int fcntl_setlk(unsigned int fd, struct
+       error = do_lock_file_wait(filp, cmd, file_lock);
+       /*
+-       * Attempt to detect a close/fcntl race and recover by
+-       * releasing the lock that was just acquired.
++       * Attempt to detect a close/fcntl race and recover by releasing the
++       * lock that was just acquired. There is no need to do that when we're
++       * unlocking though, or for OFD locks.
+        */
+-      if (!error && file_lock->fl_type != F_UNLCK) {
++      if (!error && file_lock->fl_type != F_UNLCK &&
++          !(file_lock->fl_flags & FL_OFDLCK)) {
+               /*
+                * We need that spin_lock here - it prevents reordering between
+                * update of i_flctx->flc_posix and check for it done in
+@@ -2362,10 +2364,12 @@ int fcntl_setlk64(unsigned int fd, struc
+       error = do_lock_file_wait(filp, cmd, file_lock);
+       /*
+-       * Attempt to detect a close/fcntl race and recover by
+-       * releasing the lock that was just acquired.
++       * Attempt to detect a close/fcntl race and recover by releasing the
++       * lock that was just acquired. There is no need to do that when we're
++       * unlocking though, or for OFD locks.
+        */
+-      if (!error && file_lock->fl_type != F_UNLCK) {
++      if (!error && file_lock->fl_type != F_UNLCK &&
++          !(file_lock->fl_flags & FL_OFDLCK)) {
+               /*
+                * We need that spin_lock here - it prevents reordering between
+                * update of i_flctx->flc_posix and check for it done in
diff --git a/queue-4.4/mm-compaction-fix-invalid-free_pfn-and-compact_cached_free_pfn.patch b/queue-4.4/mm-compaction-fix-invalid-free_pfn-and-compact_cached_free_pfn.patch
new file mode 100644 (file)
index 0000000..b8499b5
--- /dev/null
@@ -0,0 +1,69 @@
+From 623446e4dc45b37740268165107cc63abb3022f0 Mon Sep 17 00:00:00 2001
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Date: Tue, 15 Mar 2016 14:57:45 -0700
+Subject: mm/compaction: fix invalid free_pfn and compact_cached_free_pfn
+
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+
+commit 623446e4dc45b37740268165107cc63abb3022f0 upstream.
+
+free_pfn and compact_cached_free_pfn are the pointer that remember
+restart position of freepage scanner.  When they are reset or invalid,
+we set them to zone_end_pfn because freepage scanner works in reverse
+direction.  But, because zone range is defined as [zone_start_pfn,
+zone_end_pfn), zone_end_pfn is invalid to access.  Therefore, we should
+not store it to free_pfn and compact_cached_free_pfn.  Instead, we need
+to store zone_end_pfn - 1 to them.  There is one more thing we should
+consider.  Freepage scanner scan reversely by pageblock unit.  If
+free_pfn and compact_cached_free_pfn are set to middle of pageblock, it
+regards that sitiation as that it already scans front part of pageblock
+so we lose opportunity to scan there.  To fix-up, this patch do
+round_down() to guarantee that reset position will be pageblock aligned.
+
+Note that thanks to the current pageblock_pfn_to_page() implementation,
+actual access to zone_end_pfn doesn't happen until now.  But, following
+patch will change pageblock_pfn_to_page() so this patch is needed from
+now on.
+
+Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Aaron Lu <aaron.lu@intel.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -200,7 +200,8 @@ static void reset_cached_positions(struc
+ {
+       zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
+       zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
+-      zone->compact_cached_free_pfn = zone_end_pfn(zone);
++      zone->compact_cached_free_pfn =
++                      round_down(zone_end_pfn(zone) - 1, pageblock_nr_pages);
+ }
+ /*
+@@ -1358,11 +1359,11 @@ static int compact_zone(struct zone *zon
+        */
+       cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
+       cc->free_pfn = zone->compact_cached_free_pfn;
+-      if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) {
+-              cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1);
++      if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
++              cc->free_pfn = round_down(end_pfn - 1, pageblock_nr_pages);
+               zone->compact_cached_free_pfn = cc->free_pfn;
+       }
+-      if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) {
++      if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
+               cc->migrate_pfn = start_pfn;
+               zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
+               zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
diff --git a/queue-4.4/mm-compaction-pass-only-pageblock-aligned-range-to-pageblock_pfn_to_page.patch b/queue-4.4/mm-compaction-pass-only-pageblock-aligned-range-to-pageblock_pfn_to_page.patch
new file mode 100644 (file)
index 0000000..b5506f3
--- /dev/null
@@ -0,0 +1,151 @@
+From e1409c325fdc1fef7b3d8025c51892355f065d15 Mon Sep 17 00:00:00 2001
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Date: Tue, 15 Mar 2016 14:57:48 -0700
+Subject: mm/compaction: pass only pageblock aligned range to pageblock_pfn_to_page
+
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+
+commit e1409c325fdc1fef7b3d8025c51892355f065d15 upstream.
+
+pageblock_pfn_to_page() is used to check there is valid pfn and all
+pages in the pageblock is in a single zone.  If there is a hole in the
+pageblock, passing arbitrary position to pageblock_pfn_to_page() could
+cause to skip whole pageblock scanning, instead of just skipping the
+hole page.  For deterministic behaviour, it's better to always pass
+pageblock aligned range to pageblock_pfn_to_page().  It will also help
+further optimization on pageblock_pfn_to_page() in the following patch.
+
+Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Aaron Lu <aaron.lu@intel.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |   41 ++++++++++++++++++++++++++++++-----------
+ 1 file changed, 30 insertions(+), 11 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -553,13 +553,17 @@ unsigned long
+ isolate_freepages_range(struct compact_control *cc,
+                       unsigned long start_pfn, unsigned long end_pfn)
+ {
+-      unsigned long isolated, pfn, block_end_pfn;
++      unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
+       LIST_HEAD(freelist);
+       pfn = start_pfn;
++      block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
++      if (block_start_pfn < cc->zone->zone_start_pfn)
++              block_start_pfn = cc->zone->zone_start_pfn;
+       block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+       for (; pfn < end_pfn; pfn += isolated,
++                              block_start_pfn = block_end_pfn,
+                               block_end_pfn += pageblock_nr_pages) {
+               /* Protect pfn from changing by isolate_freepages_block */
+               unsigned long isolate_start_pfn = pfn;
+@@ -572,11 +576,13 @@ isolate_freepages_range(struct compact_c
+                * scanning range to right one.
+                */
+               if (pfn >= block_end_pfn) {
++                      block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
+                       block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+                       block_end_pfn = min(block_end_pfn, end_pfn);
+               }
+-              if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
++              if (!pageblock_pfn_to_page(block_start_pfn,
++                                      block_end_pfn, cc->zone))
+                       break;
+               isolated = isolate_freepages_block(cc, &isolate_start_pfn,
+@@ -862,18 +868,23 @@ unsigned long
+ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
+                                                       unsigned long end_pfn)
+ {
+-      unsigned long pfn, block_end_pfn;
++      unsigned long pfn, block_start_pfn, block_end_pfn;
+       /* Scan block by block. First and last block may be incomplete */
+       pfn = start_pfn;
++      block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
++      if (block_start_pfn < cc->zone->zone_start_pfn)
++              block_start_pfn = cc->zone->zone_start_pfn;
+       block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+       for (; pfn < end_pfn; pfn = block_end_pfn,
++                              block_start_pfn = block_end_pfn,
+                               block_end_pfn += pageblock_nr_pages) {
+               block_end_pfn = min(block_end_pfn, end_pfn);
+-              if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
++              if (!pageblock_pfn_to_page(block_start_pfn,
++                                      block_end_pfn, cc->zone))
+                       continue;
+               pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
+@@ -1091,7 +1102,9 @@ int sysctl_compact_unevictable_allowed _
+ static isolate_migrate_t isolate_migratepages(struct zone *zone,
+                                       struct compact_control *cc)
+ {
+-      unsigned long low_pfn, end_pfn;
++      unsigned long block_start_pfn;
++      unsigned long block_end_pfn;
++      unsigned long low_pfn;
+       unsigned long isolate_start_pfn;
+       struct page *page;
+       const isolate_mode_t isolate_mode =
+@@ -1103,16 +1116,21 @@ static isolate_migrate_t isolate_migrate
+        * initialized by compact_zone()
+        */
+       low_pfn = cc->migrate_pfn;
++      block_start_pfn = cc->migrate_pfn & ~(pageblock_nr_pages - 1);
++      if (block_start_pfn < zone->zone_start_pfn)
++              block_start_pfn = zone->zone_start_pfn;
+       /* Only scan within a pageblock boundary */
+-      end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
++      block_end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
+       /*
+        * Iterate over whole pageblocks until we find the first suitable.
+        * Do not cross the free scanner.
+        */
+-      for (; end_pfn <= cc->free_pfn;
+-                      low_pfn = end_pfn, end_pfn += pageblock_nr_pages) {
++      for (; block_end_pfn <= cc->free_pfn;
++                      low_pfn = block_end_pfn,
++                      block_start_pfn = block_end_pfn,
++                      block_end_pfn += pageblock_nr_pages) {
+               /*
+                * This can potentially iterate a massively long zone with
+@@ -1123,7 +1141,8 @@ static isolate_migrate_t isolate_migrate
+                                               && compact_should_abort(cc))
+                       break;
+-              page = pageblock_pfn_to_page(low_pfn, end_pfn, zone);
++              page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
++                                                                      zone);
+               if (!page)
+                       continue;
+@@ -1142,8 +1161,8 @@ static isolate_migrate_t isolate_migrate
+               /* Perform the isolation */
+               isolate_start_pfn = low_pfn;
+-              low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn,
+-                                                              isolate_mode);
++              low_pfn = isolate_migratepages_block(cc, low_pfn,
++                                              block_end_pfn, isolate_mode);
+               if (!low_pfn || cc->contended) {
+                       acct_isolated(zone, cc);
diff --git a/queue-4.4/mm-page-writeback-fix-dirty_ratelimit-calculation.patch b/queue-4.4/mm-page-writeback-fix-dirty_ratelimit-calculation.patch
new file mode 100644 (file)
index 0000000..721e185
--- /dev/null
@@ -0,0 +1,59 @@
+From d59b1087a98e402ed9a7cc577f4da435f9a555f5 Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Tue, 15 Mar 2016 14:55:27 -0700
+Subject: mm/page-writeback: fix dirty_ratelimit calculation
+
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+
+commit d59b1087a98e402ed9a7cc577f4da435f9a555f5 upstream.
+
+Calculation of dirty_ratelimit sometimes is not correct.  E.g.  initial
+values of dirty_ratelimit == INIT_BW and step == 0, lead to the
+following result:
+
+   UBSAN: Undefined behaviour in ../mm/page-writeback.c:1286:7
+   shift exponent 25600 is too large for 64-bit type 'long unsigned int'
+
+The fix is straightforward - make step 0 if the shift exponent is too
+big.
+
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Wu Fengguang <fengguang.wu@intel.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page-writeback.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -1162,6 +1162,7 @@ static void wb_update_dirty_ratelimit(st
+       unsigned long balanced_dirty_ratelimit;
+       unsigned long step;
+       unsigned long x;
++      unsigned long shift;
+       /*
+        * The dirty rate will match the writeout rate in long term, except
+@@ -1286,11 +1287,11 @@ static void wb_update_dirty_ratelimit(st
+        * rate itself is constantly fluctuating. So decrease the track speed
+        * when it gets close to the target. Helps eliminate pointless tremors.
+        */
+-      step >>= dirty_ratelimit / (2 * step + 1);
+-      /*
+-       * Limit the tracking speed to avoid overshooting.
+-       */
+-      step = (step + 7) / 8;
++      shift = dirty_ratelimit / (2 * step + 1);
++      if (shift < BITS_PER_LONG)
++              step = DIV_ROUND_UP(step >> shift, 8);
++      else
++              step = 0;
+       if (dirty_ratelimit < balanced_dirty_ratelimit)
+               dirty_ratelimit += step;
diff --git a/queue-4.4/mm-zswap-use-workqueue-to-destroy-pool.patch b/queue-4.4/mm-zswap-use-workqueue-to-destroy-pool.patch
new file mode 100644 (file)
index 0000000..7f2d780
--- /dev/null
@@ -0,0 +1,101 @@
+From 200867af4dedfe7cb707f96773684de1d1fd21e6 Mon Sep 17 00:00:00 2001
+From: Dan Streetman <ddstreet@ieee.org>
+Date: Fri, 20 May 2016 16:59:54 -0700
+Subject: mm/zswap: use workqueue to destroy pool
+
+From: Dan Streetman <ddstreet@ieee.org>
+
+commit 200867af4dedfe7cb707f96773684de1d1fd21e6 upstream.
+
+Add a work_struct to struct zswap_pool, and change __zswap_pool_empty to
+use the workqueue instead of using call_rcu().
+
+When zswap destroys a pool no longer in use, it uses call_rcu() to
+perform the destruction/freeing.  Since that executes in softirq
+context, it must not sleep.  However, actually destroying the pool
+involves freeing the per-cpu compressors (which requires locking the
+cpu_add_remove_lock mutex) and freeing the zpool, for which the
+implementation may sleep (e.g.  zsmalloc calls kmem_cache_destroy, which
+locks the slab_mutex).  So if either mutex is currently taken, or any
+other part of the compressor or zpool implementation sleeps, it will
+result in a BUG().
+
+It's not easy to reproduce this when changing zswap's params normally.
+In testing with a loaded system, this does not fail:
+
+  $ cd /sys/module/zswap/parameters
+  $ echo lz4 > compressor ; echo zsmalloc > zpool
+
+nor does this:
+
+  $ while true ; do
+  > echo lzo > compressor ; echo zbud > zpool
+  > sleep 1
+  > echo lz4 > compressor ; echo zsmalloc > zpool
+  > sleep 1
+  > done
+
+although it's still possible either of those might fail, depending on
+whether anything else besides zswap has locked the mutexes.
+
+However, changing a parameter with no delay immediately causes the
+schedule while atomic BUG:
+
+  $ while true ; do
+  > echo lzo > compressor ; echo lz4 > compressor
+  > done
+
+This is essentially the same as Yu Zhao's proposed patch to zsmalloc,
+but moved to zswap, to cover compressor and zpool freeing.
+
+Fixes: f1c54846ee45 ("zswap: dynamic pool creation")
+Signed-off-by: Dan Streetman <ddstreet@ieee.org>
+Reported-by: Yu Zhao <yuzhao@google.com>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Dan Streetman <dan.streetman@canonical.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/zswap.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/mm/zswap.c
++++ b/mm/zswap.c
+@@ -123,7 +123,7 @@ struct zswap_pool {
+       struct crypto_comp * __percpu *tfm;
+       struct kref kref;
+       struct list_head list;
+-      struct rcu_head rcu_head;
++      struct work_struct work;
+       struct notifier_block notifier;
+       char tfm_name[CRYPTO_MAX_ALG_NAME];
+ };
+@@ -667,9 +667,11 @@ static int __must_check zswap_pool_get(s
+       return kref_get_unless_zero(&pool->kref);
+ }
+-static void __zswap_pool_release(struct rcu_head *head)
++static void __zswap_pool_release(struct work_struct *work)
+ {
+-      struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
++      struct zswap_pool *pool = container_of(work, typeof(*pool), work);
++
++      synchronize_rcu();
+       /* nobody should have been able to get a kref... */
+       WARN_ON(kref_get_unless_zero(&pool->kref));
+@@ -689,7 +691,9 @@ static void __zswap_pool_empty(struct kr
+       WARN_ON(pool == zswap_pool_current());
+       list_del_rcu(&pool->list);
+-      call_rcu(&pool->rcu_head, __zswap_pool_release);
++
++      INIT_WORK(&pool->work, __zswap_pool_release);
++      schedule_work(&pool->work);
+       spin_unlock(&zswap_pools_lock);
+ }
index ff41e532c5c4bf0f9d28b99219ed85dec6cf7b20..9d86812776e7a7b47e9d00832c0d1a0dd8b850f1 100644 (file)
@@ -22,3 +22,11 @@ alsa-aloop-release-cable-upon-open-error-path.patch
 alsa-aloop-fix-inconsistent-format-due-to-incomplete-rule.patch
 alsa-aloop-fix-racy-hw-constraints-adjustment.patch
 x86-acpi-reduce-code-duplication-in-mp_override_legacy_irq.patch
+mm-compaction-fix-invalid-free_pfn-and-compact_cached_free_pfn.patch
+mm-compaction-pass-only-pageblock-aligned-range-to-pageblock_pfn_to_page.patch
+mm-page-writeback-fix-dirty_ratelimit-calculation.patch
+mm-zswap-use-workqueue-to-destroy-pool.patch
+zswap-don-t-param_set_charp-while-holding-spinlock.patch
+locks-don-t-check-for-race-with-close-when-setting-ofd-lock.patch
+futex-replace-barrier-in-unqueue_me-with-read_once.patch
+locking-mutex-allow-next-waiter-lockless-wakeup.patch
diff --git a/queue-4.4/zswap-don-t-param_set_charp-while-holding-spinlock.patch b/queue-4.4/zswap-don-t-param_set_charp-while-holding-spinlock.patch
new file mode 100644 (file)
index 0000000..0dc8e26
--- /dev/null
@@ -0,0 +1,63 @@
+From fd5bb66cd934987e49557455b6497fc006521940 Mon Sep 17 00:00:00 2001
+From: Dan Streetman <ddstreet@ieee.org>
+Date: Mon, 27 Feb 2017 14:26:53 -0800
+Subject: zswap: don't param_set_charp while holding spinlock
+
+From: Dan Streetman <ddstreet@ieee.org>
+
+commit fd5bb66cd934987e49557455b6497fc006521940 upstream.
+
+Change the zpool/compressor param callback function to release the
+zswap_pools_lock spinlock before calling param_set_charp, since that
+function may sleep when it calls kmalloc with GFP_KERNEL.
+
+While this problem has existed for a while, I wasn't able to trigger it
+using a tight loop changing either/both the zpool and compressor params; I
+think it's very unlikely to be an issue on the stable kernels, especially
+since most zswap users will change the compressor and/or zpool from sysfs
+only one time each boot - or zero times, if they add the params to the
+kernel boot.
+
+Fixes: c99b42c3529e ("zswap: use charp for zswap param strings")
+Link: http://lkml.kernel.org/r/20170126155821.4545-1-ddstreet@ieee.org
+Signed-off-by: Dan Streetman <dan.streetman@canonical.com>
+Reported-by: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Minchan Kim <minchan@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/zswap.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/mm/zswap.c
++++ b/mm/zswap.c
+@@ -752,18 +752,22 @@ static int __zswap_param_set(const char
+       pool = zswap_pool_find_get(type, compressor);
+       if (pool) {
+               zswap_pool_debug("using existing", pool);
++              WARN_ON(pool == zswap_pool_current());
+               list_del_rcu(&pool->list);
+-      } else {
+-              spin_unlock(&zswap_pools_lock);
+-              pool = zswap_pool_create(type, compressor);
+-              spin_lock(&zswap_pools_lock);
+       }
++      spin_unlock(&zswap_pools_lock);
++
++      if (!pool)
++              pool = zswap_pool_create(type, compressor);
++
+       if (pool)
+               ret = param_set_charp(s, kp);
+       else
+               ret = -EINVAL;
++      spin_lock(&zswap_pools_lock);
++
+       if (!ret) {
+               put_pool = zswap_pool_current();
+               list_add_rcu(&pool->list, &zswap_pools);