]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 8 Mar 2017 13:13:28 +0000 (14:13 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 8 Mar 2017 13:13:28 +0000 (14:13 +0100)
added patches:
dm-cache-fix-corruption-seen-when-using-cache-2tb.patch
dm-raid-fix-data-corruption-on-reshape-request.patch
dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch
dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch
fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch
ima-fix-ima_d_path-possible-race-with-rename.patch
ipc-shm-fix-shmat-mmap-nil-page-protection.patch
mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch
mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch
mm-do-not-access-page-mapping-directly-on-page_endio.patch
mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch
mm-vmpressure-fix-sending-wrong-events-on-underflow.patch
mm-vmscan-cleanup-lru-size-claculations.patch
mm-vmscan-consider-eligible-zones-in-get_scan_count.patch
pm-devfreq-fix-available_governor-sysfs.patch
pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch
power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch
scsi-aacraid-reorder-adapter-status-check.patch
scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch
scsi-storvsc-properly-set-residual-data-length-on-errors.patch
scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch
scsi-use-scsi_device_from_queue-for-scsi_dh.patch
sigaltstack-support-ss_autodisarm-for-config_compat.patch

24 files changed:
queue-4.9/dm-cache-fix-corruption-seen-when-using-cache-2tb.patch [new file with mode: 0644]
queue-4.9/dm-raid-fix-data-corruption-on-reshape-request.patch [new file with mode: 0644]
queue-4.9/dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch [new file with mode: 0644]
queue-4.9/dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch [new file with mode: 0644]
queue-4.9/fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch [new file with mode: 0644]
queue-4.9/ima-fix-ima_d_path-possible-race-with-rename.patch [new file with mode: 0644]
queue-4.9/ipc-shm-fix-shmat-mmap-nil-page-protection.patch [new file with mode: 0644]
queue-4.9/mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch [new file with mode: 0644]
queue-4.9/mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch [new file with mode: 0644]
queue-4.9/mm-do-not-access-page-mapping-directly-on-page_endio.patch [new file with mode: 0644]
queue-4.9/mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch [new file with mode: 0644]
queue-4.9/mm-vmpressure-fix-sending-wrong-events-on-underflow.patch [new file with mode: 0644]
queue-4.9/mm-vmscan-cleanup-lru-size-claculations.patch [new file with mode: 0644]
queue-4.9/mm-vmscan-consider-eligible-zones-in-get_scan_count.patch [new file with mode: 0644]
queue-4.9/pm-devfreq-fix-available_governor-sysfs.patch [new file with mode: 0644]
queue-4.9/pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch [new file with mode: 0644]
queue-4.9/power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch [new file with mode: 0644]
queue-4.9/scsi-aacraid-reorder-adapter-status-check.patch [new file with mode: 0644]
queue-4.9/scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch [new file with mode: 0644]
queue-4.9/scsi-storvsc-properly-set-residual-data-length-on-errors.patch [new file with mode: 0644]
queue-4.9/scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch [new file with mode: 0644]
queue-4.9/scsi-use-scsi_device_from_queue-for-scsi_dh.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/sigaltstack-support-ss_autodisarm-for-config_compat.patch [new file with mode: 0644]

diff --git a/queue-4.9/dm-cache-fix-corruption-seen-when-using-cache-2tb.patch b/queue-4.9/dm-cache-fix-corruption-seen-when-using-cache-2tb.patch
new file mode 100644 (file)
index 0000000..0fbc424
--- /dev/null
@@ -0,0 +1,47 @@
+From ca763d0a53b264a650342cee206512bc92ac7050 Mon Sep 17 00:00:00 2001
+From: Joe Thornber <ejt@redhat.com>
+Date: Thu, 9 Feb 2017 11:46:18 -0500
+Subject: dm cache: fix corruption seen when using cache > 2TB
+
+From: Joe Thornber <ejt@redhat.com>
+
+commit ca763d0a53b264a650342cee206512bc92ac7050 upstream.
+
+A rounding bug due to compiler generated temporary being 32bit was found
+in remap_to_cache().  A localized cast in remap_to_cache() fixes the
+corruption but this preferred fix (changing from uint32_t to sector_t)
+eliminates potential for future rounding errors elsewhere.
+
+Signed-off-by: Joe Thornber <ejt@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-cache-target.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/md/dm-cache-target.c
++++ b/drivers/md/dm-cache-target.c
+@@ -248,7 +248,7 @@ struct cache {
+       /*
+        * Fields for converting from sectors to blocks.
+        */
+-      uint32_t sectors_per_block;
++      sector_t sectors_per_block;
+       int sectors_per_block_shift;
+       spinlock_t lock;
+@@ -3546,11 +3546,11 @@ static void cache_status(struct dm_targe
+               residency = policy_residency(cache->policy);
+-              DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
++              DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
+                      (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
+                      (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
+                      (unsigned long long)nr_blocks_metadata,
+-                     cache->sectors_per_block,
++                     (unsigned long long)cache->sectors_per_block,
+                      (unsigned long long) from_cblock(residency),
+                      (unsigned long long) from_cblock(cache->cache_size),
+                      (unsigned) atomic_read(&cache->stats.read_hit),
diff --git a/queue-4.9/dm-raid-fix-data-corruption-on-reshape-request.patch b/queue-4.9/dm-raid-fix-data-corruption-on-reshape-request.patch
new file mode 100644 (file)
index 0000000..8944b0c
--- /dev/null
@@ -0,0 +1,70 @@
+From d36a19541fe8f392778ac137d60f9be8dfdd8f9d Mon Sep 17 00:00:00 2001
+From: Heinz Mauelshagen <heinzm@redhat.com>
+Date: Tue, 28 Feb 2017 19:17:49 +0100
+Subject: dm raid: fix data corruption on reshape request
+
+From: Heinz Mauelshagen <heinzm@redhat.com>
+
+commit d36a19541fe8f392778ac137d60f9be8dfdd8f9d upstream.
+
+The lvm2 sequence to manage dm-raid constructor flags that trigger a
+rebuild or a reshape is defined as:
+
+1) load table with flags (e.g. rebuild/delta_disks/data_offset)
+2) clear out the flags in lvm2 metadata
+3) store the lvm2 metadata, reload the table to reset the flags
+   previously established during the initial load (1) -- in order to
+   prevent repeatedly requesting a rebuild or a reshape on activation
+
+Currently, loading an inactive table with rebuild/reshape flags
+specified will cause dm-raid to rebuild/reshape on resume and thus start
+updating the raid metadata (about the progress).  When the second table
+reload, to reset the flags, occurs the constructor accesses the volatile
+progress state kept in the raid superblocks.  Because the active mapping
+is still processing the rebuild/reshape, that position will be stale by
+the time the device is resumed.
+
+In the reshape case, this causes data corruption by processing already
+reshaped stripes again.  In the rebuild case, it does _not_ cause data
+corruption but instead involves superfluous rebuilds.
+
+Fix by keeping the raid set frozen during the first resume and then
+allow the rebuild/reshape during the second resume.
+
+Fixes: 9dbd1aa3a ("dm raid: add reshaping support to the target")
+Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-raid.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3621,6 +3621,8 @@ static int raid_preresume(struct dm_targ
+       return r;
+ }
++#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
++
+ static void raid_resume(struct dm_target *ti)
+ {
+       struct raid_set *rs = ti->private;
+@@ -3638,7 +3640,15 @@ static void raid_resume(struct dm_target
+       mddev->ro = 0;
+       mddev->in_sync = 0;
+-      clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
++      /*
++       * Keep the RAID set frozen if reshape/rebuild flags are set.
++       * The RAID set is unfrozen once the next table load/resume,
++       * which clears the reshape/rebuild flags, occurs.
++       * This ensures that the constructor for the inactive table
++       * retrieves an up-to-date reshape_position.
++       */
++      if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
++              clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+       if (mddev->suspended)
+               mddev_resume(mddev);
diff --git a/queue-4.9/dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch b/queue-4.9/dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch
new file mode 100644 (file)
index 0000000..e110270
--- /dev/null
@@ -0,0 +1,156 @@
+From 37a098e9d10db6e2efc05fe61e3a6ff2e9802c53 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@redhat.com>
+Date: Thu, 16 Feb 2017 23:57:17 -0500
+Subject: dm round robin: revert "use percpu 'repeat_count' and 'current_path'"
+
+From: Mike Snitzer <snitzer@redhat.com>
+
+commit 37a098e9d10db6e2efc05fe61e3a6ff2e9802c53 upstream.
+
+The sloppy nature of lockless access to percpu pointers
+(s->current_path) in rr_select_path(), from multiple threads, is
+causing some paths to used more than others -- which results in less
+IO performance being observed.
+
+Revert these upstream commits to restore truly symmetric round-robin
+IO submission in DM multipath:
+
+b0b477c dm round robin: use percpu 'repeat_count' and 'current_path'
+802934b dm round robin: do not use this_cpu_ptr() without having preemption disabled
+
+There is no benefit to all this complexity if repeat_count = 1 (which is
+the recommended default).
+
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-round-robin.c |   67 +++++++++-----------------------------------
+ 1 file changed, 14 insertions(+), 53 deletions(-)
+
+--- a/drivers/md/dm-round-robin.c
++++ b/drivers/md/dm-round-robin.c
+@@ -17,8 +17,8 @@
+ #include <linux/module.h>
+ #define DM_MSG_PREFIX "multipath round-robin"
+-#define RR_MIN_IO     1000
+-#define RR_VERSION    "1.1.0"
++#define RR_MIN_IO     1
++#define RR_VERSION    "1.2.0"
+ /*-----------------------------------------------------------------
+  * Path-handling code, paths are held in lists
+@@ -47,44 +47,19 @@ struct selector {
+       struct list_head valid_paths;
+       struct list_head invalid_paths;
+       spinlock_t lock;
+-      struct dm_path * __percpu *current_path;
+-      struct percpu_counter repeat_count;
+ };
+-static void set_percpu_current_path(struct selector *s, struct dm_path *path)
+-{
+-      int cpu;
+-
+-      for_each_possible_cpu(cpu)
+-              *per_cpu_ptr(s->current_path, cpu) = path;
+-}
+-
+ static struct selector *alloc_selector(void)
+ {
+       struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
+-      if (!s)
+-              return NULL;
+-
+-      INIT_LIST_HEAD(&s->valid_paths);
+-      INIT_LIST_HEAD(&s->invalid_paths);
+-      spin_lock_init(&s->lock);
+-
+-      s->current_path = alloc_percpu(struct dm_path *);
+-      if (!s->current_path)
+-              goto out_current_path;
+-      set_percpu_current_path(s, NULL);
+-
+-      if (percpu_counter_init(&s->repeat_count, 0, GFP_KERNEL))
+-              goto out_repeat_count;
++      if (s) {
++              INIT_LIST_HEAD(&s->valid_paths);
++              INIT_LIST_HEAD(&s->invalid_paths);
++              spin_lock_init(&s->lock);
++      }
+       return s;
+-
+-out_repeat_count:
+-      free_percpu(s->current_path);
+-out_current_path:
+-      kfree(s);
+-      return NULL;;
+ }
+ static int rr_create(struct path_selector *ps, unsigned argc, char **argv)
+@@ -105,8 +80,6 @@ static void rr_destroy(struct path_selec
+       free_paths(&s->valid_paths);
+       free_paths(&s->invalid_paths);
+-      free_percpu(s->current_path);
+-      percpu_counter_destroy(&s->repeat_count);
+       kfree(s);
+       ps->context = NULL;
+ }
+@@ -157,6 +130,11 @@ static int rr_add_path(struct path_selec
+               return -EINVAL;
+       }
++      if (repeat_count > 1) {
++              DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead");
++              repeat_count = 1;
++      }
++
+       /* allocate the path */
+       pi = kmalloc(sizeof(*pi), GFP_KERNEL);
+       if (!pi) {
+@@ -183,9 +161,6 @@ static void rr_fail_path(struct path_sel
+       struct path_info *pi = p->pscontext;
+       spin_lock_irqsave(&s->lock, flags);
+-      if (p == *this_cpu_ptr(s->current_path))
+-              set_percpu_current_path(s, NULL);
+-
+       list_move(&pi->list, &s->invalid_paths);
+       spin_unlock_irqrestore(&s->lock, flags);
+ }
+@@ -208,29 +183,15 @@ static struct dm_path *rr_select_path(st
+       unsigned long flags;
+       struct selector *s = ps->context;
+       struct path_info *pi = NULL;
+-      struct dm_path *current_path = NULL;
+-      local_irq_save(flags);
+-      current_path = *this_cpu_ptr(s->current_path);
+-      if (current_path) {
+-              percpu_counter_dec(&s->repeat_count);
+-              if (percpu_counter_read_positive(&s->repeat_count) > 0) {
+-                      local_irq_restore(flags);
+-                      return current_path;
+-              }
+-      }
+-
+-      spin_lock(&s->lock);
++      spin_lock_irqsave(&s->lock, flags);
+       if (!list_empty(&s->valid_paths)) {
+               pi = list_entry(s->valid_paths.next, struct path_info, list);
+               list_move_tail(&pi->list, &s->valid_paths);
+-              percpu_counter_set(&s->repeat_count, pi->repeat_count);
+-              set_percpu_current_path(s, pi->path);
+-              current_path = pi->path;
+       }
+       spin_unlock_irqrestore(&s->lock, flags);
+-      return current_path;
++      return pi ? pi->path : NULL;
+ }
+ static struct path_selector_type rr_ps = {
diff --git a/queue-4.9/dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch b/queue-4.9/dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch
new file mode 100644 (file)
index 0000000..a9c8dcb
--- /dev/null
@@ -0,0 +1,28 @@
+From 6085831883c25860264721df15f05bbded45e2a2 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Wed, 15 Feb 2017 12:06:19 -0500
+Subject: dm stats: fix a leaked s->histogram_boundaries array
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 6085831883c25860264721df15f05bbded45e2a2 upstream.
+
+Fixes: dfcfac3e4cd9 ("dm stats: collect and report histogram of IO latencies")
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-stats.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-stats.c
++++ b/drivers/md/dm-stats.c
+@@ -175,6 +175,7 @@ static void dm_stat_free(struct rcu_head
+       int cpu;
+       struct dm_stat *s = container_of(head, struct dm_stat, rcu_head);
++      kfree(s->histogram_boundaries);
+       kfree(s->program_id);
+       kfree(s->aux_data);
+       for_each_possible_cpu(cpu) {
diff --git a/queue-4.9/fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch b/queue-4.9/fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch
new file mode 100644 (file)
index 0000000..b90cf7f
--- /dev/null
@@ -0,0 +1,53 @@
+From 907565337ebf998a68cb5c5b2174ce5e5da065eb Mon Sep 17 00:00:00 2001
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Date: Thu, 3 Nov 2016 10:29:28 -0600
+Subject: Fix: Disable sys_membarrier when nohz_full is enabled
+
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
+commit 907565337ebf998a68cb5c5b2174ce5e5da065eb upstream.
+
+Userspace applications should be allowed to expect the membarrier system
+call with MEMBARRIER_CMD_SHARED command to issue memory barriers on
+nohz_full CPUs, but synchronize_sched() does not take those into
+account.
+
+Given that we do not want unrelated processes to be able to affect
+real-time sensitive nohz_full CPUs, simply return ENOSYS when membarrier
+is invoked on a kernel with enabled nohz_full CPUs.
+
+Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+CC: Josh Triplett <josh@joshtriplett.org>
+CC: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: Chris Metcalf <cmetcalf@mellanox.com>
+Cc: Rik van Riel <riel@redhat.com>
+Acked-by: Lai Jiangshan <jiangshanlai@gmail.com>
+Reviewed-by: Josh Triplett <josh@joshtriplett.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/membarrier.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/membarrier.c
++++ b/kernel/membarrier.c
+@@ -16,6 +16,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/membarrier.h>
++#include <linux/tick.h>
+ /*
+  * Bitmask made from a "or" of all commands within enum membarrier_cmd,
+@@ -51,6 +52,9 @@
+  */
+ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
+ {
++      /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
++      if (tick_nohz_full_enabled())
++              return -ENOSYS;
+       if (unlikely(flags))
+               return -EINVAL;
+       switch (cmd) {
diff --git a/queue-4.9/ima-fix-ima_d_path-possible-race-with-rename.patch b/queue-4.9/ima-fix-ima_d_path-possible-race-with-rename.patch
new file mode 100644 (file)
index 0000000..d30f918
--- /dev/null
@@ -0,0 +1,110 @@
+From bc15ed663e7e53ee4dc3e60f8d09c93a0528c694 Mon Sep 17 00:00:00 2001
+From: Mimi Zohar <zohar@linux.vnet.ibm.com>
+Date: Tue, 17 Jan 2017 06:45:41 -0500
+Subject: ima: fix ima_d_path() possible race with rename
+
+From: Mimi Zohar <zohar@linux.vnet.ibm.com>
+
+commit bc15ed663e7e53ee4dc3e60f8d09c93a0528c694 upstream.
+
+On failure to return a pathname from ima_d_path(), a pointer to
+dname is returned, which is subsequently used in the IMA measurement
+list, the IMA audit records, and other audit logging.  Saving the
+pointer to dname for later use has the potential to race with rename.
+
+Intead of returning a pointer to dname on failure, this patch returns
+a pointer to a copy of the filename.
+
+Reported-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ security/integrity/ima/ima.h      |    2 +-
+ security/integrity/ima/ima_api.c  |   20 ++++++++++++++++++--
+ security/integrity/ima/ima_main.c |    8 +++++---
+ 3 files changed, 24 insertions(+), 6 deletions(-)
+
+--- a/security/integrity/ima/ima.h
++++ b/security/integrity/ima/ima.h
+@@ -173,7 +173,7 @@ int ima_store_template(struct ima_templa
+                      struct inode *inode,
+                      const unsigned char *filename, int pcr);
+ void ima_free_template_entry(struct ima_template_entry *entry);
+-const char *ima_d_path(const struct path *path, char **pathbuf);
++const char *ima_d_path(const struct path *path, char **pathbuf, char *filename);
+ /* IMA policy related functions */
+ int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask,
+--- a/security/integrity/ima/ima_api.c
++++ b/security/integrity/ima/ima_api.c
+@@ -318,7 +318,17 @@ void ima_audit_measurement(struct integr
+       iint->flags |= IMA_AUDITED;
+ }
+-const char *ima_d_path(const struct path *path, char **pathbuf)
++/*
++ * ima_d_path - return a pointer to the full pathname
++ *
++ * Attempt to return a pointer to the full pathname for use in the
++ * IMA measurement list, IMA audit records, and auditing logs.
++ *
++ * On failure, return a pointer to a copy of the filename, not dname.
++ * Returning a pointer to dname, could result in using the pointer
++ * after the memory has been freed.
++ */
++const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf)
+ {
+       char *pathname = NULL;
+@@ -331,5 +341,11 @@ const char *ima_d_path(const struct path
+                       pathname = NULL;
+               }
+       }
+-      return pathname ?: (const char *)path->dentry->d_name.name;
++
++      if (!pathname) {
++              strlcpy(namebuf, path->dentry->d_name.name, NAME_MAX);
++              pathname = namebuf;
++      }
++
++      return pathname;
+ }
+--- a/security/integrity/ima/ima_main.c
++++ b/security/integrity/ima/ima_main.c
+@@ -83,6 +83,7 @@ static void ima_rdwr_violation_check(str
+                                    const char **pathname)
+ {
+       struct inode *inode = file_inode(file);
++      char filename[NAME_MAX];
+       fmode_t mode = file->f_mode;
+       bool send_tomtou = false, send_writers = false;
+@@ -102,7 +103,7 @@ static void ima_rdwr_violation_check(str
+       if (!send_tomtou && !send_writers)
+               return;
+-      *pathname = ima_d_path(&file->f_path, pathbuf);
++      *pathname = ima_d_path(&file->f_path, pathbuf, filename);
+       if (send_tomtou)
+               ima_add_violation(file, *pathname, iint,
+@@ -161,6 +162,7 @@ static int process_measurement(struct fi
+       struct integrity_iint_cache *iint = NULL;
+       struct ima_template_desc *template_desc;
+       char *pathbuf = NULL;
++      char filename[NAME_MAX];
+       const char *pathname = NULL;
+       int rc = -ENOMEM, action, must_appraise;
+       int pcr = CONFIG_IMA_MEASURE_PCR_IDX;
+@@ -239,8 +241,8 @@ static int process_measurement(struct fi
+               goto out_digsig;
+       }
+-      if (!pathname)  /* ima_rdwr_violation possibly pre-fetched */
+-              pathname = ima_d_path(&file->f_path, &pathbuf);
++      if (!pathbuf)   /* ima_rdwr_violation possibly pre-fetched */
++              pathname = ima_d_path(&file->f_path, &pathbuf, filename);
+       if (action & IMA_MEASURE)
+               ima_store_measurement(iint, file, pathname,
diff --git a/queue-4.9/ipc-shm-fix-shmat-mmap-nil-page-protection.patch b/queue-4.9/ipc-shm-fix-shmat-mmap-nil-page-protection.patch
new file mode 100644 (file)
index 0000000..c7e3176
--- /dev/null
@@ -0,0 +1,75 @@
+From 95e91b831f87ac8e1f8ed50c14d709089b4e01b8 Mon Sep 17 00:00:00 2001
+From: Davidlohr Bueso <dave@stgolabs.net>
+Date: Mon, 27 Feb 2017 14:28:24 -0800
+Subject: ipc/shm: Fix shmat mmap nil-page protection
+
+From: Davidlohr Bueso <dave@stgolabs.net>
+
+commit 95e91b831f87ac8e1f8ed50c14d709089b4e01b8 upstream.
+
+The issue is described here, with a nice testcase:
+
+    https://bugzilla.kernel.org/show_bug.cgi?id=192931
+
+The problem is that shmat() calls do_mmap_pgoff() with MAP_FIXED, and
+the address rounded down to 0.  For the regular mmap case, the
+protection mentioned above is that the kernel gets to generate the
+address -- arch_get_unmapped_area() will always check for MAP_FIXED and
+return that address.  So by the time we do security_mmap_addr(0) things
+get funky for shmat().
+
+The testcase itself shows that while a regular user crashes, root will
+not have a problem attaching a nil-page.  There are two possible fixes
+to this.  The first, and which this patch does, is to simply allow root
+to crash as well -- this is also regular mmap behavior, ie when hacking
+up the testcase and adding mmap(...  |MAP_FIXED).  While this approach
+is the safer option, the second alternative is to ignore SHM_RND if the
+rounded address is 0, thus only having MAP_SHARED flags.  This makes the
+behavior of shmat() identical to the mmap() case.  The downside of this
+is obviously user visible, but does make sense in that it maintains
+semantics after the round-down wrt 0 address and mmap.
+
+Passes shm related ltp tests.
+
+Link: http://lkml.kernel.org/r/1486050195-18629-1-git-send-email-dave@stgolabs.net
+Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
+Reported-by: Gareth Evans <gareth.evans@contextis.co.uk>
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/shm.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/ipc/shm.c
++++ b/ipc/shm.c
+@@ -1085,8 +1085,8 @@ out_unlock1:
+  * "raddr" thing points to kernel space, and there has to be a wrapper around
+  * this.
+  */
+-long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
+-            unsigned long shmlba)
++long do_shmat(int shmid, char __user *shmaddr, int shmflg,
++            ulong *raddr, unsigned long shmlba)
+ {
+       struct shmid_kernel *shp;
+       unsigned long addr;
+@@ -1107,8 +1107,13 @@ long do_shmat(int shmid, char __user *sh
+               goto out;
+       else if ((addr = (ulong)shmaddr)) {
+               if (addr & (shmlba - 1)) {
+-                      if (shmflg & SHM_RND)
+-                              addr &= ~(shmlba - 1);     /* round down */
++                      /*
++                       * Round down to the nearest multiple of shmlba.
++                       * For sane do_mmap_pgoff() parameters, avoid
++                       * round downs that trigger nil-page and MAP_FIXED.
++                       */
++                      if ((shmflg & SHM_RND) && addr >= shmlba)
++                              addr &= ~(shmlba - 1);
+                       else
+ #ifndef __ARCH_FORCE_SHMLBA
+                               if (addr & ~PAGE_MASK)
diff --git a/queue-4.9/mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch b/queue-4.9/mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch
new file mode 100644 (file)
index 0000000..148001c
--- /dev/null
@@ -0,0 +1,49 @@
+From 9c57b5808c625f4fc93da330b932647eaff321f7 Mon Sep 17 00:00:00 2001
+From: Yisheng Xie <xieyisheng1@huawei.com>
+Date: Fri, 24 Feb 2017 15:00:40 -0800
+Subject: mm balloon: umount balloon_mnt when removing vb device
+
+From: Yisheng Xie <xieyisheng1@huawei.com>
+
+commit 9c57b5808c625f4fc93da330b932647eaff321f7 upstream.
+
+With CONFIG_BALLOON_COMPACTION=y the kernel will mount balloon_mnt for
+balloon page migration when we probe a virtio_balloon device.  However
+we do not unmount it when removing the device.  Fix this.
+
+Fixes: b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature")
+Link: http://lkml.kernel.org/r/1486531318-35189-1-git-send-email-xieyisheng1@huawei.com
+Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Cc: Rafael Aquini <aquini@redhat.com>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Gioh Kim <gi-oh.kim@profitbricks.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Cc: Hanjun Guo <guohanjun@huawei.com>
+Cc: Xishi Qiu <qiuxishi@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/virtio/virtio_balloon.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/virtio/virtio_balloon.c
++++ b/drivers/virtio/virtio_balloon.c
+@@ -615,8 +615,12 @@ static void virtballoon_remove(struct vi
+       cancel_work_sync(&vb->update_balloon_stats_work);
+       remove_common(vb);
++#ifdef CONFIG_BALLOON_COMPACTION
+       if (vb->vb_dev_info.inode)
+               iput(vb->vb_dev_info.inode);
++
++      kern_unmount(balloon_mnt);
++#endif
+       kfree(vb);
+ }
diff --git a/queue-4.9/mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch b/queue-4.9/mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch
new file mode 100644 (file)
index 0000000..43f5552
--- /dev/null
@@ -0,0 +1,63 @@
+From b5d24fda9c3dce51fcb4eee459550a458eaaf1e2 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Fri, 24 Feb 2017 14:55:45 -0800
+Subject: mm, devm_memremap_pages: hold device_hotplug lock over mem_hotplug_{begin, done}
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit b5d24fda9c3dce51fcb4eee459550a458eaaf1e2 upstream.
+
+The mem_hotplug_{begin,done} lock coordinates with {get,put}_online_mems()
+to hold off "readers" of the current state of memory from new hotplug
+actions.  mem_hotplug_begin() expects exclusive access, via the
+device_hotplug lock, to set mem_hotplug.active_writer.  Calling
+mem_hotplug_begin() without locking device_hotplug can lead to
+corrupting mem_hotplug.refcount and missed wakeups / soft lockups.
+
+[dan.j.williams@intel.com: v2]
+  Link: http://lkml.kernel.org/r/148728203365.38457.17804568297887708345.stgit@dwillia2-desk3.amr.corp.intel.com
+Link: http://lkml.kernel.org/r/148693885680.16345.17802627926777862337.stgit@dwillia2-desk3.amr.corp.intel.com
+Fixes: f931ab479dd2 ("mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}")
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Reported-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Toshi Kani <toshi.kani@hpe.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Logan Gunthorpe <logang@deltatee.com>
+Cc: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/memremap.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/kernel/memremap.c
++++ b/kernel/memremap.c
+@@ -246,9 +246,13 @@ static void devm_memremap_pages_release(
+       /* pages are dead and unused, undo the arch mapping */
+       align_start = res->start & ~(SECTION_SIZE - 1);
+       align_size = ALIGN(resource_size(res), SECTION_SIZE);
++
++      lock_device_hotplug();
+       mem_hotplug_begin();
+       arch_remove_memory(align_start, align_size);
+       mem_hotplug_done();
++      unlock_device_hotplug();
++
+       untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+       pgmap_radix_release(res);
+       dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
+@@ -360,9 +364,11 @@ void *devm_memremap_pages(struct device
+       if (error)
+               goto err_pfn_remap;
++      lock_device_hotplug();
+       mem_hotplug_begin();
+       error = arch_add_memory(nid, align_start, align_size, true);
+       mem_hotplug_done();
++      unlock_device_hotplug();
+       if (error)
+               goto err_add_memory;
diff --git a/queue-4.9/mm-do-not-access-page-mapping-directly-on-page_endio.patch b/queue-4.9/mm-do-not-access-page-mapping-directly-on-page_endio.patch
new file mode 100644 (file)
index 0000000..4fdd199
--- /dev/null
@@ -0,0 +1,57 @@
+From dd8416c47715cf324c9a16f13273f9fda87acfed Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Fri, 24 Feb 2017 14:59:59 -0800
+Subject: mm: do not access page->mapping directly on page_endio
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit dd8416c47715cf324c9a16f13273f9fda87acfed upstream.
+
+With rw_page, page_endio is used for completing IO on a page and it
+propagates write error to the address space if the IO fails.  The
+problem is it accesses page->mapping directly which might be okay for
+file-backed pages but it shouldn't for anonymous page.  Otherwise, it
+can corrupt one of field from anon_vma under us and system goes panic
+randomly.
+
+swap_writepage
+  bdev_writepage
+    ops->rw_page
+
+I encountered the BUG during developing new zram feature and it was
+really hard to figure it out because it made random crash, somtime
+mmap_sem lockdep, sometime other places where places never related to
+zram/zsmalloc, and not reproducible with some configuration.
+
+When I consider how that bug is subtle and people do fast-swap test with
+brd, it's worth to add stable mark, I think.
+
+Fixes: dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()")
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/filemap.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -910,9 +910,12 @@ void page_endio(struct page *page, bool
+               unlock_page(page);
+       } else {
+               if (err) {
++                      struct address_space *mapping;
++
+                       SetPageError(page);
+-                      if (page->mapping)
+-                              mapping_set_error(page->mapping, err);
++                      mapping = page_mapping(page);
++                      if (mapping)
++                              mapping_set_error(mapping, err);
+               }
+               end_page_writeback(page);
+       }
diff --git a/queue-4.9/mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch b/queue-4.9/mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch
new file mode 100644 (file)
index 0000000..8685b7f
--- /dev/null
@@ -0,0 +1,103 @@
+From e02dc017c3032dcdce1b993af0db135462e1b4b7 Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Fri, 24 Feb 2017 14:59:33 -0800
+Subject: mm/page_alloc: fix nodes for reclaim in fast path
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit e02dc017c3032dcdce1b993af0db135462e1b4b7 upstream.
+
+When @node_reclaim_node isn't 0, the page allocator tries to reclaim
+pages if the amount of free memory in the zones are below the low
+watermark.  On Power platform, none of NUMA nodes are scanned for page
+reclaim because no nodes match the condition in zone_allows_reclaim().
+On Power platform, RECLAIM_DISTANCE is set to 10 which is the distance
+of Node-A to Node-A.  So the preferred node even won't be scanned for
+page reclaim.
+
+   __alloc_pages_nodemask()
+   get_page_from_freelist()
+      zone_allows_reclaim()
+
+Anton proposed the test code as below:
+
+   # cat alloc.c
+      :
+   int main(int argc, char *argv[])
+   {
+       void *p;
+       unsigned long size;
+       unsigned long start, end;
+
+       start = time(NULL);
+       size = strtoul(argv[1], NULL, 0);
+       printf("To allocate %ldGB memory\n", size);
+
+       size <<= 30;
+       p = malloc(size);
+       assert(p);
+       memset(p, 0, size);
+
+       end = time(NULL);
+       printf("Used time: %ld seconds\n", end - start);
+       sleep(3600);
+       return 0;
+   }
+
+The system I use for testing has two NUMA nodes.  Both have 128GB
+memory.  In below scnario, the page caches on node#0 should be reclaimed
+when it encounters pressure to accommodate request of allocation.
+
+   # echo 2 > /proc/sys/vm/zone_reclaim_mode; \
+     sync; \
+     echo 3 > /proc/sys/vm/drop_caches; \
+   # taskset -c 0 cat file.32G > /dev/null; \
+     grep FilePages /sys/devices/system/node/node0/meminfo
+     Node 0 FilePages:       33619712 kB
+   # taskset -c 0 ./alloc 128
+   # grep FilePages /sys/devices/system/node/node0/meminfo
+     Node 0 FilePages:       33619840 kB
+   # grep MemFree /sys/devices/system/node/node0/meminfo
+     Node 0 MemFree:          186816 kB
+
+With the patch applied, the pagecache on node-0 is reclaimed when its
+free memory is running out.  It's the expected behaviour.
+
+   # echo 2 > /proc/sys/vm/zone_reclaim_mode; \
+     sync; \
+     echo 3 > /proc/sys/vm/drop_caches
+   # taskset -c 0 cat file.32G > /dev/null; \
+     grep FilePages /sys/devices/system/node/node0/meminfo
+     Node 0 FilePages:       33605568 kB
+   # taskset -c 0 ./alloc 128
+   # grep FilePages /sys/devices/system/node/node0/meminfo
+     Node 0 FilePages:        1379520 kB
+   # grep MemFree /sys/devices/system/node/node0/meminfo
+     Node 0 MemFree:           317120 kB
+
+Fixes: 5f7a75acdb24 ("mm: page_alloc: do not cache reclaim distances")
+Link: http://lkml.kernel.org/r/1486532455-29613-1-git-send-email-gwshan@linux.vnet.ibm.com
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Anton Blanchard <anton@samba.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2858,7 +2858,7 @@ bool zone_watermark_ok_safe(struct zone
+ #ifdef CONFIG_NUMA
+ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
+ {
+-      return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
++      return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <=
+                               RECLAIM_DISTANCE;
+ }
+ #else /* CONFIG_NUMA */
diff --git a/queue-4.9/mm-vmpressure-fix-sending-wrong-events-on-underflow.patch b/queue-4.9/mm-vmpressure-fix-sending-wrong-events-on-underflow.patch
new file mode 100644 (file)
index 0000000..217396e
--- /dev/null
@@ -0,0 +1,65 @@
+From e1587a4945408faa58d0485002c110eb2454740c Mon Sep 17 00:00:00 2001
+From: Vinayak Menon <vinmenon@codeaurora.org>
+Date: Fri, 24 Feb 2017 14:59:39 -0800
+Subject: mm: vmpressure: fix sending wrong events on underflow
+
+From: Vinayak Menon <vinmenon@codeaurora.org>
+
+commit e1587a4945408faa58d0485002c110eb2454740c upstream.
+
+At the end of a window period, if the reclaimed pages is greater than
+scanned, an unsigned underflow can result in a huge pressure value and
+thus a critical event.  Reclaimed pages is found to go higher than
+scanned because of the addition of reclaimed slab pages to reclaimed in
+shrink_node without a corresponding increment to scanned pages.
+
+Minchan Kim mentioned that this can also happen in the case of a THP
+page where the scanned is 1 and reclaimed could be 512.
+
+Link: http://lkml.kernel.org/r/1486641577-11685-1-git-send-email-vinmenon@codeaurora.org
+Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Anton Vorontsov <anton.vorontsov@linaro.org>
+Cc: Shiraz Hashim <shashim@codeaurora.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmpressure.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/mm/vmpressure.c
++++ b/mm/vmpressure.c
+@@ -112,9 +112,16 @@ static enum vmpressure_levels vmpressure
+                                                   unsigned long reclaimed)
+ {
+       unsigned long scale = scanned + reclaimed;
+-      unsigned long pressure;
++      unsigned long pressure = 0;
+       /*
++       * reclaimed can be greater than scanned in cases
++       * like THP, where the scanned is 1 and reclaimed
++       * could be 512
++       */
++      if (reclaimed >= scanned)
++              goto out;
++      /*
+        * We calculate the ratio (in percents) of how many pages were
+        * scanned vs. reclaimed in a given time frame (window). Note that
+        * time is in VM reclaimer's "ticks", i.e. number of pages
+@@ -124,6 +131,7 @@ static enum vmpressure_levels vmpressure
+       pressure = scale - (reclaimed * scale / scanned);
+       pressure = pressure * 100 / scale;
++out:
+       pr_debug("%s: %3lu  (s: %lu  r: %lu)\n", __func__, pressure,
+                scanned, reclaimed);
diff --git a/queue-4.9/mm-vmscan-cleanup-lru-size-claculations.patch b/queue-4.9/mm-vmscan-cleanup-lru-size-claculations.patch
new file mode 100644 (file)
index 0000000..8f54304
--- /dev/null
@@ -0,0 +1,185 @@
+From fd538803731e50367b7c59ce4ad3454426a3d671 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Wed, 22 Feb 2017 15:45:58 -0800
+Subject: mm, vmscan: cleanup lru size claculations
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit fd538803731e50367b7c59ce4ad3454426a3d671 upstream.
+
+lruvec_lru_size returns the full size of the LRU list while we sometimes
+need a value reduced only to eligible zones (e.g.  for lowmem requests).
+inactive_list_is_low is one such user.  Later patches will add more of
+them.  Add a new parameter to lruvec_lru_size and allow it filter out
+zones which are not eligible for the given context.
+
+Link: http://lkml.kernel.org/r/20170117103702.28542-2-mhocko@kernel.org
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ include/linux/mmzone.h |    2 -
+ mm/vmscan.c            |   81 +++++++++++++++++++++++--------------------------
+ mm/workingset.c        |    2 -
+ 3 files changed, 41 insertions(+), 44 deletions(-)
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -779,7 +779,7 @@ static inline struct pglist_data *lruvec
+ #endif
+ }
+-extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru);
++extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
+ #ifdef CONFIG_HAVE_MEMORY_PRESENT
+ void memory_present(int nid, unsigned long start, unsigned long end);
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -234,22 +234,39 @@ bool pgdat_reclaimable(struct pglist_dat
+               pgdat_reclaimable_pages(pgdat) * 6;
+ }
+-unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
++/**
++ * lruvec_lru_size -  Returns the number of pages on the given LRU list.
++ * @lruvec: lru vector
++ * @lru: lru to use
++ * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list)
++ */
++unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
+ {
++      unsigned long lru_size;
++      int zid;
++
+       if (!mem_cgroup_disabled())
+-              return mem_cgroup_get_lru_size(lruvec, lru);
++              lru_size = mem_cgroup_get_lru_size(lruvec, lru);
++      else
++              lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
+-      return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
+-}
++      for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) {
++              struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid];
++              unsigned long size;
+-unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru,
+-                                 int zone_idx)
+-{
+-      if (!mem_cgroup_disabled())
+-              return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx);
++              if (!managed_zone(zone))
++                      continue;
++
++              if (!mem_cgroup_disabled())
++                      size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
++              else
++                      size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid],
++                                     NR_ZONE_LRU_BASE + lru);
++              lru_size -= min(size, lru_size);
++      }
++
++      return lru_size;
+-      return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx],
+-                             NR_ZONE_LRU_BASE + lru);
+ }
+ /*
+@@ -2028,11 +2045,10 @@ static bool inactive_list_is_low(struct
+                                               struct scan_control *sc)
+ {
+       unsigned long inactive_ratio;
+-      unsigned long inactive;
+-      unsigned long active;
++      unsigned long inactive, active;
++      enum lru_list inactive_lru = file * LRU_FILE;
++      enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
+       unsigned long gb;
+-      struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+-      int zid;
+       /*
+        * If we don't have swap space, anonymous page deactivation
+@@ -2041,27 +2057,8 @@ static bool inactive_list_is_low(struct
+       if (!file && !total_swap_pages)
+               return false;
+-      inactive = lruvec_lru_size(lruvec, file * LRU_FILE);
+-      active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE);
+-
+-      /*
+-       * For zone-constrained allocations, it is necessary to check if
+-       * deactivations are required for lowmem to be reclaimed. This
+-       * calculates the inactive/active pages available in eligible zones.
+-       */
+-      for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) {
+-              struct zone *zone = &pgdat->node_zones[zid];
+-              unsigned long inactive_zone, active_zone;
+-
+-              if (!managed_zone(zone))
+-                      continue;
+-
+-              inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid);
+-              active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid);
+-
+-              inactive -= min(inactive, inactive_zone);
+-              active -= min(active, active_zone);
+-      }
++      inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
++      active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
+       gb = (inactive + active) >> (30 - PAGE_SHIFT);
+       if (gb)
+@@ -2208,7 +2205,7 @@ static void get_scan_count(struct lruvec
+        * system is under heavy pressure.
+        */
+       if (!inactive_list_is_low(lruvec, true, sc) &&
+-          lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
++          lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) {
+               scan_balance = SCAN_FILE;
+               goto out;
+       }
+@@ -2234,10 +2231,10 @@ static void get_scan_count(struct lruvec
+        * anon in [0], file in [1]
+        */
+-      anon  = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) +
+-              lruvec_lru_size(lruvec, LRU_INACTIVE_ANON);
+-      file  = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) +
+-              lruvec_lru_size(lruvec, LRU_INACTIVE_FILE);
++      anon  = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
++              lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
++      file  = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) +
++              lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES);
+       spin_lock_irq(&pgdat->lru_lock);
+       if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
+@@ -2275,7 +2272,7 @@ out:
+                       unsigned long size;
+                       unsigned long scan;
+-                      size = lruvec_lru_size(lruvec, lru);
++                      size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES);
+                       scan = size >> sc->priority;
+                       if (!scan && pass && force_scan)
+--- a/mm/workingset.c
++++ b/mm/workingset.c
+@@ -266,7 +266,7 @@ bool workingset_refault(void *shadow)
+       }
+       lruvec = mem_cgroup_lruvec(pgdat, memcg);
+       refault = atomic_long_read(&lruvec->inactive_age);
+-      active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
++      active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
+       rcu_read_unlock();
+       /*
diff --git a/queue-4.9/mm-vmscan-consider-eligible-zones-in-get_scan_count.patch b/queue-4.9/mm-vmscan-consider-eligible-zones-in-get_scan_count.patch
new file mode 100644 (file)
index 0000000..32d8063
--- /dev/null
@@ -0,0 +1,75 @@
+From 71ab6cfe88dcf9f6e6a65eb85cf2bda20a257682 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Wed, 22 Feb 2017 15:46:01 -0800
+Subject: mm, vmscan: consider eligible zones in get_scan_count
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 71ab6cfe88dcf9f6e6a65eb85cf2bda20a257682 upstream.
+
+get_scan_count() considers the whole node LRU size when
+
+ - doing SCAN_FILE due to many page cache inactive pages
+ - calculating the number of pages to scan
+
+In both cases this might lead to unexpected behavior especially on 32b
+systems where we can expect lowmem memory pressure very often.
+
+A large highmem zone can easily distort SCAN_FILE heuristic because
+there might be only few file pages from the eligible zones on the node
+lru and we would still enforce file lru scanning which can lead to
+trashing while we could still scan anonymous pages.
+
+The later use of lruvec_lru_size can be problematic as well.  Especially
+when there are not many pages from the eligible zones.  We would have to
+skip over many pages to find anything to reclaim but shrink_node_memcg
+would only reduce the remaining number to scan by SWAP_CLUSTER_MAX at
+maximum.  Therefore we can end up going over a large LRU many times
+without actually having chance to reclaim much if anything at all.  The
+closer we are out of memory on lowmem zone the worse the problem will
+be.
+
+Fix this by filtering out all the ineligible zones when calculating the
+lru size for both paths and consider only sc->reclaim_idx zones.
+
+The patch would need to be tweaked a bit to apply to 4.10 and older but
+I will do that as soon as it hits the Linus tree in the next merge
+window.
+
+Link: http://lkml.kernel.org/r/20170117103702.28542-3-mhocko@kernel.org
+Fixes: b2e18757f2c9 ("mm, vmscan: begin reclaiming pages on a per-node basis")
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Tested-by: Trevor Cordes <trevor@tecnopolis.ca>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ mm/vmscan.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2205,7 +2205,7 @@ static void get_scan_count(struct lruvec
+        * system is under heavy pressure.
+        */
+       if (!inactive_list_is_low(lruvec, true, sc) &&
+-          lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) {
++          lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
+               scan_balance = SCAN_FILE;
+               goto out;
+       }
+@@ -2272,7 +2272,7 @@ out:
+                       unsigned long size;
+                       unsigned long scan;
+-                      size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES);
++                      size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
+                       scan = size >> sc->priority;
+                       if (!scan && pass && force_scan)
diff --git a/queue-4.9/pm-devfreq-fix-available_governor-sysfs.patch b/queue-4.9/pm-devfreq-fix-available_governor-sysfs.patch
new file mode 100644 (file)
index 0000000..7018927
--- /dev/null
@@ -0,0 +1,104 @@
+From bcf23c79c4e46130701370af4383b61a3cba755c Mon Sep 17 00:00:00 2001
+From: Chanwoo Choi <cw00.choi@samsung.com>
+Date: Tue, 31 Jan 2017 15:38:16 +0900
+Subject: PM / devfreq: Fix available_governor sysfs
+
+From: Chanwoo Choi <cw00.choi@samsung.com>
+
+commit bcf23c79c4e46130701370af4383b61a3cba755c upstream.
+
+The devfreq using passive governor is not able to change the governor.
+So, the user can not change the governor through 'available_governor' sysfs
+entry. Also, the devfreq which don't use the passive governor is not able to
+change to 'passive' governor on the fly.
+
+Fixes: 996133119f57 ("PM / devfreq: Add new passive governor")
+Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
+Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/devfreq/devfreq.c          |   31 +++++++++++++++++++++++++++----
+ drivers/devfreq/governor_passive.c |    1 +
+ include/linux/devfreq.h            |    3 +++
+ 3 files changed, 31 insertions(+), 4 deletions(-)
+
+--- a/drivers/devfreq/devfreq.c
++++ b/drivers/devfreq/devfreq.c
+@@ -939,6 +939,9 @@ static ssize_t governor_store(struct dev
+       if (df->governor == governor) {
+               ret = 0;
+               goto out;
++      } else if (df->governor->immutable || governor->immutable) {
++              ret = -EINVAL;
++              goto out;
+       }
+       if (df->governor) {
+@@ -968,13 +971,33 @@ static ssize_t available_governors_show(
+                                       struct device_attribute *attr,
+                                       char *buf)
+ {
+-      struct devfreq_governor *tmp_governor;
++      struct devfreq *df = to_devfreq(d);
+       ssize_t count = 0;
+       mutex_lock(&devfreq_list_lock);
+-      list_for_each_entry(tmp_governor, &devfreq_governor_list, node)
+-              count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
+-                                 "%s ", tmp_governor->name);
++
++      /*
++       * The devfreq with immutable governor (e.g., passive) shows
++       * only own governor.
++       */
++      if (df->governor->immutable) {
++              count = scnprintf(&buf[count], DEVFREQ_NAME_LEN,
++                                 "%s ", df->governor_name);
++      /*
++       * The devfreq device shows the registered governor except for
++       * immutable governors such as passive governor .
++       */
++      } else {
++              struct devfreq_governor *governor;
++
++              list_for_each_entry(governor, &devfreq_governor_list, node) {
++                      if (governor->immutable)
++                              continue;
++                      count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
++                                         "%s ", governor->name);
++              }
++      }
++
+       mutex_unlock(&devfreq_list_lock);
+       /* Truncate the trailing space */
+--- a/drivers/devfreq/governor_passive.c
++++ b/drivers/devfreq/governor_passive.c
+@@ -179,6 +179,7 @@ static int devfreq_passive_event_handler
+ static struct devfreq_governor devfreq_passive = {
+       .name = "passive",
++      .immutable = 1,
+       .get_target_freq = devfreq_passive_get_target_freq,
+       .event_handler = devfreq_passive_event_handler,
+ };
+--- a/include/linux/devfreq.h
++++ b/include/linux/devfreq.h
+@@ -104,6 +104,8 @@ struct devfreq_dev_profile {
+  * struct devfreq_governor - Devfreq policy governor
+  * @node:             list node - contains registered devfreq governors
+  * @name:             Governor's name
++ * @immutable:                Immutable flag for governor. If the value is 1,
++ *                    this govenror is never changeable to other governor.
+  * @get_target_freq:  Returns desired operating frequency for the device.
+  *                    Basically, get_target_freq will run
+  *                    devfreq_dev_profile.get_dev_status() to get the
+@@ -121,6 +123,7 @@ struct devfreq_governor {
+       struct list_head node;
+       const char name[DEVFREQ_NAME_LEN];
++      const unsigned int immutable;
+       int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+       int (*event_handler)(struct devfreq *devfreq,
+                               unsigned int event, void *data);
diff --git a/queue-4.9/pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch b/queue-4.9/pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch
new file mode 100644 (file)
index 0000000..0493442
--- /dev/null
@@ -0,0 +1,66 @@
+From 30582c25a4b4e0a5e456a309fde79b845e9473b2 Mon Sep 17 00:00:00 2001
+From: Chanwoo Choi <cw00.choi@samsung.com>
+Date: Tue, 31 Jan 2017 15:38:17 +0900
+Subject: PM / devfreq: Fix wrong trans_stat of passive devfreq device
+
+From: Chanwoo Choi <cw00.choi@samsung.com>
+
+commit 30582c25a4b4e0a5e456a309fde79b845e9473b2 upstream.
+
+Until now, the trans_stat information of passive devfreq is not updated.
+This patch updates the trans_stat information after setting the target
+frequency of passive devfreq device.
+
+Fixes: 996133119f57 ("PM / devfreq: Add new passive governor")
+Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
+Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/devfreq/devfreq.c          |    3 ++-
+ drivers/devfreq/governor.h         |    2 ++
+ drivers/devfreq/governor_passive.c |    5 +++++
+ 3 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/devfreq/devfreq.c
++++ b/drivers/devfreq/devfreq.c
+@@ -130,7 +130,7 @@ static void devfreq_set_freq_table(struc
+  * @devfreq:  the devfreq instance
+  * @freq:     the update target frequency
+  */
+-static int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
++int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
+ {
+       int lev, prev_lev, ret = 0;
+       unsigned long cur_time;
+@@ -166,6 +166,7 @@ out:
+       devfreq->last_stat_updated = cur_time;
+       return ret;
+ }
++EXPORT_SYMBOL(devfreq_update_status);
+ /**
+  * find_devfreq_governor() - find devfreq governor from name
+--- a/drivers/devfreq/governor.h
++++ b/drivers/devfreq/governor.h
+@@ -38,4 +38,6 @@ extern void devfreq_interval_update(stru
+ extern int devfreq_add_governor(struct devfreq_governor *governor);
+ extern int devfreq_remove_governor(struct devfreq_governor *governor);
++extern int devfreq_update_status(struct devfreq *devfreq, unsigned long freq);
++
+ #endif /* _GOVERNOR_H */
+--- a/drivers/devfreq/governor_passive.c
++++ b/drivers/devfreq/governor_passive.c
+@@ -112,6 +112,11 @@ static int update_devfreq_passive(struct
+       if (ret < 0)
+               goto out;
++      if (devfreq->profile->freq_table
++              && (devfreq_update_status(devfreq, freq)))
++              dev_err(&devfreq->dev,
++                      "Couldn't update frequency transition information.\n");
++
+       devfreq->previous_freq = freq;
+ out:
diff --git a/queue-4.9/power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch b/queue-4.9/power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch
new file mode 100644 (file)
index 0000000..0785b60
--- /dev/null
@@ -0,0 +1,235 @@
+From 0b0408745e7ff24757cbfd571d69026c0ddb803c Mon Sep 17 00:00:00 2001
+From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Date: Tue, 25 Oct 2016 11:37:59 +0200
+Subject: power: reset: at91-poweroff: timely shutdown LPDDR memories
+
+From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+
+commit 0b0408745e7ff24757cbfd571d69026c0ddb803c upstream.
+
+LPDDR memories can only handle up to 400 uncontrolled power off. Ensure the
+proper power off sequence is used before shutting down the platform.
+
+Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: Sebastian Reichel <sre@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/power/reset/Kconfig              |    2 -
+ drivers/power/reset/at91-poweroff.c      |   54 ++++++++++++++++++++++++++++++-
+ drivers/power/reset/at91-sama5d2_shdwc.c |   49 +++++++++++++++++++++++++++-
+ 3 files changed, 102 insertions(+), 3 deletions(-)
+
+--- a/drivers/power/reset/Kconfig
++++ b/drivers/power/reset/Kconfig
+@@ -32,7 +32,7 @@ config POWER_RESET_AT91_RESET
+ config POWER_RESET_AT91_SAMA5D2_SHDWC
+       tristate "Atmel AT91 SAMA5D2-Compatible shutdown controller driver"
+-      depends on ARCH_AT91 || COMPILE_TEST
++      depends on ARCH_AT91
+       default SOC_SAMA5
+       help
+         This driver supports the alternate shutdown controller for some Atmel
+--- a/drivers/power/reset/at91-poweroff.c
++++ b/drivers/power/reset/at91-poweroff.c
+@@ -14,9 +14,12 @@
+ #include <linux/io.h>
+ #include <linux/module.h>
+ #include <linux/of.h>
++#include <linux/of_address.h>
+ #include <linux/platform_device.h>
+ #include <linux/printk.h>
++#include <soc/at91/at91sam9_ddrsdr.h>
++
+ #define AT91_SHDW_CR  0x00            /* Shut Down Control Register */
+ #define AT91_SHDW_SHDW                BIT(0)                  /* Shut Down command */
+ #define AT91_SHDW_KEY         (0xa5 << 24)            /* KEY Password */
+@@ -50,6 +53,7 @@ static const char *shdwc_wakeup_modes[]
+ static void __iomem *at91_shdwc_base;
+ static struct clk *sclk;
++static void __iomem *mpddrc_base;
+ static void __init at91_wakeup_status(void)
+ {
+@@ -73,6 +77,29 @@ static void at91_poweroff(void)
+       writel(AT91_SHDW_KEY | AT91_SHDW_SHDW, at91_shdwc_base + AT91_SHDW_CR);
+ }
++static void at91_lpddr_poweroff(void)
++{
++      asm volatile(
++              /* Align to cache lines */
++              ".balign 32\n\t"
++
++              /* Ensure AT91_SHDW_CR is in the TLB by reading it */
++              "       ldr     r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t"
++
++              /* Power down SDRAM0 */
++              "       str     %1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t"
++              /* Shutdown CPU */
++              "       str     %3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t"
++
++              "       b       .\n\t"
++              :
++              : "r" (mpddrc_base),
++                "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF),
++                "r" (at91_shdwc_base),
++                "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW)
++              : "r0");
++}
++
+ static int at91_poweroff_get_wakeup_mode(struct device_node *np)
+ {
+       const char *pm;
+@@ -124,6 +151,8 @@ static void at91_poweroff_dt_set_wakeup_
+ static int __init at91_poweroff_probe(struct platform_device *pdev)
+ {
+       struct resource *res;
++      struct device_node *np;
++      u32 ddr_type;
+       int ret;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+@@ -150,12 +179,30 @@ static int __init at91_poweroff_probe(st
+       pm_power_off = at91_poweroff;
++      np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc");
++      if (!np)
++              return 0;
++
++      mpddrc_base = of_iomap(np, 0);
++      of_node_put(np);
++
++      if (!mpddrc_base)
++              return 0;
++
++      ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD;
++      if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) ||
++          (ddr_type == AT91_DDRSDRC_MD_LPDDR3))
++              pm_power_off = at91_lpddr_poweroff;
++      else
++              iounmap(mpddrc_base);
++
+       return 0;
+ }
+ static int __exit at91_poweroff_remove(struct platform_device *pdev)
+ {
+-      if (pm_power_off == at91_poweroff)
++      if (pm_power_off == at91_poweroff ||
++          pm_power_off == at91_lpddr_poweroff)
+               pm_power_off = NULL;
+       clk_disable_unprepare(sclk);
+@@ -163,6 +210,11 @@ static int __exit at91_poweroff_remove(s
+       return 0;
+ }
++static const struct of_device_id at91_ramc_of_match[] = {
++      { .compatible = "atmel,sama5d3-ddramc", },
++      { /* sentinel */ }
++};
++
+ static const struct of_device_id at91_poweroff_of_match[] = {
+       { .compatible = "atmel,at91sam9260-shdwc", },
+       { .compatible = "atmel,at91sam9rl-shdwc", },
+--- a/drivers/power/reset/at91-sama5d2_shdwc.c
++++ b/drivers/power/reset/at91-sama5d2_shdwc.c
+@@ -22,9 +22,12 @@
+ #include <linux/io.h>
+ #include <linux/module.h>
+ #include <linux/of.h>
++#include <linux/of_address.h>
+ #include <linux/platform_device.h>
+ #include <linux/printk.h>
++#include <soc/at91/at91sam9_ddrsdr.h>
++
+ #define SLOW_CLOCK_FREQ       32768
+ #define AT91_SHDW_CR  0x00            /* Shut Down Control Register */
+@@ -75,6 +78,7 @@ struct shdwc {
+  */
+ static struct shdwc *at91_shdwc;
+ static struct clk *sclk;
++static void __iomem *mpddrc_base;
+ static const unsigned long long sdwc_dbc_period[] = {
+       0, 3, 32, 512, 4096, 32768,
+@@ -108,6 +112,29 @@ static void at91_poweroff(void)
+              at91_shdwc->at91_shdwc_base + AT91_SHDW_CR);
+ }
++static void at91_lpddr_poweroff(void)
++{
++      asm volatile(
++              /* Align to cache lines */
++              ".balign 32\n\t"
++
++              /* Ensure AT91_SHDW_CR is in the TLB by reading it */
++              "       ldr     r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t"
++
++              /* Power down SDRAM0 */
++              "       str     %1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t"
++              /* Shutdown CPU */
++              "       str     %3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t"
++
++              "       b       .\n\t"
++              :
++              : "r" (mpddrc_base),
++                "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF),
++                "r" (at91_shdwc->at91_shdwc_base),
++                "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW)
++              : "r0");
++}
++
+ static u32 at91_shdwc_debouncer_value(struct platform_device *pdev,
+                                     u32 in_period_us)
+ {
+@@ -212,6 +239,8 @@ static int __init at91_shdwc_probe(struc
+ {
+       struct resource *res;
+       const struct of_device_id *match;
++      struct device_node *np;
++      u32 ddr_type;
+       int ret;
+       if (!pdev->dev.of_node)
+@@ -249,6 +278,23 @@ static int __init at91_shdwc_probe(struc
+       pm_power_off = at91_poweroff;
++      np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc");
++      if (!np)
++              return 0;
++
++      mpddrc_base = of_iomap(np, 0);
++      of_node_put(np);
++
++      if (!mpddrc_base)
++              return 0;
++
++      ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD;
++      if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) ||
++          (ddr_type == AT91_DDRSDRC_MD_LPDDR3))
++              pm_power_off = at91_lpddr_poweroff;
++      else
++              iounmap(mpddrc_base);
++
+       return 0;
+ }
+@@ -256,7 +302,8 @@ static int __exit at91_shdwc_remove(stru
+ {
+       struct shdwc *shdw = platform_get_drvdata(pdev);
+-      if (pm_power_off == at91_poweroff)
++      if (pm_power_off == at91_poweroff ||
++          pm_power_off == at91_lpddr_poweroff)
+               pm_power_off = NULL;
+       /* Reset values to disable wake-up features  */
diff --git a/queue-4.9/scsi-aacraid-reorder-adapter-status-check.patch b/queue-4.9/scsi-aacraid-reorder-adapter-status-check.patch
new file mode 100644 (file)
index 0000000..89bf340
--- /dev/null
@@ -0,0 +1,72 @@
+From c421530bf848604e97d0785a03b3fe2c62775083 Mon Sep 17 00:00:00 2001
+From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
+Date: Thu, 16 Feb 2017 12:51:21 -0800
+Subject: scsi: aacraid: Reorder Adapter status check
+
+From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
+
+commit c421530bf848604e97d0785a03b3fe2c62775083 upstream.
+
+The driver currently checks the SELF_TEST_FAILED first and then
+KERNEL_PANIC next. Under error conditions(boot code failure) both
+SELF_TEST_FAILED and KERNEL_PANIC can be set at the same time.
+
+The driver has the capability to reset the controller on an KERNEL_PANIC,
+but not on SELF_TEST_FAILED.
+
+Fixed by first checking KERNEL_PANIC and then the others.
+
+Fixes: e8b12f0fb835223752 ([SCSI] aacraid: Add new code for PMC-Sierra's SRC base controller family)
+Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
+Reviewed-by: David Carroll <David.Carroll@microsemi.com>
+Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/aacraid/src.c |   21 +++++++++++++++++----
+ 1 file changed, 17 insertions(+), 4 deletions(-)
+
+--- a/drivers/scsi/aacraid/src.c
++++ b/drivers/scsi/aacraid/src.c
+@@ -414,16 +414,23 @@ static int aac_src_check_health(struct a
+       u32 status = src_readl(dev, MUnit.OMR);
+       /*
++       *      Check to see if the board panic'd.
++       */
++      if (unlikely(status & KERNEL_PANIC))
++              goto err_blink;
++
++      /*
+        *      Check to see if the board failed any self tests.
+        */
+       if (unlikely(status & SELF_TEST_FAILED))
+-              return -1;
++              goto err_out;
+       /*
+-       *      Check to see if the board panic'd.
++       *      Check to see if the board failed any self tests.
+        */
+-      if (unlikely(status & KERNEL_PANIC))
+-              return (status >> 16) & 0xFF;
++      if (unlikely(status & MONITOR_PANIC))
++              goto err_out;
++
+       /*
+        *      Wait for the adapter to be up and running.
+        */
+@@ -433,6 +440,12 @@ static int aac_src_check_health(struct a
+        *      Everything is OK
+        */
+       return 0;
++
++err_out:
++      return -1;
++
++err_blink:
++      return (status > 16) & 0xFF;
+ }
+ /**
diff --git a/queue-4.9/scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch b/queue-4.9/scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch
new file mode 100644 (file)
index 0000000..6981ffc
--- /dev/null
@@ -0,0 +1,39 @@
+From bba5dc332ec2d3a685cb4dae668c793f6a3713a3 Mon Sep 17 00:00:00 2001
+From: Long Li <longli@microsoft.com>
+Date: Wed, 14 Dec 2016 18:46:02 -0800
+Subject: scsi: storvsc: properly handle SRB_ERROR when sense message is present
+
+From: Long Li <longli@microsoft.com>
+
+commit bba5dc332ec2d3a685cb4dae668c793f6a3713a3 upstream.
+
+When sense message is present on error, we should pass along to the upper
+layer to decide how to deal with the error.
+This patch fixes connectivity issues with Fiber Channel devices.
+
+Signed-off-by: Long Li <longli@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/storvsc_drv.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/scsi/storvsc_drv.c
++++ b/drivers/scsi/storvsc_drv.c
+@@ -891,6 +891,13 @@ static void storvsc_handle_error(struct
+       switch (SRB_STATUS(vm_srb->srb_status)) {
+       case SRB_STATUS_ERROR:
+               /*
++               * Let upper layer deal with error when
++               * sense message is present.
++               */
++
++              if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
++                      break;
++              /*
+                * If there is an error; offline the device since all
+                * error recovery strategies would have already been
+                * deployed on the host side. However, if the command
diff --git a/queue-4.9/scsi-storvsc-properly-set-residual-data-length-on-errors.patch b/queue-4.9/scsi-storvsc-properly-set-residual-data-length-on-errors.patch
new file mode 100644 (file)
index 0000000..9192d7c
--- /dev/null
@@ -0,0 +1,76 @@
+From 40630f462824ee24bc00d692865c86c3828094e0 Mon Sep 17 00:00:00 2001
+From: Long Li <longli@microsoft.com>
+Date: Wed, 14 Dec 2016 18:46:03 -0800
+Subject: scsi: storvsc: properly set residual data length on errors
+
+From: Long Li <longli@microsoft.com>
+
+commit 40630f462824ee24bc00d692865c86c3828094e0 upstream.
+
+On I/O errors, the Windows driver doesn't set data_transfer_length
+on error conditions other than SRB_STATUS_DATA_OVERRUN.
+In these cases we need to set data_transfer_length to 0,
+indicating there is no data transferred. On SRB_STATUS_DATA_OVERRUN,
+data_transfer_length is set by the Windows driver to the actual data transferred.
+
+Reported-by: Shiva Krishna <Shiva.Krishna@nimblestorage.com>
+Signed-off-by: Long Li <longli@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/storvsc_drv.c |   16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/drivers/scsi/storvsc_drv.c
++++ b/drivers/scsi/storvsc_drv.c
+@@ -377,6 +377,7 @@ enum storvsc_request_type {
+ #define SRB_STATUS_SUCCESS    0x01
+ #define SRB_STATUS_ABORTED    0x02
+ #define SRB_STATUS_ERROR      0x04
++#define SRB_STATUS_DATA_OVERRUN       0x12
+ #define SRB_STATUS(status) \
+       (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN))
+@@ -962,6 +963,7 @@ static void storvsc_command_completion(s
+       struct scsi_cmnd *scmnd = cmd_request->cmd;
+       struct scsi_sense_hdr sense_hdr;
+       struct vmscsi_request *vm_srb;
++      u32 data_transfer_length;
+       struct Scsi_Host *host;
+       u32 payload_sz = cmd_request->payload_sz;
+       void *payload = cmd_request->payload;
+@@ -969,6 +971,7 @@ static void storvsc_command_completion(s
+       host = stor_dev->host;
+       vm_srb = &cmd_request->vstor_packet.vm_srb;
++      data_transfer_length = vm_srb->data_transfer_length;
+       scmnd->result = vm_srb->scsi_status;
+@@ -982,13 +985,20 @@ static void storvsc_command_completion(s
+                                            &sense_hdr);
+       }
+-      if (vm_srb->srb_status != SRB_STATUS_SUCCESS)
++      if (vm_srb->srb_status != SRB_STATUS_SUCCESS) {
+               storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc,
+                                        sense_hdr.ascq);
++              /*
++               * The Windows driver set data_transfer_length on
++               * SRB_STATUS_DATA_OVERRUN. On other errors, this value
++               * is untouched.  In these cases we set it to 0.
++               */
++              if (vm_srb->srb_status != SRB_STATUS_DATA_OVERRUN)
++                      data_transfer_length = 0;
++      }
+       scsi_set_resid(scmnd,
+-              cmd_request->payload->range.len -
+-              vm_srb->data_transfer_length);
++              cmd_request->payload->range.len - data_transfer_length);
+       scmnd->scsi_done(scmnd);
diff --git a/queue-4.9/scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch b/queue-4.9/scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch
new file mode 100644 (file)
index 0000000..0a519f8
--- /dev/null
@@ -0,0 +1,47 @@
+From 3cd6d3d9b1abab8dcdf0800224ce26daac24eea2 Mon Sep 17 00:00:00 2001
+From: Long Li <longli@microsoft.com>
+Date: Wed, 14 Dec 2016 18:46:01 -0800
+Subject: scsi: storvsc: use tagged SRB requests if supported by the device
+
+From: Long Li <longli@microsoft.com>
+
+commit 3cd6d3d9b1abab8dcdf0800224ce26daac24eea2 upstream.
+
+Properly set SRB flags when hosting device supports tagged queuing.
+This patch improves the performance on Fiber Channel disks.
+
+Signed-off-by: Long Li <longli@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/storvsc_drv.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/scsi/storvsc_drv.c
++++ b/drivers/scsi/storvsc_drv.c
+@@ -136,6 +136,8 @@ struct hv_fc_wwn_packet {
+ #define SRB_FLAGS_PORT_DRIVER_RESERVED                0x0F000000
+ #define SRB_FLAGS_CLASS_DRIVER_RESERVED               0xF0000000
++#define SP_UNTAGGED                   ((unsigned char) ~0)
++#define SRB_SIMPLE_TAG_REQUEST                0x20
+ /*
+  * Platform neutral description of a scsi request -
+@@ -1451,6 +1453,13 @@ static int storvsc_queuecommand(struct S
+       vm_srb->win8_extension.srb_flags |=
+               SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
++      if (scmnd->device->tagged_supported) {
++              vm_srb->win8_extension.srb_flags |=
++              (SRB_FLAGS_QUEUE_ACTION_ENABLE | SRB_FLAGS_NO_QUEUE_FREEZE);
++              vm_srb->win8_extension.queue_tag = SP_UNTAGGED;
++              vm_srb->win8_extension.queue_action = SRB_SIMPLE_TAG_REQUEST;
++      }
++
+       /* Build the SRB */
+       switch (scmnd->sc_data_direction) {
+       case DMA_TO_DEVICE:
diff --git a/queue-4.9/scsi-use-scsi_device_from_queue-for-scsi_dh.patch b/queue-4.9/scsi-use-scsi_device_from_queue-for-scsi_dh.patch
new file mode 100644 (file)
index 0000000..c15d172
--- /dev/null
@@ -0,0 +1,126 @@
+From 857de6e00778738dc3d61f75acbac35bdc48e533 Mon Sep 17 00:00:00 2001
+From: Hannes Reinecke <hare@suse.de>
+Date: Fri, 17 Feb 2017 09:02:45 +0100
+Subject: scsi: use 'scsi_device_from_queue()' for scsi_dh
+
+From: Hannes Reinecke <hare@suse.de>
+
+commit 857de6e00778738dc3d61f75acbac35bdc48e533 upstream.
+
+The device handler needs to check if a given queue belongs to a scsi
+device; only then does it make sense to attach a device handler.
+
+[mkp: dropped flags]
+
+Signed-off-by: Hannes Reinecke <hare@suse.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/scsi_dh.c     |   22 ++++------------------
+ drivers/scsi/scsi_lib.c    |   23 +++++++++++++++++++++++
+ include/scsi/scsi_device.h |    1 +
+ 3 files changed, 28 insertions(+), 18 deletions(-)
+
+--- a/drivers/scsi/scsi_dh.c
++++ b/drivers/scsi/scsi_dh.c
+@@ -219,20 +219,6 @@ int scsi_unregister_device_handler(struc
+ }
+ EXPORT_SYMBOL_GPL(scsi_unregister_device_handler);
+-static struct scsi_device *get_sdev_from_queue(struct request_queue *q)
+-{
+-      struct scsi_device *sdev;
+-      unsigned long flags;
+-
+-      spin_lock_irqsave(q->queue_lock, flags);
+-      sdev = q->queuedata;
+-      if (!sdev || !get_device(&sdev->sdev_gendev))
+-              sdev = NULL;
+-      spin_unlock_irqrestore(q->queue_lock, flags);
+-
+-      return sdev;
+-}
+-
+ /*
+  * scsi_dh_activate - activate the path associated with the scsi_device
+  *      corresponding to the given request queue.
+@@ -251,7 +237,7 @@ int scsi_dh_activate(struct request_queu
+       struct scsi_device *sdev;
+       int err = SCSI_DH_NOSYS;
+-      sdev = get_sdev_from_queue(q);
++      sdev = scsi_device_from_queue(q);
+       if (!sdev) {
+               if (fn)
+                       fn(data, err);
+@@ -298,7 +284,7 @@ int scsi_dh_set_params(struct request_qu
+       struct scsi_device *sdev;
+       int err = -SCSI_DH_NOSYS;
+-      sdev = get_sdev_from_queue(q);
++      sdev = scsi_device_from_queue(q);
+       if (!sdev)
+               return err;
+@@ -321,7 +307,7 @@ int scsi_dh_attach(struct request_queue
+       struct scsi_device_handler *scsi_dh;
+       int err = 0;
+-      sdev = get_sdev_from_queue(q);
++      sdev = scsi_device_from_queue(q);
+       if (!sdev)
+               return -ENODEV;
+@@ -359,7 +345,7 @@ const char *scsi_dh_attached_handler_nam
+       struct scsi_device *sdev;
+       const char *handler_name = NULL;
+-      sdev = get_sdev_from_queue(q);
++      sdev = scsi_device_from_queue(q);
+       if (!sdev)
+               return NULL;
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -2127,6 +2127,29 @@ void scsi_mq_destroy_tags(struct Scsi_Ho
+       blk_mq_free_tag_set(&shost->tag_set);
+ }
++/**
++ * scsi_device_from_queue - return sdev associated with a request_queue
++ * @q: The request queue to return the sdev from
++ *
++ * Return the sdev associated with a request queue or NULL if the
++ * request_queue does not reference a SCSI device.
++ */
++struct scsi_device *scsi_device_from_queue(struct request_queue *q)
++{
++      struct scsi_device *sdev = NULL;
++
++      if (q->mq_ops) {
++              if (q->mq_ops == &scsi_mq_ops)
++                      sdev = q->queuedata;
++      } else if (q->request_fn == scsi_request_fn)
++              sdev = q->queuedata;
++      if (!sdev || !get_device(&sdev->sdev_gendev))
++              sdev = NULL;
++
++      return sdev;
++}
++EXPORT_SYMBOL_GPL(scsi_device_from_queue);
++
+ /*
+  * Function:    scsi_block_requests()
+  *
+--- a/include/scsi/scsi_device.h
++++ b/include/scsi/scsi_device.h
+@@ -315,6 +315,7 @@ extern void scsi_remove_device(struct sc
+ extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh);
+ void scsi_attach_vpd(struct scsi_device *sdev);
++extern struct scsi_device *scsi_device_from_queue(struct request_queue *q);
+ extern int scsi_device_get(struct scsi_device *);
+ extern void scsi_device_put(struct scsi_device *);
+ extern struct scsi_device *scsi_device_lookup(struct Scsi_Host *,
index 368534a235fa619648726bb563dc2fdfd33c1065..b4b6967aeaa4fccdece8eb846a28db45e89ee392 100644 (file)
@@ -39,3 +39,26 @@ tpm_tis-fix-the-error-handling-of-init_tis.patch
 iommu-vt-d-fix-some-macros-that-are-incorrectly-specified-in-intel-iommu.patch
 iommu-vt-d-tylersburg-isoch-identity-map-check-is-done-too-late.patch
 cifs-fix-splice-read-for-non-cached-files.patch
+mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch
+mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch
+mm-vmpressure-fix-sending-wrong-events-on-underflow.patch
+mm-do-not-access-page-mapping-directly-on-page_endio.patch
+mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch
+mm-vmscan-cleanup-lru-size-claculations.patch
+mm-vmscan-consider-eligible-zones-in-get_scan_count.patch
+sigaltstack-support-ss_autodisarm-for-config_compat.patch
+ipc-shm-fix-shmat-mmap-nil-page-protection.patch
+ima-fix-ima_d_path-possible-race-with-rename.patch
+pm-devfreq-fix-available_governor-sysfs.patch
+pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch
+dm-cache-fix-corruption-seen-when-using-cache-2tb.patch
+dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch
+dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch
+dm-raid-fix-data-corruption-on-reshape-request.patch
+scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch
+scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch
+scsi-storvsc-properly-set-residual-data-length-on-errors.patch
+scsi-aacraid-reorder-adapter-status-check.patch
+scsi-use-scsi_device_from_queue-for-scsi_dh.patch
+power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch
+fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch
diff --git a/queue-4.9/sigaltstack-support-ss_autodisarm-for-config_compat.patch b/queue-4.9/sigaltstack-support-ss_autodisarm-for-config_compat.patch
new file mode 100644 (file)
index 0000000..2d2900c
--- /dev/null
@@ -0,0 +1,76 @@
+From 441398d378f29a5ad6d0fcda07918e54e4961800 Mon Sep 17 00:00:00 2001
+From: Stas Sergeev <stsp@list.ru>
+Date: Mon, 27 Feb 2017 14:27:25 -0800
+Subject: sigaltstack: support SS_AUTODISARM for CONFIG_COMPAT
+
+From: Stas Sergeev <stsp@list.ru>
+
+commit 441398d378f29a5ad6d0fcda07918e54e4961800 upstream.
+
+Currently SS_AUTODISARM is not supported in compatibility mode, but does
+not return -EINVAL either.  This makes dosemu built with -m32 on x86_64
+to crash.  Also the kernel's sigaltstack selftest fails if compiled with
+-m32.
+
+This patch adds the needed support.
+
+Link: http://lkml.kernel.org/r/20170205101213.8163-2-stsp@list.ru
+Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
+Cc: Milosz Tanski <milosz@adfin.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
+Cc: Waiman Long <Waiman.Long@hpe.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
+Cc: Wang Xiaoqiang <wangxq10@lzu.edu.cn>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/compat.h |    4 +++-
+ kernel/signal.c        |   11 +++++++++--
+ 2 files changed, 12 insertions(+), 3 deletions(-)
+
+--- a/include/linux/compat.h
++++ b/include/linux/compat.h
+@@ -711,8 +711,10 @@ int __compat_save_altstack(compat_stack_
+       compat_stack_t __user *__uss = uss; \
+       struct task_struct *t = current; \
+       put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \
+-      put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \
++      put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \
+       put_user_ex(t->sas_ss_size, &__uss->ss_size); \
++      if (t->sas_ss_flags & SS_AUTODISARM) \
++              sas_ss_reset(t); \
+ } while (0);
+ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -3226,10 +3226,17 @@ int compat_restore_altstack(const compat
+ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
+ {
++      int err;
+       struct task_struct *t = current;
+-      return  __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) |
+-              __put_user(sas_ss_flags(sp), &uss->ss_flags) |
++      err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp),
++                       &uss->ss_sp) |
++              __put_user(t->sas_ss_flags, &uss->ss_flags) |
+               __put_user(t->sas_ss_size, &uss->ss_size);
++      if (err)
++              return err;
++      if (t->sas_ss_flags & SS_AUTODISARM)
++              sas_ss_reset(t);
++      return 0;
+ }
+ #endif