From: Greg Kroah-Hartman Date: Wed, 8 Mar 2017 13:13:28 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.4.53~19 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1c17fe0605e85b37cb5c86f5da6cfb3cd23be74b;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: dm-cache-fix-corruption-seen-when-using-cache-2tb.patch dm-raid-fix-data-corruption-on-reshape-request.patch dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch ima-fix-ima_d_path-possible-race-with-rename.patch ipc-shm-fix-shmat-mmap-nil-page-protection.patch mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch mm-do-not-access-page-mapping-directly-on-page_endio.patch mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch mm-vmpressure-fix-sending-wrong-events-on-underflow.patch mm-vmscan-cleanup-lru-size-claculations.patch mm-vmscan-consider-eligible-zones-in-get_scan_count.patch pm-devfreq-fix-available_governor-sysfs.patch pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch scsi-aacraid-reorder-adapter-status-check.patch scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch scsi-storvsc-properly-set-residual-data-length-on-errors.patch scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch scsi-use-scsi_device_from_queue-for-scsi_dh.patch sigaltstack-support-ss_autodisarm-for-config_compat.patch --- diff --git a/queue-4.9/dm-cache-fix-corruption-seen-when-using-cache-2tb.patch b/queue-4.9/dm-cache-fix-corruption-seen-when-using-cache-2tb.patch new file mode 100644 index 00000000000..0fbc42493b2 --- /dev/null +++ b/queue-4.9/dm-cache-fix-corruption-seen-when-using-cache-2tb.patch @@ -0,0 +1,47 @@ +From ca763d0a53b264a650342cee206512bc92ac7050 Mon Sep 17 00:00:00 2001 +From: Joe Thornber +Date: Thu, 9 Feb 2017 11:46:18 -0500 +Subject: dm cache: fix corruption seen when using cache > 2TB + +From: Joe Thornber + +commit ca763d0a53b264a650342cee206512bc92ac7050 upstream. + +A rounding bug due to compiler generated temporary being 32bit was found +in remap_to_cache(). A localized cast in remap_to_cache() fixes the +corruption but this preferred fix (changing from uint32_t to sector_t) +eliminates potential for future rounding errors elsewhere. + +Signed-off-by: Joe Thornber +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-cache-target.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/md/dm-cache-target.c ++++ b/drivers/md/dm-cache-target.c +@@ -248,7 +248,7 @@ struct cache { + /* + * Fields for converting from sectors to blocks. + */ +- uint32_t sectors_per_block; ++ sector_t sectors_per_block; + int sectors_per_block_shift; + + spinlock_t lock; +@@ -3546,11 +3546,11 @@ static void cache_status(struct dm_targe + + residency = policy_residency(cache->policy); + +- DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", ++ DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ", + (unsigned)DM_CACHE_METADATA_BLOCK_SIZE, + (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), + (unsigned long long)nr_blocks_metadata, +- cache->sectors_per_block, ++ (unsigned long long)cache->sectors_per_block, + (unsigned long long) from_cblock(residency), + (unsigned long long) from_cblock(cache->cache_size), + (unsigned) atomic_read(&cache->stats.read_hit), diff --git a/queue-4.9/dm-raid-fix-data-corruption-on-reshape-request.patch b/queue-4.9/dm-raid-fix-data-corruption-on-reshape-request.patch new file mode 100644 index 00000000000..8944b0c0086 --- /dev/null +++ b/queue-4.9/dm-raid-fix-data-corruption-on-reshape-request.patch @@ -0,0 +1,70 @@ +From d36a19541fe8f392778ac137d60f9be8dfdd8f9d Mon Sep 17 00:00:00 2001 +From: Heinz Mauelshagen +Date: Tue, 28 Feb 2017 19:17:49 +0100 +Subject: dm raid: fix data corruption on reshape request + +From: Heinz Mauelshagen + +commit d36a19541fe8f392778ac137d60f9be8dfdd8f9d upstream. + +The lvm2 sequence to manage dm-raid constructor flags that trigger a +rebuild or a reshape is defined as: + +1) load table with flags (e.g. rebuild/delta_disks/data_offset) +2) clear out the flags in lvm2 metadata +3) store the lvm2 metadata, reload the table to reset the flags + previously established during the initial load (1) -- in order to + prevent repeatedly requesting a rebuild or a reshape on activation + +Currently, loading an inactive table with rebuild/reshape flags +specified will cause dm-raid to rebuild/reshape on resume and thus start +updating the raid metadata (about the progress). When the second table +reload, to reset the flags, occurs the constructor accesses the volatile +progress state kept in the raid superblocks. Because the active mapping +is still processing the rebuild/reshape, that position will be stale by +the time the device is resumed. + +In the reshape case, this causes data corruption by processing already +reshaped stripes again. In the rebuild case, it does _not_ cause data +corruption but instead involves superfluous rebuilds. + +Fix by keeping the raid set frozen during the first resume and then +allow the rebuild/reshape during the second resume. + +Fixes: 9dbd1aa3a ("dm raid: add reshaping support to the target") +Signed-off-by: Heinz Mauelshagen +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-raid.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -3621,6 +3621,8 @@ static int raid_preresume(struct dm_targ + return r; + } + ++#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET) ++ + static void raid_resume(struct dm_target *ti) + { + struct raid_set *rs = ti->private; +@@ -3638,7 +3640,15 @@ static void raid_resume(struct dm_target + mddev->ro = 0; + mddev->in_sync = 0; + +- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); ++ /* ++ * Keep the RAID set frozen if reshape/rebuild flags are set. ++ * The RAID set is unfrozen once the next table load/resume, ++ * which clears the reshape/rebuild flags, occurs. ++ * This ensures that the constructor for the inactive table ++ * retrieves an up-to-date reshape_position. ++ */ ++ if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS)) ++ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + + if (mddev->suspended) + mddev_resume(mddev); diff --git a/queue-4.9/dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch b/queue-4.9/dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch new file mode 100644 index 00000000000..e1102706e08 --- /dev/null +++ b/queue-4.9/dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch @@ -0,0 +1,156 @@ +From 37a098e9d10db6e2efc05fe61e3a6ff2e9802c53 Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Thu, 16 Feb 2017 23:57:17 -0500 +Subject: dm round robin: revert "use percpu 'repeat_count' and 'current_path'" + +From: Mike Snitzer + +commit 37a098e9d10db6e2efc05fe61e3a6ff2e9802c53 upstream. + +The sloppy nature of lockless access to percpu pointers +(s->current_path) in rr_select_path(), from multiple threads, is +causing some paths to used more than others -- which results in less +IO performance being observed. + +Revert these upstream commits to restore truly symmetric round-robin +IO submission in DM multipath: + +b0b477c dm round robin: use percpu 'repeat_count' and 'current_path' +802934b dm round robin: do not use this_cpu_ptr() without having preemption disabled + +There is no benefit to all this complexity if repeat_count = 1 (which is +the recommended default). + +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-round-robin.c | 67 +++++++++----------------------------------- + 1 file changed, 14 insertions(+), 53 deletions(-) + +--- a/drivers/md/dm-round-robin.c ++++ b/drivers/md/dm-round-robin.c +@@ -17,8 +17,8 @@ + #include + + #define DM_MSG_PREFIX "multipath round-robin" +-#define RR_MIN_IO 1000 +-#define RR_VERSION "1.1.0" ++#define RR_MIN_IO 1 ++#define RR_VERSION "1.2.0" + + /*----------------------------------------------------------------- + * Path-handling code, paths are held in lists +@@ -47,44 +47,19 @@ struct selector { + struct list_head valid_paths; + struct list_head invalid_paths; + spinlock_t lock; +- struct dm_path * __percpu *current_path; +- struct percpu_counter repeat_count; + }; + +-static void set_percpu_current_path(struct selector *s, struct dm_path *path) +-{ +- int cpu; +- +- for_each_possible_cpu(cpu) +- *per_cpu_ptr(s->current_path, cpu) = path; +-} +- + static struct selector *alloc_selector(void) + { + struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); + +- if (!s) +- return NULL; +- +- INIT_LIST_HEAD(&s->valid_paths); +- INIT_LIST_HEAD(&s->invalid_paths); +- spin_lock_init(&s->lock); +- +- s->current_path = alloc_percpu(struct dm_path *); +- if (!s->current_path) +- goto out_current_path; +- set_percpu_current_path(s, NULL); +- +- if (percpu_counter_init(&s->repeat_count, 0, GFP_KERNEL)) +- goto out_repeat_count; ++ if (s) { ++ INIT_LIST_HEAD(&s->valid_paths); ++ INIT_LIST_HEAD(&s->invalid_paths); ++ spin_lock_init(&s->lock); ++ } + + return s; +- +-out_repeat_count: +- free_percpu(s->current_path); +-out_current_path: +- kfree(s); +- return NULL;; + } + + static int rr_create(struct path_selector *ps, unsigned argc, char **argv) +@@ -105,8 +80,6 @@ static void rr_destroy(struct path_selec + + free_paths(&s->valid_paths); + free_paths(&s->invalid_paths); +- free_percpu(s->current_path); +- percpu_counter_destroy(&s->repeat_count); + kfree(s); + ps->context = NULL; + } +@@ -157,6 +130,11 @@ static int rr_add_path(struct path_selec + return -EINVAL; + } + ++ if (repeat_count > 1) { ++ DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead"); ++ repeat_count = 1; ++ } ++ + /* allocate the path */ + pi = kmalloc(sizeof(*pi), GFP_KERNEL); + if (!pi) { +@@ -183,9 +161,6 @@ static void rr_fail_path(struct path_sel + struct path_info *pi = p->pscontext; + + spin_lock_irqsave(&s->lock, flags); +- if (p == *this_cpu_ptr(s->current_path)) +- set_percpu_current_path(s, NULL); +- + list_move(&pi->list, &s->invalid_paths); + spin_unlock_irqrestore(&s->lock, flags); + } +@@ -208,29 +183,15 @@ static struct dm_path *rr_select_path(st + unsigned long flags; + struct selector *s = ps->context; + struct path_info *pi = NULL; +- struct dm_path *current_path = NULL; + +- local_irq_save(flags); +- current_path = *this_cpu_ptr(s->current_path); +- if (current_path) { +- percpu_counter_dec(&s->repeat_count); +- if (percpu_counter_read_positive(&s->repeat_count) > 0) { +- local_irq_restore(flags); +- return current_path; +- } +- } +- +- spin_lock(&s->lock); ++ spin_lock_irqsave(&s->lock, flags); + if (!list_empty(&s->valid_paths)) { + pi = list_entry(s->valid_paths.next, struct path_info, list); + list_move_tail(&pi->list, &s->valid_paths); +- percpu_counter_set(&s->repeat_count, pi->repeat_count); +- set_percpu_current_path(s, pi->path); +- current_path = pi->path; + } + spin_unlock_irqrestore(&s->lock, flags); + +- return current_path; ++ return pi ? pi->path : NULL; + } + + static struct path_selector_type rr_ps = { diff --git a/queue-4.9/dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch b/queue-4.9/dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch new file mode 100644 index 00000000000..a9c8dcb6704 --- /dev/null +++ b/queue-4.9/dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch @@ -0,0 +1,28 @@ +From 6085831883c25860264721df15f05bbded45e2a2 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Wed, 15 Feb 2017 12:06:19 -0500 +Subject: dm stats: fix a leaked s->histogram_boundaries array + +From: Mikulas Patocka + +commit 6085831883c25860264721df15f05bbded45e2a2 upstream. + +Fixes: dfcfac3e4cd9 ("dm stats: collect and report histogram of IO latencies") +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-stats.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/md/dm-stats.c ++++ b/drivers/md/dm-stats.c +@@ -175,6 +175,7 @@ static void dm_stat_free(struct rcu_head + int cpu; + struct dm_stat *s = container_of(head, struct dm_stat, rcu_head); + ++ kfree(s->histogram_boundaries); + kfree(s->program_id); + kfree(s->aux_data); + for_each_possible_cpu(cpu) { diff --git a/queue-4.9/fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch b/queue-4.9/fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch new file mode 100644 index 00000000000..b90cf7f1cc0 --- /dev/null +++ b/queue-4.9/fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch @@ -0,0 +1,53 @@ +From 907565337ebf998a68cb5c5b2174ce5e5da065eb Mon Sep 17 00:00:00 2001 +From: Mathieu Desnoyers +Date: Thu, 3 Nov 2016 10:29:28 -0600 +Subject: Fix: Disable sys_membarrier when nohz_full is enabled + +From: Mathieu Desnoyers + +commit 907565337ebf998a68cb5c5b2174ce5e5da065eb upstream. + +Userspace applications should be allowed to expect the membarrier system +call with MEMBARRIER_CMD_SHARED command to issue memory barriers on +nohz_full CPUs, but synchronize_sched() does not take those into +account. + +Given that we do not want unrelated processes to be able to affect +real-time sensitive nohz_full CPUs, simply return ENOSYS when membarrier +is invoked on a kernel with enabled nohz_full CPUs. + +Signed-off-by: Mathieu Desnoyers +CC: Josh Triplett +CC: Steven Rostedt +Signed-off-by: Paul E. McKenney +Cc: Frederic Weisbecker +Cc: Chris Metcalf +Cc: Rik van Riel +Acked-by: Lai Jiangshan +Reviewed-by: Josh Triplett +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/membarrier.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/membarrier.c ++++ b/kernel/membarrier.c +@@ -16,6 +16,7 @@ + + #include + #include ++#include + + /* + * Bitmask made from a "or" of all commands within enum membarrier_cmd, +@@ -51,6 +52,9 @@ + */ + SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) + { ++ /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */ ++ if (tick_nohz_full_enabled()) ++ return -ENOSYS; + if (unlikely(flags)) + return -EINVAL; + switch (cmd) { diff --git a/queue-4.9/ima-fix-ima_d_path-possible-race-with-rename.patch b/queue-4.9/ima-fix-ima_d_path-possible-race-with-rename.patch new file mode 100644 index 00000000000..d30f918cc92 --- /dev/null +++ b/queue-4.9/ima-fix-ima_d_path-possible-race-with-rename.patch @@ -0,0 +1,110 @@ +From bc15ed663e7e53ee4dc3e60f8d09c93a0528c694 Mon Sep 17 00:00:00 2001 +From: Mimi Zohar +Date: Tue, 17 Jan 2017 06:45:41 -0500 +Subject: ima: fix ima_d_path() possible race with rename + +From: Mimi Zohar + +commit bc15ed663e7e53ee4dc3e60f8d09c93a0528c694 upstream. + +On failure to return a pathname from ima_d_path(), a pointer to +dname is returned, which is subsequently used in the IMA measurement +list, the IMA audit records, and other audit logging. Saving the +pointer to dname for later use has the potential to race with rename. + +Intead of returning a pointer to dname on failure, this patch returns +a pointer to a copy of the filename. + +Reported-by: Al Viro +Signed-off-by: Mimi Zohar +Signed-off-by: Greg Kroah-Hartman + +--- + security/integrity/ima/ima.h | 2 +- + security/integrity/ima/ima_api.c | 20 ++++++++++++++++++-- + security/integrity/ima/ima_main.c | 8 +++++--- + 3 files changed, 24 insertions(+), 6 deletions(-) + +--- a/security/integrity/ima/ima.h ++++ b/security/integrity/ima/ima.h +@@ -173,7 +173,7 @@ int ima_store_template(struct ima_templa + struct inode *inode, + const unsigned char *filename, int pcr); + void ima_free_template_entry(struct ima_template_entry *entry); +-const char *ima_d_path(const struct path *path, char **pathbuf); ++const char *ima_d_path(const struct path *path, char **pathbuf, char *filename); + + /* IMA policy related functions */ + int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask, +--- a/security/integrity/ima/ima_api.c ++++ b/security/integrity/ima/ima_api.c +@@ -318,7 +318,17 @@ void ima_audit_measurement(struct integr + iint->flags |= IMA_AUDITED; + } + +-const char *ima_d_path(const struct path *path, char **pathbuf) ++/* ++ * ima_d_path - return a pointer to the full pathname ++ * ++ * Attempt to return a pointer to the full pathname for use in the ++ * IMA measurement list, IMA audit records, and auditing logs. ++ * ++ * On failure, return a pointer to a copy of the filename, not dname. ++ * Returning a pointer to dname, could result in using the pointer ++ * after the memory has been freed. ++ */ ++const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf) + { + char *pathname = NULL; + +@@ -331,5 +341,11 @@ const char *ima_d_path(const struct path + pathname = NULL; + } + } +- return pathname ?: (const char *)path->dentry->d_name.name; ++ ++ if (!pathname) { ++ strlcpy(namebuf, path->dentry->d_name.name, NAME_MAX); ++ pathname = namebuf; ++ } ++ ++ return pathname; + } +--- a/security/integrity/ima/ima_main.c ++++ b/security/integrity/ima/ima_main.c +@@ -83,6 +83,7 @@ static void ima_rdwr_violation_check(str + const char **pathname) + { + struct inode *inode = file_inode(file); ++ char filename[NAME_MAX]; + fmode_t mode = file->f_mode; + bool send_tomtou = false, send_writers = false; + +@@ -102,7 +103,7 @@ static void ima_rdwr_violation_check(str + if (!send_tomtou && !send_writers) + return; + +- *pathname = ima_d_path(&file->f_path, pathbuf); ++ *pathname = ima_d_path(&file->f_path, pathbuf, filename); + + if (send_tomtou) + ima_add_violation(file, *pathname, iint, +@@ -161,6 +162,7 @@ static int process_measurement(struct fi + struct integrity_iint_cache *iint = NULL; + struct ima_template_desc *template_desc; + char *pathbuf = NULL; ++ char filename[NAME_MAX]; + const char *pathname = NULL; + int rc = -ENOMEM, action, must_appraise; + int pcr = CONFIG_IMA_MEASURE_PCR_IDX; +@@ -239,8 +241,8 @@ static int process_measurement(struct fi + goto out_digsig; + } + +- if (!pathname) /* ima_rdwr_violation possibly pre-fetched */ +- pathname = ima_d_path(&file->f_path, &pathbuf); ++ if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ ++ pathname = ima_d_path(&file->f_path, &pathbuf, filename); + + if (action & IMA_MEASURE) + ima_store_measurement(iint, file, pathname, diff --git a/queue-4.9/ipc-shm-fix-shmat-mmap-nil-page-protection.patch b/queue-4.9/ipc-shm-fix-shmat-mmap-nil-page-protection.patch new file mode 100644 index 00000000000..c7e31769b6e --- /dev/null +++ b/queue-4.9/ipc-shm-fix-shmat-mmap-nil-page-protection.patch @@ -0,0 +1,75 @@ +From 95e91b831f87ac8e1f8ed50c14d709089b4e01b8 Mon Sep 17 00:00:00 2001 +From: Davidlohr Bueso +Date: Mon, 27 Feb 2017 14:28:24 -0800 +Subject: ipc/shm: Fix shmat mmap nil-page protection + +From: Davidlohr Bueso + +commit 95e91b831f87ac8e1f8ed50c14d709089b4e01b8 upstream. + +The issue is described here, with a nice testcase: + + https://bugzilla.kernel.org/show_bug.cgi?id=192931 + +The problem is that shmat() calls do_mmap_pgoff() with MAP_FIXED, and +the address rounded down to 0. For the regular mmap case, the +protection mentioned above is that the kernel gets to generate the +address -- arch_get_unmapped_area() will always check for MAP_FIXED and +return that address. So by the time we do security_mmap_addr(0) things +get funky for shmat(). + +The testcase itself shows that while a regular user crashes, root will +not have a problem attaching a nil-page. There are two possible fixes +to this. The first, and which this patch does, is to simply allow root +to crash as well -- this is also regular mmap behavior, ie when hacking +up the testcase and adding mmap(... |MAP_FIXED). While this approach +is the safer option, the second alternative is to ignore SHM_RND if the +rounded address is 0, thus only having MAP_SHARED flags. This makes the +behavior of shmat() identical to the mmap() case. The downside of this +is obviously user visible, but does make sense in that it maintains +semantics after the round-down wrt 0 address and mmap. + +Passes shm related ltp tests. + +Link: http://lkml.kernel.org/r/1486050195-18629-1-git-send-email-dave@stgolabs.net +Signed-off-by: Davidlohr Bueso +Reported-by: Gareth Evans +Cc: Manfred Spraul +Cc: Michael Kerrisk +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/shm.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/ipc/shm.c ++++ b/ipc/shm.c +@@ -1085,8 +1085,8 @@ out_unlock1: + * "raddr" thing points to kernel space, and there has to be a wrapper around + * this. + */ +-long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, +- unsigned long shmlba) ++long do_shmat(int shmid, char __user *shmaddr, int shmflg, ++ ulong *raddr, unsigned long shmlba) + { + struct shmid_kernel *shp; + unsigned long addr; +@@ -1107,8 +1107,13 @@ long do_shmat(int shmid, char __user *sh + goto out; + else if ((addr = (ulong)shmaddr)) { + if (addr & (shmlba - 1)) { +- if (shmflg & SHM_RND) +- addr &= ~(shmlba - 1); /* round down */ ++ /* ++ * Round down to the nearest multiple of shmlba. ++ * For sane do_mmap_pgoff() parameters, avoid ++ * round downs that trigger nil-page and MAP_FIXED. ++ */ ++ if ((shmflg & SHM_RND) && addr >= shmlba) ++ addr &= ~(shmlba - 1); + else + #ifndef __ARCH_FORCE_SHMLBA + if (addr & ~PAGE_MASK) diff --git a/queue-4.9/mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch b/queue-4.9/mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch new file mode 100644 index 00000000000..148001cee62 --- /dev/null +++ b/queue-4.9/mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch @@ -0,0 +1,49 @@ +From 9c57b5808c625f4fc93da330b932647eaff321f7 Mon Sep 17 00:00:00 2001 +From: Yisheng Xie +Date: Fri, 24 Feb 2017 15:00:40 -0800 +Subject: mm balloon: umount balloon_mnt when removing vb device + +From: Yisheng Xie + +commit 9c57b5808c625f4fc93da330b932647eaff321f7 upstream. + +With CONFIG_BALLOON_COMPACTION=y the kernel will mount balloon_mnt for +balloon page migration when we probe a virtio_balloon device. However +we do not unmount it when removing the device. Fix this. + +Fixes: b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature") +Link: http://lkml.kernel.org/r/1486531318-35189-1-git-send-email-xieyisheng1@huawei.com +Signed-off-by: Yisheng Xie +Acked-by: Minchan Kim +Cc: Rafael Aquini +Cc: Konstantin Khlebnikov +Cc: Gioh Kim +Cc: Vlastimil Babka +Cc: Michal Hocko +Cc: Michael S. Tsirkin +Cc: Jason Wang +Cc: Hanjun Guo +Cc: Xishi Qiu +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/virtio/virtio_balloon.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/virtio/virtio_balloon.c ++++ b/drivers/virtio/virtio_balloon.c +@@ -615,8 +615,12 @@ static void virtballoon_remove(struct vi + cancel_work_sync(&vb->update_balloon_stats_work); + + remove_common(vb); ++#ifdef CONFIG_BALLOON_COMPACTION + if (vb->vb_dev_info.inode) + iput(vb->vb_dev_info.inode); ++ ++ kern_unmount(balloon_mnt); ++#endif + kfree(vb); + } + diff --git a/queue-4.9/mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch b/queue-4.9/mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch new file mode 100644 index 00000000000..43f5552ccc6 --- /dev/null +++ b/queue-4.9/mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch @@ -0,0 +1,63 @@ +From b5d24fda9c3dce51fcb4eee459550a458eaaf1e2 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Fri, 24 Feb 2017 14:55:45 -0800 +Subject: mm, devm_memremap_pages: hold device_hotplug lock over mem_hotplug_{begin, done} + +From: Dan Williams + +commit b5d24fda9c3dce51fcb4eee459550a458eaaf1e2 upstream. + +The mem_hotplug_{begin,done} lock coordinates with {get,put}_online_mems() +to hold off "readers" of the current state of memory from new hotplug +actions. mem_hotplug_begin() expects exclusive access, via the +device_hotplug lock, to set mem_hotplug.active_writer. Calling +mem_hotplug_begin() without locking device_hotplug can lead to +corrupting mem_hotplug.refcount and missed wakeups / soft lockups. + +[dan.j.williams@intel.com: v2] + Link: http://lkml.kernel.org/r/148728203365.38457.17804568297887708345.stgit@dwillia2-desk3.amr.corp.intel.com +Link: http://lkml.kernel.org/r/148693885680.16345.17802627926777862337.stgit@dwillia2-desk3.amr.corp.intel.com +Fixes: f931ab479dd2 ("mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}") +Signed-off-by: Dan Williams +Reported-by: Ben Hutchings +Cc: Michal Hocko +Cc: Toshi Kani +Cc: Vlastimil Babka +Cc: Logan Gunthorpe +Cc: Masayoshi Mizuma +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/memremap.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/kernel/memremap.c ++++ b/kernel/memremap.c +@@ -246,9 +246,13 @@ static void devm_memremap_pages_release( + /* pages are dead and unused, undo the arch mapping */ + align_start = res->start & ~(SECTION_SIZE - 1); + align_size = ALIGN(resource_size(res), SECTION_SIZE); ++ ++ lock_device_hotplug(); + mem_hotplug_begin(); + arch_remove_memory(align_start, align_size); + mem_hotplug_done(); ++ unlock_device_hotplug(); ++ + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); + pgmap_radix_release(res); + dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, +@@ -360,9 +364,11 @@ void *devm_memremap_pages(struct device + if (error) + goto err_pfn_remap; + ++ lock_device_hotplug(); + mem_hotplug_begin(); + error = arch_add_memory(nid, align_start, align_size, true); + mem_hotplug_done(); ++ unlock_device_hotplug(); + if (error) + goto err_add_memory; + diff --git a/queue-4.9/mm-do-not-access-page-mapping-directly-on-page_endio.patch b/queue-4.9/mm-do-not-access-page-mapping-directly-on-page_endio.patch new file mode 100644 index 00000000000..4fdd1991178 --- /dev/null +++ b/queue-4.9/mm-do-not-access-page-mapping-directly-on-page_endio.patch @@ -0,0 +1,57 @@ +From dd8416c47715cf324c9a16f13273f9fda87acfed Mon Sep 17 00:00:00 2001 +From: Minchan Kim +Date: Fri, 24 Feb 2017 14:59:59 -0800 +Subject: mm: do not access page->mapping directly on page_endio + +From: Minchan Kim + +commit dd8416c47715cf324c9a16f13273f9fda87acfed upstream. + +With rw_page, page_endio is used for completing IO on a page and it +propagates write error to the address space if the IO fails. The +problem is it accesses page->mapping directly which might be okay for +file-backed pages but it shouldn't for anonymous page. Otherwise, it +can corrupt one of field from anon_vma under us and system goes panic +randomly. + +swap_writepage + bdev_writepage + ops->rw_page + +I encountered the BUG during developing new zram feature and it was +really hard to figure it out because it made random crash, somtime +mmap_sem lockdep, sometime other places where places never related to +zram/zsmalloc, and not reproducible with some configuration. + +When I consider how that bug is subtle and people do fast-swap test with +brd, it's worth to add stable mark, I think. + +Fixes: dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()") +Signed-off-by: Minchan Kim +Acked-by: Michal Hocko +Cc: Matthew Wilcox +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/filemap.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -910,9 +910,12 @@ void page_endio(struct page *page, bool + unlock_page(page); + } else { + if (err) { ++ struct address_space *mapping; ++ + SetPageError(page); +- if (page->mapping) +- mapping_set_error(page->mapping, err); ++ mapping = page_mapping(page); ++ if (mapping) ++ mapping_set_error(mapping, err); + } + end_page_writeback(page); + } diff --git a/queue-4.9/mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch b/queue-4.9/mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch new file mode 100644 index 00000000000..8685b7f0aba --- /dev/null +++ b/queue-4.9/mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch @@ -0,0 +1,103 @@ +From e02dc017c3032dcdce1b993af0db135462e1b4b7 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Fri, 24 Feb 2017 14:59:33 -0800 +Subject: mm/page_alloc: fix nodes for reclaim in fast path + +From: Gavin Shan + +commit e02dc017c3032dcdce1b993af0db135462e1b4b7 upstream. + +When @node_reclaim_node isn't 0, the page allocator tries to reclaim +pages if the amount of free memory in the zones are below the low +watermark. On Power platform, none of NUMA nodes are scanned for page +reclaim because no nodes match the condition in zone_allows_reclaim(). +On Power platform, RECLAIM_DISTANCE is set to 10 which is the distance +of Node-A to Node-A. So the preferred node even won't be scanned for +page reclaim. + + __alloc_pages_nodemask() + get_page_from_freelist() + zone_allows_reclaim() + +Anton proposed the test code as below: + + # cat alloc.c + : + int main(int argc, char *argv[]) + { + void *p; + unsigned long size; + unsigned long start, end; + + start = time(NULL); + size = strtoul(argv[1], NULL, 0); + printf("To allocate %ldGB memory\n", size); + + size <<= 30; + p = malloc(size); + assert(p); + memset(p, 0, size); + + end = time(NULL); + printf("Used time: %ld seconds\n", end - start); + sleep(3600); + return 0; + } + +The system I use for testing has two NUMA nodes. Both have 128GB +memory. In below scnario, the page caches on node#0 should be reclaimed +when it encounters pressure to accommodate request of allocation. + + # echo 2 > /proc/sys/vm/zone_reclaim_mode; \ + sync; \ + echo 3 > /proc/sys/vm/drop_caches; \ + # taskset -c 0 cat file.32G > /dev/null; \ + grep FilePages /sys/devices/system/node/node0/meminfo + Node 0 FilePages: 33619712 kB + # taskset -c 0 ./alloc 128 + # grep FilePages /sys/devices/system/node/node0/meminfo + Node 0 FilePages: 33619840 kB + # grep MemFree /sys/devices/system/node/node0/meminfo + Node 0 MemFree: 186816 kB + +With the patch applied, the pagecache on node-0 is reclaimed when its +free memory is running out. It's the expected behaviour. + + # echo 2 > /proc/sys/vm/zone_reclaim_mode; \ + sync; \ + echo 3 > /proc/sys/vm/drop_caches + # taskset -c 0 cat file.32G > /dev/null; \ + grep FilePages /sys/devices/system/node/node0/meminfo + Node 0 FilePages: 33605568 kB + # taskset -c 0 ./alloc 128 + # grep FilePages /sys/devices/system/node/node0/meminfo + Node 0 FilePages: 1379520 kB + # grep MemFree /sys/devices/system/node/node0/meminfo + Node 0 MemFree: 317120 kB + +Fixes: 5f7a75acdb24 ("mm: page_alloc: do not cache reclaim distances") +Link: http://lkml.kernel.org/r/1486532455-29613-1-git-send-email-gwshan@linux.vnet.ibm.com +Signed-off-by: Gavin Shan +Acked-by: Mel Gorman +Acked-by: Michal Hocko +Cc: Anton Blanchard +Cc: Michael Ellerman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_alloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -2858,7 +2858,7 @@ bool zone_watermark_ok_safe(struct zone + #ifdef CONFIG_NUMA + static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) + { +- return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < ++ return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <= + RECLAIM_DISTANCE; + } + #else /* CONFIG_NUMA */ diff --git a/queue-4.9/mm-vmpressure-fix-sending-wrong-events-on-underflow.patch b/queue-4.9/mm-vmpressure-fix-sending-wrong-events-on-underflow.patch new file mode 100644 index 00000000000..217396ea78c --- /dev/null +++ b/queue-4.9/mm-vmpressure-fix-sending-wrong-events-on-underflow.patch @@ -0,0 +1,65 @@ +From e1587a4945408faa58d0485002c110eb2454740c Mon Sep 17 00:00:00 2001 +From: Vinayak Menon +Date: Fri, 24 Feb 2017 14:59:39 -0800 +Subject: mm: vmpressure: fix sending wrong events on underflow + +From: Vinayak Menon + +commit e1587a4945408faa58d0485002c110eb2454740c upstream. + +At the end of a window period, if the reclaimed pages is greater than +scanned, an unsigned underflow can result in a huge pressure value and +thus a critical event. Reclaimed pages is found to go higher than +scanned because of the addition of reclaimed slab pages to reclaimed in +shrink_node without a corresponding increment to scanned pages. + +Minchan Kim mentioned that this can also happen in the case of a THP +page where the scanned is 1 and reclaimed could be 512. + +Link: http://lkml.kernel.org/r/1486641577-11685-1-git-send-email-vinmenon@codeaurora.org +Signed-off-by: Vinayak Menon +Acked-by: Minchan Kim +Acked-by: Michal Hocko +Cc: Johannes Weiner +Cc: Mel Gorman +Cc: Vlastimil Babka +Cc: Rik van Riel +Cc: Vladimir Davydov +Cc: Anton Vorontsov +Cc: Shiraz Hashim +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmpressure.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/mm/vmpressure.c ++++ b/mm/vmpressure.c +@@ -112,9 +112,16 @@ static enum vmpressure_levels vmpressure + unsigned long reclaimed) + { + unsigned long scale = scanned + reclaimed; +- unsigned long pressure; ++ unsigned long pressure = 0; + + /* ++ * reclaimed can be greater than scanned in cases ++ * like THP, where the scanned is 1 and reclaimed ++ * could be 512 ++ */ ++ if (reclaimed >= scanned) ++ goto out; ++ /* + * We calculate the ratio (in percents) of how many pages were + * scanned vs. reclaimed in a given time frame (window). Note that + * time is in VM reclaimer's "ticks", i.e. number of pages +@@ -124,6 +131,7 @@ static enum vmpressure_levels vmpressure + pressure = scale - (reclaimed * scale / scanned); + pressure = pressure * 100 / scale; + ++out: + pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure, + scanned, reclaimed); + diff --git a/queue-4.9/mm-vmscan-cleanup-lru-size-claculations.patch b/queue-4.9/mm-vmscan-cleanup-lru-size-claculations.patch new file mode 100644 index 00000000000..8f543045dec --- /dev/null +++ b/queue-4.9/mm-vmscan-cleanup-lru-size-claculations.patch @@ -0,0 +1,185 @@ +From fd538803731e50367b7c59ce4ad3454426a3d671 Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Wed, 22 Feb 2017 15:45:58 -0800 +Subject: mm, vmscan: cleanup lru size claculations + +From: Michal Hocko + +commit fd538803731e50367b7c59ce4ad3454426a3d671 upstream. + +lruvec_lru_size returns the full size of the LRU list while we sometimes +need a value reduced only to eligible zones (e.g. for lowmem requests). +inactive_list_is_low is one such user. Later patches will add more of +them. Add a new parameter to lruvec_lru_size and allow it filter out +zones which are not eligible for the given context. + +Link: http://lkml.kernel.org/r/20170117103702.28542-2-mhocko@kernel.org +Signed-off-by: Michal Hocko +Acked-by: Johannes Weiner +Acked-by: Hillf Danton +Acked-by: Minchan Kim +Acked-by: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + + +--- + include/linux/mmzone.h | 2 - + mm/vmscan.c | 81 +++++++++++++++++++++++-------------------------- + mm/workingset.c | 2 - + 3 files changed, 41 insertions(+), 44 deletions(-) + +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -779,7 +779,7 @@ static inline struct pglist_data *lruvec + #endif + } + +-extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru); ++extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx); + + #ifdef CONFIG_HAVE_MEMORY_PRESENT + void memory_present(int nid, unsigned long start, unsigned long end); +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -234,22 +234,39 @@ bool pgdat_reclaimable(struct pglist_dat + pgdat_reclaimable_pages(pgdat) * 6; + } + +-unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) ++/** ++ * lruvec_lru_size - Returns the number of pages on the given LRU list. ++ * @lruvec: lru vector ++ * @lru: lru to use ++ * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list) ++ */ ++unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx) + { ++ unsigned long lru_size; ++ int zid; ++ + if (!mem_cgroup_disabled()) +- return mem_cgroup_get_lru_size(lruvec, lru); ++ lru_size = mem_cgroup_get_lru_size(lruvec, lru); ++ else ++ lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); + +- return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); +-} ++ for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) { ++ struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; ++ unsigned long size; + +-unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru, +- int zone_idx) +-{ +- if (!mem_cgroup_disabled()) +- return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx); ++ if (!managed_zone(zone)) ++ continue; ++ ++ if (!mem_cgroup_disabled()) ++ size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid); ++ else ++ size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid], ++ NR_ZONE_LRU_BASE + lru); ++ lru_size -= min(size, lru_size); ++ } ++ ++ return lru_size; + +- return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx], +- NR_ZONE_LRU_BASE + lru); + } + + /* +@@ -2028,11 +2045,10 @@ static bool inactive_list_is_low(struct + struct scan_control *sc) + { + unsigned long inactive_ratio; +- unsigned long inactive; +- unsigned long active; ++ unsigned long inactive, active; ++ enum lru_list inactive_lru = file * LRU_FILE; ++ enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; + unsigned long gb; +- struct pglist_data *pgdat = lruvec_pgdat(lruvec); +- int zid; + + /* + * If we don't have swap space, anonymous page deactivation +@@ -2041,27 +2057,8 @@ static bool inactive_list_is_low(struct + if (!file && !total_swap_pages) + return false; + +- inactive = lruvec_lru_size(lruvec, file * LRU_FILE); +- active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE); +- +- /* +- * For zone-constrained allocations, it is necessary to check if +- * deactivations are required for lowmem to be reclaimed. This +- * calculates the inactive/active pages available in eligible zones. +- */ +- for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) { +- struct zone *zone = &pgdat->node_zones[zid]; +- unsigned long inactive_zone, active_zone; +- +- if (!managed_zone(zone)) +- continue; +- +- inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid); +- active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid); +- +- inactive -= min(inactive, inactive_zone); +- active -= min(active, active_zone); +- } ++ inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); ++ active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); + + gb = (inactive + active) >> (30 - PAGE_SHIFT); + if (gb) +@@ -2208,7 +2205,7 @@ static void get_scan_count(struct lruvec + * system is under heavy pressure. + */ + if (!inactive_list_is_low(lruvec, true, sc) && +- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { ++ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) { + scan_balance = SCAN_FILE; + goto out; + } +@@ -2234,10 +2231,10 @@ static void get_scan_count(struct lruvec + * anon in [0], file in [1] + */ + +- anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) + +- lruvec_lru_size(lruvec, LRU_INACTIVE_ANON); +- file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) + +- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); ++ anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) + ++ lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES); ++ file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) + ++ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES); + + spin_lock_irq(&pgdat->lru_lock); + if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { +@@ -2275,7 +2272,7 @@ out: + unsigned long size; + unsigned long scan; + +- size = lruvec_lru_size(lruvec, lru); ++ size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES); + scan = size >> sc->priority; + + if (!scan && pass && force_scan) +--- a/mm/workingset.c ++++ b/mm/workingset.c +@@ -266,7 +266,7 @@ bool workingset_refault(void *shadow) + } + lruvec = mem_cgroup_lruvec(pgdat, memcg); + refault = atomic_long_read(&lruvec->inactive_age); +- active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); ++ active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES); + rcu_read_unlock(); + + /* diff --git a/queue-4.9/mm-vmscan-consider-eligible-zones-in-get_scan_count.patch b/queue-4.9/mm-vmscan-consider-eligible-zones-in-get_scan_count.patch new file mode 100644 index 00000000000..32d8063f6c4 --- /dev/null +++ b/queue-4.9/mm-vmscan-consider-eligible-zones-in-get_scan_count.patch @@ -0,0 +1,75 @@ +From 71ab6cfe88dcf9f6e6a65eb85cf2bda20a257682 Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Wed, 22 Feb 2017 15:46:01 -0800 +Subject: mm, vmscan: consider eligible zones in get_scan_count + +From: Michal Hocko + +commit 71ab6cfe88dcf9f6e6a65eb85cf2bda20a257682 upstream. + +get_scan_count() considers the whole node LRU size when + + - doing SCAN_FILE due to many page cache inactive pages + - calculating the number of pages to scan + +In both cases this might lead to unexpected behavior especially on 32b +systems where we can expect lowmem memory pressure very often. + +A large highmem zone can easily distort SCAN_FILE heuristic because +there might be only few file pages from the eligible zones on the node +lru and we would still enforce file lru scanning which can lead to +trashing while we could still scan anonymous pages. + +The later use of lruvec_lru_size can be problematic as well. Especially +when there are not many pages from the eligible zones. We would have to +skip over many pages to find anything to reclaim but shrink_node_memcg +would only reduce the remaining number to scan by SWAP_CLUSTER_MAX at +maximum. Therefore we can end up going over a large LRU many times +without actually having chance to reclaim much if anything at all. The +closer we are out of memory on lowmem zone the worse the problem will +be. + +Fix this by filtering out all the ineligible zones when calculating the +lru size for both paths and consider only sc->reclaim_idx zones. + +The patch would need to be tweaked a bit to apply to 4.10 and older but +I will do that as soon as it hits the Linus tree in the next merge +window. + +Link: http://lkml.kernel.org/r/20170117103702.28542-3-mhocko@kernel.org +Fixes: b2e18757f2c9 ("mm, vmscan: begin reclaiming pages on a per-node basis") +Signed-off-by: Michal Hocko +Tested-by: Trevor Cordes +Acked-by: Minchan Kim +Acked-by: Hillf Danton +Acked-by: Mel Gorman +Acked-by: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + + +--- + mm/vmscan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2205,7 +2205,7 @@ static void get_scan_count(struct lruvec + * system is under heavy pressure. + */ + if (!inactive_list_is_low(lruvec, true, sc) && +- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) { ++ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { + scan_balance = SCAN_FILE; + goto out; + } +@@ -2272,7 +2272,7 @@ out: + unsigned long size; + unsigned long scan; + +- size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES); ++ size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); + scan = size >> sc->priority; + + if (!scan && pass && force_scan) diff --git a/queue-4.9/pm-devfreq-fix-available_governor-sysfs.patch b/queue-4.9/pm-devfreq-fix-available_governor-sysfs.patch new file mode 100644 index 00000000000..7018927838c --- /dev/null +++ b/queue-4.9/pm-devfreq-fix-available_governor-sysfs.patch @@ -0,0 +1,104 @@ +From bcf23c79c4e46130701370af4383b61a3cba755c Mon Sep 17 00:00:00 2001 +From: Chanwoo Choi +Date: Tue, 31 Jan 2017 15:38:16 +0900 +Subject: PM / devfreq: Fix available_governor sysfs + +From: Chanwoo Choi + +commit bcf23c79c4e46130701370af4383b61a3cba755c upstream. + +The devfreq using passive governor is not able to change the governor. +So, the user can not change the governor through 'available_governor' sysfs +entry. Also, the devfreq which don't use the passive governor is not able to +change to 'passive' governor on the fly. + +Fixes: 996133119f57 ("PM / devfreq: Add new passive governor") +Signed-off-by: Chanwoo Choi +Signed-off-by: MyungJoo Ham +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/devfreq/devfreq.c | 31 +++++++++++++++++++++++++++---- + drivers/devfreq/governor_passive.c | 1 + + include/linux/devfreq.h | 3 +++ + 3 files changed, 31 insertions(+), 4 deletions(-) + +--- a/drivers/devfreq/devfreq.c ++++ b/drivers/devfreq/devfreq.c +@@ -939,6 +939,9 @@ static ssize_t governor_store(struct dev + if (df->governor == governor) { + ret = 0; + goto out; ++ } else if (df->governor->immutable || governor->immutable) { ++ ret = -EINVAL; ++ goto out; + } + + if (df->governor) { +@@ -968,13 +971,33 @@ static ssize_t available_governors_show( + struct device_attribute *attr, + char *buf) + { +- struct devfreq_governor *tmp_governor; ++ struct devfreq *df = to_devfreq(d); + ssize_t count = 0; + + mutex_lock(&devfreq_list_lock); +- list_for_each_entry(tmp_governor, &devfreq_governor_list, node) +- count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), +- "%s ", tmp_governor->name); ++ ++ /* ++ * The devfreq with immutable governor (e.g., passive) shows ++ * only own governor. ++ */ ++ if (df->governor->immutable) { ++ count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, ++ "%s ", df->governor_name); ++ /* ++ * The devfreq device shows the registered governor except for ++ * immutable governors such as passive governor . ++ */ ++ } else { ++ struct devfreq_governor *governor; ++ ++ list_for_each_entry(governor, &devfreq_governor_list, node) { ++ if (governor->immutable) ++ continue; ++ count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), ++ "%s ", governor->name); ++ } ++ } ++ + mutex_unlock(&devfreq_list_lock); + + /* Truncate the trailing space */ +--- a/drivers/devfreq/governor_passive.c ++++ b/drivers/devfreq/governor_passive.c +@@ -179,6 +179,7 @@ static int devfreq_passive_event_handler + + static struct devfreq_governor devfreq_passive = { + .name = "passive", ++ .immutable = 1, + .get_target_freq = devfreq_passive_get_target_freq, + .event_handler = devfreq_passive_event_handler, + }; +--- a/include/linux/devfreq.h ++++ b/include/linux/devfreq.h +@@ -104,6 +104,8 @@ struct devfreq_dev_profile { + * struct devfreq_governor - Devfreq policy governor + * @node: list node - contains registered devfreq governors + * @name: Governor's name ++ * @immutable: Immutable flag for governor. If the value is 1, ++ * this govenror is never changeable to other governor. + * @get_target_freq: Returns desired operating frequency for the device. + * Basically, get_target_freq will run + * devfreq_dev_profile.get_dev_status() to get the +@@ -121,6 +123,7 @@ struct devfreq_governor { + struct list_head node; + + const char name[DEVFREQ_NAME_LEN]; ++ const unsigned int immutable; + int (*get_target_freq)(struct devfreq *this, unsigned long *freq); + int (*event_handler)(struct devfreq *devfreq, + unsigned int event, void *data); diff --git a/queue-4.9/pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch b/queue-4.9/pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch new file mode 100644 index 00000000000..04934425cea --- /dev/null +++ b/queue-4.9/pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch @@ -0,0 +1,66 @@ +From 30582c25a4b4e0a5e456a309fde79b845e9473b2 Mon Sep 17 00:00:00 2001 +From: Chanwoo Choi +Date: Tue, 31 Jan 2017 15:38:17 +0900 +Subject: PM / devfreq: Fix wrong trans_stat of passive devfreq device + +From: Chanwoo Choi + +commit 30582c25a4b4e0a5e456a309fde79b845e9473b2 upstream. + +Until now, the trans_stat information of passive devfreq is not updated. +This patch updates the trans_stat information after setting the target +frequency of passive devfreq device. + +Fixes: 996133119f57 ("PM / devfreq: Add new passive governor") +Signed-off-by: Chanwoo Choi +Signed-off-by: MyungJoo Ham +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/devfreq/devfreq.c | 3 ++- + drivers/devfreq/governor.h | 2 ++ + drivers/devfreq/governor_passive.c | 5 +++++ + 3 files changed, 9 insertions(+), 1 deletion(-) + +--- a/drivers/devfreq/devfreq.c ++++ b/drivers/devfreq/devfreq.c +@@ -130,7 +130,7 @@ static void devfreq_set_freq_table(struc + * @devfreq: the devfreq instance + * @freq: the update target frequency + */ +-static int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) ++int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) + { + int lev, prev_lev, ret = 0; + unsigned long cur_time; +@@ -166,6 +166,7 @@ out: + devfreq->last_stat_updated = cur_time; + return ret; + } ++EXPORT_SYMBOL(devfreq_update_status); + + /** + * find_devfreq_governor() - find devfreq governor from name +--- a/drivers/devfreq/governor.h ++++ b/drivers/devfreq/governor.h +@@ -38,4 +38,6 @@ extern void devfreq_interval_update(stru + extern int devfreq_add_governor(struct devfreq_governor *governor); + extern int devfreq_remove_governor(struct devfreq_governor *governor); + ++extern int devfreq_update_status(struct devfreq *devfreq, unsigned long freq); ++ + #endif /* _GOVERNOR_H */ +--- a/drivers/devfreq/governor_passive.c ++++ b/drivers/devfreq/governor_passive.c +@@ -112,6 +112,11 @@ static int update_devfreq_passive(struct + if (ret < 0) + goto out; + ++ if (devfreq->profile->freq_table ++ && (devfreq_update_status(devfreq, freq))) ++ dev_err(&devfreq->dev, ++ "Couldn't update frequency transition information.\n"); ++ + devfreq->previous_freq = freq; + + out: diff --git a/queue-4.9/power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch b/queue-4.9/power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch new file mode 100644 index 00000000000..0785b60671e --- /dev/null +++ b/queue-4.9/power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch @@ -0,0 +1,235 @@ +From 0b0408745e7ff24757cbfd571d69026c0ddb803c Mon Sep 17 00:00:00 2001 +From: Alexandre Belloni +Date: Tue, 25 Oct 2016 11:37:59 +0200 +Subject: power: reset: at91-poweroff: timely shutdown LPDDR memories + +From: Alexandre Belloni + +commit 0b0408745e7ff24757cbfd571d69026c0ddb803c upstream. + +LPDDR memories can only handle up to 400 uncontrolled power off. Ensure the +proper power off sequence is used before shutting down the platform. + +Signed-off-by: Alexandre Belloni +Signed-off-by: Sebastian Reichel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/power/reset/Kconfig | 2 - + drivers/power/reset/at91-poweroff.c | 54 ++++++++++++++++++++++++++++++- + drivers/power/reset/at91-sama5d2_shdwc.c | 49 +++++++++++++++++++++++++++- + 3 files changed, 102 insertions(+), 3 deletions(-) + +--- a/drivers/power/reset/Kconfig ++++ b/drivers/power/reset/Kconfig +@@ -32,7 +32,7 @@ config POWER_RESET_AT91_RESET + + config POWER_RESET_AT91_SAMA5D2_SHDWC + tristate "Atmel AT91 SAMA5D2-Compatible shutdown controller driver" +- depends on ARCH_AT91 || COMPILE_TEST ++ depends on ARCH_AT91 + default SOC_SAMA5 + help + This driver supports the alternate shutdown controller for some Atmel +--- a/drivers/power/reset/at91-poweroff.c ++++ b/drivers/power/reset/at91-poweroff.c +@@ -14,9 +14,12 @@ + #include + #include + #include ++#include + #include + #include + ++#include ++ + #define AT91_SHDW_CR 0x00 /* Shut Down Control Register */ + #define AT91_SHDW_SHDW BIT(0) /* Shut Down command */ + #define AT91_SHDW_KEY (0xa5 << 24) /* KEY Password */ +@@ -50,6 +53,7 @@ static const char *shdwc_wakeup_modes[] + + static void __iomem *at91_shdwc_base; + static struct clk *sclk; ++static void __iomem *mpddrc_base; + + static void __init at91_wakeup_status(void) + { +@@ -73,6 +77,29 @@ static void at91_poweroff(void) + writel(AT91_SHDW_KEY | AT91_SHDW_SHDW, at91_shdwc_base + AT91_SHDW_CR); + } + ++static void at91_lpddr_poweroff(void) ++{ ++ asm volatile( ++ /* Align to cache lines */ ++ ".balign 32\n\t" ++ ++ /* Ensure AT91_SHDW_CR is in the TLB by reading it */ ++ " ldr r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" ++ ++ /* Power down SDRAM0 */ ++ " str %1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t" ++ /* Shutdown CPU */ ++ " str %3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" ++ ++ " b .\n\t" ++ : ++ : "r" (mpddrc_base), ++ "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF), ++ "r" (at91_shdwc_base), ++ "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW) ++ : "r0"); ++} ++ + static int at91_poweroff_get_wakeup_mode(struct device_node *np) + { + const char *pm; +@@ -124,6 +151,8 @@ static void at91_poweroff_dt_set_wakeup_ + static int __init at91_poweroff_probe(struct platform_device *pdev) + { + struct resource *res; ++ struct device_node *np; ++ u32 ddr_type; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); +@@ -150,12 +179,30 @@ static int __init at91_poweroff_probe(st + + pm_power_off = at91_poweroff; + ++ np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc"); ++ if (!np) ++ return 0; ++ ++ mpddrc_base = of_iomap(np, 0); ++ of_node_put(np); ++ ++ if (!mpddrc_base) ++ return 0; ++ ++ ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD; ++ if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) || ++ (ddr_type == AT91_DDRSDRC_MD_LPDDR3)) ++ pm_power_off = at91_lpddr_poweroff; ++ else ++ iounmap(mpddrc_base); ++ + return 0; + } + + static int __exit at91_poweroff_remove(struct platform_device *pdev) + { +- if (pm_power_off == at91_poweroff) ++ if (pm_power_off == at91_poweroff || ++ pm_power_off == at91_lpddr_poweroff) + pm_power_off = NULL; + + clk_disable_unprepare(sclk); +@@ -163,6 +210,11 @@ static int __exit at91_poweroff_remove(s + return 0; + } + ++static const struct of_device_id at91_ramc_of_match[] = { ++ { .compatible = "atmel,sama5d3-ddramc", }, ++ { /* sentinel */ } ++}; ++ + static const struct of_device_id at91_poweroff_of_match[] = { + { .compatible = "atmel,at91sam9260-shdwc", }, + { .compatible = "atmel,at91sam9rl-shdwc", }, +--- a/drivers/power/reset/at91-sama5d2_shdwc.c ++++ b/drivers/power/reset/at91-sama5d2_shdwc.c +@@ -22,9 +22,12 @@ + #include + #include + #include ++#include + #include + #include + ++#include ++ + #define SLOW_CLOCK_FREQ 32768 + + #define AT91_SHDW_CR 0x00 /* Shut Down Control Register */ +@@ -75,6 +78,7 @@ struct shdwc { + */ + static struct shdwc *at91_shdwc; + static struct clk *sclk; ++static void __iomem *mpddrc_base; + + static const unsigned long long sdwc_dbc_period[] = { + 0, 3, 32, 512, 4096, 32768, +@@ -108,6 +112,29 @@ static void at91_poweroff(void) + at91_shdwc->at91_shdwc_base + AT91_SHDW_CR); + } + ++static void at91_lpddr_poweroff(void) ++{ ++ asm volatile( ++ /* Align to cache lines */ ++ ".balign 32\n\t" ++ ++ /* Ensure AT91_SHDW_CR is in the TLB by reading it */ ++ " ldr r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" ++ ++ /* Power down SDRAM0 */ ++ " str %1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t" ++ /* Shutdown CPU */ ++ " str %3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" ++ ++ " b .\n\t" ++ : ++ : "r" (mpddrc_base), ++ "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF), ++ "r" (at91_shdwc->at91_shdwc_base), ++ "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW) ++ : "r0"); ++} ++ + static u32 at91_shdwc_debouncer_value(struct platform_device *pdev, + u32 in_period_us) + { +@@ -212,6 +239,8 @@ static int __init at91_shdwc_probe(struc + { + struct resource *res; + const struct of_device_id *match; ++ struct device_node *np; ++ u32 ddr_type; + int ret; + + if (!pdev->dev.of_node) +@@ -249,6 +278,23 @@ static int __init at91_shdwc_probe(struc + + pm_power_off = at91_poweroff; + ++ np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc"); ++ if (!np) ++ return 0; ++ ++ mpddrc_base = of_iomap(np, 0); ++ of_node_put(np); ++ ++ if (!mpddrc_base) ++ return 0; ++ ++ ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD; ++ if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) || ++ (ddr_type == AT91_DDRSDRC_MD_LPDDR3)) ++ pm_power_off = at91_lpddr_poweroff; ++ else ++ iounmap(mpddrc_base); ++ + return 0; + } + +@@ -256,7 +302,8 @@ static int __exit at91_shdwc_remove(stru + { + struct shdwc *shdw = platform_get_drvdata(pdev); + +- if (pm_power_off == at91_poweroff) ++ if (pm_power_off == at91_poweroff || ++ pm_power_off == at91_lpddr_poweroff) + pm_power_off = NULL; + + /* Reset values to disable wake-up features */ diff --git a/queue-4.9/scsi-aacraid-reorder-adapter-status-check.patch b/queue-4.9/scsi-aacraid-reorder-adapter-status-check.patch new file mode 100644 index 00000000000..89bf340f870 --- /dev/null +++ b/queue-4.9/scsi-aacraid-reorder-adapter-status-check.patch @@ -0,0 +1,72 @@ +From c421530bf848604e97d0785a03b3fe2c62775083 Mon Sep 17 00:00:00 2001 +From: Raghava Aditya Renukunta +Date: Thu, 16 Feb 2017 12:51:21 -0800 +Subject: scsi: aacraid: Reorder Adapter status check + +From: Raghava Aditya Renukunta + +commit c421530bf848604e97d0785a03b3fe2c62775083 upstream. + +The driver currently checks the SELF_TEST_FAILED first and then +KERNEL_PANIC next. Under error conditions(boot code failure) both +SELF_TEST_FAILED and KERNEL_PANIC can be set at the same time. + +The driver has the capability to reset the controller on an KERNEL_PANIC, +but not on SELF_TEST_FAILED. + +Fixed by first checking KERNEL_PANIC and then the others. + +Fixes: e8b12f0fb835223752 ([SCSI] aacraid: Add new code for PMC-Sierra's SRC base controller family) +Signed-off-by: Raghava Aditya Renukunta +Reviewed-by: David Carroll +Reviewed-by: Johannes Thumshirn +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/aacraid/src.c | 21 +++++++++++++++++---- + 1 file changed, 17 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/aacraid/src.c ++++ b/drivers/scsi/aacraid/src.c +@@ -414,16 +414,23 @@ static int aac_src_check_health(struct a + u32 status = src_readl(dev, MUnit.OMR); + + /* ++ * Check to see if the board panic'd. ++ */ ++ if (unlikely(status & KERNEL_PANIC)) ++ goto err_blink; ++ ++ /* + * Check to see if the board failed any self tests. + */ + if (unlikely(status & SELF_TEST_FAILED)) +- return -1; ++ goto err_out; + + /* +- * Check to see if the board panic'd. ++ * Check to see if the board failed any self tests. + */ +- if (unlikely(status & KERNEL_PANIC)) +- return (status >> 16) & 0xFF; ++ if (unlikely(status & MONITOR_PANIC)) ++ goto err_out; ++ + /* + * Wait for the adapter to be up and running. + */ +@@ -433,6 +440,12 @@ static int aac_src_check_health(struct a + * Everything is OK + */ + return 0; ++ ++err_out: ++ return -1; ++ ++err_blink: ++ return (status > 16) & 0xFF; + } + + /** diff --git a/queue-4.9/scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch b/queue-4.9/scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch new file mode 100644 index 00000000000..6981ffc7c5a --- /dev/null +++ b/queue-4.9/scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch @@ -0,0 +1,39 @@ +From bba5dc332ec2d3a685cb4dae668c793f6a3713a3 Mon Sep 17 00:00:00 2001 +From: Long Li +Date: Wed, 14 Dec 2016 18:46:02 -0800 +Subject: scsi: storvsc: properly handle SRB_ERROR when sense message is present + +From: Long Li + +commit bba5dc332ec2d3a685cb4dae668c793f6a3713a3 upstream. + +When sense message is present on error, we should pass along to the upper +layer to decide how to deal with the error. +This patch fixes connectivity issues with Fiber Channel devices. + +Signed-off-by: Long Li +Reviewed-by: K. Y. Srinivasan +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/storvsc_drv.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -891,6 +891,13 @@ static void storvsc_handle_error(struct + switch (SRB_STATUS(vm_srb->srb_status)) { + case SRB_STATUS_ERROR: + /* ++ * Let upper layer deal with error when ++ * sense message is present. ++ */ ++ ++ if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) ++ break; ++ /* + * If there is an error; offline the device since all + * error recovery strategies would have already been + * deployed on the host side. However, if the command diff --git a/queue-4.9/scsi-storvsc-properly-set-residual-data-length-on-errors.patch b/queue-4.9/scsi-storvsc-properly-set-residual-data-length-on-errors.patch new file mode 100644 index 00000000000..9192d7c434d --- /dev/null +++ b/queue-4.9/scsi-storvsc-properly-set-residual-data-length-on-errors.patch @@ -0,0 +1,76 @@ +From 40630f462824ee24bc00d692865c86c3828094e0 Mon Sep 17 00:00:00 2001 +From: Long Li +Date: Wed, 14 Dec 2016 18:46:03 -0800 +Subject: scsi: storvsc: properly set residual data length on errors + +From: Long Li + +commit 40630f462824ee24bc00d692865c86c3828094e0 upstream. + +On I/O errors, the Windows driver doesn't set data_transfer_length +on error conditions other than SRB_STATUS_DATA_OVERRUN. +In these cases we need to set data_transfer_length to 0, +indicating there is no data transferred. On SRB_STATUS_DATA_OVERRUN, +data_transfer_length is set by the Windows driver to the actual data transferred. + +Reported-by: Shiva Krishna +Signed-off-by: Long Li +Reviewed-by: K. Y. Srinivasan +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/storvsc_drv.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -377,6 +377,7 @@ enum storvsc_request_type { + #define SRB_STATUS_SUCCESS 0x01 + #define SRB_STATUS_ABORTED 0x02 + #define SRB_STATUS_ERROR 0x04 ++#define SRB_STATUS_DATA_OVERRUN 0x12 + + #define SRB_STATUS(status) \ + (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) +@@ -962,6 +963,7 @@ static void storvsc_command_completion(s + struct scsi_cmnd *scmnd = cmd_request->cmd; + struct scsi_sense_hdr sense_hdr; + struct vmscsi_request *vm_srb; ++ u32 data_transfer_length; + struct Scsi_Host *host; + u32 payload_sz = cmd_request->payload_sz; + void *payload = cmd_request->payload; +@@ -969,6 +971,7 @@ static void storvsc_command_completion(s + host = stor_dev->host; + + vm_srb = &cmd_request->vstor_packet.vm_srb; ++ data_transfer_length = vm_srb->data_transfer_length; + + scmnd->result = vm_srb->scsi_status; + +@@ -982,13 +985,20 @@ static void storvsc_command_completion(s + &sense_hdr); + } + +- if (vm_srb->srb_status != SRB_STATUS_SUCCESS) ++ if (vm_srb->srb_status != SRB_STATUS_SUCCESS) { + storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc, + sense_hdr.ascq); ++ /* ++ * The Windows driver set data_transfer_length on ++ * SRB_STATUS_DATA_OVERRUN. On other errors, this value ++ * is untouched. In these cases we set it to 0. ++ */ ++ if (vm_srb->srb_status != SRB_STATUS_DATA_OVERRUN) ++ data_transfer_length = 0; ++ } + + scsi_set_resid(scmnd, +- cmd_request->payload->range.len - +- vm_srb->data_transfer_length); ++ cmd_request->payload->range.len - data_transfer_length); + + scmnd->scsi_done(scmnd); + diff --git a/queue-4.9/scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch b/queue-4.9/scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch new file mode 100644 index 00000000000..0a519f8564e --- /dev/null +++ b/queue-4.9/scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch @@ -0,0 +1,47 @@ +From 3cd6d3d9b1abab8dcdf0800224ce26daac24eea2 Mon Sep 17 00:00:00 2001 +From: Long Li +Date: Wed, 14 Dec 2016 18:46:01 -0800 +Subject: scsi: storvsc: use tagged SRB requests if supported by the device + +From: Long Li + +commit 3cd6d3d9b1abab8dcdf0800224ce26daac24eea2 upstream. + +Properly set SRB flags when hosting device supports tagged queuing. +This patch improves the performance on Fiber Channel disks. + +Signed-off-by: Long Li +Reviewed-by: K. Y. Srinivasan +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/storvsc_drv.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -136,6 +136,8 @@ struct hv_fc_wwn_packet { + #define SRB_FLAGS_PORT_DRIVER_RESERVED 0x0F000000 + #define SRB_FLAGS_CLASS_DRIVER_RESERVED 0xF0000000 + ++#define SP_UNTAGGED ((unsigned char) ~0) ++#define SRB_SIMPLE_TAG_REQUEST 0x20 + + /* + * Platform neutral description of a scsi request - +@@ -1451,6 +1453,13 @@ static int storvsc_queuecommand(struct S + vm_srb->win8_extension.srb_flags |= + SRB_FLAGS_DISABLE_SYNCH_TRANSFER; + ++ if (scmnd->device->tagged_supported) { ++ vm_srb->win8_extension.srb_flags |= ++ (SRB_FLAGS_QUEUE_ACTION_ENABLE | SRB_FLAGS_NO_QUEUE_FREEZE); ++ vm_srb->win8_extension.queue_tag = SP_UNTAGGED; ++ vm_srb->win8_extension.queue_action = SRB_SIMPLE_TAG_REQUEST; ++ } ++ + /* Build the SRB */ + switch (scmnd->sc_data_direction) { + case DMA_TO_DEVICE: diff --git a/queue-4.9/scsi-use-scsi_device_from_queue-for-scsi_dh.patch b/queue-4.9/scsi-use-scsi_device_from_queue-for-scsi_dh.patch new file mode 100644 index 00000000000..c15d1724f2e --- /dev/null +++ b/queue-4.9/scsi-use-scsi_device_from_queue-for-scsi_dh.patch @@ -0,0 +1,126 @@ +From 857de6e00778738dc3d61f75acbac35bdc48e533 Mon Sep 17 00:00:00 2001 +From: Hannes Reinecke +Date: Fri, 17 Feb 2017 09:02:45 +0100 +Subject: scsi: use 'scsi_device_from_queue()' for scsi_dh + +From: Hannes Reinecke + +commit 857de6e00778738dc3d61f75acbac35bdc48e533 upstream. + +The device handler needs to check if a given queue belongs to a scsi +device; only then does it make sense to attach a device handler. + +[mkp: dropped flags] + +Signed-off-by: Hannes Reinecke +Reviewed-by: Christoph Hellwig +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/scsi_dh.c | 22 ++++------------------ + drivers/scsi/scsi_lib.c | 23 +++++++++++++++++++++++ + include/scsi/scsi_device.h | 1 + + 3 files changed, 28 insertions(+), 18 deletions(-) + +--- a/drivers/scsi/scsi_dh.c ++++ b/drivers/scsi/scsi_dh.c +@@ -219,20 +219,6 @@ int scsi_unregister_device_handler(struc + } + EXPORT_SYMBOL_GPL(scsi_unregister_device_handler); + +-static struct scsi_device *get_sdev_from_queue(struct request_queue *q) +-{ +- struct scsi_device *sdev; +- unsigned long flags; +- +- spin_lock_irqsave(q->queue_lock, flags); +- sdev = q->queuedata; +- if (!sdev || !get_device(&sdev->sdev_gendev)) +- sdev = NULL; +- spin_unlock_irqrestore(q->queue_lock, flags); +- +- return sdev; +-} +- + /* + * scsi_dh_activate - activate the path associated with the scsi_device + * corresponding to the given request queue. +@@ -251,7 +237,7 @@ int scsi_dh_activate(struct request_queu + struct scsi_device *sdev; + int err = SCSI_DH_NOSYS; + +- sdev = get_sdev_from_queue(q); ++ sdev = scsi_device_from_queue(q); + if (!sdev) { + if (fn) + fn(data, err); +@@ -298,7 +284,7 @@ int scsi_dh_set_params(struct request_qu + struct scsi_device *sdev; + int err = -SCSI_DH_NOSYS; + +- sdev = get_sdev_from_queue(q); ++ sdev = scsi_device_from_queue(q); + if (!sdev) + return err; + +@@ -321,7 +307,7 @@ int scsi_dh_attach(struct request_queue + struct scsi_device_handler *scsi_dh; + int err = 0; + +- sdev = get_sdev_from_queue(q); ++ sdev = scsi_device_from_queue(q); + if (!sdev) + return -ENODEV; + +@@ -359,7 +345,7 @@ const char *scsi_dh_attached_handler_nam + struct scsi_device *sdev; + const char *handler_name = NULL; + +- sdev = get_sdev_from_queue(q); ++ sdev = scsi_device_from_queue(q); + if (!sdev) + return NULL; + +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -2127,6 +2127,29 @@ void scsi_mq_destroy_tags(struct Scsi_Ho + blk_mq_free_tag_set(&shost->tag_set); + } + ++/** ++ * scsi_device_from_queue - return sdev associated with a request_queue ++ * @q: The request queue to return the sdev from ++ * ++ * Return the sdev associated with a request queue or NULL if the ++ * request_queue does not reference a SCSI device. ++ */ ++struct scsi_device *scsi_device_from_queue(struct request_queue *q) ++{ ++ struct scsi_device *sdev = NULL; ++ ++ if (q->mq_ops) { ++ if (q->mq_ops == &scsi_mq_ops) ++ sdev = q->queuedata; ++ } else if (q->request_fn == scsi_request_fn) ++ sdev = q->queuedata; ++ if (!sdev || !get_device(&sdev->sdev_gendev)) ++ sdev = NULL; ++ ++ return sdev; ++} ++EXPORT_SYMBOL_GPL(scsi_device_from_queue); ++ + /* + * Function: scsi_block_requests() + * +--- a/include/scsi/scsi_device.h ++++ b/include/scsi/scsi_device.h +@@ -315,6 +315,7 @@ extern void scsi_remove_device(struct sc + extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh); + void scsi_attach_vpd(struct scsi_device *sdev); + ++extern struct scsi_device *scsi_device_from_queue(struct request_queue *q); + extern int scsi_device_get(struct scsi_device *); + extern void scsi_device_put(struct scsi_device *); + extern struct scsi_device *scsi_device_lookup(struct Scsi_Host *, diff --git a/queue-4.9/series b/queue-4.9/series index 368534a235f..b4b6967aeaa 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -39,3 +39,26 @@ tpm_tis-fix-the-error-handling-of-init_tis.patch iommu-vt-d-fix-some-macros-that-are-incorrectly-specified-in-intel-iommu.patch iommu-vt-d-tylersburg-isoch-identity-map-check-is-done-too-late.patch cifs-fix-splice-read-for-non-cached-files.patch +mm-devm_memremap_pages-hold-device_hotplug-lock-over-mem_hotplug_-begin-done.patch +mm-page_alloc-fix-nodes-for-reclaim-in-fast-path.patch +mm-vmpressure-fix-sending-wrong-events-on-underflow.patch +mm-do-not-access-page-mapping-directly-on-page_endio.patch +mm-balloon-umount-balloon_mnt-when-removing-vb-device.patch +mm-vmscan-cleanup-lru-size-claculations.patch +mm-vmscan-consider-eligible-zones-in-get_scan_count.patch +sigaltstack-support-ss_autodisarm-for-config_compat.patch +ipc-shm-fix-shmat-mmap-nil-page-protection.patch +ima-fix-ima_d_path-possible-race-with-rename.patch +pm-devfreq-fix-available_governor-sysfs.patch +pm-devfreq-fix-wrong-trans_stat-of-passive-devfreq-device.patch +dm-cache-fix-corruption-seen-when-using-cache-2tb.patch +dm-stats-fix-a-leaked-s-histogram_boundaries-array.patch +dm-round-robin-revert-use-percpu-repeat_count-and-current_path.patch +dm-raid-fix-data-corruption-on-reshape-request.patch +scsi-storvsc-use-tagged-srb-requests-if-supported-by-the-device.patch +scsi-storvsc-properly-handle-srb_error-when-sense-message-is-present.patch +scsi-storvsc-properly-set-residual-data-length-on-errors.patch +scsi-aacraid-reorder-adapter-status-check.patch +scsi-use-scsi_device_from_queue-for-scsi_dh.patch +power-reset-at91-poweroff-timely-shutdown-lpddr-memories.patch +fix-disable-sys_membarrier-when-nohz_full-is-enabled.patch diff --git a/queue-4.9/sigaltstack-support-ss_autodisarm-for-config_compat.patch b/queue-4.9/sigaltstack-support-ss_autodisarm-for-config_compat.patch new file mode 100644 index 00000000000..2d2900c05da --- /dev/null +++ b/queue-4.9/sigaltstack-support-ss_autodisarm-for-config_compat.patch @@ -0,0 +1,76 @@ +From 441398d378f29a5ad6d0fcda07918e54e4961800 Mon Sep 17 00:00:00 2001 +From: Stas Sergeev +Date: Mon, 27 Feb 2017 14:27:25 -0800 +Subject: sigaltstack: support SS_AUTODISARM for CONFIG_COMPAT + +From: Stas Sergeev + +commit 441398d378f29a5ad6d0fcda07918e54e4961800 upstream. + +Currently SS_AUTODISARM is not supported in compatibility mode, but does +not return -EINVAL either. This makes dosemu built with -m32 on x86_64 +to crash. Also the kernel's sigaltstack selftest fails if compiled with +-m32. + +This patch adds the needed support. + +Link: http://lkml.kernel.org/r/20170205101213.8163-2-stsp@list.ru +Signed-off-by: Stas Sergeev +Cc: Milosz Tanski +Cc: Andy Lutomirski +Cc: Al Viro +Cc: Arnd Bergmann +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: Oleg Nesterov +Cc: Nicolas Pitre +Cc: Waiman Long +Cc: Dave Hansen +Cc: Dmitry Safonov +Cc: Wang Xiaoqiang +Cc: Oleg Nesterov +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/compat.h | 4 +++- + kernel/signal.c | 11 +++++++++-- + 2 files changed, 12 insertions(+), 3 deletions(-) + +--- a/include/linux/compat.h ++++ b/include/linux/compat.h +@@ -711,8 +711,10 @@ int __compat_save_altstack(compat_stack_ + compat_stack_t __user *__uss = uss; \ + struct task_struct *t = current; \ + put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ +- put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \ ++ put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ + put_user_ex(t->sas_ss_size, &__uss->ss_size); \ ++ if (t->sas_ss_flags & SS_AUTODISARM) \ ++ sas_ss_reset(t); \ + } while (0); + + asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -3226,10 +3226,17 @@ int compat_restore_altstack(const compat + + int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) + { ++ int err; + struct task_struct *t = current; +- return __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) | +- __put_user(sas_ss_flags(sp), &uss->ss_flags) | ++ err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), ++ &uss->ss_sp) | ++ __put_user(t->sas_ss_flags, &uss->ss_flags) | + __put_user(t->sas_ss_size, &uss->ss_size); ++ if (err) ++ return err; ++ if (t->sas_ss_flags & SS_AUTODISARM) ++ sas_ss_reset(t); ++ return 0; + } + #endif +