From: Greg Kroah-Hartman Date: Fri, 2 Aug 2019 08:49:10 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v5.2.6~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c35cde8279bfae3d528fc529b037438c52aab8ac;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: block-scsi-change-the-preempt-only-flag-into-a-counter.patch ceph-hold-i_ceph_lock-when-removing-caps-for-freeing-inode.patch drivers-pps-pps.c-clear-offset-flags-in-pps_setparams-ioctl.patch fix-allyesconfig-output.patch proc-pid-cmdline-add-back-the-setproctitle-special-case.patch proc-pid-cmdline-remove-all-the-special-cases.patch sched-fair-don-t-free-p-numa_faults-with-concurrent-readers.patch sched-fair-use-rcu-accessors-consistently-for-numa_group.patch scsi-core-avoid-that-a-kernel-warning-appears-during-system-resume.patch --- diff --git a/queue-4.19/block-scsi-change-the-preempt-only-flag-into-a-counter.patch b/queue-4.19/block-scsi-change-the-preempt-only-flag-into-a-counter.patch new file mode 100644 index 00000000000..409074895ef --- /dev/null +++ b/queue-4.19/block-scsi-change-the-preempt-only-flag-into-a-counter.patch @@ -0,0 +1,223 @@ +From cd84a62e0078dce09f4ed349bec84f86c9d54b30 Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Wed, 26 Sep 2018 14:01:04 -0700 +Subject: block, scsi: Change the preempt-only flag into a counter + +From: Bart Van Assche + +commit cd84a62e0078dce09f4ed349bec84f86c9d54b30 upstream. + +The RQF_PREEMPT flag is used for three purposes: +- In the SCSI core, for making sure that power management requests + are executed even if a device is in the "quiesced" state. +- For domain validation by SCSI drivers that use the parallel port. +- In the IDE driver, for IDE preempt requests. +Rename "preempt-only" into "pm-only" because the primary purpose of +this mode is power management. Since the power management core may +but does not have to resume a runtime suspended device before +performing system-wide suspend and since a later patch will set +"pm-only" mode as long as a block device is runtime suspended, make +it possible to set "pm-only" mode from more than one context. Since +with this change scsi_device_quiesce() is no longer idempotent, make +that function return early if it is called for a quiesced queue. + +Signed-off-by: Bart Van Assche +Acked-by: Martin K. Petersen +Reviewed-by: Hannes Reinecke +Reviewed-by: Christoph Hellwig +Reviewed-by: Ming Lei +Cc: Jianchao Wang +Cc: Johannes Thumshirn +Cc: Alan Stern +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-core.c | 35 ++++++++++++++++++----------------- + block/blk-mq-debugfs.c | 10 +++++++++- + drivers/scsi/scsi_lib.c | 11 +++++++---- + include/linux/blkdev.h | 14 +++++++++----- + 4 files changed, 43 insertions(+), 27 deletions(-) + +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -421,24 +421,25 @@ void blk_sync_queue(struct request_queue + EXPORT_SYMBOL(blk_sync_queue); + + /** +- * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY ++ * blk_set_pm_only - increment pm_only counter + * @q: request queue pointer +- * +- * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not +- * set and 1 if the flag was already set. + */ +-int blk_set_preempt_only(struct request_queue *q) ++void blk_set_pm_only(struct request_queue *q) + { +- return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q); ++ atomic_inc(&q->pm_only); + } +-EXPORT_SYMBOL_GPL(blk_set_preempt_only); ++EXPORT_SYMBOL_GPL(blk_set_pm_only); + +-void blk_clear_preempt_only(struct request_queue *q) ++void blk_clear_pm_only(struct request_queue *q) + { +- blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); +- wake_up_all(&q->mq_freeze_wq); ++ int pm_only; ++ ++ pm_only = atomic_dec_return(&q->pm_only); ++ WARN_ON_ONCE(pm_only < 0); ++ if (pm_only == 0) ++ wake_up_all(&q->mq_freeze_wq); + } +-EXPORT_SYMBOL_GPL(blk_clear_preempt_only); ++EXPORT_SYMBOL_GPL(blk_clear_pm_only); + + /** + * __blk_run_queue_uncond - run a queue whether or not it has been stopped +@@ -916,7 +917,7 @@ EXPORT_SYMBOL(blk_alloc_queue); + */ + int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) + { +- const bool preempt = flags & BLK_MQ_REQ_PREEMPT; ++ const bool pm = flags & BLK_MQ_REQ_PREEMPT; + + while (true) { + bool success = false; +@@ -924,11 +925,11 @@ int blk_queue_enter(struct request_queue + rcu_read_lock(); + if (percpu_ref_tryget_live(&q->q_usage_counter)) { + /* +- * The code that sets the PREEMPT_ONLY flag is +- * responsible for ensuring that that flag is globally +- * visible before the queue is unfrozen. ++ * The code that increments the pm_only counter is ++ * responsible for ensuring that that counter is ++ * globally visible before the queue is unfrozen. + */ +- if (preempt || !blk_queue_preempt_only(q)) { ++ if (pm || !blk_queue_pm_only(q)) { + success = true; + } else { + percpu_ref_put(&q->q_usage_counter); +@@ -953,7 +954,7 @@ int blk_queue_enter(struct request_queue + + wait_event(q->mq_freeze_wq, + (atomic_read(&q->mq_freeze_depth) == 0 && +- (preempt || !blk_queue_preempt_only(q))) || ++ (pm || !blk_queue_pm_only(q))) || + blk_queue_dying(q)); + if (blk_queue_dying(q)) + return -ENODEV; +--- a/block/blk-mq-debugfs.c ++++ b/block/blk-mq-debugfs.c +@@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_fil + return 0; + } + ++static int queue_pm_only_show(void *data, struct seq_file *m) ++{ ++ struct request_queue *q = data; ++ ++ seq_printf(m, "%d\n", atomic_read(&q->pm_only)); ++ return 0; ++} ++ + #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name + static const char *const blk_queue_flag_name[] = { + QUEUE_FLAG_NAME(QUEUED), +@@ -132,7 +140,6 @@ static const char *const blk_queue_flag_ + QUEUE_FLAG_NAME(REGISTERED), + QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), + QUEUE_FLAG_NAME(QUIESCED), +- QUEUE_FLAG_NAME(PREEMPT_ONLY), + }; + #undef QUEUE_FLAG_NAME + +@@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(vo + static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { + { "poll_stat", 0400, queue_poll_stat_show }, + { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, ++ { "pm_only", 0600, queue_pm_only_show, NULL }, + { "state", 0600, queue_state_show, queue_state_write }, + { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, + { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -3059,11 +3059,14 @@ scsi_device_quiesce(struct scsi_device * + */ + WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); + +- blk_set_preempt_only(q); ++ if (sdev->quiesced_by == current) ++ return 0; ++ ++ blk_set_pm_only(q); + + blk_mq_freeze_queue(q); + /* +- * Ensure that the effect of blk_set_preempt_only() will be visible ++ * Ensure that the effect of blk_set_pm_only() will be visible + * for percpu_ref_tryget() callers that occur after the queue + * unfreeze even if the queue was already frozen before this function + * was called. See also https://lwn.net/Articles/573497/. +@@ -3076,7 +3079,7 @@ scsi_device_quiesce(struct scsi_device * + if (err == 0) + sdev->quiesced_by = current; + else +- blk_clear_preempt_only(q); ++ blk_clear_pm_only(q); + mutex_unlock(&sdev->state_mutex); + + return err; +@@ -3100,7 +3103,7 @@ void scsi_device_resume(struct scsi_devi + */ + mutex_lock(&sdev->state_mutex); + sdev->quiesced_by = NULL; +- blk_clear_preempt_only(sdev->request_queue); ++ blk_clear_pm_only(sdev->request_queue); + if (sdev->sdev_state == SDEV_QUIESCE) + scsi_device_set_state(sdev, SDEV_RUNNING); + mutex_unlock(&sdev->state_mutex); +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -504,6 +504,12 @@ struct request_queue { + * various queue flags, see QUEUE_* below + */ + unsigned long queue_flags; ++ /* ++ * Number of contexts that have called blk_set_pm_only(). If this ++ * counter is above zero then only RQF_PM and RQF_PREEMPT requests are ++ * processed. ++ */ ++ atomic_t pm_only; + + /* + * ida allocated id for this queue. Used to index queues from +@@ -698,7 +704,6 @@ struct request_queue { + #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ + #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ + #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ +-#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */ + + #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ + (1 << QUEUE_FLAG_SAME_COMP) | \ +@@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsig + ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ + REQ_FAILFAST_DRIVER)) + #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) +-#define blk_queue_preempt_only(q) \ +- test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) ++#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) + #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) + +-extern int blk_set_preempt_only(struct request_queue *q); +-extern void blk_clear_preempt_only(struct request_queue *q); ++extern void blk_set_pm_only(struct request_queue *q); ++extern void blk_clear_pm_only(struct request_queue *q); + + static inline int queue_in_flight(struct request_queue *q) + { diff --git a/queue-4.19/ceph-hold-i_ceph_lock-when-removing-caps-for-freeing-inode.patch b/queue-4.19/ceph-hold-i_ceph_lock-when-removing-caps-for-freeing-inode.patch new file mode 100644 index 00000000000..a12fcc330a6 --- /dev/null +++ b/queue-4.19/ceph-hold-i_ceph_lock-when-removing-caps-for-freeing-inode.patch @@ -0,0 +1,49 @@ +From d6e47819721ae2d9d090058ad5570a66f3c42e39 Mon Sep 17 00:00:00 2001 +From: "Yan, Zheng" +Date: Thu, 23 May 2019 11:01:37 +0800 +Subject: ceph: hold i_ceph_lock when removing caps for freeing inode + +From: Yan, Zheng + +commit d6e47819721ae2d9d090058ad5570a66f3c42e39 upstream. + +ceph_d_revalidate(, LOOKUP_RCU) may call __ceph_caps_issued_mask() +on a freeing inode. + +Signed-off-by: "Yan, Zheng" +Reviewed-by: Jeff Layton +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ceph/caps.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/ceph/caps.c ++++ b/fs/ceph/caps.c +@@ -1237,20 +1237,23 @@ static int send_cap_msg(struct cap_msg_a + } + + /* +- * Queue cap releases when an inode is dropped from our cache. Since +- * inode is about to be destroyed, there is no need for i_ceph_lock. ++ * Queue cap releases when an inode is dropped from our cache. + */ + void ceph_queue_caps_release(struct inode *inode) + { + struct ceph_inode_info *ci = ceph_inode(inode); + struct rb_node *p; + ++ /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) ++ * may call __ceph_caps_issued_mask() on a freeing inode. */ ++ spin_lock(&ci->i_ceph_lock); + p = rb_first(&ci->i_caps); + while (p) { + struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); + p = rb_next(p); + __ceph_remove_cap(cap, true); + } ++ spin_unlock(&ci->i_ceph_lock); + } + + /* diff --git a/queue-4.19/drivers-pps-pps.c-clear-offset-flags-in-pps_setparams-ioctl.patch b/queue-4.19/drivers-pps-pps.c-clear-offset-flags-in-pps_setparams-ioctl.patch new file mode 100644 index 00000000000..12fe7600345 --- /dev/null +++ b/queue-4.19/drivers-pps-pps.c-clear-offset-flags-in-pps_setparams-ioctl.patch @@ -0,0 +1,50 @@ +From 5515e9a6273b8c02034466bcbd717ac9f53dab99 Mon Sep 17 00:00:00 2001 +From: Miroslav Lichvar +Date: Tue, 16 Jul 2019 16:30:09 -0700 +Subject: drivers/pps/pps.c: clear offset flags in PPS_SETPARAMS ioctl + +From: Miroslav Lichvar + +commit 5515e9a6273b8c02034466bcbd717ac9f53dab99 upstream. + +The PPS assert/clear offset corrections are set by the PPS_SETPARAMS +ioctl in the pps_ktime structs, which also contain flags. The flags are +not initialized by applications (using the timepps.h header) and they +are not used by the kernel for anything except returning them back in +the PPS_GETPARAMS ioctl. + +Set the flags to zero to make it clear they are unused and avoid leaking +uninitialized data of the PPS_SETPARAMS caller to other applications +that have a read access to the PPS device. + +Link: http://lkml.kernel.org/r/20190702092251.24303-1-mlichvar@redhat.com +Signed-off-by: Miroslav Lichvar +Reviewed-by: Thomas Gleixner +Acked-by: Rodolfo Giometti +Cc: Greg KH +Cc: Dan Carpenter +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pps/pps.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/pps/pps.c ++++ b/drivers/pps/pps.c +@@ -166,6 +166,14 @@ static long pps_cdev_ioctl(struct file * + pps->params.mode |= PPS_CANWAIT; + pps->params.api_version = PPS_API_VERS; + ++ /* ++ * Clear unused fields of pps_kparams to avoid leaking ++ * uninitialized data of the PPS_SETPARAMS caller via ++ * PPS_GETPARAMS ++ */ ++ pps->params.assert_off_tu.flags = 0; ++ pps->params.clear_off_tu.flags = 0; ++ + spin_unlock_irq(&pps->lock); + + break; diff --git a/queue-4.19/fix-allyesconfig-output.patch b/queue-4.19/fix-allyesconfig-output.patch new file mode 100644 index 00000000000..d896011046c --- /dev/null +++ b/queue-4.19/fix-allyesconfig-output.patch @@ -0,0 +1,51 @@ +From 1b496469d0c020e09124e03e66a81421c21272a7 Mon Sep 17 00:00:00 2001 +From: Yoshinori Sato +Date: Sun, 21 Apr 2019 22:53:58 +0900 +Subject: Fix allyesconfig output. + +From: Yoshinori Sato + +commit 1b496469d0c020e09124e03e66a81421c21272a7 upstream. + +Conflict JCore-SoC and SolutionEngine 7619. + +Signed-off-by: Yoshinori Sato +Signed-off-by: Greg Kroah-Hartman + +--- + arch/sh/boards/Kconfig | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +--- a/arch/sh/boards/Kconfig ++++ b/arch/sh/boards/Kconfig +@@ -8,27 +8,19 @@ config SH_ALPHA_BOARD + bool + + config SH_DEVICE_TREE +- bool "Board Described by Device Tree" ++ bool + select OF + select OF_EARLY_FLATTREE + select TIMER_OF + select COMMON_CLK + select GENERIC_CALIBRATE_DELAY +- help +- Select Board Described by Device Tree to build a kernel that +- does not hard-code any board-specific knowledge but instead uses +- a device tree blob provided by the boot-loader. You must enable +- drivers for any hardware you want to use separately. At this +- time, only boards based on the open-hardware J-Core processors +- have sufficient driver coverage to use this option; do not +- select it if you are using original SuperH hardware. + + config SH_JCORE_SOC + bool "J-Core SoC" +- depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2) ++ select SH_DEVICE_TREE + select CLKSRC_JCORE_PIT + select JCORE_AIC +- default y if CPU_J2 ++ depends on CPU_J2 + help + Select this option to include drivers core components of the + J-Core SoC, including interrupt controllers and timers. diff --git a/queue-4.19/proc-pid-cmdline-add-back-the-setproctitle-special-case.patch b/queue-4.19/proc-pid-cmdline-add-back-the-setproctitle-special-case.patch new file mode 100644 index 00000000000..0b506c24d08 --- /dev/null +++ b/queue-4.19/proc-pid-cmdline-add-back-the-setproctitle-special-case.patch @@ -0,0 +1,147 @@ +From d26d0cd97c88eb1a5704b42e41ab443406807810 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sat, 13 Jul 2019 14:27:14 -0700 +Subject: /proc//cmdline: add back the setproctitle() special case + +From: Linus Torvalds + +commit d26d0cd97c88eb1a5704b42e41ab443406807810 upstream. + +This makes the setproctitle() special case very explicit indeed, and +handles it with a separate helper function entirely. In the process, it +re-instates the original semantics of simply stopping at the first NUL +character when the original last NUL character is no longer there. + +[ The original semantics can still be seen in mm/util.c: get_cmdline() + that is limited to a fixed-size buffer ] + +This makes the logic about when we use the string lengths etc much more +obvious, and makes it easier to see what we do and what the two very +different cases are. + +Note that even when we allow walking past the end of the argument array +(because the setproctitle() might have overwritten and overflowed the +original argv[] strings), we only allow it when it overflows into the +environment region if it is immediately adjacent. + +[ Fixed for missing 'count' checks noted by Alexey Izbyshev ] + +Link: https://lore.kernel.org/lkml/alpine.LNX.2.21.1904052326230.3249@kich.toxcorp.com/ +Fixes: 5ab827189965 ("fs/proc: simplify and clarify get_mm_cmdline() function") +Cc: Jakub Jankowski +Cc: Alexey Dobriyan +Cc: Alexey Izbyshev +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/proc/base.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 77 insertions(+), 4 deletions(-) + +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -205,12 +205,53 @@ static int proc_root_link(struct dentry + return result; + } + ++/* ++ * If the user used setproctitle(), we just get the string from ++ * user space at arg_start, and limit it to a maximum of one page. ++ */ ++static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, ++ size_t count, unsigned long pos, ++ unsigned long arg_start) ++{ ++ char *page; ++ int ret, got; ++ ++ if (pos >= PAGE_SIZE) ++ return 0; ++ ++ page = (char *)__get_free_page(GFP_KERNEL); ++ if (!page) ++ return -ENOMEM; ++ ++ ret = 0; ++ got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); ++ if (got > 0) { ++ int len = strnlen(page, got); ++ ++ /* Include the NUL character if it was found */ ++ if (len < got) ++ len++; ++ ++ if (len > pos) { ++ len -= pos; ++ if (len > count) ++ len = count; ++ len -= copy_to_user(buf, page+pos, len); ++ if (!len) ++ len = -EFAULT; ++ ret = len; ++ } ++ } ++ free_page((unsigned long)page); ++ return ret; ++} ++ + static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, + size_t count, loff_t *ppos) + { +- unsigned long arg_start, arg_end; ++ unsigned long arg_start, arg_end, env_start, env_end; + unsigned long pos, len; +- char *page; ++ char *page, c; + + /* Check if process spawned far enough to have cmdline. */ + if (!mm->env_end) +@@ -219,14 +260,46 @@ static ssize_t get_mm_cmdline(struct mm_ + spin_lock(&mm->arg_lock); + arg_start = mm->arg_start; + arg_end = mm->arg_end; ++ env_start = mm->env_start; ++ env_end = mm->env_end; + spin_unlock(&mm->arg_lock); + + if (arg_start >= arg_end) + return 0; + ++ /* ++ * We allow setproctitle() to overwrite the argument ++ * strings, and overflow past the original end. But ++ * only when it overflows into the environment area. ++ */ ++ if (env_start != arg_end || env_end < env_start) ++ env_start = env_end = arg_end; ++ len = env_end - arg_start; ++ + /* We're not going to care if "*ppos" has high bits set */ +- /* .. but we do check the result is in the proper range */ +- pos = arg_start + *ppos; ++ pos = *ppos; ++ if (pos >= len) ++ return 0; ++ if (count > len - pos) ++ count = len - pos; ++ if (!count) ++ return 0; ++ ++ /* ++ * Magical special case: if the argv[] end byte is not ++ * zero, the user has overwritten it with setproctitle(3). ++ * ++ * Possible future enhancement: do this only once when ++ * pos is 0, and set a flag in the 'struct file'. ++ */ ++ if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) ++ return get_mm_proctitle(mm, buf, count, pos, arg_start); ++ ++ /* ++ * For the non-setproctitle() case we limit things strictly ++ * to the [arg_start, arg_end[ range. ++ */ ++ pos += arg_start; + if (pos < arg_start || pos >= arg_end) + return 0; + if (count > arg_end - pos) diff --git a/queue-4.19/proc-pid-cmdline-remove-all-the-special-cases.patch b/queue-4.19/proc-pid-cmdline-remove-all-the-special-cases.patch new file mode 100644 index 00000000000..86b9088603c --- /dev/null +++ b/queue-4.19/proc-pid-cmdline-remove-all-the-special-cases.patch @@ -0,0 +1,135 @@ +From 3d712546d8ba9f25cdf080d79f90482aa4231ed4 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sat, 13 Jul 2019 13:40:13 -0700 +Subject: /proc//cmdline: remove all the special cases + +From: Linus Torvalds + +commit 3d712546d8ba9f25cdf080d79f90482aa4231ed4 upstream. + +Start off with a clean slate that only reads exactly from arg_start to +arg_end, without any oddities. This simplifies the code and in the +process removes the case that caused us to potentially leak an +uninitialized byte from the temporary kernel buffer. + +Note that in order to start from scratch with an understandable base, +this simplifies things _too_ much, and removes all the legacy logic to +handle setproctitle() having changed the argument strings. + +We'll add back those special cases very differently in the next commit. + +Link: https://lore.kernel.org/lkml/20190712160913.17727-1-izbyshev@ispras.ru/ +Fixes: f5b65348fd77 ("proc: fix missing final NUL in get_mm_cmdline() rewrite") +Cc: Alexey Izbyshev +Cc: Alexey Dobriyan +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/proc/base.c | 71 ++++++--------------------------------------------------- + 1 file changed, 8 insertions(+), 63 deletions(-) + +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -208,7 +208,7 @@ static int proc_root_link(struct dentry + static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, + size_t count, loff_t *ppos) + { +- unsigned long arg_start, arg_end, env_start, env_end; ++ unsigned long arg_start, arg_end; + unsigned long pos, len; + char *page; + +@@ -219,36 +219,18 @@ static ssize_t get_mm_cmdline(struct mm_ + spin_lock(&mm->arg_lock); + arg_start = mm->arg_start; + arg_end = mm->arg_end; +- env_start = mm->env_start; +- env_end = mm->env_end; + spin_unlock(&mm->arg_lock); + + if (arg_start >= arg_end) + return 0; + +- /* +- * We have traditionally allowed the user to re-write +- * the argument strings and overflow the end result +- * into the environment section. But only do that if +- * the environment area is contiguous to the arguments. +- */ +- if (env_start != arg_end || env_start >= env_end) +- env_start = env_end = arg_end; +- +- /* .. and limit it to a maximum of one page of slop */ +- if (env_end >= arg_end + PAGE_SIZE) +- env_end = arg_end + PAGE_SIZE - 1; +- + /* We're not going to care if "*ppos" has high bits set */ +- pos = arg_start + *ppos; +- + /* .. but we do check the result is in the proper range */ +- if (pos < arg_start || pos >= env_end) ++ pos = arg_start + *ppos; ++ if (pos < arg_start || pos >= arg_end) + return 0; +- +- /* .. and we never go past env_end */ +- if (env_end - pos < count) +- count = env_end - pos; ++ if (count > arg_end - pos) ++ count = arg_end - pos; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) +@@ -258,48 +240,11 @@ static ssize_t get_mm_cmdline(struct mm_ + while (count) { + int got; + size_t size = min_t(size_t, PAGE_SIZE, count); +- long offset; +- +- /* +- * Are we already starting past the official end? +- * We always include the last byte that is *supposed* +- * to be NUL +- */ +- offset = (pos >= arg_end) ? pos - arg_end + 1 : 0; + +- got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON); +- if (got <= offset) ++ got = access_remote_vm(mm, pos, page, size, FOLL_ANON); ++ if (got <= 0) + break; +- got -= offset; +- +- /* Don't walk past a NUL character once you hit arg_end */ +- if (pos + got >= arg_end) { +- int n = 0; +- +- /* +- * If we started before 'arg_end' but ended up +- * at or after it, we start the NUL character +- * check at arg_end-1 (where we expect the normal +- * EOF to be). +- * +- * NOTE! This is smaller than 'got', because +- * pos + got >= arg_end +- */ +- if (pos < arg_end) +- n = arg_end - pos - 1; +- +- /* Cut off at first NUL after 'n' */ +- got = n + strnlen(page+n, offset+got-n); +- if (got < offset) +- break; +- got -= offset; +- +- /* Include the NUL if it existed */ +- if (got < size) +- got++; +- } +- +- got -= copy_to_user(buf, page+offset, got); ++ got -= copy_to_user(buf, page, got); + if (unlikely(!got)) { + if (!len) + len = -EFAULT; diff --git a/queue-4.19/sched-fair-don-t-free-p-numa_faults-with-concurrent-readers.patch b/queue-4.19/sched-fair-don-t-free-p-numa_faults-with-concurrent-readers.patch new file mode 100644 index 00000000000..b3e53b751ce --- /dev/null +++ b/queue-4.19/sched-fair-don-t-free-p-numa_faults-with-concurrent-readers.patch @@ -0,0 +1,131 @@ +From 16d51a590a8ce3befb1308e0e7ab77f3b661af33 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Tue, 16 Jul 2019 17:20:45 +0200 +Subject: sched/fair: Don't free p->numa_faults with concurrent readers + +From: Jann Horn + +commit 16d51a590a8ce3befb1308e0e7ab77f3b661af33 upstream. + +When going through execve(), zero out the NUMA fault statistics instead of +freeing them. + +During execve, the task is reachable through procfs and the scheduler. A +concurrent /proc/*/sched reader can read data from a freed ->numa_faults +allocation (confirmed by KASAN) and write it back to userspace. +I believe that it would also be possible for a use-after-free read to occur +through a race between a NUMA fault and execve(): task_numa_fault() can +lead to task_numa_compare(), which invokes task_weight() on the currently +running task of a different CPU. + +Another way to fix this would be to make ->numa_faults RCU-managed or add +extra locking, but it seems easier to wipe the NUMA fault statistics on +execve. + +Signed-off-by: Jann Horn +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Petr Mladek +Cc: Sergey Senozhatsky +Cc: Thomas Gleixner +Cc: Will Deacon +Fixes: 82727018b0d3 ("sched/numa: Call task_numa_free() from do_execve()") +Link: https://lkml.kernel.org/r/20190716152047.14424-1-jannh@google.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + fs/exec.c | 2 +- + include/linux/sched/numa_balancing.h | 4 ++-- + kernel/fork.c | 2 +- + kernel/sched/fair.c | 24 ++++++++++++++++++++---- + 4 files changed, 24 insertions(+), 8 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1826,7 +1826,7 @@ static int __do_execve_file(int fd, stru + membarrier_execve(current); + rseq_execve(current); + acct_update_integrals(current); +- task_numa_free(current); ++ task_numa_free(current, false); + free_bprm(bprm); + kfree(pathbuf); + if (filename) +--- a/include/linux/sched/numa_balancing.h ++++ b/include/linux/sched/numa_balancing.h +@@ -19,7 +19,7 @@ + extern void task_numa_fault(int last_node, int node, int pages, int flags); + extern pid_t task_numa_group_id(struct task_struct *p); + extern void set_numabalancing_state(bool enabled); +-extern void task_numa_free(struct task_struct *p); ++extern void task_numa_free(struct task_struct *p, bool final); + extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, + int src_nid, int dst_cpu); + #else +@@ -34,7 +34,7 @@ static inline pid_t task_numa_group_id(s + static inline void set_numabalancing_state(bool enabled) + { + } +-static inline void task_numa_free(struct task_struct *p) ++static inline void task_numa_free(struct task_struct *p, bool final) + { + } + static inline bool should_numa_migrate_memory(struct task_struct *p, +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -679,7 +679,7 @@ void __put_task_struct(struct task_struc + WARN_ON(tsk == current); + + cgroup_free(tsk); +- task_numa_free(tsk); ++ task_numa_free(tsk, true); + security_task_free(tsk); + exit_creds(tsk); + delayacct_tsk_free(tsk); +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -2345,13 +2345,23 @@ no_join: + return; + } + +-void task_numa_free(struct task_struct *p) ++/* ++ * Get rid of NUMA staticstics associated with a task (either current or dead). ++ * If @final is set, the task is dead and has reached refcount zero, so we can ++ * safely free all relevant data structures. Otherwise, there might be ++ * concurrent reads from places like load balancing and procfs, and we should ++ * reset the data back to default state without freeing ->numa_faults. ++ */ ++void task_numa_free(struct task_struct *p, bool final) + { + struct numa_group *grp = p->numa_group; +- void *numa_faults = p->numa_faults; ++ unsigned long *numa_faults = p->numa_faults; + unsigned long flags; + int i; + ++ if (!numa_faults) ++ return; ++ + if (grp) { + spin_lock_irqsave(&grp->lock, flags); + for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) +@@ -2364,8 +2374,14 @@ void task_numa_free(struct task_struct * + put_numa_group(grp); + } + +- p->numa_faults = NULL; +- kfree(numa_faults); ++ if (final) { ++ p->numa_faults = NULL; ++ kfree(numa_faults); ++ } else { ++ p->total_numa_faults = 0; ++ for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) ++ numa_faults[i] = 0; ++ } + } + + /* diff --git a/queue-4.19/sched-fair-use-rcu-accessors-consistently-for-numa_group.patch b/queue-4.19/sched-fair-use-rcu-accessors-consistently-for-numa_group.patch new file mode 100644 index 00000000000..462aa41e657 --- /dev/null +++ b/queue-4.19/sched-fair-use-rcu-accessors-consistently-for-numa_group.patch @@ -0,0 +1,386 @@ +From cb361d8cdef69990f6b4504dc1fd9a594d983c97 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Tue, 16 Jul 2019 17:20:47 +0200 +Subject: sched/fair: Use RCU accessors consistently for ->numa_group + +From: Jann Horn + +commit cb361d8cdef69990f6b4504dc1fd9a594d983c97 upstream. + +The old code used RCU annotations and accessors inconsistently for +->numa_group, which can lead to use-after-frees and NULL dereferences. + +Let all accesses to ->numa_group use proper RCU helpers to prevent such +issues. + +Signed-off-by: Jann Horn +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Petr Mladek +Cc: Sergey Senozhatsky +Cc: Thomas Gleixner +Cc: Will Deacon +Fixes: 8c8a743c5087 ("sched/numa: Use {cpu, pid} to create task groups for shared faults") +Link: https://lkml.kernel.org/r/20190716152047.14424-3-jannh@google.com +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/sched.h | 10 +++- + kernel/sched/fair.c | 120 +++++++++++++++++++++++++++++++++----------------- + 2 files changed, 90 insertions(+), 40 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1023,7 +1023,15 @@ struct task_struct { + u64 last_sum_exec_runtime; + struct callback_head numa_work; + +- struct numa_group *numa_group; ++ /* ++ * This pointer is only modified for current in syscall and ++ * pagefault context (and for tasks being destroyed), so it can be read ++ * from any of the following contexts: ++ * - RCU read-side critical section ++ * - current->numa_group from everywhere ++ * - task's runqueue locked, task not running ++ */ ++ struct numa_group __rcu *numa_group; + + /* + * numa_faults is an array split into four regions: +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -1053,6 +1053,21 @@ struct numa_group { + unsigned long faults[0]; + }; + ++/* ++ * For functions that can be called in multiple contexts that permit reading ++ * ->numa_group (see struct task_struct for locking rules). ++ */ ++static struct numa_group *deref_task_numa_group(struct task_struct *p) ++{ ++ return rcu_dereference_check(p->numa_group, p == current || ++ (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu))); ++} ++ ++static struct numa_group *deref_curr_numa_group(struct task_struct *p) ++{ ++ return rcu_dereference_protected(p->numa_group, p == current); ++} ++ + static inline unsigned long group_faults_priv(struct numa_group *ng); + static inline unsigned long group_faults_shared(struct numa_group *ng); + +@@ -1096,10 +1111,12 @@ static unsigned int task_scan_start(stru + { + unsigned long smin = task_scan_min(p); + unsigned long period = smin; ++ struct numa_group *ng; + + /* Scale the maximum scan period with the amount of shared memory. */ +- if (p->numa_group) { +- struct numa_group *ng = p->numa_group; ++ rcu_read_lock(); ++ ng = rcu_dereference(p->numa_group); ++ if (ng) { + unsigned long shared = group_faults_shared(ng); + unsigned long private = group_faults_priv(ng); + +@@ -1107,6 +1124,7 @@ static unsigned int task_scan_start(stru + period *= shared + 1; + period /= private + shared + 1; + } ++ rcu_read_unlock(); + + return max(smin, period); + } +@@ -1115,13 +1133,14 @@ static unsigned int task_scan_max(struct + { + unsigned long smin = task_scan_min(p); + unsigned long smax; ++ struct numa_group *ng; + + /* Watch for min being lower than max due to floor calculations */ + smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p); + + /* Scale the maximum scan period with the amount of shared memory. */ +- if (p->numa_group) { +- struct numa_group *ng = p->numa_group; ++ ng = deref_curr_numa_group(p); ++ if (ng) { + unsigned long shared = group_faults_shared(ng); + unsigned long private = group_faults_priv(ng); + unsigned long period = smax; +@@ -1153,7 +1172,7 @@ void init_numa_balancing(unsigned long c + p->numa_scan_period = sysctl_numa_balancing_scan_delay; + p->numa_work.next = &p->numa_work; + p->numa_faults = NULL; +- p->numa_group = NULL; ++ RCU_INIT_POINTER(p->numa_group, NULL); + p->last_task_numa_placement = 0; + p->last_sum_exec_runtime = 0; + +@@ -1200,7 +1219,16 @@ static void account_numa_dequeue(struct + + pid_t task_numa_group_id(struct task_struct *p) + { +- return p->numa_group ? p->numa_group->gid : 0; ++ struct numa_group *ng; ++ pid_t gid = 0; ++ ++ rcu_read_lock(); ++ ng = rcu_dereference(p->numa_group); ++ if (ng) ++ gid = ng->gid; ++ rcu_read_unlock(); ++ ++ return gid; + } + + /* +@@ -1225,11 +1253,13 @@ static inline unsigned long task_faults( + + static inline unsigned long group_faults(struct task_struct *p, int nid) + { +- if (!p->numa_group) ++ struct numa_group *ng = deref_task_numa_group(p); ++ ++ if (!ng) + return 0; + +- return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] + +- p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)]; ++ return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] + ++ ng->faults[task_faults_idx(NUMA_MEM, nid, 1)]; + } + + static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) +@@ -1367,12 +1397,13 @@ static inline unsigned long task_weight( + static inline unsigned long group_weight(struct task_struct *p, int nid, + int dist) + { ++ struct numa_group *ng = deref_task_numa_group(p); + unsigned long faults, total_faults; + +- if (!p->numa_group) ++ if (!ng) + return 0; + +- total_faults = p->numa_group->total_faults; ++ total_faults = ng->total_faults; + + if (!total_faults) + return 0; +@@ -1386,7 +1417,7 @@ static inline unsigned long group_weight + bool should_numa_migrate_memory(struct task_struct *p, struct page * page, + int src_nid, int dst_cpu) + { +- struct numa_group *ng = p->numa_group; ++ struct numa_group *ng = deref_curr_numa_group(p); + int dst_nid = cpu_to_node(dst_cpu); + int last_cpupid, this_cpupid; + +@@ -1592,13 +1623,14 @@ static bool load_too_imbalanced(long src + static void task_numa_compare(struct task_numa_env *env, + long taskimp, long groupimp, bool maymove) + { ++ struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p); + struct rq *dst_rq = cpu_rq(env->dst_cpu); ++ long imp = p_ng ? groupimp : taskimp; + struct task_struct *cur; + long src_load, dst_load; +- long load; +- long imp = env->p->numa_group ? groupimp : taskimp; +- long moveimp = imp; + int dist = env->dist; ++ long moveimp = imp; ++ long load; + + if (READ_ONCE(dst_rq->numa_migrate_on)) + return; +@@ -1637,21 +1669,22 @@ static void task_numa_compare(struct tas + * If dst and source tasks are in the same NUMA group, or not + * in any group then look only at task weights. + */ +- if (cur->numa_group == env->p->numa_group) { ++ cur_ng = rcu_dereference(cur->numa_group); ++ if (cur_ng == p_ng) { + imp = taskimp + task_weight(cur, env->src_nid, dist) - + task_weight(cur, env->dst_nid, dist); + /* + * Add some hysteresis to prevent swapping the + * tasks within a group over tiny differences. + */ +- if (cur->numa_group) ++ if (cur_ng) + imp -= imp / 16; + } else { + /* + * Compare the group weights. If a task is all by itself + * (not part of a group), use the task weight instead. + */ +- if (cur->numa_group && env->p->numa_group) ++ if (cur_ng && p_ng) + imp += group_weight(cur, env->src_nid, dist) - + group_weight(cur, env->dst_nid, dist); + else +@@ -1749,11 +1782,12 @@ static int task_numa_migrate(struct task + .best_imp = 0, + .best_cpu = -1, + }; ++ unsigned long taskweight, groupweight; + struct sched_domain *sd; ++ long taskimp, groupimp; ++ struct numa_group *ng; + struct rq *best_rq; +- unsigned long taskweight, groupweight; + int nid, ret, dist; +- long taskimp, groupimp; + + /* + * Pick the lowest SD_NUMA domain, as that would have the smallest +@@ -1799,7 +1833,8 @@ static int task_numa_migrate(struct task + * multiple NUMA nodes; in order to better consolidate the group, + * we need to check other locations. + */ +- if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) { ++ ng = deref_curr_numa_group(p); ++ if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) { + for_each_online_node(nid) { + if (nid == env.src_nid || nid == p->numa_preferred_nid) + continue; +@@ -1832,7 +1867,7 @@ static int task_numa_migrate(struct task + * A task that migrated to a second choice node will be better off + * trying for a better one later. Do not set the preferred node here. + */ +- if (p->numa_group) { ++ if (ng) { + if (env.best_cpu == -1) + nid = env.src_nid; + else +@@ -2127,6 +2162,7 @@ static void task_numa_placement(struct t + unsigned long total_faults; + u64 runtime, period; + spinlock_t *group_lock = NULL; ++ struct numa_group *ng; + + /* + * The p->mm->numa_scan_seq field gets updated without +@@ -2144,8 +2180,9 @@ static void task_numa_placement(struct t + runtime = numa_get_avg_runtime(p, &period); + + /* If the task is part of a group prevent parallel updates to group stats */ +- if (p->numa_group) { +- group_lock = &p->numa_group->lock; ++ ng = deref_curr_numa_group(p); ++ if (ng) { ++ group_lock = &ng->lock; + spin_lock_irq(group_lock); + } + +@@ -2186,7 +2223,7 @@ static void task_numa_placement(struct t + p->numa_faults[cpu_idx] += f_diff; + faults += p->numa_faults[mem_idx]; + p->total_numa_faults += diff; +- if (p->numa_group) { ++ if (ng) { + /* + * safe because we can only change our own group + * +@@ -2194,14 +2231,14 @@ static void task_numa_placement(struct t + * nid and priv in a specific region because it + * is at the beginning of the numa_faults array. + */ +- p->numa_group->faults[mem_idx] += diff; +- p->numa_group->faults_cpu[mem_idx] += f_diff; +- p->numa_group->total_faults += diff; +- group_faults += p->numa_group->faults[mem_idx]; ++ ng->faults[mem_idx] += diff; ++ ng->faults_cpu[mem_idx] += f_diff; ++ ng->total_faults += diff; ++ group_faults += ng->faults[mem_idx]; + } + } + +- if (!p->numa_group) { ++ if (!ng) { + if (faults > max_faults) { + max_faults = faults; + max_nid = nid; +@@ -2212,8 +2249,8 @@ static void task_numa_placement(struct t + } + } + +- if (p->numa_group) { +- numa_group_count_active_nodes(p->numa_group); ++ if (ng) { ++ numa_group_count_active_nodes(ng); + spin_unlock_irq(group_lock); + max_nid = preferred_group_nid(p, max_nid); + } +@@ -2247,7 +2284,7 @@ static void task_numa_group(struct task_ + int cpu = cpupid_to_cpu(cpupid); + int i; + +- if (unlikely(!p->numa_group)) { ++ if (unlikely(!deref_curr_numa_group(p))) { + unsigned int size = sizeof(struct numa_group) + + 4*nr_node_ids*sizeof(unsigned long); + +@@ -2283,7 +2320,7 @@ static void task_numa_group(struct task_ + if (!grp) + goto no_join; + +- my_grp = p->numa_group; ++ my_grp = deref_curr_numa_group(p); + if (grp == my_grp) + goto no_join; + +@@ -2354,7 +2391,8 @@ no_join: + */ + void task_numa_free(struct task_struct *p, bool final) + { +- struct numa_group *grp = p->numa_group; ++ /* safe: p either is current or is being freed by current */ ++ struct numa_group *grp = rcu_dereference_raw(p->numa_group); + unsigned long *numa_faults = p->numa_faults; + unsigned long flags; + int i; +@@ -2434,7 +2472,7 @@ void task_numa_fault(int last_cpupid, in + * actively using should be counted as local. This allows the + * scan rate to slow down when a workload has settled down. + */ +- ng = p->numa_group; ++ ng = deref_curr_numa_group(p); + if (!priv && !local && ng && ng->active_nodes > 1 && + numa_is_active_node(cpu_node, ng) && + numa_is_active_node(mem_node, ng)) +@@ -10234,18 +10272,22 @@ void show_numa_stats(struct task_struct + { + int node; + unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0; ++ struct numa_group *ng; + ++ rcu_read_lock(); ++ ng = rcu_dereference(p->numa_group); + for_each_online_node(node) { + if (p->numa_faults) { + tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)]; + tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)]; + } +- if (p->numa_group) { +- gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)], +- gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)]; ++ if (ng) { ++ gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)], ++ gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)]; + } + print_numa_stats(m, node, tsf, tpf, gsf, gpf); + } ++ rcu_read_unlock(); + } + #endif /* CONFIG_NUMA_BALANCING */ + #endif /* CONFIG_SCHED_DEBUG */ diff --git a/queue-4.19/scsi-core-avoid-that-a-kernel-warning-appears-during-system-resume.patch b/queue-4.19/scsi-core-avoid-that-a-kernel-warning-appears-during-system-resume.patch new file mode 100644 index 00000000000..8a20d8ef51d --- /dev/null +++ b/queue-4.19/scsi-core-avoid-that-a-kernel-warning-appears-during-system-resume.patch @@ -0,0 +1,63 @@ +From 17605afaae825b0291f80c62a7f6565879edaa8a Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Fri, 15 Mar 2019 16:27:58 -0700 +Subject: scsi: core: Avoid that a kernel warning appears during system resume + +From: Bart Van Assche + +commit 17605afaae825b0291f80c62a7f6565879edaa8a upstream. + +Since scsi_device_quiesce() skips SCSI devices that have another state than +RUNNING, OFFLINE or TRANSPORT_OFFLINE, scsi_device_resume() should not +complain about SCSI devices that have been skipped. Hence this patch. This +patch avoids that the following warning appears during resume: + +WARNING: CPU: 3 PID: 1039 at blk_clear_pm_only+0x2a/0x30 +CPU: 3 PID: 1039 Comm: kworker/u8:49 Not tainted 5.0.0+ #1 +Hardware name: LENOVO 4180F42/4180F42, BIOS 83ET75WW (1.45 ) 05/10/2013 +Workqueue: events_unbound async_run_entry_fn +RIP: 0010:blk_clear_pm_only+0x2a/0x30 +Call Trace: + ? scsi_device_resume+0x28/0x50 + ? scsi_dev_type_resume+0x2b/0x80 + ? async_run_entry_fn+0x2c/0xd0 + ? process_one_work+0x1f0/0x3f0 + ? worker_thread+0x28/0x3c0 + ? process_one_work+0x3f0/0x3f0 + ? kthread+0x10c/0x130 + ? __kthread_create_on_node+0x150/0x150 + ? ret_from_fork+0x1f/0x30 + +Cc: Christoph Hellwig +Cc: Hannes Reinecke +Cc: Ming Lei +Cc: Johannes Thumshirn +Cc: Oleksandr Natalenko +Cc: Martin Steigerwald +Cc: +Reported-by: Jisheng Zhang +Tested-by: Jisheng Zhang +Fixes: 3a0a529971ec ("block, scsi: Make SCSI quiesce and resume work reliably") # v4.15 +Signed-off-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/scsi_lib.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -3102,8 +3102,10 @@ void scsi_device_resume(struct scsi_devi + * device deleted during suspend) + */ + mutex_lock(&sdev->state_mutex); +- sdev->quiesced_by = NULL; +- blk_clear_pm_only(sdev->request_queue); ++ if (sdev->quiesced_by) { ++ sdev->quiesced_by = NULL; ++ blk_clear_pm_only(sdev->request_queue); ++ } + if (sdev->sdev_state == SDEV_QUIESCE) + scsi_device_set_state(sdev, SDEV_RUNNING); + mutex_unlock(&sdev->state_mutex); diff --git a/queue-4.19/series b/queue-4.19/series index 6320fb4d9d0..b8450eae0e9 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -21,3 +21,12 @@ vhost-introduce-vhost_exceeds_weight.patch vhost_net-fix-possible-infinite-loop.patch vhost-vsock-add-weight-support.patch vhost-scsi-add-weight-support.patch +sched-fair-don-t-free-p-numa_faults-with-concurrent-readers.patch +sched-fair-use-rcu-accessors-consistently-for-numa_group.patch +proc-pid-cmdline-remove-all-the-special-cases.patch +proc-pid-cmdline-add-back-the-setproctitle-special-case.patch +drivers-pps-pps.c-clear-offset-flags-in-pps_setparams-ioctl.patch +fix-allyesconfig-output.patch +ceph-hold-i_ceph_lock-when-removing-caps-for-freeing-inode.patch +block-scsi-change-the-preempt-only-flag-into-a-counter.patch +scsi-core-avoid-that-a-kernel-warning-appears-during-system-resume.patch