From: Greg Kroah-Hartman Date: Mon, 9 Oct 2017 12:49:01 +0000 (+0200) Subject: 4.13-stable patches X-Git-Tag: v3.18.75~25 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=21bb299577eeed13eba1665ca5e1bc429c4b4245;p=thirdparty%2Fkernel%2Fstable-queue.git 4.13-stable patches added patches: alsa-compress-remove-unused-variable.patch alsa-usx2y-suppress-kernel-warning-at-page-allocation-failures.patch lib-ratelimit.c-use-deferred-printk-version.patch lsm-fix-smack_inode_removexattr-and-xattr_getsecurity-memleak.patch mm-avoid-marking-swap-cached-page-as-lazyfree.patch mm-fix-data-corruption-caused-by-lazyfree-page.patch mm-fix-rodata_test-failure-rodata_test-test-data-was-not-read-only.patch mm-hugetlb-soft_offline-save-compound-page-order-before-page-migration.patch mm-oom_reaper-skip-mm-structs-with-mmu-notifiers.patch revert-alsa-echoaudio-purge-contradictions-between-dimension-matrix-members-and-total-number-of-members.patch userfaultfd-non-cooperative-fix-fork-use-after-free.patch --- diff --git a/queue-4.13/alsa-compress-remove-unused-variable.patch b/queue-4.13/alsa-compress-remove-unused-variable.patch new file mode 100644 index 00000000000..182a2fb9d23 --- /dev/null +++ b/queue-4.13/alsa-compress-remove-unused-variable.patch @@ -0,0 +1,43 @@ +From a931b9ce93841a5b66b709ba5a244276e345e63b Mon Sep 17 00:00:00 2001 +From: Guneshwor Singh +Date: Thu, 14 Sep 2017 17:49:40 +0530 +Subject: ALSA: compress: Remove unused variable + +From: Guneshwor Singh + +commit a931b9ce93841a5b66b709ba5a244276e345e63b upstream. + +Commit 04c5d5a430fc ("ALSA: compress: Embed struct device") removed +the statement that used 'str' but didn't remove the variable itself. +So remove it. + +[Adding stable to Cc since pr_debug() may refer to the uninitialized + buffer -- tiwai] + +Fixes: 04c5d5a430fc ("ALSA: compress: Embed struct device") +Signed-off-by: Guneshwor Singh +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/core/compress_offload.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/sound/core/compress_offload.c ++++ b/sound/core/compress_offload.c +@@ -948,14 +948,13 @@ static const struct file_operations snd_ + static int snd_compress_dev_register(struct snd_device *device) + { + int ret = -EINVAL; +- char str[16]; + struct snd_compr *compr; + + if (snd_BUG_ON(!device || !device->device_data)) + return -EBADFD; + compr = device->device_data; + +- pr_debug("reg %s for device %s, direction %d\n", str, compr->name, ++ pr_debug("reg device %s, direction %d\n", compr->name, + compr->direction); + /* register compressed device */ + ret = snd_register_device(SNDRV_DEVICE_TYPE_COMPRESS, diff --git a/queue-4.13/alsa-usx2y-suppress-kernel-warning-at-page-allocation-failures.patch b/queue-4.13/alsa-usx2y-suppress-kernel-warning-at-page-allocation-failures.patch new file mode 100644 index 00000000000..14dea9fc521 --- /dev/null +++ b/queue-4.13/alsa-usx2y-suppress-kernel-warning-at-page-allocation-failures.patch @@ -0,0 +1,58 @@ +From 7682e399485fe19622b6fd82510b1f4551e48a25 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Mon, 2 Oct 2017 14:06:43 +0200 +Subject: ALSA: usx2y: Suppress kernel warning at page allocation failures + +From: Takashi Iwai + +commit 7682e399485fe19622b6fd82510b1f4551e48a25 upstream. + +The usx2y driver allocates the stream read/write buffers in continuous +pages depending on the stream setup, and this may spew the kernel +warning messages with a stack trace like: + WARNING: CPU: 1 PID: 1846 at mm/page_alloc.c:3883 + __alloc_pages_slowpath+0x1ef2/0x2d70 + Modules linked in: + CPU: 1 PID: 1846 Comm: kworker/1:2 Not tainted + .... + +It may confuse user as if it were any serious error, although this is +no fatal error and the driver handles the error case gracefully. +Since the driver has already some sanity check of the given size (128 +and 256 pages), it can't pass any crazy value. So it's merely page +fragmentation. + +This patch adds __GFP_NOWARN to each caller for suppressing such +kernel warnings. The original issue was spotted by syzkaller. + +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/usb/usx2y/usb_stream.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/sound/usb/usx2y/usb_stream.c ++++ b/sound/usb/usx2y/usb_stream.c +@@ -191,7 +191,8 @@ struct usb_stream *usb_stream_new(struct + } + + pg = get_order(read_size); +- sk->s = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO, pg); ++ sk->s = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO| ++ __GFP_NOWARN, pg); + if (!sk->s) { + snd_printk(KERN_WARNING "couldn't __get_free_pages()\n"); + goto out; +@@ -211,7 +212,8 @@ struct usb_stream *usb_stream_new(struct + pg = get_order(write_size); + + sk->write_page = +- (void *)__get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO, pg); ++ (void *)__get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO| ++ __GFP_NOWARN, pg); + if (!sk->write_page) { + snd_printk(KERN_WARNING "couldn't __get_free_pages()\n"); + usb_stream_free(sk); diff --git a/queue-4.13/lib-ratelimit.c-use-deferred-printk-version.patch b/queue-4.13/lib-ratelimit.c-use-deferred-printk-version.patch new file mode 100644 index 00000000000..e06e2e78e55 --- /dev/null +++ b/queue-4.13/lib-ratelimit.c-use-deferred-printk-version.patch @@ -0,0 +1,260 @@ +From 656d61ce9666209c4c4a13c71902d3ee70d1ff6f Mon Sep 17 00:00:00 2001 +From: Sergey Senozhatsky +Date: Tue, 3 Oct 2017 16:16:45 -0700 +Subject: lib/ratelimit.c: use deferred printk() version + +From: Sergey Senozhatsky + +commit 656d61ce9666209c4c4a13c71902d3ee70d1ff6f upstream. + +printk_ratelimit() invokes ___ratelimit() which may invoke a normal +printk() (pr_warn() in this particular case) to warn about suppressed +output. Given that printk_ratelimit() may be called from anywhere, that +pr_warn() is dangerous - it may end up deadlocking the system. Fix +___ratelimit() by using deferred printk(). + +Sasha reported the following lockdep error: + + : Unregister pv shared memory for cpu 8 + : select_fallback_rq: 3 callbacks suppressed + : process 8583 (trinity-c78) no longer affine to cpu8 + : + : ====================================================== + : WARNING: possible circular locking dependency detected + : 4.14.0-rc2-next-20170927+ #252 Not tainted + : ------------------------------------------------------ + : migration/8/62 is trying to acquire lock: + : (&port_lock_key){-.-.}, at: serial8250_console_write() + : + : but task is already holding lock: + : (&rq->lock){-.-.}, at: sched_cpu_dying() + : + : which lock already depends on the new lock. + : + : + : the existing dependency chain (in reverse order) is: + : + : -> #3 (&rq->lock){-.-.}: + : __lock_acquire() + : lock_acquire() + : _raw_spin_lock() + : task_fork_fair() + : sched_fork() + : copy_process.part.31() + : _do_fork() + : kernel_thread() + : rest_init() + : start_kernel() + : x86_64_start_reservations() + : x86_64_start_kernel() + : verify_cpu() + : + : -> #2 (&p->pi_lock){-.-.}: + : __lock_acquire() + : lock_acquire() + : _raw_spin_lock_irqsave() + : try_to_wake_up() + : default_wake_function() + : woken_wake_function() + : __wake_up_common() + : __wake_up_common_lock() + : __wake_up() + : tty_wakeup() + : tty_port_default_wakeup() + : tty_port_tty_wakeup() + : uart_write_wakeup() + : serial8250_tx_chars() + : serial8250_handle_irq.part.25() + : serial8250_default_handle_irq() + : serial8250_interrupt() + : __handle_irq_event_percpu() + : handle_irq_event_percpu() + : handle_irq_event() + : handle_level_irq() + : handle_irq() + : do_IRQ() + : ret_from_intr() + : native_safe_halt() + : default_idle() + : arch_cpu_idle() + : default_idle_call() + : do_idle() + : cpu_startup_entry() + : rest_init() + : start_kernel() + : x86_64_start_reservations() + : x86_64_start_kernel() + : verify_cpu() + : + : -> #1 (&tty->write_wait){-.-.}: + : __lock_acquire() + : lock_acquire() + : _raw_spin_lock_irqsave() + : __wake_up_common_lock() + : __wake_up() + : tty_wakeup() + : tty_port_default_wakeup() + : tty_port_tty_wakeup() + : uart_write_wakeup() + : serial8250_tx_chars() + : serial8250_handle_irq.part.25() + : serial8250_default_handle_irq() + : serial8250_interrupt() + : __handle_irq_event_percpu() + : handle_irq_event_percpu() + : handle_irq_event() + : handle_level_irq() + : handle_irq() + : do_IRQ() + : ret_from_intr() + : native_safe_halt() + : default_idle() + : arch_cpu_idle() + : default_idle_call() + : do_idle() + : cpu_startup_entry() + : rest_init() + : start_kernel() + : x86_64_start_reservations() + : x86_64_start_kernel() + : verify_cpu() + : + : -> #0 (&port_lock_key){-.-.}: + : check_prev_add() + : __lock_acquire() + : lock_acquire() + : _raw_spin_lock_irqsave() + : serial8250_console_write() + : univ8250_console_write() + : console_unlock() + : vprintk_emit() + : vprintk_default() + : vprintk_func() + : printk() + : ___ratelimit() + : __printk_ratelimit() + : select_fallback_rq() + : sched_cpu_dying() + : cpuhp_invoke_callback() + : take_cpu_down() + : multi_cpu_stop() + : cpu_stopper_thread() + : smpboot_thread_fn() + : kthread() + : ret_from_fork() + : + : other info that might help us debug this: + : + : Chain exists of: + : &port_lock_key --> &p->pi_lock --> &rq->lock + : + : Possible unsafe locking scenario: + : + : CPU0 CPU1 + : ---- ---- + : lock(&rq->lock); + : lock(&p->pi_lock); + : lock(&rq->lock); + : lock(&port_lock_key); + : + : *** DEADLOCK *** + : + : 4 locks held by migration/8/62: + : #0: (&p->pi_lock){-.-.}, at: sched_cpu_dying() + : #1: (&rq->lock){-.-.}, at: sched_cpu_dying() + : #2: (printk_ratelimit_state.lock){....}, at: ___ratelimit() + : #3: (console_lock){+.+.}, at: vprintk_emit() + : + : stack backtrace: + : CPU: 8 PID: 62 Comm: migration/8 Not tainted 4.14.0-rc2-next-20170927+ #252 + : Call Trace: + : dump_stack() + : print_circular_bug() + : check_prev_add() + : ? add_lock_to_list.isra.26() + : ? check_usage() + : ? kvm_clock_read() + : ? kvm_sched_clock_read() + : ? sched_clock() + : ? check_preemption_disabled() + : __lock_acquire() + : ? __lock_acquire() + : ? add_lock_to_list.isra.26() + : ? debug_check_no_locks_freed() + : ? memcpy() + : lock_acquire() + : ? serial8250_console_write() + : _raw_spin_lock_irqsave() + : ? serial8250_console_write() + : serial8250_console_write() + : ? serial8250_start_tx() + : ? lock_acquire() + : ? memcpy() + : univ8250_console_write() + : console_unlock() + : ? __down_trylock_console_sem() + : vprintk_emit() + : vprintk_default() + : vprintk_func() + : printk() + : ? show_regs_print_info() + : ? lock_acquire() + : ___ratelimit() + : __printk_ratelimit() + : select_fallback_rq() + : sched_cpu_dying() + : ? sched_cpu_starting() + : ? rcutree_dying_cpu() + : ? sched_cpu_starting() + : cpuhp_invoke_callback() + : ? cpu_disable_common() + : take_cpu_down() + : ? trace_hardirqs_off_caller() + : ? cpuhp_invoke_callback() + : multi_cpu_stop() + : ? __this_cpu_preempt_check() + : ? cpu_stop_queue_work() + : cpu_stopper_thread() + : ? cpu_stop_create() + : smpboot_thread_fn() + : ? sort_range() + : ? schedule() + : ? __kthread_parkme() + : kthread() + : ? sort_range() + : ? kthread_create_on_node() + : ret_from_fork() + : process 9121 (trinity-c78) no longer affine to cpu8 + : smpboot: CPU 8 is now offline + +Link: http://lkml.kernel.org/r/20170928120405.18273-1-sergey.senozhatsky@gmail.com +Fixes: 6b1d174b0c27b ("ratelimit: extend to print suppressed messages on release") +Signed-off-by: Sergey Senozhatsky +Reported-by: Sasha Levin +Reviewed-by: Petr Mladek +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: Borislav Petkov +Cc: Steven Rostedt +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + lib/ratelimit.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/lib/ratelimit.c ++++ b/lib/ratelimit.c +@@ -48,7 +48,9 @@ int ___ratelimit(struct ratelimit_state + if (time_is_before_jiffies(rs->begin + rs->interval)) { + if (rs->missed) { + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { +- pr_warn("%s: %d callbacks suppressed\n", func, rs->missed); ++ printk_deferred(KERN_WARNING ++ "%s: %d callbacks suppressed\n", ++ func, rs->missed); + rs->missed = 0; + } + } diff --git a/queue-4.13/lsm-fix-smack_inode_removexattr-and-xattr_getsecurity-memleak.patch b/queue-4.13/lsm-fix-smack_inode_removexattr-and-xattr_getsecurity-memleak.patch new file mode 100644 index 00000000000..0afc0516af0 --- /dev/null +++ b/queue-4.13/lsm-fix-smack_inode_removexattr-and-xattr_getsecurity-memleak.patch @@ -0,0 +1,129 @@ +From 57e7ba04d422c3d41c8426380303ec9b7533ded9 Mon Sep 17 00:00:00 2001 +From: Casey Schaufler +Date: Tue, 19 Sep 2017 09:39:08 -0700 +Subject: lsm: fix smack_inode_removexattr and xattr_getsecurity memleak + +From: Casey Schaufler + +commit 57e7ba04d422c3d41c8426380303ec9b7533ded9 upstream. + +security_inode_getsecurity() provides the text string value +of a security attribute. It does not provide a "secctx". +The code in xattr_getsecurity() that calls security_inode_getsecurity() +and then calls security_release_secctx() happened to work because +SElinux and Smack treat the attribute and the secctx the same way. +It fails for cap_inode_getsecurity(), because that module has no +secctx that ever needs releasing. It turns out that Smack is the +one that's doing things wrong by not allocating memory when instructed +to do so by the "alloc" parameter. + +The fix is simple enough. Change the security_release_secctx() to +kfree() because it isn't a secctx being returned by +security_inode_getsecurity(). Change Smack to allocate the string when +told to do so. + +Note: this also fixes memory leaks for LSMs which implement +inode_getsecurity but not release_secctx, such as capabilities. + +Signed-off-by: Casey Schaufler +Reported-by: Konstantin Khlebnikov +Signed-off-by: James Morris +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xattr.c | 2 - + security/smack/smack_lsm.c | 59 ++++++++++++++++++++------------------------- + 2 files changed, 28 insertions(+), 33 deletions(-) + +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -249,7 +249,7 @@ xattr_getsecurity(struct inode *inode, c + } + memcpy(value, buffer, len); + out: +- security_release_secctx(buffer, len); ++ kfree(buffer); + out_noalloc: + return len; + } +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -1499,7 +1499,7 @@ static int smack_inode_removexattr(struc + * @inode: the object + * @name: attribute name + * @buffer: where to put the result +- * @alloc: unused ++ * @alloc: duplicate memory + * + * Returns the size of the attribute or an error code + */ +@@ -1512,43 +1512,38 @@ static int smack_inode_getsecurity(struc + struct super_block *sbp; + struct inode *ip = (struct inode *)inode; + struct smack_known *isp; +- int ilen; +- int rc = 0; + +- if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) { ++ if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) + isp = smk_of_inode(inode); +- ilen = strlen(isp->smk_known); +- *buffer = isp->smk_known; +- return ilen; ++ else { ++ /* ++ * The rest of the Smack xattrs are only on sockets. ++ */ ++ sbp = ip->i_sb; ++ if (sbp->s_magic != SOCKFS_MAGIC) ++ return -EOPNOTSUPP; ++ ++ sock = SOCKET_I(ip); ++ if (sock == NULL || sock->sk == NULL) ++ return -EOPNOTSUPP; ++ ++ ssp = sock->sk->sk_security; ++ ++ if (strcmp(name, XATTR_SMACK_IPIN) == 0) ++ isp = ssp->smk_in; ++ else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) ++ isp = ssp->smk_out; ++ else ++ return -EOPNOTSUPP; + } + +- /* +- * The rest of the Smack xattrs are only on sockets. +- */ +- sbp = ip->i_sb; +- if (sbp->s_magic != SOCKFS_MAGIC) +- return -EOPNOTSUPP; +- +- sock = SOCKET_I(ip); +- if (sock == NULL || sock->sk == NULL) +- return -EOPNOTSUPP; +- +- ssp = sock->sk->sk_security; +- +- if (strcmp(name, XATTR_SMACK_IPIN) == 0) +- isp = ssp->smk_in; +- else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) +- isp = ssp->smk_out; +- else +- return -EOPNOTSUPP; +- +- ilen = strlen(isp->smk_known); +- if (rc == 0) { +- *buffer = isp->smk_known; +- rc = ilen; ++ if (alloc) { ++ *buffer = kstrdup(isp->smk_known, GFP_KERNEL); ++ if (*buffer == NULL) ++ return -ENOMEM; + } + +- return rc; ++ return strlen(isp->smk_known); + } + + diff --git a/queue-4.13/mm-avoid-marking-swap-cached-page-as-lazyfree.patch b/queue-4.13/mm-avoid-marking-swap-cached-page-as-lazyfree.patch new file mode 100644 index 00000000000..6aba8ab2f7f --- /dev/null +++ b/queue-4.13/mm-avoid-marking-swap-cached-page-as-lazyfree.patch @@ -0,0 +1,61 @@ +From 24c92eb7dce0a299b8e1a8c5fa585844a53bf7f0 Mon Sep 17 00:00:00 2001 +From: Shaohua Li +Date: Tue, 3 Oct 2017 16:15:29 -0700 +Subject: mm: avoid marking swap cached page as lazyfree + +From: Shaohua Li + +commit 24c92eb7dce0a299b8e1a8c5fa585844a53bf7f0 upstream. + +MADV_FREE clears pte dirty bit and then marks the page lazyfree (clear +SwapBacked). There is no lock to prevent the page is added to swap +cache between these two steps by page reclaim. Page reclaim could add +the page to swap cache and unmap the page. After page reclaim, the page +is added back to lru. At that time, we probably start draining per-cpu +pagevec and mark the page lazyfree. So the page could be in a state +with SwapBacked cleared and PG_swapcache set. Next time there is a +refault in the virtual address, do_swap_page can find the page from swap +cache but the page has PageSwapCache false because SwapBacked isn't set, +so do_swap_page will bail out and do nothing. The task will keep +running into fault handler. + +Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") +Link: http://lkml.kernel.org/r/6537ef3814398c0073630b03f176263bc81f0902.1506446061.git.shli@fb.com +Signed-off-by: Shaohua Li +Reported-by: Artem Savkov +Tested-by: Artem Savkov +Reviewed-by: Rik van Riel +Acked-by: Johannes Weiner +Acked-by: Michal Hocko +Acked-by: Minchan Kim +Cc: Hillf Danton +Cc: Hugh Dickins +Cc: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/swap.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -575,7 +575,7 @@ static void lru_lazyfree_fn(struct page + void *arg) + { + if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && +- !PageUnevictable(page)) { ++ !PageSwapCache(page) && !PageUnevictable(page)) { + bool active = PageActive(page); + + del_page_from_lru_list(page, lruvec, +@@ -665,7 +665,7 @@ void deactivate_file_page(struct page *p + void mark_page_lazyfree(struct page *page) + { + if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && +- !PageUnevictable(page)) { ++ !PageSwapCache(page) && !PageUnevictable(page)) { + struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs); + + get_page(page); diff --git a/queue-4.13/mm-fix-data-corruption-caused-by-lazyfree-page.patch b/queue-4.13/mm-fix-data-corruption-caused-by-lazyfree-page.patch new file mode 100644 index 00000000000..d6e1f0e2d5b --- /dev/null +++ b/queue-4.13/mm-fix-data-corruption-caused-by-lazyfree-page.patch @@ -0,0 +1,63 @@ +From 9625456cc76391b7f3f2809579126542a8ed4d39 Mon Sep 17 00:00:00 2001 +From: Shaohua Li +Date: Tue, 3 Oct 2017 16:15:32 -0700 +Subject: mm: fix data corruption caused by lazyfree page + +From: Shaohua Li + +commit 9625456cc76391b7f3f2809579126542a8ed4d39 upstream. + +MADV_FREE clears pte dirty bit and then marks the page lazyfree (clear +SwapBacked). There is no lock to prevent the page is added to swap +cache between these two steps by page reclaim. If page reclaim finds +such page, it will simply add the page to swap cache without pageout the +page to swap because the page is marked as clean. Next time, page fault +will read data from the swap slot which doesn't have the original data, +so we have a data corruption. To fix issue, we mark the page dirty and +pageout the page. + +However, we shouldn't dirty all pages which is clean and in swap cache. +swapin page is swap cache and clean too. So we only dirty page which is +added into swap cache in page reclaim, which shouldn't be swapin page. +As Minchan suggested, simply dirty the page in add_to_swap can do the +job. + +Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") +Link: http://lkml.kernel.org/r/08c84256b007bf3f63c91d94383bd9eb6fee2daa.1506446061.git.shli@fb.com +Signed-off-by: Shaohua Li +Reported-by: Artem Savkov +Acked-by: Michal Hocko +Acked-by: Minchan Kim +Cc: Johannes Weiner +Cc: Hillf Danton +Cc: Hugh Dickins +Cc: Rik van Riel +Cc: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/swap_state.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/mm/swap_state.c ++++ b/mm/swap_state.c +@@ -219,6 +219,17 @@ int add_to_swap(struct page *page) + * clear SWAP_HAS_CACHE flag. + */ + goto fail; ++ /* ++ * Normally the page will be dirtied in unmap because its pte should be ++ * dirty. A special case is MADV_FREE page. The page'e pte could have ++ * dirty bit cleared but the page's SwapBacked bit is still set because ++ * clearing the dirty bit and SwapBacked bit has no lock protected. For ++ * such page, unmap will not set dirty bit for it, so page reclaim will ++ * not write the page out. This can cause data corruption when the page ++ * is swap in later. Always setting the dirty bit for the page solves ++ * the problem. ++ */ ++ set_page_dirty(page); + + return 1; + diff --git a/queue-4.13/mm-fix-rodata_test-failure-rodata_test-test-data-was-not-read-only.patch b/queue-4.13/mm-fix-rodata_test-failure-rodata_test-test-data-was-not-read-only.patch new file mode 100644 index 00000000000..9ae26ecb6c9 --- /dev/null +++ b/queue-4.13/mm-fix-rodata_test-failure-rodata_test-test-data-was-not-read-only.patch @@ -0,0 +1,51 @@ +From a872eb2131e91ce7c89a8888974a5e22a272b12f Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Tue, 3 Oct 2017 16:15:16 -0700 +Subject: mm: fix RODATA_TEST failure "rodata_test: test data was not read only" + +From: Christophe Leroy + +commit a872eb2131e91ce7c89a8888974a5e22a272b12f upstream. + +On powerpc, RODATA_TEST fails with message the following messages: + + Freeing unused kernel memory: 528K + rodata_test: test data was not read only + +This is because GCC allocates it to .data section: + + c0695034 g O .data 00000004 rodata_test_data + +Since commit 056b9d8a7692 ("mm: remove rodata_test_data export, add +pr_fmt"), rodata_test_data is used only inside rodata_test.c By +declaring it static, it gets properly allocated into .rodata section +instead of .data: + + c04df710 l O .rodata 00000004 rodata_test_data + +Fixes: 056b9d8a7692 ("mm: remove rodata_test_data export, add pr_fmt") +Link: http://lkml.kernel.org/r/20170921093729.1080368AC1@po15668-vm-win7.idsi0.si.c-s.fr +Signed-off-by: Christophe Leroy +Cc: Kees Cook +Cc: Jinbum Park +Cc: Segher Boessenkool +Cc: David Laight +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/rodata_test.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/rodata_test.c ++++ b/mm/rodata_test.c +@@ -14,7 +14,7 @@ + #include + #include + +-const int rodata_test_data = 0xC3; ++static const int rodata_test_data = 0xC3; + + void rodata_test(void) + { diff --git a/queue-4.13/mm-hugetlb-soft_offline-save-compound-page-order-before-page-migration.patch b/queue-4.13/mm-hugetlb-soft_offline-save-compound-page-order-before-page-migration.patch new file mode 100644 index 00000000000..600fee974a4 --- /dev/null +++ b/queue-4.13/mm-hugetlb-soft_offline-save-compound-page-order-before-page-migration.patch @@ -0,0 +1,72 @@ +From 19bfbe22f59a207417b2679e7e83c180419c9ec5 Mon Sep 17 00:00:00 2001 +From: Alexandru Moise <00moses.alexander00@gmail.com> +Date: Tue, 3 Oct 2017 16:14:31 -0700 +Subject: mm, hugetlb, soft_offline: save compound page order before page migration + +From: Alexandru Moise <00moses.alexander00@gmail.com> + +commit 19bfbe22f59a207417b2679e7e83c180419c9ec5 upstream. + +This fixes a bug in madvise() where if you'd try to soft offline a +hugepage via madvise(), while walking the address range you'd end up, +using the wrong page offset due to attempting to get the compound order +of a former but presently not compound page, due to dissolving the huge +page (since commit c3114a84f7f9: "mm: hugetlb: soft-offline: dissolve +source hugepage after successful migration"). + +As a result I ended up with all my free pages except one being offlined. + +Link: http://lkml.kernel.org/r/20170912204306.GA12053@gmail.com +Fixes: c3114a84f7f9 ("mm: hugetlb: soft-offline: dissolve source hugepage after successful migration") +Signed-off-by: Alexandru Moise <00moses.alexander00@gmail.com> +Cc: Anshuman Khandual +Cc: Michal Hocko +Cc: Andrea Arcangeli +Cc: Minchan Kim +Cc: Hillf Danton +Cc: Shaohua Li +Cc: Mike Rapoport +Cc: "Kirill A. Shutemov" +Cc: Mel Gorman +Cc: David Rientjes +Cc: Rik van Riel +Cc: Naoya Horiguchi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/madvise.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -614,18 +614,26 @@ static int madvise_inject_error(int beha + { + struct page *page; + struct zone *zone; ++ unsigned int order; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + +- for (; start < end; start += PAGE_SIZE << +- compound_order(compound_head(page))) { ++ ++ for (; start < end; start += PAGE_SIZE << order) { + int ret; + + ret = get_user_pages_fast(start, 1, 0, &page); + if (ret != 1) + return ret; + ++ /* ++ * When soft offlining hugepages, after migrating the page ++ * we dissolve it, therefore in the second loop "page" will ++ * no longer be a compound page, and order will be 0. ++ */ ++ order = compound_order(compound_head(page)); ++ + if (PageHWPoison(page)) { + put_page(page); + continue; diff --git a/queue-4.13/mm-oom_reaper-skip-mm-structs-with-mmu-notifiers.patch b/queue-4.13/mm-oom_reaper-skip-mm-structs-with-mmu-notifiers.patch new file mode 100644 index 00000000000..a072864c484 --- /dev/null +++ b/queue-4.13/mm-oom_reaper-skip-mm-structs-with-mmu-notifiers.patch @@ -0,0 +1,97 @@ +From 4d4bbd8526a8fbeb2c090ea360211fceff952383 Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Tue, 3 Oct 2017 16:14:50 -0700 +Subject: mm, oom_reaper: skip mm structs with mmu notifiers + +From: Michal Hocko + +commit 4d4bbd8526a8fbeb2c090ea360211fceff952383 upstream. + +Andrea has noticed that the oom_reaper doesn't invalidate the range via +mmu notifiers (mmu_notifier_invalidate_range_start/end) and that can +corrupt the memory of the kvm guest for example. + +tlb_flush_mmu_tlbonly already invokes mmu notifiers but that is not +sufficient as per Andrea: + + "mmu_notifier_invalidate_range cannot be used in replacement of + mmu_notifier_invalidate_range_start/end. For KVM + mmu_notifier_invalidate_range is a noop and rightfully so. A MMU + notifier implementation has to implement either ->invalidate_range + method or the invalidate_range_start/end methods, not both. And if you + implement invalidate_range_start/end like KVM is forced to do, calling + mmu_notifier_invalidate_range in common code is a noop for KVM. + + For those MMU notifiers that can get away only implementing + ->invalidate_range, the ->invalidate_range is implicitly called by + mmu_notifier_invalidate_range_end(). And only those secondary MMUs + that share the same pagetable with the primary MMU (like AMD iommuv2) + can get away only implementing ->invalidate_range" + +As the callback is allowed to sleep and the implementation is out of +hand of the MM it is safer to simply bail out if there is an mmu +notifier registered. In order to not fail too early make the +mm_has_notifiers check under the oom_lock and have a little nap before +failing to give the current oom victim some more time to exit. + +[akpm@linux-foundation.org: coding-style fixes] +Link: http://lkml.kernel.org/r/20170913113427.2291-1-mhocko@kernel.org +Fixes: aac453635549 ("mm, oom: introduce oom reaper") +Signed-off-by: Michal Hocko +Reported-by: Andrea Arcangeli +Reviewed-by: Andrea Arcangeli +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mmu_notifier.h | 5 +++++ + mm/oom_kill.c | 16 ++++++++++++++++ + 2 files changed, 21 insertions(+) + +--- a/include/linux/mmu_notifier.h ++++ b/include/linux/mmu_notifier.h +@@ -400,6 +400,11 @@ extern void mmu_notifier_synchronize(voi + + #else /* CONFIG_MMU_NOTIFIER */ + ++static inline int mm_has_notifiers(struct mm_struct *mm) ++{ ++ return 0; ++} ++ + static inline void mmu_notifier_release(struct mm_struct *mm) + { + } +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + + #include + #include "internal.h" +@@ -494,6 +495,21 @@ static bool __oom_reap_task_mm(struct ta + goto unlock_oom; + } + ++ /* ++ * If the mm has notifiers then we would need to invalidate them around ++ * unmap_page_range and that is risky because notifiers can sleep and ++ * what they do is basically undeterministic. So let's have a short ++ * sleep to give the oom victim some more time. ++ * TODO: we really want to get rid of this ugly hack and make sure that ++ * notifiers cannot block for unbounded amount of time and add ++ * mmu_notifier_invalidate_range_{start,end} around unmap_page_range ++ */ ++ if (mm_has_notifiers(mm)) { ++ up_read(&mm->mmap_sem); ++ schedule_timeout_idle(HZ); ++ goto unlock_oom; ++ } ++ + /* + * increase mm_users only after we know we will reap something so + * that the mmput_async is called only when we have reaped something diff --git a/queue-4.13/revert-alsa-echoaudio-purge-contradictions-between-dimension-matrix-members-and-total-number-of-members.patch b/queue-4.13/revert-alsa-echoaudio-purge-contradictions-between-dimension-matrix-members-and-total-number-of-members.patch new file mode 100644 index 00000000000..711fcdb6774 --- /dev/null +++ b/queue-4.13/revert-alsa-echoaudio-purge-contradictions-between-dimension-matrix-members-and-total-number-of-members.patch @@ -0,0 +1,119 @@ +From 51db452df07bb4c5754b73789253ba21681d9dc2 Mon Sep 17 00:00:00 2001 +From: Takashi Sakamoto +Date: Tue, 26 Sep 2017 09:11:49 +0900 +Subject: Revert "ALSA: echoaudio: purge contradictions between dimension matrix members and total number of members" + +From: Takashi Sakamoto + +commit 51db452df07bb4c5754b73789253ba21681d9dc2 upstream. + +This reverts commit 275353bb684e to fix a regression which can abort +'alsactl' program in alsa-utils due to assertion in alsa-lib. + +alsactl: control.c:2513: snd_ctl_elem_value_get_integer: Assertion `idx < sizeof(obj->value.integer.value) / sizeof(obj->value.integer.value[0])' failed. + +alsactl: control.c:2976: snd_ctl_elem_value_get_integer: Assertion `idx < ARRAY_SIZE(obj->value.integer.value)' failed. + +This commit is a band-aid. In a point of usage of ALSA control interface, +the drivers still bring an issue that they prevent userspace applications +to have a consistent way to parse each levels of the dimension information +via ALSA control interface. + +Let me investigate this issue. Current implementation of the drivers +have three control element sets with dimension information: + * 'Monitor Mixer Volume' (type: integer) + * 'VMixer Volume' (type: integer) + * 'VU-meters' (type: boolean) + +Although the number of elements named as 'Monitor Mixer Volume' differs +depending on drivers in this group, it can be calculated by macros +defined by each driver (= (BX_NUM - BX_ANALOG_IN) * BX_ANALOG_IN). Each +of the elements has one member for value and has dimension information +with 2 levels (= BX_ANALOG_IN * (BX_NUM - BX_ANALOG_IN)). For these +elements, userspace applications are expected to handle the dimension +information so that all of the elements construct a matrix where the +number of rows and columns are represented by the dimension information. + +The same way is applied to elements named as 'VMixer Volume'. The number +of these elements can also be calculated by macros defined by each +drivers (= PX_ANALOG_IN * BX_ANALOG_IN). Each of the element has one +member for value and has dimension information with 2 levels +(= BX_ANALOG_IN * PX_ANALOG_IN). All of the elements construct a matrix +with the dimension information. + +An element named as 'VU-meters' gets a different way in a point of +dimension information. The element includes 96 members for value. The +element has dimension information with 3 levels (= 3 or 2 * 16 * 2). For +this element, userspace applications are expected to handle the dimension +information so that all of the members for value construct a matrix +where the number of rows and columns are represented by the dimension +information. This is different from the way for the former. + +As a summary, the drivers were not designed to produce a consistent way to +parse the dimension information. This makes it hard for general userspace +applications such as amixer to parse the information by a consistent way, +and actually no userspace applications except for 'echomixer' utilize the +dimension information. Additionally, no drivers excluding this group use +the information. + +The reverted commit was written based on the latter way. A commit +860c1994a70a ('ALSA: control: add dimension validator for userspace +elements') is written based on the latter way, too. The patch should be +reconsider too in the same time to re-define a consistent way to parse the +dimension information. + +Reported-by: Mark Hills +Reported-by: S. Christian Collins +Fixes: 275353bb684e ('ALSA: echoaudio: purge contradictions between dimension matrix members and total number of members') +Signed-off-by: Takashi Sakamoto +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/echoaudio/echoaudio.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/sound/pci/echoaudio/echoaudio.c ++++ b/sound/pci/echoaudio/echoaudio.c +@@ -1272,11 +1272,11 @@ static int snd_echo_mixer_info(struct sn + + chip = snd_kcontrol_chip(kcontrol); + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; ++ uinfo->count = 1; + uinfo->value.integer.min = ECHOGAIN_MINOUT; + uinfo->value.integer.max = ECHOGAIN_MAXOUT; + uinfo->dimen.d[0] = num_busses_out(chip); + uinfo->dimen.d[1] = num_busses_in(chip); +- uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1]; + return 0; + } + +@@ -1344,11 +1344,11 @@ static int snd_echo_vmixer_info(struct s + + chip = snd_kcontrol_chip(kcontrol); + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; ++ uinfo->count = 1; + uinfo->value.integer.min = ECHOGAIN_MINOUT; + uinfo->value.integer.max = ECHOGAIN_MAXOUT; + uinfo->dimen.d[0] = num_busses_out(chip); + uinfo->dimen.d[1] = num_pipes_out(chip); +- uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1]; + return 0; + } + +@@ -1728,6 +1728,7 @@ static int snd_echo_vumeters_info(struct + struct snd_ctl_elem_info *uinfo) + { + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; ++ uinfo->count = 96; + uinfo->value.integer.min = ECHOGAIN_MINOUT; + uinfo->value.integer.max = 0; + #ifdef ECHOCARD_HAS_VMIXER +@@ -1737,7 +1738,6 @@ static int snd_echo_vumeters_info(struct + #endif + uinfo->dimen.d[1] = 16; /* 16 channels */ + uinfo->dimen.d[2] = 2; /* 0=level, 1=peak */ +- uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1] * uinfo->dimen.d[2]; + return 0; + } + diff --git a/queue-4.13/series b/queue-4.13/series index 2cbf8dd2578..4655b415258 100644 --- a/queue-4.13/series +++ b/queue-4.13/series @@ -94,3 +94,14 @@ uwb-properly-check-kthread_run-return-value.patch uwb-ensure-that-endpoint-is-interrupt.patch staging-vchiq_2835_arm-fix-null-ptr-dereference-in-free_pagelist.patch ksm-fix-unlocked-iteration-over-vmas-in-cmp_and_merge_page.patch +mm-hugetlb-soft_offline-save-compound-page-order-before-page-migration.patch +mm-oom_reaper-skip-mm-structs-with-mmu-notifiers.patch +mm-fix-rodata_test-failure-rodata_test-test-data-was-not-read-only.patch +mm-avoid-marking-swap-cached-page-as-lazyfree.patch +mm-fix-data-corruption-caused-by-lazyfree-page.patch +userfaultfd-non-cooperative-fix-fork-use-after-free.patch +lib-ratelimit.c-use-deferred-printk-version.patch +lsm-fix-smack_inode_removexattr-and-xattr_getsecurity-memleak.patch +alsa-compress-remove-unused-variable.patch +revert-alsa-echoaudio-purge-contradictions-between-dimension-matrix-members-and-total-number-of-members.patch +alsa-usx2y-suppress-kernel-warning-at-page-allocation-failures.patch diff --git a/queue-4.13/userfaultfd-non-cooperative-fix-fork-use-after-free.patch b/queue-4.13/userfaultfd-non-cooperative-fix-fork-use-after-free.patch new file mode 100644 index 00000000000..95c97a2c538 --- /dev/null +++ b/queue-4.13/userfaultfd-non-cooperative-fix-fork-use-after-free.patch @@ -0,0 +1,149 @@ +From 384632e67e0829deb8015ee6ad916b180049d252 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli +Date: Tue, 3 Oct 2017 16:15:38 -0700 +Subject: userfaultfd: non-cooperative: fix fork use after free + +From: Andrea Arcangeli + +commit 384632e67e0829deb8015ee6ad916b180049d252 upstream. + +When reading the event from the uffd, we put it on a temporary +fork_event list to detect if we can still access it after releasing and +retaking the event_wqh.lock. + +If fork aborts and removes the event from the fork_event all is fine as +long as we're still in the userfault read context and fork_event head is +still alive. + +We've to put the event allocated in the fork kernel stack, back from +fork_event list-head to the event_wqh head, before returning from +userfaultfd_ctx_read, because the fork_event head lifetime is limited to +the userfaultfd_ctx_read stack lifetime. + +Forgetting to move the event back to its event_wqh place then results in +__remove_wait_queue(&ctx->event_wqh, &ewq->wq); in +userfaultfd_event_wait_completion to remove it from a head that has been +already freed from the reader stack. + +This could only happen if resolve_userfault_fork failed (for example if +there are no file descriptors available to allocate the fork uffd). If +it succeeded it was put back correctly. + +Furthermore, after find_userfault_evt receives a fork event, the forked +userfault context in fork_nctx and uwq->msg.arg.reserved.reserved1 can +be released by the fork thread as soon as the event_wqh.lock is +released. Taking a reference on the fork_nctx before dropping the lock +prevents an use after free in resolve_userfault_fork(). + +If the fork side aborted and it already released everything, we still +try to succeed resolve_userfault_fork(), if possible. + +Fixes: 893e26e61d04eac9 ("userfaultfd: non-cooperative: Add fork() event") +Link: http://lkml.kernel.org/r/20170920180413.26713-1-aarcange@redhat.com +Signed-off-by: Andrea Arcangeli +Reported-by: Mark Rutland +Tested-by: Mark Rutland +Cc: Pavel Emelyanov +Cc: Mike Rapoport +Cc: "Dr. David Alan Gilbert" +Cc: Mike Kravetz +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/userfaultfd.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 56 insertions(+), 10 deletions(-) + +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -566,6 +566,12 @@ static void userfaultfd_event_wait_compl + break; + if (ACCESS_ONCE(ctx->released) || + fatal_signal_pending(current)) { ++ /* ++ * &ewq->wq may be queued in fork_event, but ++ * __remove_wait_queue ignores the head ++ * parameter. It would be a problem if it ++ * didn't. ++ */ + __remove_wait_queue(&ctx->event_wqh, &ewq->wq); + if (ewq->msg.event == UFFD_EVENT_FORK) { + struct userfaultfd_ctx *new; +@@ -1039,6 +1045,12 @@ static ssize_t userfaultfd_ctx_read(stru + (unsigned long) + uwq->msg.arg.reserved.reserved1; + list_move(&uwq->wq.entry, &fork_event); ++ /* ++ * fork_nctx can be freed as soon as ++ * we drop the lock, unless we take a ++ * reference on it. ++ */ ++ userfaultfd_ctx_get(fork_nctx); + spin_unlock(&ctx->event_wqh.lock); + ret = 0; + break; +@@ -1069,19 +1081,53 @@ static ssize_t userfaultfd_ctx_read(stru + + if (!ret && msg->event == UFFD_EVENT_FORK) { + ret = resolve_userfault_fork(ctx, fork_nctx, msg); ++ spin_lock(&ctx->event_wqh.lock); ++ if (!list_empty(&fork_event)) { ++ /* ++ * The fork thread didn't abort, so we can ++ * drop the temporary refcount. ++ */ ++ userfaultfd_ctx_put(fork_nctx); ++ ++ uwq = list_first_entry(&fork_event, ++ typeof(*uwq), ++ wq.entry); ++ /* ++ * If fork_event list wasn't empty and in turn ++ * the event wasn't already released by fork ++ * (the event is allocated on fork kernel ++ * stack), put the event back to its place in ++ * the event_wq. fork_event head will be freed ++ * as soon as we return so the event cannot ++ * stay queued there no matter the current ++ * "ret" value. ++ */ ++ list_del(&uwq->wq.entry); ++ __add_wait_queue(&ctx->event_wqh, &uwq->wq); + +- if (!ret) { +- spin_lock(&ctx->event_wqh.lock); +- if (!list_empty(&fork_event)) { +- uwq = list_first_entry(&fork_event, +- typeof(*uwq), +- wq.entry); +- list_del(&uwq->wq.entry); +- __add_wait_queue(&ctx->event_wqh, &uwq->wq); ++ /* ++ * Leave the event in the waitqueue and report ++ * error to userland if we failed to resolve ++ * the userfault fork. ++ */ ++ if (likely(!ret)) + userfaultfd_event_complete(ctx, uwq); +- } +- spin_unlock(&ctx->event_wqh.lock); ++ } else { ++ /* ++ * Here the fork thread aborted and the ++ * refcount from the fork thread on fork_nctx ++ * has already been released. We still hold ++ * the reference we took before releasing the ++ * lock above. If resolve_userfault_fork ++ * failed we've to drop it because the ++ * fork_nctx has to be freed in such case. If ++ * it succeeded we'll hold it because the new ++ * uffd references it. ++ */ ++ if (ret) ++ userfaultfd_ctx_put(fork_nctx); + } ++ spin_unlock(&ctx->event_wqh.lock); + } + + return ret;