--- /dev/null
+From c168870704bcde6bb63d05f7882b620dd3985a46 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Mon, 2 Jun 2014 11:47:23 +0100
+Subject: arm64: ptrace: change fs when passing kernel pointer to regset code
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit c168870704bcde6bb63d05f7882b620dd3985a46 upstream.
+
+Our compat PTRACE_POKEUSR implementation simply passes the user data to
+regset_copy_from_user after some simple range checking. Unfortunately,
+the data in question has already been copied to the kernel stack by this
+point, so the subsequent access_ok check fails and the ptrace request
+returns -EFAULT. This causes problems tracing fork() with older versions
+of strace.
+
+This patch briefly changes the fs to KERNEL_DS, so that the access_ok
+check passes even with a kernel address.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/ptrace.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/arm64/kernel/ptrace.c
++++ b/arch/arm64/kernel/ptrace.c
+@@ -872,6 +872,7 @@ static int compat_ptrace_write_user(stru
+ compat_ulong_t val)
+ {
+ int ret;
++ mm_segment_t old_fs = get_fs();
+
+ if (off & 3 || off >= COMPAT_USER_SZ)
+ return -EIO;
+@@ -879,10 +880,13 @@ static int compat_ptrace_write_user(stru
+ if (off >= sizeof(compat_elf_gregset_t))
+ return 0;
+
++ set_fs(KERNEL_DS);
+ ret = copy_regset_from_user(tsk, &user_aarch32_view,
+ REGSET_COMPAT_GPR, off,
+ sizeof(compat_ulong_t),
+ &val);
++ set_fs(old_fs);
++
+ return ret;
+ }
+
--- /dev/null
+From b5b60778558cafad17bbcbf63e0310bd3c68eb17 Mon Sep 17 00:00:00 2001
+From: Maurizio Lombardi <mlombard@redhat.com>
+Date: Tue, 27 May 2014 12:48:56 -0400
+Subject: ext4: fix wrong assert in ext4_mb_normalize_request()
+
+From: Maurizio Lombardi <mlombard@redhat.com>
+
+commit b5b60778558cafad17bbcbf63e0310bd3c68eb17 upstream.
+
+The variable "size" is expressed as number of blocks and not as
+number of clusters, this could trigger a kernel panic when using
+ext4 with the size of a cluster different from the size of a block.
+
+Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/mballoc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3116,7 +3116,7 @@ ext4_mb_normalize_request(struct ext4_al
+ }
+ BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
+ start > ac->ac_o_ex.fe_logical);
+- BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
++ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+
+ /* now prepare goal request */
+
--- /dev/null
+From eeece469dedadf3918bad50ad80f4616a0064e90 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Tue, 27 May 2014 12:48:55 -0400
+Subject: ext4: fix zeroing of page during writeback
+
+From: Jan Kara <jack@suse.cz>
+
+commit eeece469dedadf3918bad50ad80f4616a0064e90 upstream.
+
+Tail of a page straddling inode size must be zeroed when being written
+out due to POSIX requirement that modifications of mmaped page beyond
+inode size must not be written to the file. ext4_bio_write_page() did
+this only for blocks fully beyond inode size but didn't properly zero
+blocks partially beyond inode size. Fix this.
+
+The problem has been uncovered by mmap_11-4 test in openposix test suite
+(part of LTP).
+
+Reported-by: Xiaoguang Wang <wangxg.fnst@cn.fujitsu.com>
+Fixes: 5a0dc7365c240
+Fixes: bd2d0210cf22f
+CC: stable@vger.kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/page-io.c | 24 +++++++++++-------------
+ 1 file changed, 11 insertions(+), 13 deletions(-)
+
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -384,6 +384,17 @@ int ext4_bio_write_page(struct ext4_io_s
+ ClearPageError(page);
+
+ /*
++ * Comments copied from block_write_full_page_endio:
++ *
++ * The page straddles i_size. It must be zeroed out on each and every
++ * writepage invocation because it may be mmapped. "A file is mapped
++ * in multiples of the page size. For a file that is not a multiple of
++ * the page size, the remaining memory is zeroed when mapped, and
++ * writes to that region are not written out to the file."
++ */
++ if (len < PAGE_CACHE_SIZE)
++ zero_user_segment(page, len, PAGE_CACHE_SIZE);
++ /*
+ * In the first loop we prepare and mark buffers to submit. We have to
+ * mark all buffers in the page before submitting so that
+ * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
+@@ -394,19 +405,6 @@ int ext4_bio_write_page(struct ext4_io_s
+ do {
+ block_start = bh_offset(bh);
+ if (block_start >= len) {
+- /*
+- * Comments copied from block_write_full_page_endio:
+- *
+- * The page straddles i_size. It must be zeroed out on
+- * each and every writepage invocation because it may
+- * be mmapped. "A file is mapped in multiples of the
+- * page size. For a file that is not a multiple of
+- * the page size, the remaining memory is zeroed when
+- * mapped, and writes to that region are not written
+- * out to the file."
+- */
+- zero_user_segment(page, block_start,
+- block_start + blocksize);
+ clear_buffer_dirty(bh);
+ set_buffer_uptodate(bh);
+ continue;
--- /dev/null
+From 1b15d2e5b8077670b1e6a33250a0d9577efff4a5 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Thu, 17 Apr 2014 13:22:09 -0700
+Subject: HID: core: fix validation of report id 0
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 1b15d2e5b8077670b1e6a33250a0d9577efff4a5 upstream.
+
+Some drivers use the first HID report in the list instead of using an
+index. In these cases, validation uses ID 0, which was supposed to mean
+"first known report". This fixes the problem, which was causing at least
+the lgff family of devices to stop working since hid_validate_values
+was being called with ID 0, but the devices used single numbered IDs
+for their reports:
+
+0x05, 0x01, /* Usage Page (Desktop), */
+0x09, 0x05, /* Usage (Gamepad), */
+0xA1, 0x01, /* Collection (Application), */
+0xA1, 0x02, /* Collection (Logical), */
+0x85, 0x01, /* Report ID (1), */
+...
+
+Reported-by: Simon Wood <simon@mungewell.org>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hid/hid-core.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -796,7 +796,17 @@ struct hid_report *hid_validate_values(s
+ * ->numbered being checked, which may not always be the case when
+ * drivers go to access report values.
+ */
+- report = hid->report_enum[type].report_id_hash[id];
++ if (id == 0) {
++ /*
++ * Validating on id 0 means we should examine the first
++ * report in the list.
++ */
++ report = list_entry(
++ hid->report_enum[type].report_list.next,
++ struct hid_report, list);
++ } else {
++ report = hid->report_enum[type].report_id_hash[id];
++ }
+ if (!report) {
+ hid_err(hid, "missing %s %u\n", hid_report_names[type], id);
+ return NULL;
--- /dev/null
+From 3afb69cb5572b3c8c898c00880803cf1a49852c4 Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+Date: Fri, 6 Jun 2014 14:37:10 -0700
+Subject: idr: fix overflow bug during maximum ID calculation at maximum height
+
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+
+commit 3afb69cb5572b3c8c898c00880803cf1a49852c4 upstream.
+
+idr_replace() open-codes the logic to calculate the maximum valid ID
+given the height of the idr tree; unfortunately, the open-coded logic
+doesn't account for the fact that the top layer may have unused slots
+and over-shifts the limit to zero when the tree is at its maximum
+height.
+
+The following test code shows it fails to replace the value for
+id=((1<<27)+42):
+
+ static void test5(void)
+ {
+ int id;
+ DEFINE_IDR(test_idr);
+ #define TEST5_START ((1<<27)+42) /* use the highest layer */
+
+ printk(KERN_INFO "Start test5\n");
+ id = idr_alloc(&test_idr, (void *)1, TEST5_START, 0, GFP_KERNEL);
+ BUG_ON(id != TEST5_START);
+ TEST_BUG_ON(idr_replace(&test_idr, (void *)2, TEST5_START) != (void *)1);
+ idr_destroy(&test_idr);
+ printk(KERN_INFO "End of test5\n");
+ }
+
+Fix the bug by using idr_max() which correctly takes into account the
+maximum allowed shift.
+
+sub_alloc() shares the same problem and may incorrectly fail with
+-EAGAIN; however, this bug doesn't affect correct operation because
+idr_get_empty_slot(), which already uses idr_max(), retries with the
+increased @id in such cases.
+
+[tj@kernel.org: Updated patch description.]
+Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/idr.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/lib/idr.c
++++ b/lib/idr.c
+@@ -250,7 +250,7 @@ static int sub_alloc(struct idr *idp, in
+ id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
+
+ /* if already at the top layer, we need to grow */
+- if (id >= 1 << (idp->layers * IDR_BITS)) {
++ if (id > idr_max(idp->layers)) {
+ *starting_id = id;
+ return -EAGAIN;
+ }
+@@ -829,12 +829,10 @@ void *idr_replace(struct idr *idp, void
+ if (!p)
+ return ERR_PTR(-EINVAL);
+
+- n = (p->layer+1) * IDR_BITS;
+-
+- if (id >= (1 << n))
++ if (id > idr_max(p->layer + 1))
+ return ERR_PTR(-EINVAL);
+
+- n -= IDR_BITS;
++ n = p->layer * IDR_BITS;
+ while ((n > 0) && p) {
+ p = p->ary[(id >> n) & IDR_MASK];
+ n -= IDR_BITS;
--- /dev/null
+From 7f39dda9d86fb4f4f17af0de170decf125726f8c Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 4 Jun 2014 16:05:33 -0700
+Subject: mm: fix sleeping function warning from __put_anon_vma
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 7f39dda9d86fb4f4f17af0de170decf125726f8c upstream.
+
+Trinity reports BUG:
+
+ sleeping function called from invalid context at kernel/locking/rwsem.c:47
+ in_atomic(): 0, irqs_disabled(): 0, pid: 5787, name: trinity-c27
+
+__might_sleep < down_write < __put_anon_vma < page_get_anon_vma <
+migrate_pages < compact_zone < compact_zone_order < try_to_compact_pages ..
+
+Right, since conversion to mutex then rwsem, we should not put_anon_vma()
+from inside an rcu_read_lock()ed section: fix the two places that did so.
+And add might_sleep() to anon_vma_free(), as suggested by Peter Zijlstra.
+
+Fixes: 88c22088bf23 ("mm: optimize page_lock_anon_vma() fast-path")
+Reported-by: Dave Jones <davej@redhat.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/rmap.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -103,6 +103,7 @@ static inline void anon_vma_free(struct
+ * LOCK should suffice since the actual taking of the lock must
+ * happen _before_ what follows.
+ */
++ might_sleep();
+ if (rwsem_is_locked(&anon_vma->root->rwsem)) {
+ anon_vma_lock_write(anon_vma);
+ anon_vma_unlock_write(anon_vma);
+@@ -426,8 +427,9 @@ struct anon_vma *page_get_anon_vma(struc
+ * above cannot corrupt).
+ */
+ if (!page_mapped(page)) {
++ rcu_read_unlock();
+ put_anon_vma(anon_vma);
+- anon_vma = NULL;
++ return NULL;
+ }
+ out:
+ rcu_read_unlock();
+@@ -477,9 +479,9 @@ struct anon_vma *page_lock_anon_vma_read
+ }
+
+ if (!page_mapped(page)) {
++ rcu_read_unlock();
+ put_anon_vma(anon_vma);
+- anon_vma = NULL;
+- goto out;
++ return NULL;
+ }
+
+ /* we pinned the anon_vma, its safe to sleep */
--- /dev/null
+From 74614de17db6fb472370c426d4f934d8d616edf2 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Wed, 4 Jun 2014 16:11:01 -0700
+Subject: mm/memory-failure.c: don't let collect_procs() skip over processes for MF_ACTION_REQUIRED
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit 74614de17db6fb472370c426d4f934d8d616edf2 upstream.
+
+When Linux sees an "action optional" machine check (where h/w has reported
+an error that is not in the current execution path) we generally do not
+want to signal a process, since most processes do not have a SIGBUS
+handler - we'd just prematurely terminate the process for a problem that
+they might never actually see.
+
+task_early_kill() decides whether to consider a process - and it checks
+whether this specific process has been marked for early signals with
+"prctl", or if the system administrator has requested early signals for
+all processes using /proc/sys/vm/memory_failure_early_kill.
+
+But for MF_ACTION_REQUIRED case we must not defer. The error is in the
+execution path of the current thread so we must send the SIGBUS
+immediatley.
+
+Fix by passing a flag argument through collect_procs*() to
+task_early_kill() so it knows whether we can defer or must take action.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Chen Gong <gong.chen@linux.jf.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c | 21 ++++++++++++---------
+ 1 file changed, 12 insertions(+), 9 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -382,10 +382,12 @@ static void kill_procs(struct list_head
+ }
+ }
+
+-static int task_early_kill(struct task_struct *tsk)
++static int task_early_kill(struct task_struct *tsk, int force_early)
+ {
+ if (!tsk->mm)
+ return 0;
++ if (force_early)
++ return 1;
+ if (tsk->flags & PF_MCE_PROCESS)
+ return !!(tsk->flags & PF_MCE_EARLY);
+ return sysctl_memory_failure_early_kill;
+@@ -395,7 +397,7 @@ static int task_early_kill(struct task_s
+ * Collect processes when the error hit an anonymous page.
+ */
+ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
+- struct to_kill **tkc)
++ struct to_kill **tkc, int force_early)
+ {
+ struct vm_area_struct *vma;
+ struct task_struct *tsk;
+@@ -411,7 +413,7 @@ static void collect_procs_anon(struct pa
+ for_each_process (tsk) {
+ struct anon_vma_chain *vmac;
+
+- if (!task_early_kill(tsk))
++ if (!task_early_kill(tsk, force_early))
+ continue;
+ anon_vma_interval_tree_foreach(vmac, &av->rb_root,
+ pgoff, pgoff) {
+@@ -430,7 +432,7 @@ static void collect_procs_anon(struct pa
+ * Collect processes when the error hit a file mapped page.
+ */
+ static void collect_procs_file(struct page *page, struct list_head *to_kill,
+- struct to_kill **tkc)
++ struct to_kill **tkc, int force_early)
+ {
+ struct vm_area_struct *vma;
+ struct task_struct *tsk;
+@@ -441,7 +443,7 @@ static void collect_procs_file(struct pa
+ for_each_process(tsk) {
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+- if (!task_early_kill(tsk))
++ if (!task_early_kill(tsk, force_early))
+ continue;
+
+ vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
+@@ -467,7 +469,8 @@ static void collect_procs_file(struct pa
+ * First preallocate one tokill structure outside the spin locks,
+ * so that we can kill at least one process reasonably reliable.
+ */
+-static void collect_procs(struct page *page, struct list_head *tokill)
++static void collect_procs(struct page *page, struct list_head *tokill,
++ int force_early)
+ {
+ struct to_kill *tk;
+
+@@ -478,9 +481,9 @@ static void collect_procs(struct page *p
+ if (!tk)
+ return;
+ if (PageAnon(page))
+- collect_procs_anon(page, tokill, &tk);
++ collect_procs_anon(page, tokill, &tk, force_early);
+ else
+- collect_procs_file(page, tokill, &tk);
++ collect_procs_file(page, tokill, &tk, force_early);
+ kfree(tk);
+ }
+
+@@ -965,7 +968,7 @@ static int hwpoison_user_mappings(struct
+ * there's nothing that can be done.
+ */
+ if (kill)
+- collect_procs(ppage, &tokill);
++ collect_procs(ppage, &tokill, flags & MF_ACTION_REQUIRED);
+
+ ret = try_to_unmap(ppage, ttu);
+ if (ret != SWAP_SUCCESS)
--- /dev/null
+From a70ffcac741d31a406c1d2b832ae43d658e7e1cf Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Wed, 4 Jun 2014 16:10:59 -0700
+Subject: mm/memory-failure.c-failure: send right signal code to correct thread
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit a70ffcac741d31a406c1d2b832ae43d658e7e1cf upstream.
+
+When a thread in a multi-threaded application hits a machine check because
+of an uncorrectable error in memory - we want to send the SIGBUS with
+si.si_code = BUS_MCEERR_AR to that thread. Currently we fail to do that
+if the active thread is not the primary thread in the process.
+collect_procs() just finds primary threads and this test:
+
+ if ((flags & MF_ACTION_REQUIRED) && t == current) {
+
+will see that the thread we found isn't the current thread and so send a
+si.si_code = BUS_MCEERR_AO to the primary (and nothing to the active
+thread at this time).
+
+We can fix this by checking whether "current" shares the same mm with the
+process that collect_procs() said owned the page. If so, we send the
+SIGBUS to current (with code BUS_MCEERR_AR).
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reported-by: Otto Bruggeman <otto.g.bruggeman@intel.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Chen Gong <gong.chen@linux.jf.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -208,9 +208,9 @@ static int kill_proc(struct task_struct
+ #endif
+ si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
+
+- if ((flags & MF_ACTION_REQUIRED) && t == current) {
++ if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
+ si.si_code = BUS_MCEERR_AR;
+- ret = force_sig_info(SIGBUS, &si, t);
++ ret = force_sig_info(SIGBUS, &si, current);
+ } else {
+ /*
+ * Don't use force here, it's convenient if the signal
--- /dev/null
+From 71abdc15adf8c702a1dd535f8e30df50758848d2 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Fri, 6 Jun 2014 14:35:35 -0700
+Subject: mm: vmscan: clear kswapd's special reclaim powers before exiting
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 71abdc15adf8c702a1dd535f8e30df50758848d2 upstream.
+
+When kswapd exits, it can end up taking locks that were previously held
+by allocating tasks while they waited for reclaim. Lockdep currently
+warns about this:
+
+On Wed, May 28, 2014 at 06:06:34PM +0800, Gu Zheng wrote:
+> inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-R} usage.
+> kswapd2/1151 [HC0[0]:SC0[0]:HE1:SE1] takes:
+> (&sig->group_rwsem){+++++?}, at: exit_signals+0x24/0x130
+> {RECLAIM_FS-ON-W} state was registered at:
+> mark_held_locks+0xb9/0x140
+> lockdep_trace_alloc+0x7a/0xe0
+> kmem_cache_alloc_trace+0x37/0x240
+> flex_array_alloc+0x99/0x1a0
+> cgroup_attach_task+0x63/0x430
+> attach_task_by_pid+0x210/0x280
+> cgroup_procs_write+0x16/0x20
+> cgroup_file_write+0x120/0x2c0
+> vfs_write+0xc0/0x1f0
+> SyS_write+0x4c/0xa0
+> tracesys+0xdd/0xe2
+> irq event stamp: 49
+> hardirqs last enabled at (49): _raw_spin_unlock_irqrestore+0x36/0x70
+> hardirqs last disabled at (48): _raw_spin_lock_irqsave+0x2b/0xa0
+> softirqs last enabled at (0): copy_process.part.24+0x627/0x15f0
+> softirqs last disabled at (0): (null)
+>
+> other info that might help us debug this:
+> Possible unsafe locking scenario:
+>
+> CPU0
+> ----
+> lock(&sig->group_rwsem);
+> <Interrupt>
+> lock(&sig->group_rwsem);
+>
+> *** DEADLOCK ***
+>
+> no locks held by kswapd2/1151.
+>
+> stack backtrace:
+> CPU: 30 PID: 1151 Comm: kswapd2 Not tainted 3.10.39+ #4
+> Call Trace:
+> dump_stack+0x19/0x1b
+> print_usage_bug+0x1f7/0x208
+> mark_lock+0x21d/0x2a0
+> __lock_acquire+0x52a/0xb60
+> lock_acquire+0xa2/0x140
+> down_read+0x51/0xa0
+> exit_signals+0x24/0x130
+> do_exit+0xb5/0xa50
+> kthread+0xdb/0x100
+> ret_from_fork+0x7c/0xb0
+
+This is because the kswapd thread is still marked as a reclaimer at the
+time of exit. But because it is exiting, nobody is actually waiting on
+it to make reclaim progress anymore, and it's nothing but a regular
+thread at this point. Be tidy and strip it of all its powers
+(PF_MEMALLOC, PF_SWAPWRITE, PF_KSWAPD, and the lockdep reclaim state)
+before returning from the thread function.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
+Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+Cc: Tang Chen <tangchen@cn.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -3090,7 +3090,10 @@ static int kswapd(void *p)
+ }
+ }
+
++ tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
+ current->reclaim_state = NULL;
++ lockdep_clear_current_reclaim_state();
++
+ return 0;
+ }
+
--- /dev/null
+From 675becce15f320337499bc1a9356260409a5ba29 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 4 Jun 2014 16:07:35 -0700
+Subject: mm: vmscan: do not throttle based on pfmemalloc reserves if node has no ZONE_NORMAL
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 675becce15f320337499bc1a9356260409a5ba29 upstream.
+
+throttle_direct_reclaim() is meant to trigger during swap-over-network
+during which the min watermark is treated as a pfmemalloc reserve. It
+throttes on the first node in the zonelist but this is flawed.
+
+The user-visible impact is that a process running on CPU whose local
+memory node has no ZONE_NORMAL will stall for prolonged periods of time,
+possibly indefintely. This is due to throttle_direct_reclaim thinking the
+pfmemalloc reserves are depleted when in fact they don't exist on that
+node.
+
+On a NUMA machine running a 32-bit kernel (I know) allocation requests
+from CPUs on node 1 would detect no pfmemalloc reserves and the process
+gets throttled. This patch adjusts throttling of direct reclaim to
+throttle based on the first node in the zonelist that has a usable
+ZONE_NORMAL or lower zone.
+
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c | 43 +++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 37 insertions(+), 6 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2286,10 +2286,17 @@ static bool pfmemalloc_watermark_ok(pg_d
+
+ for (i = 0; i <= ZONE_NORMAL; i++) {
+ zone = &pgdat->node_zones[i];
++ if (!populated_zone(zone))
++ continue;
++
+ pfmemalloc_reserve += min_wmark_pages(zone);
+ free_pages += zone_page_state(zone, NR_FREE_PAGES);
+ }
+
++ /* If there are no reserves (unexpected config) then do not throttle */
++ if (!pfmemalloc_reserve)
++ return true;
++
+ wmark_ok = free_pages > pfmemalloc_reserve / 2;
+
+ /* kswapd must be awake if processes are being throttled */
+@@ -2314,9 +2321,9 @@ static bool pfmemalloc_watermark_ok(pg_d
+ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
+ nodemask_t *nodemask)
+ {
++ struct zoneref *z;
+ struct zone *zone;
+- int high_zoneidx = gfp_zone(gfp_mask);
+- pg_data_t *pgdat;
++ pg_data_t *pgdat = NULL;
+
+ /*
+ * Kernel threads should not be throttled as they may be indirectly
+@@ -2335,10 +2342,34 @@ static bool throttle_direct_reclaim(gfp_
+ if (fatal_signal_pending(current))
+ goto out;
+
+- /* Check if the pfmemalloc reserves are ok */
+- first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
+- pgdat = zone->zone_pgdat;
+- if (pfmemalloc_watermark_ok(pgdat))
++ /*
++ * Check if the pfmemalloc reserves are ok by finding the first node
++ * with a usable ZONE_NORMAL or lower zone. The expectation is that
++ * GFP_KERNEL will be required for allocating network buffers when
++ * swapping over the network so ZONE_HIGHMEM is unusable.
++ *
++ * Throttling is based on the first usable node and throttled processes
++ * wait on a queue until kswapd makes progress and wakes them. There
++ * is an affinity then between processes waking up and where reclaim
++ * progress has been made assuming the process wakes on the same node.
++ * More importantly, processes running on remote nodes will not compete
++ * for remote pfmemalloc reserves and processes on different nodes
++ * should make reasonable progress.
++ */
++ for_each_zone_zonelist_nodemask(zone, z, zonelist,
++ gfp_mask, nodemask) {
++ if (zone_idx(zone) > ZONE_NORMAL)
++ continue;
++
++ /* Throttle based on the first usable node */
++ pgdat = zone->zone_pgdat;
++ if (pfmemalloc_watermark_ok(pgdat))
++ goto out;
++ break;
++ }
++
++ /* If no zone was usable by the allocation flags then do not throttle */
++ if (!pgdat)
+ goto out;
+
+ /* Account for the throttling */
--- /dev/null
+From 4e52365f279564cef0ddd41db5237f0471381093 Mon Sep 17 00:00:00 2001
+From: Matthew Dempsky <mdempsky@chromium.org>
+Date: Fri, 6 Jun 2014 14:36:42 -0700
+Subject: ptrace: fix fork event messages across pid namespaces
+
+From: Matthew Dempsky <mdempsky@chromium.org>
+
+commit 4e52365f279564cef0ddd41db5237f0471381093 upstream.
+
+When tracing a process in another pid namespace, it's important for fork
+event messages to contain the child's pid as seen from the tracer's pid
+namespace, not the parent's. Otherwise, the tracer won't be able to
+correlate the fork event with later SIGTRAP signals it receives from the
+child.
+
+We still risk a race condition if a ptracer from a different pid
+namespace attaches after we compute the pid_t value. However, sending a
+bogus fork event message in this unlikely scenario is still a vast
+improvement over the status quo where we always send bogus fork event
+messages to debuggers in a different pid namespace than the forking
+process.
+
+Signed-off-by: Matthew Dempsky <mdempsky@chromium.org>
+Acked-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Julien Tinnes <jln@chromium.org>
+Cc: Roland McGrath <mcgrathr@chromium.org>
+Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/ptrace.h | 32 ++++++++++++++++++++++++++++++++
+ kernel/fork.c | 10 +++++++---
+ 2 files changed, 39 insertions(+), 3 deletions(-)
+
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -5,6 +5,7 @@
+ #include <linux/sched.h> /* For struct task_struct. */
+ #include <linux/err.h> /* for IS_ERR_VALUE */
+ #include <linux/bug.h> /* For BUG_ON. */
++#include <linux/pid_namespace.h> /* For task_active_pid_ns. */
+ #include <uapi/linux/ptrace.h>
+
+ /*
+@@ -129,6 +130,37 @@ static inline void ptrace_event(int even
+ }
+
+ /**
++ * ptrace_event_pid - possibly stop for a ptrace event notification
++ * @event: %PTRACE_EVENT_* value to report
++ * @pid: process identifier for %PTRACE_GETEVENTMSG to return
++ *
++ * Check whether @event is enabled and, if so, report @event and @pid
++ * to the ptrace parent. @pid is reported as the pid_t seen from the
++ * the ptrace parent's pid namespace.
++ *
++ * Called without locks.
++ */
++static inline void ptrace_event_pid(int event, struct pid *pid)
++{
++ /*
++ * FIXME: There's a potential race if a ptracer in a different pid
++ * namespace than parent attaches between computing message below and
++ * when we acquire tasklist_lock in ptrace_stop(). If this happens,
++ * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG.
++ */
++ unsigned long message = 0;
++ struct pid_namespace *ns;
++
++ rcu_read_lock();
++ ns = task_active_pid_ns(rcu_dereference(current->parent));
++ if (ns)
++ message = pid_nr_ns(pid, ns);
++ rcu_read_unlock();
++
++ ptrace_event(event, message);
++}
++
++/**
+ * ptrace_init_task - initialize ptrace state for a new child
+ * @child: new child task
+ * @ptrace: true if child should be ptrace'd by parent's tracer
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1607,10 +1607,12 @@ long do_fork(unsigned long clone_flags,
+ */
+ if (!IS_ERR(p)) {
+ struct completion vfork;
++ struct pid *pid;
+
+ trace_sched_process_fork(current, p);
+
+- nr = task_pid_vnr(p);
++ pid = get_task_pid(p, PIDTYPE_PID);
++ nr = pid_vnr(pid);
+
+ if (clone_flags & CLONE_PARENT_SETTID)
+ put_user(nr, parent_tidptr);
+@@ -1625,12 +1627,14 @@ long do_fork(unsigned long clone_flags,
+
+ /* forking complete and child started to run, tell ptracer */
+ if (unlikely(trace))
+- ptrace_event(trace, nr);
++ ptrace_event_pid(trace, pid);
+
+ if (clone_flags & CLONE_VFORK) {
+ if (!wait_for_vfork_done(p, &vfork))
+- ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
++ ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
+ }
++
++ put_pid(pid);
+ } else {
+ nr = PTR_ERR(p);
+ }
--- /dev/null
+From 993072ee67aa179c48c85eb19869804e68887d86 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Mon, 26 May 2014 21:55:08 +0200
+Subject: s390/lowcore: reserve 96 bytes for IRB in lowcore
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit 993072ee67aa179c48c85eb19869804e68887d86 upstream.
+
+The IRB might be 96 bytes if the extended-I/O-measurement facility is
+used. This feature is currently not used by Linux, but struct irb
+already has the emw defined. So let's make the irb in lowcore match the
+size of the internal data structure to be future proof.
+We also have to add a pad, to correctly align the paste.
+
+The bigger irb field also circumvents a bug in some QEMU versions that
+always write the emw field on test subchannel and therefore destroy the
+paste definitions of this CPU. Running under these QEMU version broke
+some timing functions in the VDSO and all users of these functions,
+e.g. some JREs.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
+Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/lowcore.h | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/s390/include/asm/lowcore.h
++++ b/arch/s390/include/asm/lowcore.h
+@@ -142,9 +142,9 @@ struct _lowcore {
+ __u8 pad_0x02fc[0x0300-0x02fc]; /* 0x02fc */
+
+ /* Interrupt response block */
+- __u8 irb[64]; /* 0x0300 */
++ __u8 irb[96]; /* 0x0300 */
+
+- __u8 pad_0x0340[0x0e00-0x0340]; /* 0x0340 */
++ __u8 pad_0x0360[0x0e00-0x0360]; /* 0x0360 */
+
+ /*
+ * 0xe00 contains the address of the IPL Parameter Information
+@@ -288,12 +288,13 @@ struct _lowcore {
+ __u8 pad_0x03a0[0x0400-0x03a0]; /* 0x03a0 */
+
+ /* Interrupt response block. */
+- __u8 irb[64]; /* 0x0400 */
++ __u8 irb[96]; /* 0x0400 */
++ __u8 pad_0x0460[0x0480-0x0460]; /* 0x0460 */
+
+ /* Per cpu primary space access list */
+- __u32 paste[16]; /* 0x0440 */
++ __u32 paste[16]; /* 0x0480 */
+
+- __u8 pad_0x0480[0x0e00-0x0480]; /* 0x0480 */
++ __u8 pad_0x04c0[0x0e00-0x04c0]; /* 0x04c0 */
+
+ /*
+ * 0xe00 contains the address of the IPL Parameter Information
arm-omap-replace-checks-for-config_usb_gadget_omap.patch
usb-ehci-avoid-bios-handover-on-the-hasee-e200.patch
usb-option-fix-runtime-pm-handling.patch
+mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch
+mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch
+mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch
+mm-fix-sleeping-function-warning-from-__put_anon_vma.patch
+hid-core-fix-validation-of-report-id-0.patch
+mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch
+ptrace-fix-fork-event-messages-across-pid-namespaces.patch
+arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch
+idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch
+s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch
+ext4-fix-zeroing-of-page-during-writeback.patch
+ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch