]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 27 Jun 2014 22:19:49 +0000 (15:19 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 27 Jun 2014 22:19:49 +0000 (15:19 -0700)
added patches:
arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch
arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch
ext4-fix-data-integrity-sync-in-ordered-mode.patch
ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch
ext4-fix-zeroing-of-page-during-writeback.patch
hid-core-fix-validation-of-report-id-0.patch
hugetlb-restrict-hugepage_migration_support-to-x86_64.patch
idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch
kthread-fix-return-value-of-kthread_create-upon-sigkill.patch
matroxfb-perform-a-dummy-read-of-m_status.patch
mm-fix-sleeping-function-warning-from-__put_anon_vma.patch
mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch
mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch
mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch
mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch
mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch
mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch
ptrace-fix-fork-event-messages-across-pid-namespaces.patch
s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch
s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch
usb-usb_wwan-fix-potential-blocked-i-o-after-resume.patch
usb-usb_wwan-fix-potential-null-deref-at-resume.patch
usb-usb_wwan-fix-race-between-write-and-resume.patch
usb-usb_wwan-fix-urb-leak-at-shutdown.patch
usb-usb_wwan-fix-urb-leak-in-write-error-path.patch
usb-usb_wwan-fix-write-and-suspend-race.patch

27 files changed:
queue-3.14/arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch [new file with mode: 0644]
queue-3.14/arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch [new file with mode: 0644]
queue-3.14/ext4-fix-data-integrity-sync-in-ordered-mode.patch [new file with mode: 0644]
queue-3.14/ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch [new file with mode: 0644]
queue-3.14/ext4-fix-zeroing-of-page-during-writeback.patch [new file with mode: 0644]
queue-3.14/hid-core-fix-validation-of-report-id-0.patch [new file with mode: 0644]
queue-3.14/hugetlb-restrict-hugepage_migration_support-to-x86_64.patch [new file with mode: 0644]
queue-3.14/idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch [new file with mode: 0644]
queue-3.14/kthread-fix-return-value-of-kthread_create-upon-sigkill.patch [new file with mode: 0644]
queue-3.14/matroxfb-perform-a-dummy-read-of-m_status.patch [new file with mode: 0644]
queue-3.14/mm-fix-sleeping-function-warning-from-__put_anon_vma.patch [new file with mode: 0644]
queue-3.14/mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch [new file with mode: 0644]
queue-3.14/mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch [new file with mode: 0644]
queue-3.14/mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch [new file with mode: 0644]
queue-3.14/mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch [new file with mode: 0644]
queue-3.14/mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch [new file with mode: 0644]
queue-3.14/mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch [new file with mode: 0644]
queue-3.14/ptrace-fix-fork-event-messages-across-pid-namespaces.patch [new file with mode: 0644]
queue-3.14/s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch [new file with mode: 0644]
queue-3.14/s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch [new file with mode: 0644]
queue-3.14/series
queue-3.14/usb-usb_wwan-fix-potential-blocked-i-o-after-resume.patch [new file with mode: 0644]
queue-3.14/usb-usb_wwan-fix-potential-null-deref-at-resume.patch [new file with mode: 0644]
queue-3.14/usb-usb_wwan-fix-race-between-write-and-resume.patch [new file with mode: 0644]
queue-3.14/usb-usb_wwan-fix-urb-leak-at-shutdown.patch [new file with mode: 0644]
queue-3.14/usb-usb_wwan-fix-urb-leak-in-write-error-path.patch [new file with mode: 0644]
queue-3.14/usb-usb_wwan-fix-write-and-suspend-race.patch [new file with mode: 0644]

diff --git a/queue-3.14/arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch b/queue-3.14/arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch
new file mode 100644 (file)
index 0000000..6615659
--- /dev/null
@@ -0,0 +1,51 @@
+From c168870704bcde6bb63d05f7882b620dd3985a46 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Mon, 2 Jun 2014 11:47:23 +0100
+Subject: arm64: ptrace: change fs when passing kernel pointer to regset code
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit c168870704bcde6bb63d05f7882b620dd3985a46 upstream.
+
+Our compat PTRACE_POKEUSR implementation simply passes the user data to
+regset_copy_from_user after some simple range checking. Unfortunately,
+the data in question has already been copied to the kernel stack by this
+point, so the subsequent access_ok check fails and the ptrace request
+returns -EFAULT. This causes problems tracing fork() with older versions
+of strace.
+
+This patch briefly changes the fs to KERNEL_DS, so that the access_ok
+check passes even with a kernel address.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/ptrace.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/arm64/kernel/ptrace.c
++++ b/arch/arm64/kernel/ptrace.c
+@@ -821,6 +821,7 @@ static int compat_ptrace_write_user(stru
+                                   compat_ulong_t val)
+ {
+       int ret;
++      mm_segment_t old_fs = get_fs();
+       if (off & 3 || off >= COMPAT_USER_SZ)
+               return -EIO;
+@@ -828,10 +829,13 @@ static int compat_ptrace_write_user(stru
+       if (off >= sizeof(compat_elf_gregset_t))
+               return 0;
++      set_fs(KERNEL_DS);
+       ret = copy_regset_from_user(tsk, &user_aarch32_view,
+                                   REGSET_COMPAT_GPR, off,
+                                   sizeof(compat_ulong_t),
+                                   &val);
++      set_fs(old_fs);
++
+       return ret;
+ }
diff --git a/queue-3.14/arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch b/queue-3.14/arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch
new file mode 100644 (file)
index 0000000..152d229
--- /dev/null
@@ -0,0 +1,75 @@
+From 2227901a0230d8fde81ba9c602d649839390f56b Mon Sep 17 00:00:00 2001
+From: Victor Kamensky <victor.kamensky@linaro.org>
+Date: Tue, 3 Jun 2014 19:21:30 +0100
+Subject: arm64: ptrace: fix empty registers set in prstatus of aarch32 process core
+
+From: Victor Kamensky <victor.kamensky@linaro.org>
+
+commit 2227901a0230d8fde81ba9c602d649839390f56b upstream.
+
+Currently core file of aarch32 process prstatus note has empty
+registers set. As result aarch32 core files create by V8 kernel are
+not very useful.
+
+It happens because compat_gpr_get and compat_gpr_set functions can
+copy registers values to/from either kbuf or ubuf. ELF core file
+collection function fill_thread_core_info calls compat_gpr_get
+with kbuf set and ubuf set to 0. But current compat_gpr_get and
+compat_gpr_set function handle copy to/from only ubuf case.
+
+Fix is to handle kbuf and ubuf as two separate cases in similar
+way as other functions like user_regset_copyout, user_regset_copyin do.
+
+Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/ptrace.c |   26 ++++++++++++++++++--------
+ 1 file changed, 18 insertions(+), 8 deletions(-)
+
+--- a/arch/arm64/kernel/ptrace.c
++++ b/arch/arm64/kernel/ptrace.c
+@@ -650,11 +650,16 @@ static int compat_gpr_get(struct task_st
+                       reg = task_pt_regs(target)->regs[idx];
+               }
+-              ret = copy_to_user(ubuf, &reg, sizeof(reg));
+-              if (ret)
+-                      break;
++              if (kbuf) {
++                      memcpy(kbuf, &reg, sizeof(reg));
++                      kbuf += sizeof(reg);
++              } else {
++                      ret = copy_to_user(ubuf, &reg, sizeof(reg));
++                      if (ret)
++                              break;
+-              ubuf += sizeof(reg);
++                      ubuf += sizeof(reg);
++              }
+       }
+       return ret;
+@@ -684,11 +689,16 @@ static int compat_gpr_set(struct task_st
+               unsigned int idx = start + i;
+               compat_ulong_t reg;
+-              ret = copy_from_user(&reg, ubuf, sizeof(reg));
+-              if (ret)
+-                      return ret;
++              if (kbuf) {
++                      memcpy(&reg, kbuf, sizeof(reg));
++                      kbuf += sizeof(reg);
++              } else {
++                      ret = copy_from_user(&reg, ubuf, sizeof(reg));
++                      if (ret)
++                              return ret;
+-              ubuf += sizeof(reg);
++                      ubuf += sizeof(reg);
++              }
+               switch (idx) {
+               case 15:
diff --git a/queue-3.14/ext4-fix-data-integrity-sync-in-ordered-mode.patch b/queue-3.14/ext4-fix-data-integrity-sync-in-ordered-mode.patch
new file mode 100644 (file)
index 0000000..d3d56f9
--- /dev/null
@@ -0,0 +1,178 @@
+From 1c8349a17137b93f0a83f276c764a6df1b9a116e Mon Sep 17 00:00:00 2001
+From: Namjae Jeon <namjae.jeon@samsung.com>
+Date: Mon, 12 May 2014 08:12:25 -0400
+Subject: ext4: fix data integrity sync in ordered mode
+
+From: Namjae Jeon <namjae.jeon@samsung.com>
+
+commit 1c8349a17137b93f0a83f276c764a6df1b9a116e upstream.
+
+When we perform a data integrity sync we tag all the dirty pages with
+PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages.  Later we check
+for this tag in write_cache_pages_da and creates a struct
+mpage_da_data containing contiguously indexed pages tagged with this
+tag and sync these pages with a call to mpage_da_map_and_submit.  This
+process is done in while loop until all the PAGECACHE_TAG_TOWRITE
+pages are synced. We also do journal start and stop in each iteration.
+journal_stop could initiate journal commit which would call
+ext4_writepage which in turn will call ext4_bio_write_page even for
+delayed OR unwritten buffers. When ext4_bio_write_page is called for
+such buffers, even though it does not sync them but it clears the
+PAGECACHE_TAG_TOWRITE of the corresponding page and hence these pages
+are also not synced by the currently running data integrity sync. We
+will end up with dirty pages although sync is completed.
+
+This could cause a potential data loss when the sync call is followed
+by a truncate_pagecache call, which is exactly the case in
+collapse_range.  (It will cause generic/127 failure in xfstests)
+
+To avoid this issue, we can use set_page_writeback_keepwrite instead of
+set_page_writeback, which doesn't clear TOWRITE tag.
+
+Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
+Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4.h             |    3 ++-
+ fs/ext4/inode.c            |    6 ++++--
+ fs/ext4/page-io.c          |    8 ++++++--
+ include/linux/page-flags.h |   12 +++++++++++-
+ mm/page-writeback.c        |   11 ++++++-----
+ 5 files changed, 29 insertions(+), 11 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -2764,7 +2764,8 @@ extern void ext4_io_submit(struct ext4_i
+ extern int ext4_bio_write_page(struct ext4_io_submit *io,
+                              struct page *page,
+                              int len,
+-                             struct writeback_control *wbc);
++                             struct writeback_control *wbc,
++                             bool keep_towrite);
+ /* mmp.c */
+ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1835,6 +1835,7 @@ static int ext4_writepage(struct page *p
+       struct buffer_head *page_bufs = NULL;
+       struct inode *inode = page->mapping->host;
+       struct ext4_io_submit io_submit;
++      bool keep_towrite = false;
+       trace_ext4_writepage(page);
+       size = i_size_read(inode);
+@@ -1865,6 +1866,7 @@ static int ext4_writepage(struct page *p
+                       unlock_page(page);
+                       return 0;
+               }
++              keep_towrite = true;
+       }
+       if (PageChecked(page) && ext4_should_journal_data(inode))
+@@ -1881,7 +1883,7 @@ static int ext4_writepage(struct page *p
+               unlock_page(page);
+               return -ENOMEM;
+       }
+-      ret = ext4_bio_write_page(&io_submit, page, len, wbc);
++      ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
+       ext4_io_submit(&io_submit);
+       /* Drop io_end reference we got from init */
+       ext4_put_io_end_defer(io_submit.io_end);
+@@ -1900,7 +1902,7 @@ static int mpage_submit_page(struct mpag
+       else
+               len = PAGE_CACHE_SIZE;
+       clear_page_dirty_for_io(page);
+-      err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
++      err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
+       if (!err)
+               mpd->wbc->nr_to_write--;
+       mpd->first_page++;
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -401,7 +401,8 @@ submit_and_retry:
+ int ext4_bio_write_page(struct ext4_io_submit *io,
+                       struct page *page,
+                       int len,
+-                      struct writeback_control *wbc)
++                      struct writeback_control *wbc,
++                      bool keep_towrite)
+ {
+       struct inode *inode = page->mapping->host;
+       unsigned block_start, blocksize;
+@@ -414,7 +415,10 @@ int ext4_bio_write_page(struct ext4_io_s
+       BUG_ON(!PageLocked(page));
+       BUG_ON(PageWriteback(page));
+-      set_page_writeback(page);
++      if (keep_towrite)
++              set_page_writeback_keepwrite(page);
++      else
++              set_page_writeback(page);
+       ClearPageError(page);
+       /*
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -317,13 +317,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)
+ extern void cancel_dirty_page(struct page *page, unsigned int account_size);
+ int test_clear_page_writeback(struct page *page);
+-int test_set_page_writeback(struct page *page);
++int __test_set_page_writeback(struct page *page, bool keep_write);
++
++#define test_set_page_writeback(page)                 \
++      __test_set_page_writeback(page, false)
++#define test_set_page_writeback_keepwrite(page)       \
++      __test_set_page_writeback(page, true)
+ static inline void set_page_writeback(struct page *page)
+ {
+       test_set_page_writeback(page);
+ }
++static inline void set_page_writeback_keepwrite(struct page *page)
++{
++      test_set_page_writeback_keepwrite(page);
++}
++
+ #ifdef CONFIG_PAGEFLAGS_EXTENDED
+ /*
+  * System with lots of page flags available. This allows separate
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct pag
+       return ret;
+ }
+-int test_set_page_writeback(struct page *page)
++int __test_set_page_writeback(struct page *page, bool keep_write)
+ {
+       struct address_space *mapping = page_mapping(page);
+       int ret;
+@@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page
+                       radix_tree_tag_clear(&mapping->page_tree,
+                                               page_index(page),
+                                               PAGECACHE_TAG_DIRTY);
+-              radix_tree_tag_clear(&mapping->page_tree,
+-                                   page_index(page),
+-                                   PAGECACHE_TAG_TOWRITE);
++              if (!keep_write)
++                      radix_tree_tag_clear(&mapping->page_tree,
++                                              page_index(page),
++                                              PAGECACHE_TAG_TOWRITE);
+               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       } else {
+               ret = TestSetPageWriteback(page);
+@@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page
+       return ret;
+ }
+-EXPORT_SYMBOL(test_set_page_writeback);
++EXPORT_SYMBOL(__test_set_page_writeback);
+ /*
+  * Return true if any of the pages in the mapping are marked with the
diff --git a/queue-3.14/ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch b/queue-3.14/ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch
new file mode 100644 (file)
index 0000000..fcf7a02
--- /dev/null
@@ -0,0 +1,32 @@
+From b5b60778558cafad17bbcbf63e0310bd3c68eb17 Mon Sep 17 00:00:00 2001
+From: Maurizio Lombardi <mlombard@redhat.com>
+Date: Tue, 27 May 2014 12:48:56 -0400
+Subject: ext4: fix wrong assert in ext4_mb_normalize_request()
+
+From: Maurizio Lombardi <mlombard@redhat.com>
+
+commit b5b60778558cafad17bbcbf63e0310bd3c68eb17 upstream.
+
+The variable "size" is expressed as number of blocks and not as
+number of clusters, this could trigger a kernel panic when using
+ext4 with the size of a cluster different from the size of a block.
+
+Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/mballoc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3135,7 +3135,7 @@ ext4_mb_normalize_request(struct ext4_al
+       }
+       BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
+                       start > ac->ac_o_ex.fe_logical);
+-      BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
++      BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+       /* now prepare goal request */
diff --git a/queue-3.14/ext4-fix-zeroing-of-page-during-writeback.patch b/queue-3.14/ext4-fix-zeroing-of-page-during-writeback.patch
new file mode 100644 (file)
index 0000000..125efe7
--- /dev/null
@@ -0,0 +1,70 @@
+From eeece469dedadf3918bad50ad80f4616a0064e90 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Tue, 27 May 2014 12:48:55 -0400
+Subject: ext4: fix zeroing of page during writeback
+
+From: Jan Kara <jack@suse.cz>
+
+commit eeece469dedadf3918bad50ad80f4616a0064e90 upstream.
+
+Tail of a page straddling inode size must be zeroed when being written
+out due to POSIX requirement that modifications of mmaped page beyond
+inode size must not be written to the file. ext4_bio_write_page() did
+this only for blocks fully beyond inode size but didn't properly zero
+blocks partially beyond inode size. Fix this.
+
+The problem has been uncovered by mmap_11-4 test in openposix test suite
+(part of LTP).
+
+Reported-by: Xiaoguang Wang <wangxg.fnst@cn.fujitsu.com>
+Fixes: 5a0dc7365c240
+Fixes: bd2d0210cf22f
+CC: stable@vger.kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/page-io.c |   24 +++++++++++-------------
+ 1 file changed, 11 insertions(+), 13 deletions(-)
+
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -422,6 +422,17 @@ int ext4_bio_write_page(struct ext4_io_s
+       ClearPageError(page);
+       /*
++       * Comments copied from block_write_full_page_endio:
++       *
++       * The page straddles i_size.  It must be zeroed out on each and every
++       * writepage invocation because it may be mmapped.  "A file is mapped
++       * in multiples of the page size.  For a file that is not a multiple of
++       * the page size, the remaining memory is zeroed when mapped, and
++       * writes to that region are not written out to the file."
++       */
++      if (len < PAGE_CACHE_SIZE)
++              zero_user_segment(page, len, PAGE_CACHE_SIZE);
++      /*
+        * In the first loop we prepare and mark buffers to submit. We have to
+        * mark all buffers in the page before submitting so that
+        * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
+@@ -432,19 +443,6 @@ int ext4_bio_write_page(struct ext4_io_s
+       do {
+               block_start = bh_offset(bh);
+               if (block_start >= len) {
+-                      /*
+-                       * Comments copied from block_write_full_page_endio:
+-                       *
+-                       * The page straddles i_size.  It must be zeroed out on
+-                       * each and every writepage invocation because it may
+-                       * be mmapped.  "A file is mapped in multiples of the
+-                       * page size.  For a file that is not a multiple of
+-                       * the  page size, the remaining memory is zeroed when
+-                       * mapped, and writes to that region are not written
+-                       * out to the file."
+-                       */
+-                      zero_user_segment(page, block_start,
+-                                        block_start + blocksize);
+                       clear_buffer_dirty(bh);
+                       set_buffer_uptodate(bh);
+                       continue;
diff --git a/queue-3.14/hid-core-fix-validation-of-report-id-0.patch b/queue-3.14/hid-core-fix-validation-of-report-id-0.patch
new file mode 100644 (file)
index 0000000..cec48e2
--- /dev/null
@@ -0,0 +1,54 @@
+From 1b15d2e5b8077670b1e6a33250a0d9577efff4a5 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Thu, 17 Apr 2014 13:22:09 -0700
+Subject: HID: core: fix validation of report id 0
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 1b15d2e5b8077670b1e6a33250a0d9577efff4a5 upstream.
+
+Some drivers use the first HID report in the list instead of using an
+index. In these cases, validation uses ID 0, which was supposed to mean
+"first known report". This fixes the problem, which was causing at least
+the lgff family of devices to stop working since hid_validate_values
+was being called with ID 0, but the devices used single numbered IDs
+for their reports:
+
+0x05, 0x01,         /*  Usage Page (Desktop),                   */
+0x09, 0x05,         /*  Usage (Gamepad),                        */
+0xA1, 0x01,         /*  Collection (Application),               */
+0xA1, 0x02,         /*      Collection (Logical),               */
+0x85, 0x01,         /*          Report ID (1),                  */
+...
+
+Reported-by: Simon Wood <simon@mungewell.org>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hid/hid-core.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -842,7 +842,17 @@ struct hid_report *hid_validate_values(s
+        * ->numbered being checked, which may not always be the case when
+        * drivers go to access report values.
+        */
+-      report = hid->report_enum[type].report_id_hash[id];
++      if (id == 0) {
++              /*
++               * Validating on id 0 means we should examine the first
++               * report in the list.
++               */
++              report = list_entry(
++                              hid->report_enum[type].report_list.next,
++                              struct hid_report, list);
++      } else {
++              report = hid->report_enum[type].report_id_hash[id];
++      }
+       if (!report) {
+               hid_err(hid, "missing %s %u\n", hid_report_names[type], id);
+               return NULL;
diff --git a/queue-3.14/hugetlb-restrict-hugepage_migration_support-to-x86_64.patch b/queue-3.14/hugetlb-restrict-hugepage_migration_support-to-x86_64.patch
new file mode 100644 (file)
index 0000000..1d9f821
--- /dev/null
@@ -0,0 +1,299 @@
+From c177c81e09e517bbf75b67762cdab1b83aba6976 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 4 Jun 2014 16:05:35 -0700
+Subject: hugetlb: restrict hugepage_migration_support() to x86_64
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit c177c81e09e517bbf75b67762cdab1b83aba6976 upstream.
+
+Currently hugepage migration is available for all archs which support
+pmd-level hugepage, but testing is done only for x86_64 and there're
+bugs for other archs.  So to avoid breaking such archs, this patch
+limits the availability strictly to x86_64 until developers of other
+archs get interested in enabling this feature.
+
+Simply disabling hugepage migration on non-x86_64 archs is not enough to
+fix the reported problem where sys_move_pages() hits the BUG_ON() in
+follow_page(FOLL_GET), so let's fix this by checking if hugepage
+migration is supported in vma_migratable().
+
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reported-by: Michael Ellerman <mpe@ellerman.id.au>
+Tested-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Hugh Dickins <hughd@google.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Russell King <rmk@arm.linux.org.uk>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: James Hogan <james.hogan@imgtec.com>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: David Miller <davem@davemloft.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/mm/hugetlbpage.c     |    5 -----
+ arch/arm64/mm/hugetlbpage.c   |    5 -----
+ arch/ia64/mm/hugetlbpage.c    |    5 -----
+ arch/metag/mm/hugetlbpage.c   |    5 -----
+ arch/mips/mm/hugetlbpage.c    |    5 -----
+ arch/powerpc/mm/hugetlbpage.c |   10 ----------
+ arch/s390/mm/hugetlbpage.c    |    5 -----
+ arch/sh/mm/hugetlbpage.c      |    5 -----
+ arch/sparc/mm/hugetlbpage.c   |    5 -----
+ arch/tile/mm/hugetlbpage.c    |    5 -----
+ arch/x86/Kconfig              |    4 ++++
+ arch/x86/mm/hugetlbpage.c     |   10 ----------
+ include/linux/hugetlb.h       |   13 +++++--------
+ include/linux/mempolicy.h     |    6 ++++++
+ mm/Kconfig                    |    3 +++
+ 15 files changed, 18 insertions(+), 73 deletions(-)
+
+--- a/arch/arm/mm/hugetlbpage.c
++++ b/arch/arm/mm/hugetlbpage.c
+@@ -56,8 +56,3 @@ int pmd_huge(pmd_t pmd)
+ {
+       return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
+ }
+-
+-int pmd_huge_support(void)
+-{
+-      return 1;
+-}
+--- a/arch/arm64/mm/hugetlbpage.c
++++ b/arch/arm64/mm/hugetlbpage.c
+@@ -58,11 +58,6 @@ int pud_huge(pud_t pud)
+ #endif
+ }
+-int pmd_huge_support(void)
+-{
+-      return 1;
+-}
+-
+ static __init int setup_hugepagesz(char *opt)
+ {
+       unsigned long ps = memparse(opt, &opt);
+--- a/arch/ia64/mm/hugetlbpage.c
++++ b/arch/ia64/mm/hugetlbpage.c
+@@ -114,11 +114,6 @@ int pud_huge(pud_t pud)
+       return 0;
+ }
+-int pmd_huge_support(void)
+-{
+-      return 0;
+-}
+-
+ struct page *
+ follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
+ {
+--- a/arch/metag/mm/hugetlbpage.c
++++ b/arch/metag/mm/hugetlbpage.c
+@@ -110,11 +110,6 @@ int pud_huge(pud_t pud)
+       return 0;
+ }
+-int pmd_huge_support(void)
+-{
+-      return 1;
+-}
+-
+ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+                            pmd_t *pmd, int write)
+ {
+--- a/arch/mips/mm/hugetlbpage.c
++++ b/arch/mips/mm/hugetlbpage.c
+@@ -84,11 +84,6 @@ int pud_huge(pud_t pud)
+       return (pud_val(pud) & _PAGE_HUGE) != 0;
+ }
+-int pmd_huge_support(void)
+-{
+-      return 1;
+-}
+-
+ struct page *
+ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+               pmd_t *pmd, int write)
+--- a/arch/powerpc/mm/hugetlbpage.c
++++ b/arch/powerpc/mm/hugetlbpage.c
+@@ -86,11 +86,6 @@ int pgd_huge(pgd_t pgd)
+        */
+       return ((pgd_val(pgd) & 0x3) != 0x0);
+ }
+-
+-int pmd_huge_support(void)
+-{
+-      return 1;
+-}
+ #else
+ int pmd_huge(pmd_t pmd)
+ {
+@@ -106,11 +101,6 @@ int pgd_huge(pgd_t pgd)
+ {
+       return 0;
+ }
+-
+-int pmd_huge_support(void)
+-{
+-      return 0;
+-}
+ #endif
+ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+--- a/arch/s390/mm/hugetlbpage.c
++++ b/arch/s390/mm/hugetlbpage.c
+@@ -223,11 +223,6 @@ int pud_huge(pud_t pud)
+       return 0;
+ }
+-int pmd_huge_support(void)
+-{
+-      return 1;
+-}
+-
+ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+                            pmd_t *pmdp, int write)
+ {
+--- a/arch/sh/mm/hugetlbpage.c
++++ b/arch/sh/mm/hugetlbpage.c
+@@ -83,11 +83,6 @@ int pud_huge(pud_t pud)
+       return 0;
+ }
+-int pmd_huge_support(void)
+-{
+-      return 0;
+-}
+-
+ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+                            pmd_t *pmd, int write)
+ {
+--- a/arch/sparc/mm/hugetlbpage.c
++++ b/arch/sparc/mm/hugetlbpage.c
+@@ -231,11 +231,6 @@ int pud_huge(pud_t pud)
+       return 0;
+ }
+-int pmd_huge_support(void)
+-{
+-      return 0;
+-}
+-
+ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+                            pmd_t *pmd, int write)
+ {
+--- a/arch/tile/mm/hugetlbpage.c
++++ b/arch/tile/mm/hugetlbpage.c
+@@ -166,11 +166,6 @@ int pud_huge(pud_t pud)
+       return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
+ }
+-int pmd_huge_support(void)
+-{
+-      return 1;
+-}
+-
+ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+                            pmd_t *pmd, int write)
+ {
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -1909,6 +1909,10 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+       def_bool y
+       depends on X86_64 || X86_PAE
++config ARCH_ENABLE_HUGEPAGE_MIGRATION
++      def_bool y
++      depends on X86_64 && HUGETLB_PAGE && MIGRATION
++
+ menu "Power management and ACPI options"
+ config ARCH_HIBERNATION_HEADER
+--- a/arch/x86/mm/hugetlbpage.c
++++ b/arch/x86/mm/hugetlbpage.c
+@@ -58,11 +58,6 @@ follow_huge_pmd(struct mm_struct *mm, un
+ {
+       return NULL;
+ }
+-
+-int pmd_huge_support(void)
+-{
+-      return 0;
+-}
+ #else
+ struct page *
+@@ -80,11 +75,6 @@ int pud_huge(pud_t pud)
+ {
+       return !!(pud_val(pud) & _PAGE_PSE);
+ }
+-
+-int pmd_huge_support(void)
+-{
+-      return 1;
+-}
+ #endif
+ #ifdef CONFIG_HUGETLB_PAGE
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -382,15 +382,13 @@ static inline pgoff_t basepage_index(str
+ extern void dissolve_free_huge_pages(unsigned long start_pfn,
+                                    unsigned long end_pfn);
+-int pmd_huge_support(void);
+-/*
+- * Currently hugepage migration is enabled only for pmd-based hugepage.
+- * This function will be updated when hugepage migration is more widely
+- * supported.
+- */
+ static inline int hugepage_migration_support(struct hstate *h)
+ {
+-      return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT);
++#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
++      return huge_page_shift(h) == PMD_SHIFT;
++#else
++      return 0;
++#endif
+ }
+ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
+@@ -430,7 +428,6 @@ static inline pgoff_t basepage_index(str
+       return page->index;
+ }
+ #define dissolve_free_huge_pages(s, e)        do {} while (0)
+-#define pmd_huge_support()    0
+ #define hugepage_migration_support(h) 0
+ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
+--- a/include/linux/mempolicy.h
++++ b/include/linux/mempolicy.h
+@@ -176,6 +176,12 @@ static inline int vma_migratable(struct
+ {
+       if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+               return 0;
++
++#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
++      if (vma->vm_flags & VM_HUGETLB)
++              return 0;
++#endif
++
+       /*
+        * Migration allocates pages in the highest zone. If we cannot
+        * do so then migration (at least from node to node) is not
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -263,6 +263,9 @@ config MIGRATION
+         pages as migration can relocate pages to satisfy a huge page
+         allocation instead of reclaiming.
++config ARCH_ENABLE_HUGEPAGE_MIGRATION
++      boolean
++
+ config PHYS_ADDR_T_64BIT
+       def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
diff --git a/queue-3.14/idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch b/queue-3.14/idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch
new file mode 100644 (file)
index 0000000..937d8a4
--- /dev/null
@@ -0,0 +1,77 @@
+From 3afb69cb5572b3c8c898c00880803cf1a49852c4 Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+Date: Fri, 6 Jun 2014 14:37:10 -0700
+Subject: idr: fix overflow bug during maximum ID calculation at maximum height
+
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+
+commit 3afb69cb5572b3c8c898c00880803cf1a49852c4 upstream.
+
+idr_replace() open-codes the logic to calculate the maximum valid ID
+given the height of the idr tree; unfortunately, the open-coded logic
+doesn't account for the fact that the top layer may have unused slots
+and over-shifts the limit to zero when the tree is at its maximum
+height.
+
+The following test code shows it fails to replace the value for
+id=((1<<27)+42):
+
+  static void test5(void)
+  {
+        int id;
+        DEFINE_IDR(test_idr);
+  #define TEST5_START ((1<<27)+42) /* use the highest layer */
+
+        printk(KERN_INFO "Start test5\n");
+        id = idr_alloc(&test_idr, (void *)1, TEST5_START, 0, GFP_KERNEL);
+        BUG_ON(id != TEST5_START);
+        TEST_BUG_ON(idr_replace(&test_idr, (void *)2, TEST5_START) != (void *)1);
+        idr_destroy(&test_idr);
+        printk(KERN_INFO "End of test5\n");
+  }
+
+Fix the bug by using idr_max() which correctly takes into account the
+maximum allowed shift.
+
+sub_alloc() shares the same problem and may incorrectly fail with
+-EAGAIN; however, this bug doesn't affect correct operation because
+idr_get_empty_slot(), which already uses idr_max(), retries with the
+increased @id in such cases.
+
+[tj@kernel.org: Updated patch description.]
+Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/idr.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/lib/idr.c
++++ b/lib/idr.c
+@@ -250,7 +250,7 @@ static int sub_alloc(struct idr *idp, in
+                       id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
+                       /* if already at the top layer, we need to grow */
+-                      if (id >= 1 << (idp->layers * IDR_BITS)) {
++                      if (id > idr_max(idp->layers)) {
+                               *starting_id = id;
+                               return -EAGAIN;
+                       }
+@@ -827,12 +827,10 @@ void *idr_replace(struct idr *idp, void
+       if (!p)
+               return ERR_PTR(-EINVAL);
+-      n = (p->layer+1) * IDR_BITS;
+-
+-      if (id >= (1 << n))
++      if (id > idr_max(p->layer + 1))
+               return ERR_PTR(-EINVAL);
+-      n -= IDR_BITS;
++      n = p->layer * IDR_BITS;
+       while ((n > 0) && p) {
+               p = p->ary[(id >> n) & IDR_MASK];
+               n -= IDR_BITS;
diff --git a/queue-3.14/kthread-fix-return-value-of-kthread_create-upon-sigkill.patch b/queue-3.14/kthread-fix-return-value-of-kthread_create-upon-sigkill.patch
new file mode 100644 (file)
index 0000000..2cc8f9c
--- /dev/null
@@ -0,0 +1,45 @@
+From 8fe6929cfd43c44834858a53e129ffdc7c166298 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Date: Wed, 4 Jun 2014 16:05:36 -0700
+Subject: kthread: fix return value of kthread_create() upon SIGKILL.
+
+From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+
+commit 8fe6929cfd43c44834858a53e129ffdc7c166298 upstream.
+
+Commit 786235eeba0e ("kthread: make kthread_create() killable") meant
+for allowing kthread_create() to abort as soon as killed by the
+OOM-killer.  But returning -ENOMEM is wrong if killed by SIGKILL from
+userspace.  Change kthread_create() to return -EINTR upon SIGKILL.
+
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/kthread.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -262,7 +262,7 @@ static void create_kthread(struct kthrea
+  * kthread_stop() has been called).  The return value should be zero
+  * or a negative error number; it will be passed to kthread_stop().
+  *
+- * Returns a task_struct or ERR_PTR(-ENOMEM).
++ * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
+  */
+ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
+                                          void *data, int node,
+@@ -298,7 +298,7 @@ struct task_struct *kthread_create_on_no
+                * that thread.
+                */
+               if (xchg(&create->done, NULL))
+-                      return ERR_PTR(-ENOMEM);
++                      return ERR_PTR(-EINTR);
+               /*
+                * kthreadd (or new kernel thread) will call complete()
+                * shortly.
diff --git a/queue-3.14/matroxfb-perform-a-dummy-read-of-m_status.patch b/queue-3.14/matroxfb-perform-a-dummy-read-of-m_status.patch
new file mode 100644 (file)
index 0000000..71410d5
--- /dev/null
@@ -0,0 +1,40 @@
+From 972754cfaee94d6e25acf94a497bc0a864d91b7e Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 15 May 2014 06:58:24 -0400
+Subject: matroxfb: perform a dummy read of M_STATUS
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 972754cfaee94d6e25acf94a497bc0a864d91b7e upstream.
+
+I had occasional screen corruption with the matrox framebuffer driver and
+I found out that the reason for the corruption is that the hardware
+blitter accesses the videoram while it is being written to.
+
+The matrox driver has a macro WaitTillIdle() that should wait until the
+blitter is idle, but it sometimes doesn't work. I added a dummy read
+mga_inl(M_STATUS) to WaitTillIdle() to fix the problem. The dummy read
+will flush the write buffer in the PCI chipset, and the next read of
+M_STATUS will return the hardware status.
+
+Since applying this patch, I had no screen corruption at all.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/matrox/matroxfb_base.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/video/matrox/matroxfb_base.h
++++ b/drivers/video/matrox/matroxfb_base.h
+@@ -698,7 +698,7 @@ void matroxfb_unregister_driver(struct m
+ #define mga_fifo(n)   do {} while ((mga_inl(M_FIFOSTATUS) & 0xFF) < (n))
+-#define WaitTillIdle()        do {} while (mga_inl(M_STATUS) & 0x10000)
++#define WaitTillIdle()        do { mga_inl(M_STATUS); do {} while (mga_inl(M_STATUS) & 0x10000); } while (0)
+ /* code speedup */
+ #ifdef CONFIG_FB_MATROX_MILLENIUM
diff --git a/queue-3.14/mm-fix-sleeping-function-warning-from-__put_anon_vma.patch b/queue-3.14/mm-fix-sleeping-function-warning-from-__put_anon_vma.patch
new file mode 100644 (file)
index 0000000..d35c97a
--- /dev/null
@@ -0,0 +1,66 @@
+From 7f39dda9d86fb4f4f17af0de170decf125726f8c Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 4 Jun 2014 16:05:33 -0700
+Subject: mm: fix sleeping function warning from __put_anon_vma
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 7f39dda9d86fb4f4f17af0de170decf125726f8c upstream.
+
+Trinity reports BUG:
+
+  sleeping function called from invalid context at kernel/locking/rwsem.c:47
+  in_atomic(): 0, irqs_disabled(): 0, pid: 5787, name: trinity-c27
+
+__might_sleep < down_write < __put_anon_vma < page_get_anon_vma <
+migrate_pages < compact_zone < compact_zone_order < try_to_compact_pages ..
+
+Right, since conversion to mutex then rwsem, we should not put_anon_vma()
+from inside an rcu_read_lock()ed section: fix the two places that did so.
+And add might_sleep() to anon_vma_free(), as suggested by Peter Zijlstra.
+
+Fixes: 88c22088bf23 ("mm: optimize page_lock_anon_vma() fast-path")
+Reported-by: Dave Jones <davej@redhat.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/rmap.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -103,6 +103,7 @@ static inline void anon_vma_free(struct
+        * LOCK should suffice since the actual taking of the lock must
+        * happen _before_ what follows.
+        */
++      might_sleep();
+       if (rwsem_is_locked(&anon_vma->root->rwsem)) {
+               anon_vma_lock_write(anon_vma);
+               anon_vma_unlock_write(anon_vma);
+@@ -426,8 +427,9 @@ struct anon_vma *page_get_anon_vma(struc
+        * above cannot corrupt).
+        */
+       if (!page_mapped(page)) {
++              rcu_read_unlock();
+               put_anon_vma(anon_vma);
+-              anon_vma = NULL;
++              return NULL;
+       }
+ out:
+       rcu_read_unlock();
+@@ -477,9 +479,9 @@ struct anon_vma *page_lock_anon_vma_read
+       }
+       if (!page_mapped(page)) {
++              rcu_read_unlock();
+               put_anon_vma(anon_vma);
+-              anon_vma = NULL;
+-              goto out;
++              return NULL;
+       }
+       /* we pinned the anon_vma, its safe to sleep */
diff --git a/queue-3.14/mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch b/queue-3.14/mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch
new file mode 100644 (file)
index 0000000..06efbef
--- /dev/null
@@ -0,0 +1,123 @@
+From 74614de17db6fb472370c426d4f934d8d616edf2 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Wed, 4 Jun 2014 16:11:01 -0700
+Subject: mm/memory-failure.c: don't let collect_procs() skip over processes for MF_ACTION_REQUIRED
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit 74614de17db6fb472370c426d4f934d8d616edf2 upstream.
+
+When Linux sees an "action optional" machine check (where h/w has reported
+an error that is not in the current execution path) we generally do not
+want to signal a process, since most processes do not have a SIGBUS
+handler - we'd just prematurely terminate the process for a problem that
+they might never actually see.
+
+task_early_kill() decides whether to consider a process - and it checks
+whether this specific process has been marked for early signals with
+"prctl", or if the system administrator has requested early signals for
+all processes using /proc/sys/vm/memory_failure_early_kill.
+
+But for MF_ACTION_REQUIRED case we must not defer.  The error is in the
+execution path of the current thread so we must send the SIGBUS
+immediatley.
+
+Fix by passing a flag argument through collect_procs*() to
+task_early_kill() so it knows whether we can defer or must take action.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Chen Gong <gong.chen@linux.jf.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c |   21 ++++++++++++---------
+ 1 file changed, 12 insertions(+), 9 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -384,10 +384,12 @@ static void kill_procs(struct list_head
+       }
+ }
+-static int task_early_kill(struct task_struct *tsk)
++static int task_early_kill(struct task_struct *tsk, int force_early)
+ {
+       if (!tsk->mm)
+               return 0;
++      if (force_early)
++              return 1;
+       if (tsk->flags & PF_MCE_PROCESS)
+               return !!(tsk->flags & PF_MCE_EARLY);
+       return sysctl_memory_failure_early_kill;
+@@ -397,7 +399,7 @@ static int task_early_kill(struct task_s
+  * Collect processes when the error hit an anonymous page.
+  */
+ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
+-                            struct to_kill **tkc)
++                            struct to_kill **tkc, int force_early)
+ {
+       struct vm_area_struct *vma;
+       struct task_struct *tsk;
+@@ -413,7 +415,7 @@ static void collect_procs_anon(struct pa
+       for_each_process (tsk) {
+               struct anon_vma_chain *vmac;
+-              if (!task_early_kill(tsk))
++              if (!task_early_kill(tsk, force_early))
+                       continue;
+               anon_vma_interval_tree_foreach(vmac, &av->rb_root,
+                                              pgoff, pgoff) {
+@@ -432,7 +434,7 @@ static void collect_procs_anon(struct pa
+  * Collect processes when the error hit a file mapped page.
+  */
+ static void collect_procs_file(struct page *page, struct list_head *to_kill,
+-                            struct to_kill **tkc)
++                            struct to_kill **tkc, int force_early)
+ {
+       struct vm_area_struct *vma;
+       struct task_struct *tsk;
+@@ -443,7 +445,7 @@ static void collect_procs_file(struct pa
+       for_each_process(tsk) {
+               pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+-              if (!task_early_kill(tsk))
++              if (!task_early_kill(tsk, force_early))
+                       continue;
+               vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
+@@ -469,7 +471,8 @@ static void collect_procs_file(struct pa
+  * First preallocate one tokill structure outside the spin locks,
+  * so that we can kill at least one process reasonably reliable.
+  */
+-static void collect_procs(struct page *page, struct list_head *tokill)
++static void collect_procs(struct page *page, struct list_head *tokill,
++                              int force_early)
+ {
+       struct to_kill *tk;
+@@ -480,9 +483,9 @@ static void collect_procs(struct page *p
+       if (!tk)
+               return;
+       if (PageAnon(page))
+-              collect_procs_anon(page, tokill, &tk);
++              collect_procs_anon(page, tokill, &tk, force_early);
+       else
+-              collect_procs_file(page, tokill, &tk);
++              collect_procs_file(page, tokill, &tk, force_early);
+       kfree(tk);
+ }
+@@ -967,7 +970,7 @@ static int hwpoison_user_mappings(struct
+        * there's nothing that can be done.
+        */
+       if (kill)
+-              collect_procs(ppage, &tokill);
++              collect_procs(ppage, &tokill, flags & MF_ACTION_REQUIRED);
+       ret = try_to_unmap(ppage, ttu);
+       if (ret != SWAP_SUCCESS)
diff --git a/queue-3.14/mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch b/queue-3.14/mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch
new file mode 100644 (file)
index 0000000..6477d73
--- /dev/null
@@ -0,0 +1,53 @@
+From a70ffcac741d31a406c1d2b832ae43d658e7e1cf Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Wed, 4 Jun 2014 16:10:59 -0700
+Subject: mm/memory-failure.c-failure: send right signal code to correct thread
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit a70ffcac741d31a406c1d2b832ae43d658e7e1cf upstream.
+
+When a thread in a multi-threaded application hits a machine check because
+of an uncorrectable error in memory - we want to send the SIGBUS with
+si.si_code = BUS_MCEERR_AR to that thread.  Currently we fail to do that
+if the active thread is not the primary thread in the process.
+collect_procs() just finds primary threads and this test:
+
+       if ((flags & MF_ACTION_REQUIRED) && t == current) {
+
+will see that the thread we found isn't the current thread and so send a
+si.si_code = BUS_MCEERR_AO to the primary (and nothing to the active
+thread at this time).
+
+We can fix this by checking whether "current" shares the same mm with the
+process that collect_procs() said owned the page.  If so, we send the
+SIGBUS to current (with code BUS_MCEERR_AR).
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reported-by: Otto Bruggeman <otto.g.bruggeman@intel.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Chen Gong <gong.chen@linux.jf.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -208,9 +208,9 @@ static int kill_proc(struct task_struct
+ #endif
+       si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
+-      if ((flags & MF_ACTION_REQUIRED) && t == current) {
++      if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
+               si.si_code = BUS_MCEERR_AR;
+-              ret = force_sig_info(SIGBUS, &si, t);
++              ret = force_sig_info(SIGBUS, &si, current);
+       } else {
+               /*
+                * Don't use force here, it's convenient if the signal
diff --git a/queue-3.14/mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch b/queue-3.14/mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch
new file mode 100644 (file)
index 0000000..c24ee7e
--- /dev/null
@@ -0,0 +1,162 @@
+From 3ba08129e38437561df44c36b7ea9081185d5333 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 4 Jun 2014 16:11:02 -0700
+Subject: mm/memory-failure.c: support use of a dedicated thread to handle SIGBUS(BUS_MCEERR_AO)
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 3ba08129e38437561df44c36b7ea9081185d5333 upstream.
+
+Currently memory error handler handles action optional errors in the
+deferred manner by default.  And if a recovery aware application wants
+to handle it immediately, it can do it by setting PF_MCE_EARLY flag.
+However, such signal can be sent only to the main thread, so it's
+problematic if the application wants to have a dedicated thread to
+handler such signals.
+
+So this patch adds dedicated thread support to memory error handler.  We
+have PF_MCE_EARLY flags for each thread separately, so with this patch
+AO signal is sent to the thread with PF_MCE_EARLY flag set, not the main
+thread.  If you want to implement a dedicated thread, you call prctl()
+to set PF_MCE_EARLY on the thread.
+
+Memory error handler collects processes to be killed, so this patch lets
+it check PF_MCE_EARLY flag on each thread in the collecting routines.
+
+No behavioral change for all non-early kill cases.
+
+Tony said:
+
+: The old behavior was crazy - someone with a multithreaded process might
+: well expect that if they call prctl(PF_MCE_EARLY) in just one thread, then
+: that thread would see the SIGBUS with si_code = BUS_MCEERR_A0 - even if
+: that thread wasn't the main thread for the process.
+
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Cc: Kamil Iskra <iskra@mcs.anl.gov>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Chen Gong <gong.chen@linux.jf.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/vm/hwpoison.txt |    5 +++
+ mm/memory-failure.c           |   56 ++++++++++++++++++++++++++++++++----------
+ 2 files changed, 48 insertions(+), 13 deletions(-)
+
+--- a/Documentation/vm/hwpoison.txt
++++ b/Documentation/vm/hwpoison.txt
+@@ -84,6 +84,11 @@ PR_MCE_KILL
+               PR_MCE_KILL_EARLY: Early kill
+               PR_MCE_KILL_LATE:  Late kill
+               PR_MCE_KILL_DEFAULT: Use system global default
++      Note that if you want to have a dedicated thread which handles
++      the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should
++      call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise,
++      the SIGBUS is sent to the main thread.
++
+ PR_MCE_KILL_GET
+       return current mode
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -384,15 +384,44 @@ static void kill_procs(struct list_head
+       }
+ }
+-static int task_early_kill(struct task_struct *tsk, int force_early)
++/*
++ * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO)
++ * on behalf of the thread group. Return task_struct of the (first found)
++ * dedicated thread if found, and return NULL otherwise.
++ *
++ * We already hold read_lock(&tasklist_lock) in the caller, so we don't
++ * have to call rcu_read_lock/unlock() in this function.
++ */
++static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
+ {
++      struct task_struct *t;
++
++      for_each_thread(tsk, t)
++              if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY))
++                      return t;
++      return NULL;
++}
++
++/*
++ * Determine whether a given process is "early kill" process which expects
++ * to be signaled when some page under the process is hwpoisoned.
++ * Return task_struct of the dedicated thread (main thread unless explicitly
++ * specified) if the process is "early kill," and otherwise returns NULL.
++ */
++static struct task_struct *task_early_kill(struct task_struct *tsk,
++                                         int force_early)
++{
++      struct task_struct *t;
+       if (!tsk->mm)
+-              return 0;
++              return NULL;
+       if (force_early)
+-              return 1;
+-      if (tsk->flags & PF_MCE_PROCESS)
+-              return !!(tsk->flags & PF_MCE_EARLY);
+-      return sysctl_memory_failure_early_kill;
++              return tsk;
++      t = find_early_kill_thread(tsk);
++      if (t)
++              return t;
++      if (sysctl_memory_failure_early_kill)
++              return tsk;
++      return NULL;
+ }
+ /*
+@@ -414,16 +443,17 @@ static void collect_procs_anon(struct pa
+       read_lock(&tasklist_lock);
+       for_each_process (tsk) {
+               struct anon_vma_chain *vmac;
++              struct task_struct *t = task_early_kill(tsk, force_early);
+-              if (!task_early_kill(tsk, force_early))
++              if (!t)
+                       continue;
+               anon_vma_interval_tree_foreach(vmac, &av->rb_root,
+                                              pgoff, pgoff) {
+                       vma = vmac->vma;
+                       if (!page_mapped_in_vma(page, vma))
+                               continue;
+-                      if (vma->vm_mm == tsk->mm)
+-                              add_to_kill(tsk, page, vma, to_kill, tkc);
++                      if (vma->vm_mm == t->mm)
++                              add_to_kill(t, page, vma, to_kill, tkc);
+               }
+       }
+       read_unlock(&tasklist_lock);
+@@ -444,10 +474,10 @@ static void collect_procs_file(struct pa
+       read_lock(&tasklist_lock);
+       for_each_process(tsk) {
+               pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
++              struct task_struct *t = task_early_kill(tsk, force_early);
+-              if (!task_early_kill(tsk, force_early))
++              if (!t)
+                       continue;
+-
+               vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
+                                     pgoff) {
+                       /*
+@@ -457,8 +487,8 @@ static void collect_procs_file(struct pa
+                        * Assume applications who requested early kill want
+                        * to be informed of all such data corruptions.
+                        */
+-                      if (vma->vm_mm == tsk->mm)
+-                              add_to_kill(tsk, page, vma, to_kill, tkc);
++                      if (vma->vm_mm == t->mm)
++                              add_to_kill(t, page, vma, to_kill, tkc);
+               }
+       }
+       read_unlock(&tasklist_lock);
diff --git a/queue-3.14/mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch b/queue-3.14/mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch
new file mode 100644 (file)
index 0000000..e0eced1
--- /dev/null
@@ -0,0 +1,237 @@
+From e58469bafd0524e848c3733bc3918d854595e20f Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 4 Jun 2014 16:10:16 -0700
+Subject: mm: page_alloc: use word-based accesses for get/set pageblock bitmaps
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit e58469bafd0524e848c3733bc3918d854595e20f upstream.
+
+The test_bit operations in get/set pageblock flags are expensive.  This
+patch reads the bitmap on a word basis and use shifts and masks to isolate
+the bits of interest.  Similarly masks are used to set a local copy of the
+bitmap and then use cmpxchg to update the bitmap if there have been no
+other changes made in parallel.
+
+In a test running dd onto tmpfs the overhead of the pageblock-related
+functions went from 1.27% in profiles to 0.5%.
+
+In addition to the performance benefits, this patch closes races that are
+possible between:
+
+a) get_ and set_pageblock_migratetype(), where get_pageblock_migratetype()
+   reads part of the bits before and other part of the bits after
+   set_pageblock_migratetype() has updated them.
+
+b) set_pageblock_migratetype() and set_pageblock_skip(), where the non-atomic
+   read-modify-update set bit operation in set_pageblock_skip() will cause
+   lost updates to some bits changed in the set_pageblock_migratetype().
+
+Joonsoo Kim first reported the case a) via code inspection.  Vlastimil
+Babka's testing with a debug patch showed that either a) or b) occurs
+roughly once per mmtests' stress-highalloc benchmark (although not
+necessarily in the same pageblock).  Furthermore during development of
+unrelated compaction patches, it was observed that frequent calls to
+{start,undo}_isolate_page_range() the race occurs several thousands of
+times and has resulted in NULL pointer dereferences in move_freepages()
+and free_one_page() in places where free_list[migratetype] is
+manipulated by e.g.  list_move().  Further debugging confirmed that
+migratetype had invalid value of 6, causing out of bounds access to the
+free_list array.
+
+That confirmed that the race exist, although it may be extremely rare,
+and currently only fatal where page isolation is performed due to
+memory hot remove.  Races on pageblocks being updated by
+set_pageblock_migratetype(), where both old and new migratetype are
+lower MIGRATE_RESERVE, currently cannot result in an invalid value
+being observed, although theoretically they may still lead to
+unexpected creation or destruction of MIGRATE_RESERVE pageblocks.
+Furthermore, things could get suddenly worse when memory isolation is
+used more, or when new migratetypes are added.
+
+After this patch, the race has no longer been observed in testing.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Reported-and-tested-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mmzone.h          |    6 +++-
+ include/linux/pageblock-flags.h |   37 +++++++++++++++++++++++-----
+ mm/page_alloc.c                 |   52 ++++++++++++++++++++++++----------------
+ 3 files changed, 68 insertions(+), 27 deletions(-)
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -75,9 +75,13 @@ enum {
+ extern int page_group_by_mobility_disabled;
++#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1)
++#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1)
++
+ static inline int get_pageblock_migratetype(struct page *page)
+ {
+-      return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
++      BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2);
++      return get_pageblock_flags_mask(page, PB_migrate_end, MIGRATETYPE_MASK);
+ }
+ struct free_area {
+--- a/include/linux/pageblock-flags.h
++++ b/include/linux/pageblock-flags.h
+@@ -30,9 +30,12 @@ enum pageblock_bits {
+       PB_migrate,
+       PB_migrate_end = PB_migrate + 3 - 1,
+                       /* 3 bits required for migrate types */
+-#ifdef CONFIG_COMPACTION
+       PB_migrate_skip,/* If set the block is skipped by compaction */
+-#endif /* CONFIG_COMPACTION */
++
++      /*
++       * Assume the bits will always align on a word. If this assumption
++       * changes then get/set pageblock needs updating.
++       */
+       NR_PAGEBLOCK_BITS
+ };
+@@ -62,11 +65,33 @@ extern int pageblock_order;
+ /* Forward declaration */
+ struct page;
++unsigned long get_pageblock_flags_mask(struct page *page,
++                              unsigned long end_bitidx,
++                              unsigned long mask);
++void set_pageblock_flags_mask(struct page *page,
++                              unsigned long flags,
++                              unsigned long end_bitidx,
++                              unsigned long mask);
++
+ /* Declarations for getting and setting flags. See mm/page_alloc.c */
+-unsigned long get_pageblock_flags_group(struct page *page,
+-                                      int start_bitidx, int end_bitidx);
+-void set_pageblock_flags_group(struct page *page, unsigned long flags,
+-                                      int start_bitidx, int end_bitidx);
++static inline unsigned long get_pageblock_flags_group(struct page *page,
++                                      int start_bitidx, int end_bitidx)
++{
++      unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
++      unsigned long mask = (1 << nr_flag_bits) - 1;
++
++      return get_pageblock_flags_mask(page, end_bitidx, mask);
++}
++
++static inline void set_pageblock_flags_group(struct page *page,
++                                      unsigned long flags,
++                                      int start_bitidx, int end_bitidx)
++{
++      unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
++      unsigned long mask = (1 << nr_flag_bits) - 1;
++
++      set_pageblock_flags_mask(page, flags, end_bitidx, mask);
++}
+ #ifdef CONFIG_COMPACTION
+ #define get_pageblock_skip(page) \
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -6007,53 +6007,65 @@ static inline int pfn_to_bitidx(struct z
+  * @end_bitidx: The last bit of interest
+  * returns pageblock_bits flags
+  */
+-unsigned long get_pageblock_flags_group(struct page *page,
+-                                      int start_bitidx, int end_bitidx)
++unsigned long get_pageblock_flags_mask(struct page *page,
++                                      unsigned long end_bitidx,
++                                      unsigned long mask)
+ {
+       struct zone *zone;
+       unsigned long *bitmap;
+-      unsigned long pfn, bitidx;
+-      unsigned long flags = 0;
+-      unsigned long value = 1;
++      unsigned long pfn, bitidx, word_bitidx;
++      unsigned long word;
+       zone = page_zone(page);
+       pfn = page_to_pfn(page);
+       bitmap = get_pageblock_bitmap(zone, pfn);
+       bitidx = pfn_to_bitidx(zone, pfn);
++      word_bitidx = bitidx / BITS_PER_LONG;
++      bitidx &= (BITS_PER_LONG-1);
+-      for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
+-              if (test_bit(bitidx + start_bitidx, bitmap))
+-                      flags |= value;
+-
+-      return flags;
++      word = bitmap[word_bitidx];
++      bitidx += end_bitidx;
++      return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
+ }
+ /**
+- * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages
++ * set_pageblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
+  * @page: The page within the block of interest
+  * @start_bitidx: The first bit of interest
+  * @end_bitidx: The last bit of interest
+  * @flags: The flags to set
+  */
+-void set_pageblock_flags_group(struct page *page, unsigned long flags,
+-                                      int start_bitidx, int end_bitidx)
++void set_pageblock_flags_mask(struct page *page, unsigned long flags,
++                                      unsigned long end_bitidx,
++                                      unsigned long mask)
+ {
+       struct zone *zone;
+       unsigned long *bitmap;
+-      unsigned long pfn, bitidx;
+-      unsigned long value = 1;
++      unsigned long pfn, bitidx, word_bitidx;
++      unsigned long old_word, word;
++
++      BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
+       zone = page_zone(page);
+       pfn = page_to_pfn(page);
+       bitmap = get_pageblock_bitmap(zone, pfn);
+       bitidx = pfn_to_bitidx(zone, pfn);
++      word_bitidx = bitidx / BITS_PER_LONG;
++      bitidx &= (BITS_PER_LONG-1);
++
+       VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);
+-      for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
+-              if (flags & value)
+-                      __set_bit(bitidx + start_bitidx, bitmap);
+-              else
+-                      __clear_bit(bitidx + start_bitidx, bitmap);
++      bitidx += end_bitidx;
++      mask <<= (BITS_PER_LONG - bitidx - 1);
++      flags <<= (BITS_PER_LONG - bitidx - 1);
++
++      word = ACCESS_ONCE(bitmap[word_bitidx]);
++      for (;;) {
++              old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
++              if (word == old_word)
++                      break;
++              word = old_word;
++      }
+ }
+ /*
diff --git a/queue-3.14/mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch b/queue-3.14/mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch
new file mode 100644 (file)
index 0000000..ec04ea3
--- /dev/null
@@ -0,0 +1,94 @@
+From 71abdc15adf8c702a1dd535f8e30df50758848d2 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Fri, 6 Jun 2014 14:35:35 -0700
+Subject: mm: vmscan: clear kswapd's special reclaim powers before exiting
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 71abdc15adf8c702a1dd535f8e30df50758848d2 upstream.
+
+When kswapd exits, it can end up taking locks that were previously held
+by allocating tasks while they waited for reclaim.  Lockdep currently
+warns about this:
+
+On Wed, May 28, 2014 at 06:06:34PM +0800, Gu Zheng wrote:
+>  inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-R} usage.
+>  kswapd2/1151 [HC0[0]:SC0[0]:HE1:SE1] takes:
+>   (&sig->group_rwsem){+++++?}, at: exit_signals+0x24/0x130
+>  {RECLAIM_FS-ON-W} state was registered at:
+>     mark_held_locks+0xb9/0x140
+>     lockdep_trace_alloc+0x7a/0xe0
+>     kmem_cache_alloc_trace+0x37/0x240
+>     flex_array_alloc+0x99/0x1a0
+>     cgroup_attach_task+0x63/0x430
+>     attach_task_by_pid+0x210/0x280
+>     cgroup_procs_write+0x16/0x20
+>     cgroup_file_write+0x120/0x2c0
+>     vfs_write+0xc0/0x1f0
+>     SyS_write+0x4c/0xa0
+>     tracesys+0xdd/0xe2
+>  irq event stamp: 49
+>  hardirqs last  enabled at (49):  _raw_spin_unlock_irqrestore+0x36/0x70
+>  hardirqs last disabled at (48):  _raw_spin_lock_irqsave+0x2b/0xa0
+>  softirqs last  enabled at (0):  copy_process.part.24+0x627/0x15f0
+>  softirqs last disabled at (0):            (null)
+>
+>  other info that might help us debug this:
+>   Possible unsafe locking scenario:
+>
+>         CPU0
+>         ----
+>    lock(&sig->group_rwsem);
+>    <Interrupt>
+>      lock(&sig->group_rwsem);
+>
+>   *** DEADLOCK ***
+>
+>  no locks held by kswapd2/1151.
+>
+>  stack backtrace:
+>  CPU: 30 PID: 1151 Comm: kswapd2 Not tainted 3.10.39+ #4
+>  Call Trace:
+>    dump_stack+0x19/0x1b
+>    print_usage_bug+0x1f7/0x208
+>    mark_lock+0x21d/0x2a0
+>    __lock_acquire+0x52a/0xb60
+>    lock_acquire+0xa2/0x140
+>    down_read+0x51/0xa0
+>    exit_signals+0x24/0x130
+>    do_exit+0xb5/0xa50
+>    kthread+0xdb/0x100
+>    ret_from_fork+0x7c/0xb0
+
+This is because the kswapd thread is still marked as a reclaimer at the
+time of exit.  But because it is exiting, nobody is actually waiting on
+it to make reclaim progress anymore, and it's nothing but a regular
+thread at this point.  Be tidy and strip it of all its powers
+(PF_MEMALLOC, PF_SWAPWRITE, PF_KSWAPD, and the lockdep reclaim state)
+before returning from the thread function.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
+Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+Cc: Tang Chen <tangchen@cn.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -3316,7 +3316,10 @@ static int kswapd(void *p)
+               }
+       }
++      tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
+       current->reclaim_state = NULL;
++      lockdep_clear_current_reclaim_state();
++
+       return 0;
+ }
diff --git a/queue-3.14/mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch b/queue-3.14/mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch
new file mode 100644 (file)
index 0000000..05dfef3
--- /dev/null
@@ -0,0 +1,106 @@
+From 675becce15f320337499bc1a9356260409a5ba29 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 4 Jun 2014 16:07:35 -0700
+Subject: mm: vmscan: do not throttle based on pfmemalloc reserves if node has no ZONE_NORMAL
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 675becce15f320337499bc1a9356260409a5ba29 upstream.
+
+throttle_direct_reclaim() is meant to trigger during swap-over-network
+during which the min watermark is treated as a pfmemalloc reserve.  It
+throttes on the first node in the zonelist but this is flawed.
+
+The user-visible impact is that a process running on CPU whose local
+memory node has no ZONE_NORMAL will stall for prolonged periods of time,
+possibly indefintely.  This is due to throttle_direct_reclaim thinking the
+pfmemalloc reserves are depleted when in fact they don't exist on that
+node.
+
+On a NUMA machine running a 32-bit kernel (I know) allocation requests
+from CPUs on node 1 would detect no pfmemalloc reserves and the process
+gets throttled.  This patch adjusts throttling of direct reclaim to
+throttle based on the first node in the zonelist that has a usable
+ZONE_NORMAL or lower zone.
+
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c |   43 +++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 37 insertions(+), 6 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2502,10 +2502,17 @@ static bool pfmemalloc_watermark_ok(pg_d
+       for (i = 0; i <= ZONE_NORMAL; i++) {
+               zone = &pgdat->node_zones[i];
++              if (!populated_zone(zone))
++                      continue;
++
+               pfmemalloc_reserve += min_wmark_pages(zone);
+               free_pages += zone_page_state(zone, NR_FREE_PAGES);
+       }
++      /* If there are no reserves (unexpected config) then do not throttle */
++      if (!pfmemalloc_reserve)
++              return true;
++
+       wmark_ok = free_pages > pfmemalloc_reserve / 2;
+       /* kswapd must be awake if processes are being throttled */
+@@ -2530,9 +2537,9 @@ static bool pfmemalloc_watermark_ok(pg_d
+ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
+                                       nodemask_t *nodemask)
+ {
++      struct zoneref *z;
+       struct zone *zone;
+-      int high_zoneidx = gfp_zone(gfp_mask);
+-      pg_data_t *pgdat;
++      pg_data_t *pgdat = NULL;
+       /*
+        * Kernel threads should not be throttled as they may be indirectly
+@@ -2551,10 +2558,34 @@ static bool throttle_direct_reclaim(gfp_
+       if (fatal_signal_pending(current))
+               goto out;
+-      /* Check if the pfmemalloc reserves are ok */
+-      first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
+-      pgdat = zone->zone_pgdat;
+-      if (pfmemalloc_watermark_ok(pgdat))
++      /*
++       * Check if the pfmemalloc reserves are ok by finding the first node
++       * with a usable ZONE_NORMAL or lower zone. The expectation is that
++       * GFP_KERNEL will be required for allocating network buffers when
++       * swapping over the network so ZONE_HIGHMEM is unusable.
++       *
++       * Throttling is based on the first usable node and throttled processes
++       * wait on a queue until kswapd makes progress and wakes them. There
++       * is an affinity then between processes waking up and where reclaim
++       * progress has been made assuming the process wakes on the same node.
++       * More importantly, processes running on remote nodes will not compete
++       * for remote pfmemalloc reserves and processes on different nodes
++       * should make reasonable progress.
++       */
++      for_each_zone_zonelist_nodemask(zone, z, zonelist,
++                                      gfp_mask, nodemask) {
++              if (zone_idx(zone) > ZONE_NORMAL)
++                      continue;
++
++              /* Throttle based on the first usable node */
++              pgdat = zone->zone_pgdat;
++              if (pfmemalloc_watermark_ok(pgdat))
++                      goto out;
++              break;
++      }
++
++      /* If no zone was usable by the allocation flags then do not throttle */
++      if (!pgdat)
+               goto out;
+       /* Account for the throttling */
diff --git a/queue-3.14/ptrace-fix-fork-event-messages-across-pid-namespaces.patch b/queue-3.14/ptrace-fix-fork-event-messages-across-pid-namespaces.patch
new file mode 100644 (file)
index 0000000..61511ec
--- /dev/null
@@ -0,0 +1,118 @@
+From 4e52365f279564cef0ddd41db5237f0471381093 Mon Sep 17 00:00:00 2001
+From: Matthew Dempsky <mdempsky@chromium.org>
+Date: Fri, 6 Jun 2014 14:36:42 -0700
+Subject: ptrace: fix fork event messages across pid namespaces
+
+From: Matthew Dempsky <mdempsky@chromium.org>
+
+commit 4e52365f279564cef0ddd41db5237f0471381093 upstream.
+
+When tracing a process in another pid namespace, it's important for fork
+event messages to contain the child's pid as seen from the tracer's pid
+namespace, not the parent's.  Otherwise, the tracer won't be able to
+correlate the fork event with later SIGTRAP signals it receives from the
+child.
+
+We still risk a race condition if a ptracer from a different pid
+namespace attaches after we compute the pid_t value.  However, sending a
+bogus fork event message in this unlikely scenario is still a vast
+improvement over the status quo where we always send bogus fork event
+messages to debuggers in a different pid namespace than the forking
+process.
+
+Signed-off-by: Matthew Dempsky <mdempsky@chromium.org>
+Acked-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Julien Tinnes <jln@chromium.org>
+Cc: Roland McGrath <mcgrathr@chromium.org>
+Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/ptrace.h |   32 ++++++++++++++++++++++++++++++++
+ kernel/fork.c          |   10 +++++++---
+ 2 files changed, 39 insertions(+), 3 deletions(-)
+
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -5,6 +5,7 @@
+ #include <linux/sched.h>              /* For struct task_struct.  */
+ #include <linux/err.h>                        /* for IS_ERR_VALUE */
+ #include <linux/bug.h>                        /* For BUG_ON.  */
++#include <linux/pid_namespace.h>      /* For task_active_pid_ns.  */
+ #include <uapi/linux/ptrace.h>
+ /*
+@@ -129,6 +130,37 @@ static inline void ptrace_event(int even
+ }
+ /**
++ * ptrace_event_pid - possibly stop for a ptrace event notification
++ * @event:    %PTRACE_EVENT_* value to report
++ * @pid:      process identifier for %PTRACE_GETEVENTMSG to return
++ *
++ * Check whether @event is enabled and, if so, report @event and @pid
++ * to the ptrace parent.  @pid is reported as the pid_t seen from the
++ * the ptrace parent's pid namespace.
++ *
++ * Called without locks.
++ */
++static inline void ptrace_event_pid(int event, struct pid *pid)
++{
++      /*
++       * FIXME: There's a potential race if a ptracer in a different pid
++       * namespace than parent attaches between computing message below and
++       * when we acquire tasklist_lock in ptrace_stop().  If this happens,
++       * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG.
++       */
++      unsigned long message = 0;
++      struct pid_namespace *ns;
++
++      rcu_read_lock();
++      ns = task_active_pid_ns(rcu_dereference(current->parent));
++      if (ns)
++              message = pid_nr_ns(pid, ns);
++      rcu_read_unlock();
++
++      ptrace_event(event, message);
++}
++
++/**
+  * ptrace_init_task - initialize ptrace state for a new child
+  * @child:            new child task
+  * @ptrace:           true if child should be ptrace'd by parent's tracer
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1604,10 +1604,12 @@ long do_fork(unsigned long clone_flags,
+        */
+       if (!IS_ERR(p)) {
+               struct completion vfork;
++              struct pid *pid;
+               trace_sched_process_fork(current, p);
+-              nr = task_pid_vnr(p);
++              pid = get_task_pid(p, PIDTYPE_PID);
++              nr = pid_vnr(pid);
+               if (clone_flags & CLONE_PARENT_SETTID)
+                       put_user(nr, parent_tidptr);
+@@ -1622,12 +1624,14 @@ long do_fork(unsigned long clone_flags,
+               /* forking complete and child started to run, tell ptracer */
+               if (unlikely(trace))
+-                      ptrace_event(trace, nr);
++                      ptrace_event_pid(trace, pid);
+               if (clone_flags & CLONE_VFORK) {
+                       if (!wait_for_vfork_done(p, &vfork))
+-                              ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
++                              ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
+               }
++
++              put_pid(pid);
+       } else {
+               nr = PTR_ERR(p);
+       }
diff --git a/queue-3.14/s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch b/queue-3.14/s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch
new file mode 100644 (file)
index 0000000..4b2f08c
--- /dev/null
@@ -0,0 +1,63 @@
+From 993072ee67aa179c48c85eb19869804e68887d86 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Mon, 26 May 2014 21:55:08 +0200
+Subject: s390/lowcore: reserve 96 bytes for IRB in lowcore
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit 993072ee67aa179c48c85eb19869804e68887d86 upstream.
+
+The IRB might be 96 bytes if the extended-I/O-measurement facility is
+used. This feature is currently not used by Linux, but struct irb
+already has the emw defined. So let's make the irb in lowcore match the
+size of the internal data structure to be future proof.
+We also have to add a pad, to correctly align the paste.
+
+The bigger irb field also circumvents a bug in some QEMU versions that
+always write the emw field on test subchannel and therefore destroy the
+paste definitions of this CPU. Running under these QEMU version broke
+some timing functions in the VDSO and all users of these functions,
+e.g. some JREs.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
+Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/lowcore.h |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/s390/include/asm/lowcore.h
++++ b/arch/s390/include/asm/lowcore.h
+@@ -142,9 +142,9 @@ struct _lowcore {
+       __u8    pad_0x02fc[0x0300-0x02fc];      /* 0x02fc */
+       /* Interrupt response block */
+-      __u8    irb[64];                        /* 0x0300 */
++      __u8    irb[96];                        /* 0x0300 */
+-      __u8    pad_0x0340[0x0e00-0x0340];      /* 0x0340 */
++      __u8    pad_0x0360[0x0e00-0x0360];      /* 0x0360 */
+       /*
+        * 0xe00 contains the address of the IPL Parameter Information
+@@ -288,12 +288,13 @@ struct _lowcore {
+       __u8    pad_0x03a0[0x0400-0x03a0];      /* 0x03a0 */
+       /* Interrupt response block. */
+-      __u8    irb[64];                        /* 0x0400 */
++      __u8    irb[96];                        /* 0x0400 */
++      __u8    pad_0x0460[0x0480-0x0460];      /* 0x0460 */
+       /* Per cpu primary space access list */
+-      __u32   paste[16];                      /* 0x0440 */
++      __u32   paste[16];                      /* 0x0480 */
+-      __u8    pad_0x0480[0x0e00-0x0480];      /* 0x0480 */
++      __u8    pad_0x04c0[0x0e00-0x04c0];      /* 0x04c0 */
+       /*
+        * 0xe00 contains the address of the IPL Parameter Information
diff --git a/queue-3.14/s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch b/queue-3.14/s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch
new file mode 100644 (file)
index 0000000..1797a63
--- /dev/null
@@ -0,0 +1,32 @@
+From b6f4296279ab3ada554d993d12844272fd86b36a Mon Sep 17 00:00:00 2001
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Date: Tue, 20 May 2014 17:21:35 +0200
+Subject: s390/time: cast tv_nsec to u64 prior to shift in update_vsyscall
+
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+
+commit b6f4296279ab3ada554d993d12844272fd86b36a upstream.
+
+Analog to git commit 28b92e09e25bdc0ae864b22eacf195a74f861389
+first cast tk->wall_to_monotonic.tv_nsec to u64 before doing
+the shift with tk->shift to avoid loosing relevant bits on a
+32-bit kernel.
+
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/time.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/time.c
++++ b/arch/s390/kernel/time.c
+@@ -226,7 +226,7 @@ void update_vsyscall(struct timekeeper *
+       vdso_data->wtom_clock_sec =
+               tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+       vdso_data->wtom_clock_nsec = tk->xtime_nsec +
+-              + (tk->wall_to_monotonic.tv_nsec << tk->shift);
++              + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift);
+       nsecps = (u64) NSEC_PER_SEC << tk->shift;
+       while (vdso_data->wtom_clock_nsec >= nsecps) {
+               vdso_data->wtom_clock_nsec -= nsecps;
index 2059f0fb22b2d2385b90d52a17313c63b07128ce..8f6ad0d513b81d500d123599cfcdb5a147fc966a 100644 (file)
@@ -18,3 +18,29 @@ usb-dwc3-gadget-clear-stall-when-disabling-endpoint.patch
 arm-omap-replace-checks-for-config_usb_gadget_omap.patch
 usb-ehci-avoid-bios-handover-on-the-hasee-e200.patch
 usb-option-fix-runtime-pm-handling.patch
+hugetlb-restrict-hugepage_migration_support-to-x86_64.patch
+kthread-fix-return-value-of-kthread_create-upon-sigkill.patch
+mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch
+mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch
+mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch
+mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch
+mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch
+mm-fix-sleeping-function-warning-from-__put_anon_vma.patch
+hid-core-fix-validation-of-report-id-0.patch
+mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch
+ptrace-fix-fork-event-messages-across-pid-namespaces.patch
+arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch
+arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch
+idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch
+s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch
+s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch
+ext4-fix-data-integrity-sync-in-ordered-mode.patch
+ext4-fix-zeroing-of-page-during-writeback.patch
+ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch
+matroxfb-perform-a-dummy-read-of-m_status.patch
+usb-usb_wwan-fix-urb-leak-in-write-error-path.patch
+usb-usb_wwan-fix-race-between-write-and-resume.patch
+usb-usb_wwan-fix-write-and-suspend-race.patch
+usb-usb_wwan-fix-urb-leak-at-shutdown.patch
+usb-usb_wwan-fix-potential-null-deref-at-resume.patch
+usb-usb_wwan-fix-potential-blocked-i-o-after-resume.patch
diff --git a/queue-3.14/usb-usb_wwan-fix-potential-blocked-i-o-after-resume.patch b/queue-3.14/usb-usb_wwan-fix-potential-blocked-i-o-after-resume.patch
new file mode 100644 (file)
index 0000000..54746d9
--- /dev/null
@@ -0,0 +1,107 @@
+From fb7ad4f93d9f0f7d49beda32f5e7becb94b29a4d Mon Sep 17 00:00:00 2001
+From: Johan Hovold <jhovold@gmail.com>
+Date: Mon, 26 May 2014 19:23:18 +0200
+Subject: USB: usb_wwan: fix potential blocked I/O after resume
+
+From: Johan Hovold <jhovold@gmail.com>
+
+commit fb7ad4f93d9f0f7d49beda32f5e7becb94b29a4d upstream.
+
+Keep trying to submit urbs rather than bail out on first read-urb
+submission error, which would also prevent I/O for any further ports
+from being resumed.
+
+Instead keep an error count, for all types of failed submissions, and
+let USB core know that something went wrong.
+
+Also make sure to always clear the suspended flag. Currently a failed
+read-urb submission would prevent cached writes as well as any
+subsequent writes from being submitted until next suspend-resume cycle,
+something which may not even necessarily happen.
+
+Note that USB core currently only logs an error if an interface resume
+failed.
+
+Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the
+option driver")
+
+Signed-off-by: Johan Hovold <jhovold@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/serial/usb_wwan.c |   25 +++++++++++++++++--------
+ 1 file changed, 17 insertions(+), 8 deletions(-)
+
+--- a/drivers/usb/serial/usb_wwan.c
++++ b/drivers/usb/serial/usb_wwan.c
+@@ -619,12 +619,12 @@ int usb_wwan_suspend(struct usb_serial *
+ }
+ EXPORT_SYMBOL(usb_wwan_suspend);
+-static void play_delayed(struct usb_serial_port *port)
++static int play_delayed(struct usb_serial_port *port)
+ {
+       struct usb_wwan_intf_private *data;
+       struct usb_wwan_port_private *portdata;
+       struct urb *urb;
+-      int err;
++      int err = 0;
+       portdata = usb_get_serial_port_data(port);
+       data = port->serial->private;
+@@ -641,6 +641,8 @@ static void play_delayed(struct usb_seri
+                       break;
+               }
+       }
++
++      return err;
+ }
+ int usb_wwan_resume(struct usb_serial *serial)
+@@ -650,7 +652,8 @@ int usb_wwan_resume(struct usb_serial *s
+       struct usb_wwan_intf_private *intfdata = serial->private;
+       struct usb_wwan_port_private *portdata;
+       struct urb *urb;
+-      int err = 0;
++      int err;
++      int err_count = 0;
+       spin_lock_irq(&intfdata->susp_lock);
+       for (i = 0; i < serial->num_ports; i++) {
+@@ -669,25 +672,31 @@ int usb_wwan_resume(struct usb_serial *s
+                               dev_err(&port->dev,
+                                       "%s: submit int urb failed: %d\n",
+                                       __func__, err);
++                              err_count++;
+                       }
+               }
++              err = play_delayed(port);
++              if (err)
++                      err_count++;
++
+               for (j = 0; j < N_IN_URB; j++) {
+                       urb = portdata->in_urbs[j];
+                       err = usb_submit_urb(urb, GFP_ATOMIC);
+                       if (err < 0) {
+                               dev_err(&port->dev, "%s: Error %d for bulk URB %d\n",
+                                       __func__, err, i);
+-                              spin_unlock_irq(&intfdata->susp_lock);
+-                              goto err_out;
++                              err_count++;
+                       }
+               }
+-              play_delayed(port);
+       }
+       intfdata->suspended = 0;
+       spin_unlock_irq(&intfdata->susp_lock);
+-err_out:
+-      return err;
++
++      if (err_count)
++              return -EIO;
++
++      return 0;
+ }
+ EXPORT_SYMBOL(usb_wwan_resume);
+ #endif
diff --git a/queue-3.14/usb-usb_wwan-fix-potential-null-deref-at-resume.patch b/queue-3.14/usb-usb_wwan-fix-potential-null-deref-at-resume.patch
new file mode 100644 (file)
index 0000000..e884e9d
--- /dev/null
@@ -0,0 +1,113 @@
+From 9096f1fbba916c2e052651e9de82fcfb98d4bea7 Mon Sep 17 00:00:00 2001
+From: Johan Hovold <jhovold@gmail.com>
+Date: Mon, 26 May 2014 19:23:17 +0200
+Subject: USB: usb_wwan: fix potential NULL-deref at resume
+
+From: Johan Hovold <jhovold@gmail.com>
+
+commit 9096f1fbba916c2e052651e9de82fcfb98d4bea7 upstream.
+
+The interrupt urb was submitted unconditionally at resume, something
+which could lead to a NULL-pointer dereference in the urb completion
+handler as resume may be called after the port and port data is gone.
+
+Fix this by making sure the interrupt urb is only submitted and active
+when the port is open.
+
+Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the
+option driver")
+
+Signed-off-by: Johan Hovold <jhovold@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/serial/usb_wwan.c |   43 ++++++++++++++++++------------------------
+ 1 file changed, 19 insertions(+), 24 deletions(-)
+
+--- a/drivers/usb/serial/usb_wwan.c
++++ b/drivers/usb/serial/usb_wwan.c
+@@ -388,6 +388,14 @@ int usb_wwan_open(struct tty_struct *tty
+       portdata = usb_get_serial_port_data(port);
+       intfdata = serial->private;
++      if (port->interrupt_in_urb) {
++              err = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
++              if (err) {
++                      dev_dbg(&port->dev, "%s: submit int urb failed: %d\n",
++                              __func__, err);
++              }
++      }
++
+       /* Start reading from the IN endpoint */
+       for (i = 0; i < N_IN_URB; i++) {
+               urb = portdata->in_urbs[i];
+@@ -454,6 +462,7 @@ void usb_wwan_close(struct usb_serial_po
+               usb_kill_urb(portdata->in_urbs[i]);
+       for (i = 0; i < N_OUT_URB; i++)
+               usb_kill_urb(portdata->out_urbs[i]);
++      usb_kill_urb(port->interrupt_in_urb);
+       /* balancing - important as an error cannot be handled*/
+       usb_autopm_get_interface_no_resume(serial->interface);
+@@ -487,7 +496,6 @@ int usb_wwan_port_probe(struct usb_seria
+       struct usb_wwan_port_private *portdata;
+       struct urb *urb;
+       u8 *buffer;
+-      int err;
+       int i;
+       if (!port->bulk_in_size || !port->bulk_out_size)
+@@ -527,13 +535,6 @@ int usb_wwan_port_probe(struct usb_seria
+       usb_set_serial_port_data(port, portdata);
+-      if (port->interrupt_in_urb) {
+-              err = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
+-              if (err)
+-                      dev_dbg(&port->dev, "%s: submit irq_in urb failed %d\n",
+-                              __func__, err);
+-      }
+-
+       return 0;
+ bail_out_error2:
+@@ -651,22 +652,6 @@ int usb_wwan_resume(struct usb_serial *s
+       struct urb *urb;
+       int err = 0;
+-      /* get the interrupt URBs resubmitted unconditionally */
+-      for (i = 0; i < serial->num_ports; i++) {
+-              port = serial->port[i];
+-              if (!port->interrupt_in_urb) {
+-                      dev_dbg(&port->dev, "%s: No interrupt URB for port\n", __func__);
+-                      continue;
+-              }
+-              err = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO);
+-              dev_dbg(&port->dev, "Submitted interrupt URB for port (result %d)\n", err);
+-              if (err < 0) {
+-                      dev_err(&port->dev, "%s: Error %d for interrupt URB\n",
+-                              __func__, err);
+-                      goto err_out;
+-              }
+-      }
+-
+       spin_lock_irq(&intfdata->susp_lock);
+       for (i = 0; i < serial->num_ports; i++) {
+               /* walk all ports */
+@@ -677,6 +662,16 @@ int usb_wwan_resume(struct usb_serial *s
+               if (!portdata || !portdata->opened)
+                       continue;
++              if (port->interrupt_in_urb) {
++                      err = usb_submit_urb(port->interrupt_in_urb,
++                                      GFP_ATOMIC);
++                      if (err) {
++                              dev_err(&port->dev,
++                                      "%s: submit int urb failed: %d\n",
++                                      __func__, err);
++                      }
++              }
++
+               for (j = 0; j < N_IN_URB; j++) {
+                       urb = portdata->in_urbs[j];
+                       err = usb_submit_urb(urb, GFP_ATOMIC);
diff --git a/queue-3.14/usb-usb_wwan-fix-race-between-write-and-resume.patch b/queue-3.14/usb-usb_wwan-fix-race-between-write-and-resume.patch
new file mode 100644 (file)
index 0000000..ed40379
--- /dev/null
@@ -0,0 +1,65 @@
+From d9e93c08d8d985e5ef89436ebc9f4aad7e31559f Mon Sep 17 00:00:00 2001
+From: xiao jin <jin.xiao@intel.com>
+Date: Mon, 26 May 2014 19:23:14 +0200
+Subject: USB: usb_wwan: fix race between write and resume
+
+From: xiao jin <jin.xiao@intel.com>
+
+commit d9e93c08d8d985e5ef89436ebc9f4aad7e31559f upstream.
+
+We find a race between write and resume. usb_wwan_resume run play_delayed()
+and spin_unlock, but intfdata->suspended still is not set to zero.
+At this time usb_wwan_write is called and anchor the urb to delay
+list. Then resume keep running but the delayed urb have no chance
+to be commit until next resume. If the time of next resume is far
+away, tty will be blocked in tty_wait_until_sent during time. The
+race also can lead to writes being reordered.
+
+This patch put play_Delayed and intfdata->suspended together in the
+spinlock, it's to avoid the write race during resume.
+
+Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the
+option driver")
+
+Signed-off-by: xiao jin <jin.xiao@intel.com>
+Signed-off-by: Zhang, Qi1 <qi1.zhang@intel.com>
+Reviewed-by: David Cohen <david.a.cohen@linux.intel.com>
+Signed-off-by: Johan Hovold <jhovold@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/serial/usb_wwan.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/drivers/usb/serial/usb_wwan.c
++++ b/drivers/usb/serial/usb_wwan.c
+@@ -660,17 +660,15 @@ int usb_wwan_resume(struct usb_serial *s
+               }
+       }
++      spin_lock_irq(&intfdata->susp_lock);
+       for (i = 0; i < serial->num_ports; i++) {
+               /* walk all ports */
+               port = serial->port[i];
+               portdata = usb_get_serial_port_data(port);
+               /* skip closed ports */
+-              spin_lock_irq(&intfdata->susp_lock);
+-              if (!portdata || !portdata->opened) {
+-                      spin_unlock_irq(&intfdata->susp_lock);
++              if (!portdata || !portdata->opened)
+                       continue;
+-              }
+               for (j = 0; j < N_IN_URB; j++) {
+                       urb = portdata->in_urbs[j];
+@@ -683,9 +681,7 @@ int usb_wwan_resume(struct usb_serial *s
+                       }
+               }
+               play_delayed(port);
+-              spin_unlock_irq(&intfdata->susp_lock);
+       }
+-      spin_lock_irq(&intfdata->susp_lock);
+       intfdata->suspended = 0;
+       spin_unlock_irq(&intfdata->susp_lock);
+ err_out:
diff --git a/queue-3.14/usb-usb_wwan-fix-urb-leak-at-shutdown.patch b/queue-3.14/usb-usb_wwan-fix-urb-leak-at-shutdown.patch
new file mode 100644 (file)
index 0000000..1b45f06
--- /dev/null
@@ -0,0 +1,93 @@
+From 79eed03e77d481b55d85d1cfe5a1636a0d3897fd Mon Sep 17 00:00:00 2001
+From: Johan Hovold <jhovold@gmail.com>
+Date: Mon, 26 May 2014 19:23:16 +0200
+Subject: USB: usb_wwan: fix urb leak at shutdown
+
+From: Johan Hovold <jhovold@gmail.com>
+
+commit 79eed03e77d481b55d85d1cfe5a1636a0d3897fd upstream.
+
+The delayed-write queue was never emptied at shutdown (close), something
+which could lead to leaked urbs if the port is closed before being
+runtime resumed due to a write.
+
+When this happens the output buffer would not drain on close
+(closing_wait timeout), and after consecutive opens, writes could be
+corrupted with previously buffered data, transfered with reduced
+throughput or completely blocked.
+
+Note that unbusy_queued_urb() was simply moved out of CONFIG_PM.
+
+Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the
+option driver")
+
+Signed-off-by: Johan Hovold <jhovold@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/serial/usb_wwan.c |   34 ++++++++++++++++++++++------------
+ 1 file changed, 22 insertions(+), 12 deletions(-)
+
+--- a/drivers/usb/serial/usb_wwan.c
++++ b/drivers/usb/serial/usb_wwan.c
+@@ -414,12 +414,26 @@ int usb_wwan_open(struct tty_struct *tty
+ }
+ EXPORT_SYMBOL(usb_wwan_open);
++static void unbusy_queued_urb(struct urb *urb,
++                                      struct usb_wwan_port_private *portdata)
++{
++      int i;
++
++      for (i = 0; i < N_OUT_URB; i++) {
++              if (urb == portdata->out_urbs[i]) {
++                      clear_bit(i, &portdata->out_busy);
++                      break;
++              }
++      }
++}
++
+ void usb_wwan_close(struct usb_serial_port *port)
+ {
+       int i;
+       struct usb_serial *serial = port->serial;
+       struct usb_wwan_port_private *portdata;
+       struct usb_wwan_intf_private *intfdata = port->serial->private;
++      struct urb *urb;
+       portdata = usb_get_serial_port_data(port);
+@@ -428,6 +442,14 @@ void usb_wwan_close(struct usb_serial_po
+       portdata->opened = 0;
+       spin_unlock_irq(&intfdata->susp_lock);
++      for (;;) {
++              urb = usb_get_from_anchor(&portdata->delayed);
++              if (!urb)
++                      break;
++              unbusy_queued_urb(urb, portdata);
++              usb_autopm_put_interface_async(serial->interface);
++      }
++
+       for (i = 0; i < N_IN_URB; i++)
+               usb_kill_urb(portdata->in_urbs[i]);
+       for (i = 0; i < N_OUT_URB; i++)
+@@ -596,18 +618,6 @@ int usb_wwan_suspend(struct usb_serial *
+ }
+ EXPORT_SYMBOL(usb_wwan_suspend);
+-static void unbusy_queued_urb(struct urb *urb, struct usb_wwan_port_private *portdata)
+-{
+-      int i;
+-
+-      for (i = 0; i < N_OUT_URB; i++) {
+-              if (urb == portdata->out_urbs[i]) {
+-                      clear_bit(i, &portdata->out_busy);
+-                      break;
+-              }
+-      }
+-}
+-
+ static void play_delayed(struct usb_serial_port *port)
+ {
+       struct usb_wwan_intf_private *data;
diff --git a/queue-3.14/usb-usb_wwan-fix-urb-leak-in-write-error-path.patch b/queue-3.14/usb-usb_wwan-fix-urb-leak-in-write-error-path.patch
new file mode 100644 (file)
index 0000000..37f60bf
--- /dev/null
@@ -0,0 +1,48 @@
+From db0904737947d509844e171c9863ecc5b4534005 Mon Sep 17 00:00:00 2001
+From: xiao jin <jin.xiao@intel.com>
+Date: Mon, 26 May 2014 19:23:13 +0200
+Subject: USB: usb_wwan: fix urb leak in write error path
+
+From: xiao jin <jin.xiao@intel.com>
+
+commit db0904737947d509844e171c9863ecc5b4534005 upstream.
+
+When enable usb serial for modem data, sometimes the tty is blocked
+in tty_wait_until_sent because portdata->out_busy always is set and
+have no chance to be cleared.
+
+We find a bug in write error path. usb_wwan_write set portdata->out_busy
+firstly, then try autopm async with error. No out urb submit and no
+usb_wwan_outdat_callback to this write, portdata->out_busy can't be
+cleared.
+
+This patch clear portdata->out_busy if usb_wwan_write try autopm async
+with error.
+
+Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the
+option driver")
+
+Signed-off-by: xiao jin <jin.xiao@intel.com>
+Signed-off-by: Zhang, Qi1 <qi1.zhang@intel.com>
+Reviewed-by: David Cohen <david.a.cohen@linux.intel.com>
+Signed-off-by: Johan Hovold <jhovold@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/serial/usb_wwan.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/usb/serial/usb_wwan.c
++++ b/drivers/usb/serial/usb_wwan.c
+@@ -228,8 +228,10 @@ int usb_wwan_write(struct tty_struct *tt
+                       usb_pipeendpoint(this_urb->pipe), i);
+               err = usb_autopm_get_interface_async(port->serial->interface);
+-              if (err < 0)
++              if (err < 0) {
++                      clear_bit(i, &portdata->out_busy);
+                       break;
++              }
+               /* send the data */
+               memcpy(this_urb->transfer_buffer, buf, todo);
diff --git a/queue-3.14/usb-usb_wwan-fix-write-and-suspend-race.patch b/queue-3.14/usb-usb_wwan-fix-write-and-suspend-race.patch
new file mode 100644 (file)
index 0000000..c7f3a48
--- /dev/null
@@ -0,0 +1,55 @@
+From 170fad9e22df0063eba0701adb966786d7a4ec5a Mon Sep 17 00:00:00 2001
+From: Johan Hovold <jhovold@gmail.com>
+Date: Mon, 26 May 2014 19:23:15 +0200
+Subject: USB: usb_wwan: fix write and suspend race
+
+From: Johan Hovold <jhovold@gmail.com>
+
+commit 170fad9e22df0063eba0701adb966786d7a4ec5a upstream.
+
+Fix race between write() and suspend() which could lead to writes being
+dropped (or I/O while suspended) if the device is runtime suspended
+while a write request is being processed.
+
+Specifically, suspend() releases the susp_lock after determining the
+device is idle but before setting the suspended flag, thus leaving a
+window where a concurrent write() can submit an urb.
+
+Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the
+option driver")
+
+Signed-off-by: Johan Hovold <jhovold@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/serial/usb_wwan.c |   13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+--- a/drivers/usb/serial/usb_wwan.c
++++ b/drivers/usb/serial/usb_wwan.c
+@@ -579,20 +579,17 @@ static void stop_read_write_urbs(struct
+ int usb_wwan_suspend(struct usb_serial *serial, pm_message_t message)
+ {
+       struct usb_wwan_intf_private *intfdata = serial->private;
+-      int b;
++      spin_lock_irq(&intfdata->susp_lock);
+       if (PMSG_IS_AUTO(message)) {
+-              spin_lock_irq(&intfdata->susp_lock);
+-              b = intfdata->in_flight;
+-              spin_unlock_irq(&intfdata->susp_lock);
+-
+-              if (b)
++              if (intfdata->in_flight) {
++                      spin_unlock_irq(&intfdata->susp_lock);
+                       return -EBUSY;
++              }
+       }
+-
+-      spin_lock_irq(&intfdata->susp_lock);
+       intfdata->suspended = 1;
+       spin_unlock_irq(&intfdata->susp_lock);
++
+       stop_read_write_urbs(serial);
+       return 0;