From 19b7a95795fabfa7b253991c25c2bcbe7d561cc5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 27 Jun 2014 15:02:34 -0700 Subject: [PATCH] 3.15-stable patches added patches: arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch ext4-fix-data-integrity-sync-in-ordered-mode.patch ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch ext4-fix-zero_range-test-failure-in-data-journalling.patch ext4-fix-zeroing-of-page-during-writeback.patch hid-core-fix-validation-of-report-id-0.patch hugetlb-restrict-hugepage_migration_support-to-x86_64.patch idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch kthread-fix-return-value-of-kthread_create-upon-sigkill.patch matroxfb-perform-a-dummy-read-of-m_status.patch memcg-do-not-hang-on-oom-when-killed-by-userspace-oom-access-to-memory-reserves.patch mm-fix-sleeping-function-warning-from-__put_anon_vma.patch mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch ptrace-fix-fork-event-messages-across-pid-namespaces.patch regulator-s2mpa01-fix-accidental-enable-of-buck4-ramp-delay.patch regulator-s2mpa01-use-correct-register-for-buck1-ramp-delay.patch regulator-s2mps11-fix-accidental-enable-of-buck6-ramp-delay.patch s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch tools-vm-page-types.c-catch-sigbus-if-raced-with-truncate.patch usb-usb_wwan-fix-race-between-write-and-resume.patch usb-usb_wwan-fix-urb-leak-at-shutdown.patch usb-usb_wwan-fix-urb-leak-in-write-error-path.patch usb-usb_wwan-fix-write-and-suspend-race.patch zram-correct-offset-usage-in-zram_bio_discard.patch --- ...assing-kernel-pointer-to-regset-code.patch | 51 +++ ...-in-prstatus-of-aarch32-process-core.patch | 75 +++++ ...-data-integrity-sync-in-ordered-mode.patch | 178 +++++++++++ ...-assert-in-ext4_mb_normalize_request.patch | 32 ++ ...nge-test-failure-in-data-journalling.patch | 39 +++ ...fix-zeroing-of-page-during-writeback.patch | 70 ++++ ...d-core-fix-validation-of-report-id-0.patch | 54 ++++ ...hugepage_migration_support-to-x86_64.patch | 299 ++++++++++++++++++ ...mum-id-calculation-at-maximum-height.patch | 77 +++++ ...value-of-kthread_create-upon-sigkill.patch | 45 +++ ...xfb-perform-a-dummy-read-of-m_status.patch | 40 +++ ...rspace-oom-access-to-memory-reserves.patch | 88 ++++++ ...function-warning-from-__put_anon_vma.patch | 66 ++++ ...ver-processes-for-mf_action_required.patch | 123 +++++++ ...-right-signal-code-to-correct-thread.patch | 53 ++++ ...hread-to-handle-sigbus-bus_mceerr_ao.patch | 162 ++++++++++ ...cesses-for-get-set-pageblock-bitmaps.patch | 237 ++++++++++++++ ...pecial-reclaim-powers-before-exiting.patch | 94 ++++++ ...-reserves-if-node-has-no-zone_normal.patch | 106 +++++++ ...event-messages-across-pid-namespaces.patch | 118 +++++++ ...ccidental-enable-of-buck4-ramp-delay.patch | 49 +++ ...orrect-register-for-buck1-ramp-delay.patch | 36 +++ ...ccidental-enable-of-buck6-ramp-delay.patch | 50 +++ ...-reserve-96-bytes-for-irb-in-lowcore.patch | 63 ++++ ...64-prior-to-shift-in-update_vsyscall.patch | 32 ++ queue-3.15/series | 31 ++ ...-catch-sigbus-if-raced-with-truncate.patch | 113 +++++++ ...an-fix-race-between-write-and-resume.patch | 65 ++++ ...sb-usb_wwan-fix-urb-leak-at-shutdown.patch | 93 ++++++ ...wan-fix-urb-leak-in-write-error-path.patch | 48 +++ ...-usb_wwan-fix-write-and-suspend-race.patch | 55 ++++ ...ect-offset-usage-in-zram_bio_discard.patch | 54 ++++ 32 files changed, 2696 insertions(+) create mode 100644 queue-3.15/arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch create mode 100644 queue-3.15/arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch create mode 100644 queue-3.15/ext4-fix-data-integrity-sync-in-ordered-mode.patch create mode 100644 queue-3.15/ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch create mode 100644 queue-3.15/ext4-fix-zero_range-test-failure-in-data-journalling.patch create mode 100644 queue-3.15/ext4-fix-zeroing-of-page-during-writeback.patch create mode 100644 queue-3.15/hid-core-fix-validation-of-report-id-0.patch create mode 100644 queue-3.15/hugetlb-restrict-hugepage_migration_support-to-x86_64.patch create mode 100644 queue-3.15/idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch create mode 100644 queue-3.15/kthread-fix-return-value-of-kthread_create-upon-sigkill.patch create mode 100644 queue-3.15/matroxfb-perform-a-dummy-read-of-m_status.patch create mode 100644 queue-3.15/memcg-do-not-hang-on-oom-when-killed-by-userspace-oom-access-to-memory-reserves.patch create mode 100644 queue-3.15/mm-fix-sleeping-function-warning-from-__put_anon_vma.patch create mode 100644 queue-3.15/mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch create mode 100644 queue-3.15/mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch create mode 100644 queue-3.15/mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch create mode 100644 queue-3.15/mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch create mode 100644 queue-3.15/mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch create mode 100644 queue-3.15/mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch create mode 100644 queue-3.15/ptrace-fix-fork-event-messages-across-pid-namespaces.patch create mode 100644 queue-3.15/regulator-s2mpa01-fix-accidental-enable-of-buck4-ramp-delay.patch create mode 100644 queue-3.15/regulator-s2mpa01-use-correct-register-for-buck1-ramp-delay.patch create mode 100644 queue-3.15/regulator-s2mps11-fix-accidental-enable-of-buck6-ramp-delay.patch create mode 100644 queue-3.15/s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch create mode 100644 queue-3.15/s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch create mode 100644 queue-3.15/tools-vm-page-types.c-catch-sigbus-if-raced-with-truncate.patch create mode 100644 queue-3.15/usb-usb_wwan-fix-race-between-write-and-resume.patch create mode 100644 queue-3.15/usb-usb_wwan-fix-urb-leak-at-shutdown.patch create mode 100644 queue-3.15/usb-usb_wwan-fix-urb-leak-in-write-error-path.patch create mode 100644 queue-3.15/usb-usb_wwan-fix-write-and-suspend-race.patch create mode 100644 queue-3.15/zram-correct-offset-usage-in-zram_bio_discard.patch diff --git a/queue-3.15/arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch b/queue-3.15/arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch new file mode 100644 index 00000000000..66156594948 --- /dev/null +++ b/queue-3.15/arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch @@ -0,0 +1,51 @@ +From c168870704bcde6bb63d05f7882b620dd3985a46 Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Mon, 2 Jun 2014 11:47:23 +0100 +Subject: arm64: ptrace: change fs when passing kernel pointer to regset code + +From: Will Deacon + +commit c168870704bcde6bb63d05f7882b620dd3985a46 upstream. + +Our compat PTRACE_POKEUSR implementation simply passes the user data to +regset_copy_from_user after some simple range checking. Unfortunately, +the data in question has already been copied to the kernel stack by this +point, so the subsequent access_ok check fails and the ptrace request +returns -EFAULT. This causes problems tracing fork() with older versions +of strace. + +This patch briefly changes the fs to KERNEL_DS, so that the access_ok +check passes even with a kernel address. + +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/ptrace.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/arm64/kernel/ptrace.c ++++ b/arch/arm64/kernel/ptrace.c +@@ -821,6 +821,7 @@ static int compat_ptrace_write_user(stru + compat_ulong_t val) + { + int ret; ++ mm_segment_t old_fs = get_fs(); + + if (off & 3 || off >= COMPAT_USER_SZ) + return -EIO; +@@ -828,10 +829,13 @@ static int compat_ptrace_write_user(stru + if (off >= sizeof(compat_elf_gregset_t)) + return 0; + ++ set_fs(KERNEL_DS); + ret = copy_regset_from_user(tsk, &user_aarch32_view, + REGSET_COMPAT_GPR, off, + sizeof(compat_ulong_t), + &val); ++ set_fs(old_fs); ++ + return ret; + } + diff --git a/queue-3.15/arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch b/queue-3.15/arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch new file mode 100644 index 00000000000..152d229028f --- /dev/null +++ b/queue-3.15/arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch @@ -0,0 +1,75 @@ +From 2227901a0230d8fde81ba9c602d649839390f56b Mon Sep 17 00:00:00 2001 +From: Victor Kamensky +Date: Tue, 3 Jun 2014 19:21:30 +0100 +Subject: arm64: ptrace: fix empty registers set in prstatus of aarch32 process core + +From: Victor Kamensky + +commit 2227901a0230d8fde81ba9c602d649839390f56b upstream. + +Currently core file of aarch32 process prstatus note has empty +registers set. As result aarch32 core files create by V8 kernel are +not very useful. + +It happens because compat_gpr_get and compat_gpr_set functions can +copy registers values to/from either kbuf or ubuf. ELF core file +collection function fill_thread_core_info calls compat_gpr_get +with kbuf set and ubuf set to 0. But current compat_gpr_get and +compat_gpr_set function handle copy to/from only ubuf case. + +Fix is to handle kbuf and ubuf as two separate cases in similar +way as other functions like user_regset_copyout, user_regset_copyin do. + +Signed-off-by: Victor Kamensky +Acked-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/ptrace.c | 26 ++++++++++++++++++-------- + 1 file changed, 18 insertions(+), 8 deletions(-) + +--- a/arch/arm64/kernel/ptrace.c ++++ b/arch/arm64/kernel/ptrace.c +@@ -650,11 +650,16 @@ static int compat_gpr_get(struct task_st + reg = task_pt_regs(target)->regs[idx]; + } + +- ret = copy_to_user(ubuf, ®, sizeof(reg)); +- if (ret) +- break; ++ if (kbuf) { ++ memcpy(kbuf, ®, sizeof(reg)); ++ kbuf += sizeof(reg); ++ } else { ++ ret = copy_to_user(ubuf, ®, sizeof(reg)); ++ if (ret) ++ break; + +- ubuf += sizeof(reg); ++ ubuf += sizeof(reg); ++ } + } + + return ret; +@@ -684,11 +689,16 @@ static int compat_gpr_set(struct task_st + unsigned int idx = start + i; + compat_ulong_t reg; + +- ret = copy_from_user(®, ubuf, sizeof(reg)); +- if (ret) +- return ret; ++ if (kbuf) { ++ memcpy(®, kbuf, sizeof(reg)); ++ kbuf += sizeof(reg); ++ } else { ++ ret = copy_from_user(®, ubuf, sizeof(reg)); ++ if (ret) ++ return ret; + +- ubuf += sizeof(reg); ++ ubuf += sizeof(reg); ++ } + + switch (idx) { + case 15: diff --git a/queue-3.15/ext4-fix-data-integrity-sync-in-ordered-mode.patch b/queue-3.15/ext4-fix-data-integrity-sync-in-ordered-mode.patch new file mode 100644 index 00000000000..b9bc646fe67 --- /dev/null +++ b/queue-3.15/ext4-fix-data-integrity-sync-in-ordered-mode.patch @@ -0,0 +1,178 @@ +From 1c8349a17137b93f0a83f276c764a6df1b9a116e Mon Sep 17 00:00:00 2001 +From: Namjae Jeon +Date: Mon, 12 May 2014 08:12:25 -0400 +Subject: ext4: fix data integrity sync in ordered mode + +From: Namjae Jeon + +commit 1c8349a17137b93f0a83f276c764a6df1b9a116e upstream. + +When we perform a data integrity sync we tag all the dirty pages with +PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages. Later we check +for this tag in write_cache_pages_da and creates a struct +mpage_da_data containing contiguously indexed pages tagged with this +tag and sync these pages with a call to mpage_da_map_and_submit. This +process is done in while loop until all the PAGECACHE_TAG_TOWRITE +pages are synced. We also do journal start and stop in each iteration. +journal_stop could initiate journal commit which would call +ext4_writepage which in turn will call ext4_bio_write_page even for +delayed OR unwritten buffers. When ext4_bio_write_page is called for +such buffers, even though it does not sync them but it clears the +PAGECACHE_TAG_TOWRITE of the corresponding page and hence these pages +are also not synced by the currently running data integrity sync. We +will end up with dirty pages although sync is completed. + +This could cause a potential data loss when the sync call is followed +by a truncate_pagecache call, which is exactly the case in +collapse_range. (It will cause generic/127 failure in xfstests) + +To avoid this issue, we can use set_page_writeback_keepwrite instead of +set_page_writeback, which doesn't clear TOWRITE tag. + +Signed-off-by: Namjae Jeon +Signed-off-by: Ashish Sangwan +Signed-off-by: "Theodore Ts'o" +Reviewed-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ext4.h | 3 ++- + fs/ext4/inode.c | 6 ++++-- + fs/ext4/page-io.c | 8 ++++++-- + include/linux/page-flags.h | 12 +++++++++++- + mm/page-writeback.c | 11 ++++++----- + 5 files changed, 29 insertions(+), 11 deletions(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -2771,7 +2771,8 @@ extern void ext4_io_submit(struct ext4_i + extern int ext4_bio_write_page(struct ext4_io_submit *io, + struct page *page, + int len, +- struct writeback_control *wbc); ++ struct writeback_control *wbc, ++ bool keep_towrite); + + /* mmp.c */ + extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1846,6 +1846,7 @@ static int ext4_writepage(struct page *p + struct buffer_head *page_bufs = NULL; + struct inode *inode = page->mapping->host; + struct ext4_io_submit io_submit; ++ bool keep_towrite = false; + + trace_ext4_writepage(page); + size = i_size_read(inode); +@@ -1876,6 +1877,7 @@ static int ext4_writepage(struct page *p + unlock_page(page); + return 0; + } ++ keep_towrite = true; + } + + if (PageChecked(page) && ext4_should_journal_data(inode)) +@@ -1892,7 +1894,7 @@ static int ext4_writepage(struct page *p + unlock_page(page); + return -ENOMEM; + } +- ret = ext4_bio_write_page(&io_submit, page, len, wbc); ++ ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); + ext4_io_submit(&io_submit); + /* Drop io_end reference we got from init */ + ext4_put_io_end_defer(io_submit.io_end); +@@ -1911,7 +1913,7 @@ static int mpage_submit_page(struct mpag + else + len = PAGE_CACHE_SIZE; + clear_page_dirty_for_io(page); +- err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); ++ err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); + if (!err) + mpd->wbc->nr_to_write--; + mpd->first_page++; +--- a/fs/ext4/page-io.c ++++ b/fs/ext4/page-io.c +@@ -401,7 +401,8 @@ submit_and_retry: + int ext4_bio_write_page(struct ext4_io_submit *io, + struct page *page, + int len, +- struct writeback_control *wbc) ++ struct writeback_control *wbc, ++ bool keep_towrite) + { + struct inode *inode = page->mapping->host; + unsigned block_start, blocksize; +@@ -414,7 +415,10 @@ int ext4_bio_write_page(struct ext4_io_s + BUG_ON(!PageLocked(page)); + BUG_ON(PageWriteback(page)); + +- set_page_writeback(page); ++ if (keep_towrite) ++ set_page_writeback_keepwrite(page); ++ else ++ set_page_writeback(page); + ClearPageError(page); + + /* +--- a/include/linux/page-flags.h ++++ b/include/linux/page-flags.h +@@ -317,13 +317,23 @@ CLEARPAGEFLAG(Uptodate, uptodate) + extern void cancel_dirty_page(struct page *page, unsigned int account_size); + + int test_clear_page_writeback(struct page *page); +-int test_set_page_writeback(struct page *page); ++int __test_set_page_writeback(struct page *page, bool keep_write); ++ ++#define test_set_page_writeback(page) \ ++ __test_set_page_writeback(page, false) ++#define test_set_page_writeback_keepwrite(page) \ ++ __test_set_page_writeback(page, true) + + static inline void set_page_writeback(struct page *page) + { + test_set_page_writeback(page); + } + ++static inline void set_page_writeback_keepwrite(struct page *page) ++{ ++ test_set_page_writeback_keepwrite(page); ++} ++ + #ifdef CONFIG_PAGEFLAGS_EXTENDED + /* + * System with lots of page flags available. This allows separate +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct pag + return ret; + } + +-int test_set_page_writeback(struct page *page) ++int __test_set_page_writeback(struct page *page, bool keep_write) + { + struct address_space *mapping = page_mapping(page); + int ret; +@@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page + radix_tree_tag_clear(&mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); +- radix_tree_tag_clear(&mapping->page_tree, +- page_index(page), +- PAGECACHE_TAG_TOWRITE); ++ if (!keep_write) ++ radix_tree_tag_clear(&mapping->page_tree, ++ page_index(page), ++ PAGECACHE_TAG_TOWRITE); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + } else { + ret = TestSetPageWriteback(page); +@@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page + return ret; + + } +-EXPORT_SYMBOL(test_set_page_writeback); ++EXPORT_SYMBOL(__test_set_page_writeback); + + /* + * Return true if any of the pages in the mapping are marked with the diff --git a/queue-3.15/ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch b/queue-3.15/ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch new file mode 100644 index 00000000000..6969ac8af88 --- /dev/null +++ b/queue-3.15/ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch @@ -0,0 +1,32 @@ +From b5b60778558cafad17bbcbf63e0310bd3c68eb17 Mon Sep 17 00:00:00 2001 +From: Maurizio Lombardi +Date: Tue, 27 May 2014 12:48:56 -0400 +Subject: ext4: fix wrong assert in ext4_mb_normalize_request() + +From: Maurizio Lombardi + +commit b5b60778558cafad17bbcbf63e0310bd3c68eb17 upstream. + +The variable "size" is expressed as number of blocks and not as +number of clusters, this could trigger a kernel panic when using +ext4 with the size of a cluster different from the size of a block. + +Signed-off-by: Maurizio Lombardi +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/mballoc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -3145,7 +3145,7 @@ ext4_mb_normalize_request(struct ext4_al + } + BUG_ON(start + size <= ac->ac_o_ex.fe_logical && + start > ac->ac_o_ex.fe_logical); +- BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); ++ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); + + /* now prepare goal request */ + diff --git a/queue-3.15/ext4-fix-zero_range-test-failure-in-data-journalling.patch b/queue-3.15/ext4-fix-zero_range-test-failure-in-data-journalling.patch new file mode 100644 index 00000000000..557096494fc --- /dev/null +++ b/queue-3.15/ext4-fix-zero_range-test-failure-in-data-journalling.patch @@ -0,0 +1,39 @@ +From e1ee60fd89670da61b0a4bda59f8ffb2b8abea63 Mon Sep 17 00:00:00 2001 +From: Namjae Jeon +Date: Tue, 27 May 2014 12:48:55 -0400 +Subject: ext4: fix ZERO_RANGE test failure in data journalling + +From: Namjae Jeon + +commit e1ee60fd89670da61b0a4bda59f8ffb2b8abea63 upstream. + +xfstests generic/091 is failing when mounting ext4 with data=journal. +I think that this regression is same problem that occurred prior to collapse +range issue. So ZERO RANGE also need to call ext4_force_commit as +collapse range. + +Signed-off-by: Namjae Jeon +Signed-off-by: Ashish Sangwan +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/extents.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4744,6 +4744,13 @@ static long ext4_zero_range(struct file + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + ++ /* Call ext4_force_commit to flush all data in case of data=journal. */ ++ if (ext4_should_journal_data(inode)) { ++ ret = ext4_force_commit(inode->i_sb); ++ if (ret) ++ return ret; ++ } ++ + /* + * Write out all dirty pages to avoid race conditions + * Then release them. diff --git a/queue-3.15/ext4-fix-zeroing-of-page-during-writeback.patch b/queue-3.15/ext4-fix-zeroing-of-page-during-writeback.patch new file mode 100644 index 00000000000..125efe7caf0 --- /dev/null +++ b/queue-3.15/ext4-fix-zeroing-of-page-during-writeback.patch @@ -0,0 +1,70 @@ +From eeece469dedadf3918bad50ad80f4616a0064e90 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Tue, 27 May 2014 12:48:55 -0400 +Subject: ext4: fix zeroing of page during writeback + +From: Jan Kara + +commit eeece469dedadf3918bad50ad80f4616a0064e90 upstream. + +Tail of a page straddling inode size must be zeroed when being written +out due to POSIX requirement that modifications of mmaped page beyond +inode size must not be written to the file. ext4_bio_write_page() did +this only for blocks fully beyond inode size but didn't properly zero +blocks partially beyond inode size. Fix this. + +The problem has been uncovered by mmap_11-4 test in openposix test suite +(part of LTP). + +Reported-by: Xiaoguang Wang +Fixes: 5a0dc7365c240 +Fixes: bd2d0210cf22f +CC: stable@vger.kernel.org +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/page-io.c | 24 +++++++++++------------- + 1 file changed, 11 insertions(+), 13 deletions(-) + +--- a/fs/ext4/page-io.c ++++ b/fs/ext4/page-io.c +@@ -422,6 +422,17 @@ int ext4_bio_write_page(struct ext4_io_s + ClearPageError(page); + + /* ++ * Comments copied from block_write_full_page_endio: ++ * ++ * The page straddles i_size. It must be zeroed out on each and every ++ * writepage invocation because it may be mmapped. "A file is mapped ++ * in multiples of the page size. For a file that is not a multiple of ++ * the page size, the remaining memory is zeroed when mapped, and ++ * writes to that region are not written out to the file." ++ */ ++ if (len < PAGE_CACHE_SIZE) ++ zero_user_segment(page, len, PAGE_CACHE_SIZE); ++ /* + * In the first loop we prepare and mark buffers to submit. We have to + * mark all buffers in the page before submitting so that + * end_page_writeback() cannot be called from ext4_bio_end_io() when IO +@@ -432,19 +443,6 @@ int ext4_bio_write_page(struct ext4_io_s + do { + block_start = bh_offset(bh); + if (block_start >= len) { +- /* +- * Comments copied from block_write_full_page_endio: +- * +- * The page straddles i_size. It must be zeroed out on +- * each and every writepage invocation because it may +- * be mmapped. "A file is mapped in multiples of the +- * page size. For a file that is not a multiple of +- * the page size, the remaining memory is zeroed when +- * mapped, and writes to that region are not written +- * out to the file." +- */ +- zero_user_segment(page, block_start, +- block_start + blocksize); + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + continue; diff --git a/queue-3.15/hid-core-fix-validation-of-report-id-0.patch b/queue-3.15/hid-core-fix-validation-of-report-id-0.patch new file mode 100644 index 00000000000..cec48e25b62 --- /dev/null +++ b/queue-3.15/hid-core-fix-validation-of-report-id-0.patch @@ -0,0 +1,54 @@ +From 1b15d2e5b8077670b1e6a33250a0d9577efff4a5 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 17 Apr 2014 13:22:09 -0700 +Subject: HID: core: fix validation of report id 0 + +From: Kees Cook + +commit 1b15d2e5b8077670b1e6a33250a0d9577efff4a5 upstream. + +Some drivers use the first HID report in the list instead of using an +index. In these cases, validation uses ID 0, which was supposed to mean +"first known report". This fixes the problem, which was causing at least +the lgff family of devices to stop working since hid_validate_values +was being called with ID 0, but the devices used single numbered IDs +for their reports: + +0x05, 0x01, /* Usage Page (Desktop), */ +0x09, 0x05, /* Usage (Gamepad), */ +0xA1, 0x01, /* Collection (Application), */ +0xA1, 0x02, /* Collection (Logical), */ +0x85, 0x01, /* Report ID (1), */ +... + +Reported-by: Simon Wood +Signed-off-by: Kees Cook +Reviewed-by: Benjamin Tissoires +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hid/hid-core.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/drivers/hid/hid-core.c ++++ b/drivers/hid/hid-core.c +@@ -842,7 +842,17 @@ struct hid_report *hid_validate_values(s + * ->numbered being checked, which may not always be the case when + * drivers go to access report values. + */ +- report = hid->report_enum[type].report_id_hash[id]; ++ if (id == 0) { ++ /* ++ * Validating on id 0 means we should examine the first ++ * report in the list. ++ */ ++ report = list_entry( ++ hid->report_enum[type].report_list.next, ++ struct hid_report, list); ++ } else { ++ report = hid->report_enum[type].report_id_hash[id]; ++ } + if (!report) { + hid_err(hid, "missing %s %u\n", hid_report_names[type], id); + return NULL; diff --git a/queue-3.15/hugetlb-restrict-hugepage_migration_support-to-x86_64.patch b/queue-3.15/hugetlb-restrict-hugepage_migration_support-to-x86_64.patch new file mode 100644 index 00000000000..2896018902e --- /dev/null +++ b/queue-3.15/hugetlb-restrict-hugepage_migration_support-to-x86_64.patch @@ -0,0 +1,299 @@ +From c177c81e09e517bbf75b67762cdab1b83aba6976 Mon Sep 17 00:00:00 2001 +From: Naoya Horiguchi +Date: Wed, 4 Jun 2014 16:05:35 -0700 +Subject: hugetlb: restrict hugepage_migration_support() to x86_64 + +From: Naoya Horiguchi + +commit c177c81e09e517bbf75b67762cdab1b83aba6976 upstream. + +Currently hugepage migration is available for all archs which support +pmd-level hugepage, but testing is done only for x86_64 and there're +bugs for other archs. So to avoid breaking such archs, this patch +limits the availability strictly to x86_64 until developers of other +archs get interested in enabling this feature. + +Simply disabling hugepage migration on non-x86_64 archs is not enough to +fix the reported problem where sys_move_pages() hits the BUG_ON() in +follow_page(FOLL_GET), so let's fix this by checking if hugepage +migration is supported in vma_migratable(). + +Signed-off-by: Naoya Horiguchi +Reported-by: Michael Ellerman +Tested-by: Michael Ellerman +Acked-by: Hugh Dickins +Cc: Benjamin Herrenschmidt +Cc: Tony Luck +Cc: Russell King +Cc: Martin Schwidefsky +Cc: James Hogan +Cc: Ralf Baechle +Cc: David Miller +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/mm/hugetlbpage.c | 5 ----- + arch/arm64/mm/hugetlbpage.c | 5 ----- + arch/ia64/mm/hugetlbpage.c | 5 ----- + arch/metag/mm/hugetlbpage.c | 5 ----- + arch/mips/mm/hugetlbpage.c | 5 ----- + arch/powerpc/mm/hugetlbpage.c | 10 ---------- + arch/s390/mm/hugetlbpage.c | 5 ----- + arch/sh/mm/hugetlbpage.c | 5 ----- + arch/sparc/mm/hugetlbpage.c | 5 ----- + arch/tile/mm/hugetlbpage.c | 5 ----- + arch/x86/Kconfig | 4 ++++ + arch/x86/mm/hugetlbpage.c | 10 ---------- + include/linux/hugetlb.h | 13 +++++-------- + include/linux/mempolicy.h | 6 ++++++ + mm/Kconfig | 3 +++ + 15 files changed, 18 insertions(+), 73 deletions(-) + +--- a/arch/arm/mm/hugetlbpage.c ++++ b/arch/arm/mm/hugetlbpage.c +@@ -56,8 +56,3 @@ int pmd_huge(pmd_t pmd) + { + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); + } +- +-int pmd_huge_support(void) +-{ +- return 1; +-} +--- a/arch/arm64/mm/hugetlbpage.c ++++ b/arch/arm64/mm/hugetlbpage.c +@@ -58,11 +58,6 @@ int pud_huge(pud_t pud) + #endif + } + +-int pmd_huge_support(void) +-{ +- return 1; +-} +- + static __init int setup_hugepagesz(char *opt) + { + unsigned long ps = memparse(opt, &opt); +--- a/arch/ia64/mm/hugetlbpage.c ++++ b/arch/ia64/mm/hugetlbpage.c +@@ -114,11 +114,6 @@ int pud_huge(pud_t pud) + return 0; + } + +-int pmd_huge_support(void) +-{ +- return 0; +-} +- + struct page * + follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) + { +--- a/arch/metag/mm/hugetlbpage.c ++++ b/arch/metag/mm/hugetlbpage.c +@@ -110,11 +110,6 @@ int pud_huge(pud_t pud) + return 0; + } + +-int pmd_huge_support(void) +-{ +- return 1; +-} +- + struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) + { +--- a/arch/mips/mm/hugetlbpage.c ++++ b/arch/mips/mm/hugetlbpage.c +@@ -84,11 +84,6 @@ int pud_huge(pud_t pud) + return (pud_val(pud) & _PAGE_HUGE) != 0; + } + +-int pmd_huge_support(void) +-{ +- return 1; +-} +- + struct page * + follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +--- a/arch/powerpc/mm/hugetlbpage.c ++++ b/arch/powerpc/mm/hugetlbpage.c +@@ -86,11 +86,6 @@ int pgd_huge(pgd_t pgd) + */ + return ((pgd_val(pgd) & 0x3) != 0x0); + } +- +-int pmd_huge_support(void) +-{ +- return 1; +-} + #else + int pmd_huge(pmd_t pmd) + { +@@ -106,11 +101,6 @@ int pgd_huge(pgd_t pgd) + { + return 0; + } +- +-int pmd_huge_support(void) +-{ +- return 0; +-} + #endif + + pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +--- a/arch/s390/mm/hugetlbpage.c ++++ b/arch/s390/mm/hugetlbpage.c +@@ -220,11 +220,6 @@ int pud_huge(pud_t pud) + return 0; + } + +-int pmd_huge_support(void) +-{ +- return 1; +-} +- + struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmdp, int write) + { +--- a/arch/sh/mm/hugetlbpage.c ++++ b/arch/sh/mm/hugetlbpage.c +@@ -83,11 +83,6 @@ int pud_huge(pud_t pud) + return 0; + } + +-int pmd_huge_support(void) +-{ +- return 0; +-} +- + struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) + { +--- a/arch/sparc/mm/hugetlbpage.c ++++ b/arch/sparc/mm/hugetlbpage.c +@@ -231,11 +231,6 @@ int pud_huge(pud_t pud) + return 0; + } + +-int pmd_huge_support(void) +-{ +- return 0; +-} +- + struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) + { +--- a/arch/tile/mm/hugetlbpage.c ++++ b/arch/tile/mm/hugetlbpage.c +@@ -166,11 +166,6 @@ int pud_huge(pud_t pud) + return !!(pud_val(pud) & _PAGE_HUGE_PAGE); + } + +-int pmd_huge_support(void) +-{ +- return 1; +-} +- + struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) + { +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -1871,6 +1871,10 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK + def_bool y + depends on X86_64 || X86_PAE + ++config ARCH_ENABLE_HUGEPAGE_MIGRATION ++ def_bool y ++ depends on X86_64 && HUGETLB_PAGE && MIGRATION ++ + menu "Power management and ACPI options" + + config ARCH_HIBERNATION_HEADER +--- a/arch/x86/mm/hugetlbpage.c ++++ b/arch/x86/mm/hugetlbpage.c +@@ -58,11 +58,6 @@ follow_huge_pmd(struct mm_struct *mm, un + { + return NULL; + } +- +-int pmd_huge_support(void) +-{ +- return 0; +-} + #else + + struct page * +@@ -80,11 +75,6 @@ int pud_huge(pud_t pud) + { + return !!(pud_val(pud) & _PAGE_PSE); + } +- +-int pmd_huge_support(void) +-{ +- return 1; +-} + #endif + + #ifdef CONFIG_HUGETLB_PAGE +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -392,15 +392,13 @@ static inline pgoff_t basepage_index(str + + extern void dissolve_free_huge_pages(unsigned long start_pfn, + unsigned long end_pfn); +-int pmd_huge_support(void); +-/* +- * Currently hugepage migration is enabled only for pmd-based hugepage. +- * This function will be updated when hugepage migration is more widely +- * supported. +- */ + static inline int hugepage_migration_support(struct hstate *h) + { +- return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT); ++#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION ++ return huge_page_shift(h) == PMD_SHIFT; ++#else ++ return 0; ++#endif + } + + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, +@@ -450,7 +448,6 @@ static inline pgoff_t basepage_index(str + return page->index; + } + #define dissolve_free_huge_pages(s, e) do {} while (0) +-#define pmd_huge_support() 0 + #define hugepage_migration_support(h) 0 + + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, +--- a/include/linux/mempolicy.h ++++ b/include/linux/mempolicy.h +@@ -175,6 +175,12 @@ static inline int vma_migratable(struct + { + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) + return 0; ++ ++#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION ++ if (vma->vm_flags & VM_HUGETLB) ++ return 0; ++#endif ++ + /* + * Migration allocates pages in the highest zone. If we cannot + * do so then migration (at least from node to node) is not +--- a/mm/Kconfig ++++ b/mm/Kconfig +@@ -264,6 +264,9 @@ config MIGRATION + pages as migration can relocate pages to satisfy a huge page + allocation instead of reclaiming. + ++config ARCH_ENABLE_HUGEPAGE_MIGRATION ++ boolean ++ + config PHYS_ADDR_T_64BIT + def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT + diff --git a/queue-3.15/idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch b/queue-3.15/idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch new file mode 100644 index 00000000000..46b24614397 --- /dev/null +++ b/queue-3.15/idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch @@ -0,0 +1,77 @@ +From 3afb69cb5572b3c8c898c00880803cf1a49852c4 Mon Sep 17 00:00:00 2001 +From: Lai Jiangshan +Date: Fri, 6 Jun 2014 14:37:10 -0700 +Subject: idr: fix overflow bug during maximum ID calculation at maximum height + +From: Lai Jiangshan + +commit 3afb69cb5572b3c8c898c00880803cf1a49852c4 upstream. + +idr_replace() open-codes the logic to calculate the maximum valid ID +given the height of the idr tree; unfortunately, the open-coded logic +doesn't account for the fact that the top layer may have unused slots +and over-shifts the limit to zero when the tree is at its maximum +height. + +The following test code shows it fails to replace the value for +id=((1<<27)+42): + + static void test5(void) + { + int id; + DEFINE_IDR(test_idr); + #define TEST5_START ((1<<27)+42) /* use the highest layer */ + + printk(KERN_INFO "Start test5\n"); + id = idr_alloc(&test_idr, (void *)1, TEST5_START, 0, GFP_KERNEL); + BUG_ON(id != TEST5_START); + TEST_BUG_ON(idr_replace(&test_idr, (void *)2, TEST5_START) != (void *)1); + idr_destroy(&test_idr); + printk(KERN_INFO "End of test5\n"); + } + +Fix the bug by using idr_max() which correctly takes into account the +maximum allowed shift. + +sub_alloc() shares the same problem and may incorrectly fail with +-EAGAIN; however, this bug doesn't affect correct operation because +idr_get_empty_slot(), which already uses idr_max(), retries with the +increased @id in such cases. + +[tj@kernel.org: Updated patch description.] +Signed-off-by: Lai Jiangshan +Acked-by: Tejun Heo +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + lib/idr.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/lib/idr.c ++++ b/lib/idr.c +@@ -249,7 +249,7 @@ static int sub_alloc(struct idr *idp, in + id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; + + /* if already at the top layer, we need to grow */ +- if (id >= 1 << (idp->layers * IDR_BITS)) { ++ if (id > idr_max(idp->layers)) { + *starting_id = id; + return -EAGAIN; + } +@@ -811,12 +811,10 @@ void *idr_replace(struct idr *idp, void + if (!p) + return ERR_PTR(-EINVAL); + +- n = (p->layer+1) * IDR_BITS; +- +- if (id >= (1 << n)) ++ if (id > idr_max(p->layer + 1)) + return ERR_PTR(-EINVAL); + +- n -= IDR_BITS; ++ n = p->layer * IDR_BITS; + while ((n > 0) && p) { + p = p->ary[(id >> n) & IDR_MASK]; + n -= IDR_BITS; diff --git a/queue-3.15/kthread-fix-return-value-of-kthread_create-upon-sigkill.patch b/queue-3.15/kthread-fix-return-value-of-kthread_create-upon-sigkill.patch new file mode 100644 index 00000000000..2cc8f9c5925 --- /dev/null +++ b/queue-3.15/kthread-fix-return-value-of-kthread_create-upon-sigkill.patch @@ -0,0 +1,45 @@ +From 8fe6929cfd43c44834858a53e129ffdc7c166298 Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Wed, 4 Jun 2014 16:05:36 -0700 +Subject: kthread: fix return value of kthread_create() upon SIGKILL. + +From: Tetsuo Handa + +commit 8fe6929cfd43c44834858a53e129ffdc7c166298 upstream. + +Commit 786235eeba0e ("kthread: make kthread_create() killable") meant +for allowing kthread_create() to abort as soon as killed by the +OOM-killer. But returning -ENOMEM is wrong if killed by SIGKILL from +userspace. Change kthread_create() to return -EINTR upon SIGKILL. + +Signed-off-by: Tetsuo Handa +Cc: Oleg Nesterov +Acked-by: David Rientjes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/kthread.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -262,7 +262,7 @@ static void create_kthread(struct kthrea + * kthread_stop() has been called). The return value should be zero + * or a negative error number; it will be passed to kthread_stop(). + * +- * Returns a task_struct or ERR_PTR(-ENOMEM). ++ * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR). + */ + struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), + void *data, int node, +@@ -298,7 +298,7 @@ struct task_struct *kthread_create_on_no + * that thread. + */ + if (xchg(&create->done, NULL)) +- return ERR_PTR(-ENOMEM); ++ return ERR_PTR(-EINTR); + /* + * kthreadd (or new kernel thread) will call complete() + * shortly. diff --git a/queue-3.15/matroxfb-perform-a-dummy-read-of-m_status.patch b/queue-3.15/matroxfb-perform-a-dummy-read-of-m_status.patch new file mode 100644 index 00000000000..26cd06fddb2 --- /dev/null +++ b/queue-3.15/matroxfb-perform-a-dummy-read-of-m_status.patch @@ -0,0 +1,40 @@ +From 972754cfaee94d6e25acf94a497bc0a864d91b7e Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Thu, 15 May 2014 06:58:24 -0400 +Subject: matroxfb: perform a dummy read of M_STATUS + +From: Mikulas Patocka + +commit 972754cfaee94d6e25acf94a497bc0a864d91b7e upstream. + +I had occasional screen corruption with the matrox framebuffer driver and +I found out that the reason for the corruption is that the hardware +blitter accesses the videoram while it is being written to. + +The matrox driver has a macro WaitTillIdle() that should wait until the +blitter is idle, but it sometimes doesn't work. I added a dummy read +mga_inl(M_STATUS) to WaitTillIdle() to fix the problem. The dummy read +will flush the write buffer in the PCI chipset, and the next read of +M_STATUS will return the hardware status. + +Since applying this patch, I had no screen corruption at all. + +Signed-off-by: Mikulas Patocka +Signed-off-by: Tomi Valkeinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/video/fbdev/matrox/matroxfb_base.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/video/fbdev/matrox/matroxfb_base.h ++++ b/drivers/video/fbdev/matrox/matroxfb_base.h +@@ -698,7 +698,7 @@ void matroxfb_unregister_driver(struct m + + #define mga_fifo(n) do {} while ((mga_inl(M_FIFOSTATUS) & 0xFF) < (n)) + +-#define WaitTillIdle() do {} while (mga_inl(M_STATUS) & 0x10000) ++#define WaitTillIdle() do { mga_inl(M_STATUS); do {} while (mga_inl(M_STATUS) & 0x10000); } while (0) + + /* code speedup */ + #ifdef CONFIG_FB_MATROX_MILLENIUM diff --git a/queue-3.15/memcg-do-not-hang-on-oom-when-killed-by-userspace-oom-access-to-memory-reserves.patch b/queue-3.15/memcg-do-not-hang-on-oom-when-killed-by-userspace-oom-access-to-memory-reserves.patch new file mode 100644 index 00000000000..07da691b77d --- /dev/null +++ b/queue-3.15/memcg-do-not-hang-on-oom-when-killed-by-userspace-oom-access-to-memory-reserves.patch @@ -0,0 +1,88 @@ +From d8dc595ce3909fbc131bdf5ab8c9808fe624b18d Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Wed, 4 Jun 2014 16:07:36 -0700 +Subject: memcg: do not hang on OOM when killed by userspace OOM access to memory reserves + +From: Michal Hocko + +commit d8dc595ce3909fbc131bdf5ab8c9808fe624b18d upstream. + +Eric has reported that he can see task(s) stuck in memcg OOM handler +regularly. The only way out is to + + echo 0 > $GROUP/memory.oom_control + +His usecase is: + +- Setup a hierarchy with memory and the freezer (disable kernel oom and + have a process watch for oom). + +- In that memory cgroup add a process with one thread per cpu. + +- In one thread slowly allocate once per second I think it is 16M of ram + and mlock and dirty it (just to force the pages into ram and stay + there). + +- When oom is achieved loop: + * attempt to freeze all of the tasks. + * if frozen send every task SIGKILL, unfreeze, remove the directory in + cgroupfs. + +Eric has then pinpointed the issue to be memcg specific. + +All tasks are sitting on the memcg_oom_waitq when memcg oom is disabled. +Those that have received fatal signal will bypass the charge and should +continue on their way out. The tricky part is that the exit path might +trigger a page fault (e.g. exit_robust_list), thus the memcg charge, +while its memcg is still under OOM because nobody has released any charges +yet. + +Unlike with the in-kernel OOM handler the exiting task doesn't get +TIF_MEMDIE set so it doesn't shortcut further charges of the killed task +and falls to the memcg OOM again without any way out of it as there are no +fatal signals pending anymore. + +This patch fixes the issue by checking PF_EXITING early in +mem_cgroup_try_charge and bypass the charge same as if it had fatal +signal pending or TIF_MEMDIE set. + +Normally exiting tasks (aka not killed) will bypass the charge now but +this should be OK as the task is leaving and will release memory and +increasing the memory pressure just to release it in a moment seems +dubious wasting of cycles. Besides that charges after exit_signals should +be rare. + +I am bringing this patch again (rebased on the current mmotm tree). I +hope we can move forward finally. If there is still an opposition then +I would really appreciate a concurrent approach so that we can discuss +alternatives. + +http://comments.gmane.org/gmane.linux.kernel.stable/77650 is a reference +to the followup discussion when the patch has been dropped from the mmotm +last time. + +Reported-by: Eric W. Biederman +Signed-off-by: Michal Hocko +Acked-by: David Rientjes +Acked-by: Johannes Weiner +Cc: KAMEZAWA Hiroyuki +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memcontrol.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -2684,7 +2684,8 @@ static int mem_cgroup_try_charge(struct + * free their memory. + */ + if (unlikely(test_thread_flag(TIF_MEMDIE) || +- fatal_signal_pending(current))) ++ fatal_signal_pending(current) || ++ current->flags & PF_EXITING)) + goto bypass; + + if (unlikely(task_in_memcg_oom(current))) diff --git a/queue-3.15/mm-fix-sleeping-function-warning-from-__put_anon_vma.patch b/queue-3.15/mm-fix-sleeping-function-warning-from-__put_anon_vma.patch new file mode 100644 index 00000000000..d35c97adf38 --- /dev/null +++ b/queue-3.15/mm-fix-sleeping-function-warning-from-__put_anon_vma.patch @@ -0,0 +1,66 @@ +From 7f39dda9d86fb4f4f17af0de170decf125726f8c Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Wed, 4 Jun 2014 16:05:33 -0700 +Subject: mm: fix sleeping function warning from __put_anon_vma + +From: Hugh Dickins + +commit 7f39dda9d86fb4f4f17af0de170decf125726f8c upstream. + +Trinity reports BUG: + + sleeping function called from invalid context at kernel/locking/rwsem.c:47 + in_atomic(): 0, irqs_disabled(): 0, pid: 5787, name: trinity-c27 + +__might_sleep < down_write < __put_anon_vma < page_get_anon_vma < +migrate_pages < compact_zone < compact_zone_order < try_to_compact_pages .. + +Right, since conversion to mutex then rwsem, we should not put_anon_vma() +from inside an rcu_read_lock()ed section: fix the two places that did so. +And add might_sleep() to anon_vma_free(), as suggested by Peter Zijlstra. + +Fixes: 88c22088bf23 ("mm: optimize page_lock_anon_vma() fast-path") +Reported-by: Dave Jones +Signed-off-by: Hugh Dickins +Cc: Peter Zijlstra +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/rmap.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -103,6 +103,7 @@ static inline void anon_vma_free(struct + * LOCK should suffice since the actual taking of the lock must + * happen _before_ what follows. + */ ++ might_sleep(); + if (rwsem_is_locked(&anon_vma->root->rwsem)) { + anon_vma_lock_write(anon_vma); + anon_vma_unlock_write(anon_vma); +@@ -426,8 +427,9 @@ struct anon_vma *page_get_anon_vma(struc + * above cannot corrupt). + */ + if (!page_mapped(page)) { ++ rcu_read_unlock(); + put_anon_vma(anon_vma); +- anon_vma = NULL; ++ return NULL; + } + out: + rcu_read_unlock(); +@@ -477,9 +479,9 @@ struct anon_vma *page_lock_anon_vma_read + } + + if (!page_mapped(page)) { ++ rcu_read_unlock(); + put_anon_vma(anon_vma); +- anon_vma = NULL; +- goto out; ++ return NULL; + } + + /* we pinned the anon_vma, its safe to sleep */ diff --git a/queue-3.15/mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch b/queue-3.15/mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch new file mode 100644 index 00000000000..5296452f5c0 --- /dev/null +++ b/queue-3.15/mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch @@ -0,0 +1,123 @@ +From 74614de17db6fb472370c426d4f934d8d616edf2 Mon Sep 17 00:00:00 2001 +From: Tony Luck +Date: Wed, 4 Jun 2014 16:11:01 -0700 +Subject: mm/memory-failure.c: don't let collect_procs() skip over processes for MF_ACTION_REQUIRED + +From: Tony Luck + +commit 74614de17db6fb472370c426d4f934d8d616edf2 upstream. + +When Linux sees an "action optional" machine check (where h/w has reported +an error that is not in the current execution path) we generally do not +want to signal a process, since most processes do not have a SIGBUS +handler - we'd just prematurely terminate the process for a problem that +they might never actually see. + +task_early_kill() decides whether to consider a process - and it checks +whether this specific process has been marked for early signals with +"prctl", or if the system administrator has requested early signals for +all processes using /proc/sys/vm/memory_failure_early_kill. + +But for MF_ACTION_REQUIRED case we must not defer. The error is in the +execution path of the current thread so we must send the SIGBUS +immediatley. + +Fix by passing a flag argument through collect_procs*() to +task_early_kill() so it knows whether we can defer or must take action. + +Signed-off-by: Tony Luck +Signed-off-by: Naoya Horiguchi +Cc: Andi Kleen +Cc: Borislav Petkov +Cc: Chen Gong +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory-failure.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -380,10 +380,12 @@ static void kill_procs(struct list_head + } + } + +-static int task_early_kill(struct task_struct *tsk) ++static int task_early_kill(struct task_struct *tsk, int force_early) + { + if (!tsk->mm) + return 0; ++ if (force_early) ++ return 1; + if (tsk->flags & PF_MCE_PROCESS) + return !!(tsk->flags & PF_MCE_EARLY); + return sysctl_memory_failure_early_kill; +@@ -393,7 +395,7 @@ static int task_early_kill(struct task_s + * Collect processes when the error hit an anonymous page. + */ + static void collect_procs_anon(struct page *page, struct list_head *to_kill, +- struct to_kill **tkc) ++ struct to_kill **tkc, int force_early) + { + struct vm_area_struct *vma; + struct task_struct *tsk; +@@ -409,7 +411,7 @@ static void collect_procs_anon(struct pa + for_each_process (tsk) { + struct anon_vma_chain *vmac; + +- if (!task_early_kill(tsk)) ++ if (!task_early_kill(tsk, force_early)) + continue; + anon_vma_interval_tree_foreach(vmac, &av->rb_root, + pgoff, pgoff) { +@@ -428,7 +430,7 @@ static void collect_procs_anon(struct pa + * Collect processes when the error hit a file mapped page. + */ + static void collect_procs_file(struct page *page, struct list_head *to_kill, +- struct to_kill **tkc) ++ struct to_kill **tkc, int force_early) + { + struct vm_area_struct *vma; + struct task_struct *tsk; +@@ -439,7 +441,7 @@ static void collect_procs_file(struct pa + for_each_process(tsk) { + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + +- if (!task_early_kill(tsk)) ++ if (!task_early_kill(tsk, force_early)) + continue; + + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, +@@ -465,7 +467,8 @@ static void collect_procs_file(struct pa + * First preallocate one tokill structure outside the spin locks, + * so that we can kill at least one process reasonably reliable. + */ +-static void collect_procs(struct page *page, struct list_head *tokill) ++static void collect_procs(struct page *page, struct list_head *tokill, ++ int force_early) + { + struct to_kill *tk; + +@@ -476,9 +479,9 @@ static void collect_procs(struct page *p + if (!tk) + return; + if (PageAnon(page)) +- collect_procs_anon(page, tokill, &tk); ++ collect_procs_anon(page, tokill, &tk, force_early); + else +- collect_procs_file(page, tokill, &tk); ++ collect_procs_file(page, tokill, &tk, force_early); + kfree(tk); + } + +@@ -963,7 +966,7 @@ static int hwpoison_user_mappings(struct + * there's nothing that can be done. + */ + if (kill) +- collect_procs(ppage, &tokill); ++ collect_procs(ppage, &tokill, flags & MF_ACTION_REQUIRED); + + ret = try_to_unmap(ppage, ttu); + if (ret != SWAP_SUCCESS) diff --git a/queue-3.15/mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch b/queue-3.15/mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch new file mode 100644 index 00000000000..6562c051bf9 --- /dev/null +++ b/queue-3.15/mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch @@ -0,0 +1,53 @@ +From a70ffcac741d31a406c1d2b832ae43d658e7e1cf Mon Sep 17 00:00:00 2001 +From: Tony Luck +Date: Wed, 4 Jun 2014 16:10:59 -0700 +Subject: mm/memory-failure.c-failure: send right signal code to correct thread + +From: Tony Luck + +commit a70ffcac741d31a406c1d2b832ae43d658e7e1cf upstream. + +When a thread in a multi-threaded application hits a machine check because +of an uncorrectable error in memory - we want to send the SIGBUS with +si.si_code = BUS_MCEERR_AR to that thread. Currently we fail to do that +if the active thread is not the primary thread in the process. +collect_procs() just finds primary threads and this test: + + if ((flags & MF_ACTION_REQUIRED) && t == current) { + +will see that the thread we found isn't the current thread and so send a +si.si_code = BUS_MCEERR_AO to the primary (and nothing to the active +thread at this time). + +We can fix this by checking whether "current" shares the same mm with the +process that collect_procs() said owned the page. If so, we send the +SIGBUS to current (with code BUS_MCEERR_AR). + +Signed-off-by: Tony Luck +Signed-off-by: Naoya Horiguchi +Reported-by: Otto Bruggeman +Cc: Andi Kleen +Cc: Borislav Petkov +Cc: Chen Gong +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory-failure.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -204,9 +204,9 @@ static int kill_proc(struct task_struct + #endif + si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; + +- if ((flags & MF_ACTION_REQUIRED) && t == current) { ++ if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) { + si.si_code = BUS_MCEERR_AR; +- ret = force_sig_info(SIGBUS, &si, t); ++ ret = force_sig_info(SIGBUS, &si, current); + } else { + /* + * Don't use force here, it's convenient if the signal diff --git a/queue-3.15/mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch b/queue-3.15/mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch new file mode 100644 index 00000000000..90c00415580 --- /dev/null +++ b/queue-3.15/mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch @@ -0,0 +1,162 @@ +From 3ba08129e38437561df44c36b7ea9081185d5333 Mon Sep 17 00:00:00 2001 +From: Naoya Horiguchi +Date: Wed, 4 Jun 2014 16:11:02 -0700 +Subject: mm/memory-failure.c: support use of a dedicated thread to handle SIGBUS(BUS_MCEERR_AO) + +From: Naoya Horiguchi + +commit 3ba08129e38437561df44c36b7ea9081185d5333 upstream. + +Currently memory error handler handles action optional errors in the +deferred manner by default. And if a recovery aware application wants +to handle it immediately, it can do it by setting PF_MCE_EARLY flag. +However, such signal can be sent only to the main thread, so it's +problematic if the application wants to have a dedicated thread to +handler such signals. + +So this patch adds dedicated thread support to memory error handler. We +have PF_MCE_EARLY flags for each thread separately, so with this patch +AO signal is sent to the thread with PF_MCE_EARLY flag set, not the main +thread. If you want to implement a dedicated thread, you call prctl() +to set PF_MCE_EARLY on the thread. + +Memory error handler collects processes to be killed, so this patch lets +it check PF_MCE_EARLY flag on each thread in the collecting routines. + +No behavioral change for all non-early kill cases. + +Tony said: + +: The old behavior was crazy - someone with a multithreaded process might +: well expect that if they call prctl(PF_MCE_EARLY) in just one thread, then +: that thread would see the SIGBUS with si_code = BUS_MCEERR_A0 - even if +: that thread wasn't the main thread for the process. + +[akpm@linux-foundation.org: coding-style fixes] +Signed-off-by: Naoya Horiguchi +Reviewed-by: Tony Luck +Cc: Kamil Iskra +Cc: Andi Kleen +Cc: Borislav Petkov +Cc: Chen Gong +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/vm/hwpoison.txt | 5 +++ + mm/memory-failure.c | 56 ++++++++++++++++++++++++++++++++---------- + 2 files changed, 48 insertions(+), 13 deletions(-) + +--- a/Documentation/vm/hwpoison.txt ++++ b/Documentation/vm/hwpoison.txt +@@ -84,6 +84,11 @@ PR_MCE_KILL + PR_MCE_KILL_EARLY: Early kill + PR_MCE_KILL_LATE: Late kill + PR_MCE_KILL_DEFAULT: Use system global default ++ Note that if you want to have a dedicated thread which handles ++ the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should ++ call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise, ++ the SIGBUS is sent to the main thread. ++ + PR_MCE_KILL_GET + return current mode + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -380,15 +380,44 @@ static void kill_procs(struct list_head + } + } + +-static int task_early_kill(struct task_struct *tsk, int force_early) ++/* ++ * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO) ++ * on behalf of the thread group. Return task_struct of the (first found) ++ * dedicated thread if found, and return NULL otherwise. ++ * ++ * We already hold read_lock(&tasklist_lock) in the caller, so we don't ++ * have to call rcu_read_lock/unlock() in this function. ++ */ ++static struct task_struct *find_early_kill_thread(struct task_struct *tsk) + { ++ struct task_struct *t; ++ ++ for_each_thread(tsk, t) ++ if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY)) ++ return t; ++ return NULL; ++} ++ ++/* ++ * Determine whether a given process is "early kill" process which expects ++ * to be signaled when some page under the process is hwpoisoned. ++ * Return task_struct of the dedicated thread (main thread unless explicitly ++ * specified) if the process is "early kill," and otherwise returns NULL. ++ */ ++static struct task_struct *task_early_kill(struct task_struct *tsk, ++ int force_early) ++{ ++ struct task_struct *t; + if (!tsk->mm) +- return 0; ++ return NULL; + if (force_early) +- return 1; +- if (tsk->flags & PF_MCE_PROCESS) +- return !!(tsk->flags & PF_MCE_EARLY); +- return sysctl_memory_failure_early_kill; ++ return tsk; ++ t = find_early_kill_thread(tsk); ++ if (t) ++ return t; ++ if (sysctl_memory_failure_early_kill) ++ return tsk; ++ return NULL; + } + + /* +@@ -410,16 +439,17 @@ static void collect_procs_anon(struct pa + read_lock(&tasklist_lock); + for_each_process (tsk) { + struct anon_vma_chain *vmac; ++ struct task_struct *t = task_early_kill(tsk, force_early); + +- if (!task_early_kill(tsk, force_early)) ++ if (!t) + continue; + anon_vma_interval_tree_foreach(vmac, &av->rb_root, + pgoff, pgoff) { + vma = vmac->vma; + if (!page_mapped_in_vma(page, vma)) + continue; +- if (vma->vm_mm == tsk->mm) +- add_to_kill(tsk, page, vma, to_kill, tkc); ++ if (vma->vm_mm == t->mm) ++ add_to_kill(t, page, vma, to_kill, tkc); + } + } + read_unlock(&tasklist_lock); +@@ -440,10 +470,10 @@ static void collect_procs_file(struct pa + read_lock(&tasklist_lock); + for_each_process(tsk) { + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); ++ struct task_struct *t = task_early_kill(tsk, force_early); + +- if (!task_early_kill(tsk, force_early)) ++ if (!t) + continue; +- + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, + pgoff) { + /* +@@ -453,8 +483,8 @@ static void collect_procs_file(struct pa + * Assume applications who requested early kill want + * to be informed of all such data corruptions. + */ +- if (vma->vm_mm == tsk->mm) +- add_to_kill(tsk, page, vma, to_kill, tkc); ++ if (vma->vm_mm == t->mm) ++ add_to_kill(t, page, vma, to_kill, tkc); + } + } + read_unlock(&tasklist_lock); diff --git a/queue-3.15/mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch b/queue-3.15/mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch new file mode 100644 index 00000000000..55f9c97df65 --- /dev/null +++ b/queue-3.15/mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch @@ -0,0 +1,237 @@ +From e58469bafd0524e848c3733bc3918d854595e20f Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Wed, 4 Jun 2014 16:10:16 -0700 +Subject: mm: page_alloc: use word-based accesses for get/set pageblock bitmaps + +From: Mel Gorman + +commit e58469bafd0524e848c3733bc3918d854595e20f upstream. + +The test_bit operations in get/set pageblock flags are expensive. This +patch reads the bitmap on a word basis and use shifts and masks to isolate +the bits of interest. Similarly masks are used to set a local copy of the +bitmap and then use cmpxchg to update the bitmap if there have been no +other changes made in parallel. + +In a test running dd onto tmpfs the overhead of the pageblock-related +functions went from 1.27% in profiles to 0.5%. + +In addition to the performance benefits, this patch closes races that are +possible between: + +a) get_ and set_pageblock_migratetype(), where get_pageblock_migratetype() + reads part of the bits before and other part of the bits after + set_pageblock_migratetype() has updated them. + +b) set_pageblock_migratetype() and set_pageblock_skip(), where the non-atomic + read-modify-update set bit operation in set_pageblock_skip() will cause + lost updates to some bits changed in the set_pageblock_migratetype(). + +Joonsoo Kim first reported the case a) via code inspection. Vlastimil +Babka's testing with a debug patch showed that either a) or b) occurs +roughly once per mmtests' stress-highalloc benchmark (although not +necessarily in the same pageblock). Furthermore during development of +unrelated compaction patches, it was observed that frequent calls to +{start,undo}_isolate_page_range() the race occurs several thousands of +times and has resulted in NULL pointer dereferences in move_freepages() +and free_one_page() in places where free_list[migratetype] is +manipulated by e.g. list_move(). Further debugging confirmed that +migratetype had invalid value of 6, causing out of bounds access to the +free_list array. + +That confirmed that the race exist, although it may be extremely rare, +and currently only fatal where page isolation is performed due to +memory hot remove. Races on pageblocks being updated by +set_pageblock_migratetype(), where both old and new migratetype are +lower MIGRATE_RESERVE, currently cannot result in an invalid value +being observed, although theoretically they may still lead to +unexpected creation or destruction of MIGRATE_RESERVE pageblocks. +Furthermore, things could get suddenly worse when memory isolation is +used more, or when new migratetypes are added. + +After this patch, the race has no longer been observed in testing. + +Signed-off-by: Mel Gorman +Acked-by: Vlastimil Babka +Reported-by: Joonsoo Kim +Reported-and-tested-by: Vlastimil Babka +Cc: Johannes Weiner +Cc: Jan Kara +Cc: Michal Hocko +Cc: Hugh Dickins +Cc: Dave Hansen +Cc: Theodore Ts'o +Cc: "Paul E. McKenney" +Cc: Oleg Nesterov +Cc: Rik van Riel +Cc: Peter Zijlstra +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mmzone.h | 6 +++- + include/linux/pageblock-flags.h | 37 +++++++++++++++++++++++----- + mm/page_alloc.c | 52 ++++++++++++++++++++++++---------------- + 3 files changed, 68 insertions(+), 27 deletions(-) + +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -75,9 +75,13 @@ enum { + + extern int page_group_by_mobility_disabled; + ++#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1) ++#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1) ++ + static inline int get_pageblock_migratetype(struct page *page) + { +- return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); ++ BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2); ++ return get_pageblock_flags_mask(page, PB_migrate_end, MIGRATETYPE_MASK); + } + + struct free_area { +--- a/include/linux/pageblock-flags.h ++++ b/include/linux/pageblock-flags.h +@@ -30,9 +30,12 @@ enum pageblock_bits { + PB_migrate, + PB_migrate_end = PB_migrate + 3 - 1, + /* 3 bits required for migrate types */ +-#ifdef CONFIG_COMPACTION + PB_migrate_skip,/* If set the block is skipped by compaction */ +-#endif /* CONFIG_COMPACTION */ ++ ++ /* ++ * Assume the bits will always align on a word. If this assumption ++ * changes then get/set pageblock needs updating. ++ */ + NR_PAGEBLOCK_BITS + }; + +@@ -62,11 +65,33 @@ extern int pageblock_order; + /* Forward declaration */ + struct page; + ++unsigned long get_pageblock_flags_mask(struct page *page, ++ unsigned long end_bitidx, ++ unsigned long mask); ++void set_pageblock_flags_mask(struct page *page, ++ unsigned long flags, ++ unsigned long end_bitidx, ++ unsigned long mask); ++ + /* Declarations for getting and setting flags. See mm/page_alloc.c */ +-unsigned long get_pageblock_flags_group(struct page *page, +- int start_bitidx, int end_bitidx); +-void set_pageblock_flags_group(struct page *page, unsigned long flags, +- int start_bitidx, int end_bitidx); ++static inline unsigned long get_pageblock_flags_group(struct page *page, ++ int start_bitidx, int end_bitidx) ++{ ++ unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1; ++ unsigned long mask = (1 << nr_flag_bits) - 1; ++ ++ return get_pageblock_flags_mask(page, end_bitidx, mask); ++} ++ ++static inline void set_pageblock_flags_group(struct page *page, ++ unsigned long flags, ++ int start_bitidx, int end_bitidx) ++{ ++ unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1; ++ unsigned long mask = (1 << nr_flag_bits) - 1; ++ ++ set_pageblock_flags_mask(page, flags, end_bitidx, mask); ++} + + #ifdef CONFIG_COMPACTION + #define get_pageblock_skip(page) \ +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -6009,53 +6009,65 @@ static inline int pfn_to_bitidx(struct z + * @end_bitidx: The last bit of interest + * returns pageblock_bits flags + */ +-unsigned long get_pageblock_flags_group(struct page *page, +- int start_bitidx, int end_bitidx) ++unsigned long get_pageblock_flags_mask(struct page *page, ++ unsigned long end_bitidx, ++ unsigned long mask) + { + struct zone *zone; + unsigned long *bitmap; +- unsigned long pfn, bitidx; +- unsigned long flags = 0; +- unsigned long value = 1; ++ unsigned long pfn, bitidx, word_bitidx; ++ unsigned long word; + + zone = page_zone(page); + pfn = page_to_pfn(page); + bitmap = get_pageblock_bitmap(zone, pfn); + bitidx = pfn_to_bitidx(zone, pfn); ++ word_bitidx = bitidx / BITS_PER_LONG; ++ bitidx &= (BITS_PER_LONG-1); + +- for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) +- if (test_bit(bitidx + start_bitidx, bitmap)) +- flags |= value; +- +- return flags; ++ word = bitmap[word_bitidx]; ++ bitidx += end_bitidx; ++ return (word >> (BITS_PER_LONG - bitidx - 1)) & mask; + } + + /** +- * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages ++ * set_pageblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages + * @page: The page within the block of interest + * @start_bitidx: The first bit of interest + * @end_bitidx: The last bit of interest + * @flags: The flags to set + */ +-void set_pageblock_flags_group(struct page *page, unsigned long flags, +- int start_bitidx, int end_bitidx) ++void set_pageblock_flags_mask(struct page *page, unsigned long flags, ++ unsigned long end_bitidx, ++ unsigned long mask) + { + struct zone *zone; + unsigned long *bitmap; +- unsigned long pfn, bitidx; +- unsigned long value = 1; ++ unsigned long pfn, bitidx, word_bitidx; ++ unsigned long old_word, word; ++ ++ BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4); + + zone = page_zone(page); + pfn = page_to_pfn(page); + bitmap = get_pageblock_bitmap(zone, pfn); + bitidx = pfn_to_bitidx(zone, pfn); ++ word_bitidx = bitidx / BITS_PER_LONG; ++ bitidx &= (BITS_PER_LONG-1); ++ + VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page); + +- for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) +- if (flags & value) +- __set_bit(bitidx + start_bitidx, bitmap); +- else +- __clear_bit(bitidx + start_bitidx, bitmap); ++ bitidx += end_bitidx; ++ mask <<= (BITS_PER_LONG - bitidx - 1); ++ flags <<= (BITS_PER_LONG - bitidx - 1); ++ ++ word = ACCESS_ONCE(bitmap[word_bitidx]); ++ for (;;) { ++ old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags); ++ if (word == old_word) ++ break; ++ word = old_word; ++ } + } + + /* diff --git a/queue-3.15/mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch b/queue-3.15/mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch new file mode 100644 index 00000000000..c471ac22558 --- /dev/null +++ b/queue-3.15/mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch @@ -0,0 +1,94 @@ +From 71abdc15adf8c702a1dd535f8e30df50758848d2 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Fri, 6 Jun 2014 14:35:35 -0700 +Subject: mm: vmscan: clear kswapd's special reclaim powers before exiting + +From: Johannes Weiner + +commit 71abdc15adf8c702a1dd535f8e30df50758848d2 upstream. + +When kswapd exits, it can end up taking locks that were previously held +by allocating tasks while they waited for reclaim. Lockdep currently +warns about this: + +On Wed, May 28, 2014 at 06:06:34PM +0800, Gu Zheng wrote: +> inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-R} usage. +> kswapd2/1151 [HC0[0]:SC0[0]:HE1:SE1] takes: +> (&sig->group_rwsem){+++++?}, at: exit_signals+0x24/0x130 +> {RECLAIM_FS-ON-W} state was registered at: +> mark_held_locks+0xb9/0x140 +> lockdep_trace_alloc+0x7a/0xe0 +> kmem_cache_alloc_trace+0x37/0x240 +> flex_array_alloc+0x99/0x1a0 +> cgroup_attach_task+0x63/0x430 +> attach_task_by_pid+0x210/0x280 +> cgroup_procs_write+0x16/0x20 +> cgroup_file_write+0x120/0x2c0 +> vfs_write+0xc0/0x1f0 +> SyS_write+0x4c/0xa0 +> tracesys+0xdd/0xe2 +> irq event stamp: 49 +> hardirqs last enabled at (49): _raw_spin_unlock_irqrestore+0x36/0x70 +> hardirqs last disabled at (48): _raw_spin_lock_irqsave+0x2b/0xa0 +> softirqs last enabled at (0): copy_process.part.24+0x627/0x15f0 +> softirqs last disabled at (0): (null) +> +> other info that might help us debug this: +> Possible unsafe locking scenario: +> +> CPU0 +> ---- +> lock(&sig->group_rwsem); +> +> lock(&sig->group_rwsem); +> +> *** DEADLOCK *** +> +> no locks held by kswapd2/1151. +> +> stack backtrace: +> CPU: 30 PID: 1151 Comm: kswapd2 Not tainted 3.10.39+ #4 +> Call Trace: +> dump_stack+0x19/0x1b +> print_usage_bug+0x1f7/0x208 +> mark_lock+0x21d/0x2a0 +> __lock_acquire+0x52a/0xb60 +> lock_acquire+0xa2/0x140 +> down_read+0x51/0xa0 +> exit_signals+0x24/0x130 +> do_exit+0xb5/0xa50 +> kthread+0xdb/0x100 +> ret_from_fork+0x7c/0xb0 + +This is because the kswapd thread is still marked as a reclaimer at the +time of exit. But because it is exiting, nobody is actually waiting on +it to make reclaim progress anymore, and it's nothing but a regular +thread at this point. Be tidy and strip it of all its powers +(PF_MEMALLOC, PF_SWAPWRITE, PF_KSWAPD, and the lockdep reclaim state) +before returning from the thread function. + +Signed-off-by: Johannes Weiner +Reported-by: Gu Zheng +Cc: Yasuaki Ishimatsu +Cc: Tang Chen +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmscan.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -3333,7 +3333,10 @@ static int kswapd(void *p) + } + } + ++ tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); + current->reclaim_state = NULL; ++ lockdep_clear_current_reclaim_state(); ++ + return 0; + } + diff --git a/queue-3.15/mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch b/queue-3.15/mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch new file mode 100644 index 00000000000..505edf16aa2 --- /dev/null +++ b/queue-3.15/mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch @@ -0,0 +1,106 @@ +From 675becce15f320337499bc1a9356260409a5ba29 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Wed, 4 Jun 2014 16:07:35 -0700 +Subject: mm: vmscan: do not throttle based on pfmemalloc reserves if node has no ZONE_NORMAL + +From: Mel Gorman + +commit 675becce15f320337499bc1a9356260409a5ba29 upstream. + +throttle_direct_reclaim() is meant to trigger during swap-over-network +during which the min watermark is treated as a pfmemalloc reserve. It +throttes on the first node in the zonelist but this is flawed. + +The user-visible impact is that a process running on CPU whose local +memory node has no ZONE_NORMAL will stall for prolonged periods of time, +possibly indefintely. This is due to throttle_direct_reclaim thinking the +pfmemalloc reserves are depleted when in fact they don't exist on that +node. + +On a NUMA machine running a 32-bit kernel (I know) allocation requests +from CPUs on node 1 would detect no pfmemalloc reserves and the process +gets throttled. This patch adjusts throttling of direct reclaim to +throttle based on the first node in the zonelist that has a usable +ZONE_NORMAL or lower zone. + +[akpm@linux-foundation.org: coding-style fixes] +Signed-off-by: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmscan.c | 43 +++++++++++++++++++++++++++++++++++++------ + 1 file changed, 37 insertions(+), 6 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2525,10 +2525,17 @@ static bool pfmemalloc_watermark_ok(pg_d + + for (i = 0; i <= ZONE_NORMAL; i++) { + zone = &pgdat->node_zones[i]; ++ if (!populated_zone(zone)) ++ continue; ++ + pfmemalloc_reserve += min_wmark_pages(zone); + free_pages += zone_page_state(zone, NR_FREE_PAGES); + } + ++ /* If there are no reserves (unexpected config) then do not throttle */ ++ if (!pfmemalloc_reserve) ++ return true; ++ + wmark_ok = free_pages > pfmemalloc_reserve / 2; + + /* kswapd must be awake if processes are being throttled */ +@@ -2553,9 +2560,9 @@ static bool pfmemalloc_watermark_ok(pg_d + static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, + nodemask_t *nodemask) + { ++ struct zoneref *z; + struct zone *zone; +- int high_zoneidx = gfp_zone(gfp_mask); +- pg_data_t *pgdat; ++ pg_data_t *pgdat = NULL; + + /* + * Kernel threads should not be throttled as they may be indirectly +@@ -2574,10 +2581,34 @@ static bool throttle_direct_reclaim(gfp_ + if (fatal_signal_pending(current)) + goto out; + +- /* Check if the pfmemalloc reserves are ok */ +- first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone); +- pgdat = zone->zone_pgdat; +- if (pfmemalloc_watermark_ok(pgdat)) ++ /* ++ * Check if the pfmemalloc reserves are ok by finding the first node ++ * with a usable ZONE_NORMAL or lower zone. The expectation is that ++ * GFP_KERNEL will be required for allocating network buffers when ++ * swapping over the network so ZONE_HIGHMEM is unusable. ++ * ++ * Throttling is based on the first usable node and throttled processes ++ * wait on a queue until kswapd makes progress and wakes them. There ++ * is an affinity then between processes waking up and where reclaim ++ * progress has been made assuming the process wakes on the same node. ++ * More importantly, processes running on remote nodes will not compete ++ * for remote pfmemalloc reserves and processes on different nodes ++ * should make reasonable progress. ++ */ ++ for_each_zone_zonelist_nodemask(zone, z, zonelist, ++ gfp_mask, nodemask) { ++ if (zone_idx(zone) > ZONE_NORMAL) ++ continue; ++ ++ /* Throttle based on the first usable node */ ++ pgdat = zone->zone_pgdat; ++ if (pfmemalloc_watermark_ok(pgdat)) ++ goto out; ++ break; ++ } ++ ++ /* If no zone was usable by the allocation flags then do not throttle */ ++ if (!pgdat) + goto out; + + /* Account for the throttling */ diff --git a/queue-3.15/ptrace-fix-fork-event-messages-across-pid-namespaces.patch b/queue-3.15/ptrace-fix-fork-event-messages-across-pid-namespaces.patch new file mode 100644 index 00000000000..10e4568a61c --- /dev/null +++ b/queue-3.15/ptrace-fix-fork-event-messages-across-pid-namespaces.patch @@ -0,0 +1,118 @@ +From 4e52365f279564cef0ddd41db5237f0471381093 Mon Sep 17 00:00:00 2001 +From: Matthew Dempsky +Date: Fri, 6 Jun 2014 14:36:42 -0700 +Subject: ptrace: fix fork event messages across pid namespaces + +From: Matthew Dempsky + +commit 4e52365f279564cef0ddd41db5237f0471381093 upstream. + +When tracing a process in another pid namespace, it's important for fork +event messages to contain the child's pid as seen from the tracer's pid +namespace, not the parent's. Otherwise, the tracer won't be able to +correlate the fork event with later SIGTRAP signals it receives from the +child. + +We still risk a race condition if a ptracer from a different pid +namespace attaches after we compute the pid_t value. However, sending a +bogus fork event message in this unlikely scenario is still a vast +improvement over the status quo where we always send bogus fork event +messages to debuggers in a different pid namespace than the forking +process. + +Signed-off-by: Matthew Dempsky +Acked-by: Oleg Nesterov +Cc: Kees Cook +Cc: Julien Tinnes +Cc: Roland McGrath +Cc: Jan Kratochvil +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/ptrace.h | 32 ++++++++++++++++++++++++++++++++ + kernel/fork.c | 10 +++++++--- + 2 files changed, 39 insertions(+), 3 deletions(-) + +--- a/include/linux/ptrace.h ++++ b/include/linux/ptrace.h +@@ -5,6 +5,7 @@ + #include /* For struct task_struct. */ + #include /* for IS_ERR_VALUE */ + #include /* For BUG_ON. */ ++#include /* For task_active_pid_ns. */ + #include + + /* +@@ -129,6 +130,37 @@ static inline void ptrace_event(int even + } + + /** ++ * ptrace_event_pid - possibly stop for a ptrace event notification ++ * @event: %PTRACE_EVENT_* value to report ++ * @pid: process identifier for %PTRACE_GETEVENTMSG to return ++ * ++ * Check whether @event is enabled and, if so, report @event and @pid ++ * to the ptrace parent. @pid is reported as the pid_t seen from the ++ * the ptrace parent's pid namespace. ++ * ++ * Called without locks. ++ */ ++static inline void ptrace_event_pid(int event, struct pid *pid) ++{ ++ /* ++ * FIXME: There's a potential race if a ptracer in a different pid ++ * namespace than parent attaches between computing message below and ++ * when we acquire tasklist_lock in ptrace_stop(). If this happens, ++ * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG. ++ */ ++ unsigned long message = 0; ++ struct pid_namespace *ns; ++ ++ rcu_read_lock(); ++ ns = task_active_pid_ns(rcu_dereference(current->parent)); ++ if (ns) ++ message = pid_nr_ns(pid, ns); ++ rcu_read_unlock(); ++ ++ ptrace_event(event, message); ++} ++ ++/** + * ptrace_init_task - initialize ptrace state for a new child + * @child: new child task + * @ptrace: true if child should be ptrace'd by parent's tracer +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1606,10 +1606,12 @@ long do_fork(unsigned long clone_flags, + */ + if (!IS_ERR(p)) { + struct completion vfork; ++ struct pid *pid; + + trace_sched_process_fork(current, p); + +- nr = task_pid_vnr(p); ++ pid = get_task_pid(p, PIDTYPE_PID); ++ nr = pid_vnr(pid); + + if (clone_flags & CLONE_PARENT_SETTID) + put_user(nr, parent_tidptr); +@@ -1624,12 +1626,14 @@ long do_fork(unsigned long clone_flags, + + /* forking complete and child started to run, tell ptracer */ + if (unlikely(trace)) +- ptrace_event(trace, nr); ++ ptrace_event_pid(trace, pid); + + if (clone_flags & CLONE_VFORK) { + if (!wait_for_vfork_done(p, &vfork)) +- ptrace_event(PTRACE_EVENT_VFORK_DONE, nr); ++ ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); + } ++ ++ put_pid(pid); + } else { + nr = PTR_ERR(p); + } diff --git a/queue-3.15/regulator-s2mpa01-fix-accidental-enable-of-buck4-ramp-delay.patch b/queue-3.15/regulator-s2mpa01-fix-accidental-enable-of-buck4-ramp-delay.patch new file mode 100644 index 00000000000..c6438f331a4 --- /dev/null +++ b/queue-3.15/regulator-s2mpa01-fix-accidental-enable-of-buck4-ramp-delay.patch @@ -0,0 +1,49 @@ +From 51e2fc0a251ba64c68207e4c6f6ac33c891b2465 Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Tue, 6 May 2014 08:37:37 +0200 +Subject: regulator: s2mpa01: Fix accidental enable of buck4 ramp delay + +From: Krzysztof Kozlowski + +commit 51e2fc0a251ba64c68207e4c6f6ac33c891b2465 upstream. + +S2MPA01 supports enabling/disabling ramp delay only for buck[1234]. +Other bucks have ramp delay enabled always. + +However the bit shift for enabling buck4 ramp delay in register is equal +to 0. When ramp delay was set for the bucks unsupporting enable/disable +(buck[56789] and buck10), the ramp delay for buck4 was also enabled. + +Fixes: f7b1a8dc1c1c ("regulator: s2mpa01: Don't check enable_shift before setting enable ramp rate") +Signed-off-by: Krzysztof Kozlowski +Reviewed-by: Axel Lin +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/regulator/s2mpa01.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/drivers/regulator/s2mpa01.c ++++ b/drivers/regulator/s2mpa01.c +@@ -191,11 +191,15 @@ static int s2mpa01_set_ramp_delay(struct + if (!ramp_enable) + goto ramp_disable; + +- ret = regmap_update_bits(rdev->regmap, S2MPA01_REG_RAMP1, +- 1 << enable_shift, 1 << enable_shift); +- if (ret) { +- dev_err(&rdev->dev, "failed to enable ramp rate\n"); +- return ret; ++ /* Ramp delay can be enabled/disabled only for buck[1234] */ ++ if (rdev_get_id(rdev) >= S2MPA01_BUCK1 && ++ rdev_get_id(rdev) <= S2MPA01_BUCK4) { ++ ret = regmap_update_bits(rdev->regmap, S2MPA01_REG_RAMP1, ++ 1 << enable_shift, 1 << enable_shift); ++ if (ret) { ++ dev_err(&rdev->dev, "failed to enable ramp rate\n"); ++ return ret; ++ } + } + + ramp_val = get_ramp_delay(ramp_delay); diff --git a/queue-3.15/regulator-s2mpa01-use-correct-register-for-buck1-ramp-delay.patch b/queue-3.15/regulator-s2mpa01-use-correct-register-for-buck1-ramp-delay.patch new file mode 100644 index 00000000000..3c467c356a4 --- /dev/null +++ b/queue-3.15/regulator-s2mpa01-use-correct-register-for-buck1-ramp-delay.patch @@ -0,0 +1,36 @@ +From 112da5cb43427b843e49b8710f53ecdbb3471d9f Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Mon, 26 May 2014 10:26:46 +0200 +Subject: regulator: s2mpa01: Use correct register for buck1 ramp delay + +From: Krzysztof Kozlowski + +commit 112da5cb43427b843e49b8710f53ecdbb3471d9f upstream. + +Fix the register for ramp delay of buck1 regulator. Buck1 and buck6 +share the field (offset 4) in ramp delay register S2MPA01_REG_RAMP2. + +The driver used the same register and field for ramp delay of buck3 and +buck1. This lead to updating of ramp delay of buck3 when setting buck1 +and actually the ramp delay of buck1 was never set. + +Fixes: f18792714608 ("regulator: Add support for S2MPA01 regulator") +Signed-off-by: Krzysztof Kozlowski +Reviewed-by: Sachin Kamat +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/regulator/s2mpa01.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/regulator/s2mpa01.c ++++ b/drivers/regulator/s2mpa01.c +@@ -116,7 +116,6 @@ static int s2mpa01_set_ramp_delay(struct + ramp_delay = s2mpa01->ramp_delay16; + + ramp_shift = S2MPA01_BUCK16_RAMP_SHIFT; +- ramp_reg = S2MPA01_REG_RAMP1; + break; + case S2MPA01_BUCK2: + enable_shift = S2MPA01_BUCK2_RAMP_EN_SHIFT; diff --git a/queue-3.15/regulator-s2mps11-fix-accidental-enable-of-buck6-ramp-delay.patch b/queue-3.15/regulator-s2mps11-fix-accidental-enable-of-buck6-ramp-delay.patch new file mode 100644 index 00000000000..c06e5b76c0f --- /dev/null +++ b/queue-3.15/regulator-s2mps11-fix-accidental-enable-of-buck6-ramp-delay.patch @@ -0,0 +1,50 @@ +From b203e0dfe1a2b0ae5e2681e9285056e4ae8560af Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Tue, 6 May 2014 08:37:36 +0200 +Subject: regulator: s2mps11: Fix accidental enable of buck6 ramp delay + +From: Krzysztof Kozlowski + +commit b203e0dfe1a2b0ae5e2681e9285056e4ae8560af upstream. + +S2MPS11 supports enabling/disabling ramp delay only for buck[2346]. +Other bucks have ramp delay enabled always. + +However the bit shift for enabling buck6 ramp delay in register is equal +to 0. When ramp delay was set for the bucks unsupporting enable/disable +(buck[15789] and buck10), the ramp delay for buck6 was also enabled. + +Fixes: b96244fad953 ("regulator: s2mps11: Don't check enable_shift before setting enable ramp rate") +Signed-off-by: Krzysztof Kozlowski +Reviewed-by: Axel Lin +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/regulator/s2mps11.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/drivers/regulator/s2mps11.c ++++ b/drivers/regulator/s2mps11.c +@@ -202,11 +202,16 @@ static int s2mps11_set_ramp_delay(struct + if (!ramp_enable) + goto ramp_disable; + +- ret = regmap_update_bits(rdev->regmap, S2MPS11_REG_RAMP, +- 1 << enable_shift, 1 << enable_shift); +- if (ret) { +- dev_err(&rdev->dev, "failed to enable ramp rate\n"); +- return ret; ++ /* Ramp delay can be enabled/disabled only for buck[2346] */ ++ if ((rdev_get_id(rdev) >= S2MPS11_BUCK2 && ++ rdev_get_id(rdev) <= S2MPS11_BUCK4) || ++ rdev_get_id(rdev) == S2MPS11_BUCK6) { ++ ret = regmap_update_bits(rdev->regmap, S2MPS11_REG_RAMP, ++ 1 << enable_shift, 1 << enable_shift); ++ if (ret) { ++ dev_err(&rdev->dev, "failed to enable ramp rate\n"); ++ return ret; ++ } + } + + ramp_val = get_ramp_delay(ramp_delay); diff --git a/queue-3.15/s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch b/queue-3.15/s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch new file mode 100644 index 00000000000..4b2f08c0eef --- /dev/null +++ b/queue-3.15/s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch @@ -0,0 +1,63 @@ +From 993072ee67aa179c48c85eb19869804e68887d86 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Mon, 26 May 2014 21:55:08 +0200 +Subject: s390/lowcore: reserve 96 bytes for IRB in lowcore + +From: Christian Borntraeger + +commit 993072ee67aa179c48c85eb19869804e68887d86 upstream. + +The IRB might be 96 bytes if the extended-I/O-measurement facility is +used. This feature is currently not used by Linux, but struct irb +already has the emw defined. So let's make the irb in lowcore match the +size of the internal data structure to be future proof. +We also have to add a pad, to correctly align the paste. + +The bigger irb field also circumvents a bug in some QEMU versions that +always write the emw field on test subchannel and therefore destroy the +paste definitions of this CPU. Running under these QEMU version broke +some timing functions in the VDSO and all users of these functions, +e.g. some JREs. + +Signed-off-by: Christian Borntraeger +Signed-off-by: Martin Schwidefsky +Cc: Heiko Carstens +Cc: Sebastian Ott +Cc: Cornelia Huck +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/include/asm/lowcore.h | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/arch/s390/include/asm/lowcore.h ++++ b/arch/s390/include/asm/lowcore.h +@@ -142,9 +142,9 @@ struct _lowcore { + __u8 pad_0x02fc[0x0300-0x02fc]; /* 0x02fc */ + + /* Interrupt response block */ +- __u8 irb[64]; /* 0x0300 */ ++ __u8 irb[96]; /* 0x0300 */ + +- __u8 pad_0x0340[0x0e00-0x0340]; /* 0x0340 */ ++ __u8 pad_0x0360[0x0e00-0x0360]; /* 0x0360 */ + + /* + * 0xe00 contains the address of the IPL Parameter Information +@@ -288,12 +288,13 @@ struct _lowcore { + __u8 pad_0x03a0[0x0400-0x03a0]; /* 0x03a0 */ + + /* Interrupt response block. */ +- __u8 irb[64]; /* 0x0400 */ ++ __u8 irb[96]; /* 0x0400 */ ++ __u8 pad_0x0460[0x0480-0x0460]; /* 0x0460 */ + + /* Per cpu primary space access list */ +- __u32 paste[16]; /* 0x0440 */ ++ __u32 paste[16]; /* 0x0480 */ + +- __u8 pad_0x0480[0x0e00-0x0480]; /* 0x0480 */ ++ __u8 pad_0x04c0[0x0e00-0x04c0]; /* 0x04c0 */ + + /* + * 0xe00 contains the address of the IPL Parameter Information diff --git a/queue-3.15/s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch b/queue-3.15/s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch new file mode 100644 index 00000000000..1797a630f49 --- /dev/null +++ b/queue-3.15/s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch @@ -0,0 +1,32 @@ +From b6f4296279ab3ada554d993d12844272fd86b36a Mon Sep 17 00:00:00 2001 +From: Martin Schwidefsky +Date: Tue, 20 May 2014 17:21:35 +0200 +Subject: s390/time: cast tv_nsec to u64 prior to shift in update_vsyscall + +From: Martin Schwidefsky + +commit b6f4296279ab3ada554d993d12844272fd86b36a upstream. + +Analog to git commit 28b92e09e25bdc0ae864b22eacf195a74f861389 +first cast tk->wall_to_monotonic.tv_nsec to u64 before doing +the shift with tk->shift to avoid loosing relevant bits on a +32-bit kernel. + +Signed-off-by: Martin Schwidefsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kernel/time.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/s390/kernel/time.c ++++ b/arch/s390/kernel/time.c +@@ -226,7 +226,7 @@ void update_vsyscall(struct timekeeper * + vdso_data->wtom_clock_sec = + tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + vdso_data->wtom_clock_nsec = tk->xtime_nsec + +- + (tk->wall_to_monotonic.tv_nsec << tk->shift); ++ + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift); + nsecps = (u64) NSEC_PER_SEC << tk->shift; + while (vdso_data->wtom_clock_nsec >= nsecps) { + vdso_data->wtom_clock_nsec -= nsecps; diff --git a/queue-3.15/series b/queue-3.15/series index edfa00c8af8..7d1e004e0f0 100644 --- a/queue-3.15/series +++ b/queue-3.15/series @@ -18,3 +18,34 @@ usb-dwc3-gadget-clear-stall-when-disabling-endpoint.patch arm-omap-replace-checks-for-config_usb_gadget_omap.patch usb-ehci-avoid-bios-handover-on-the-hasee-e200.patch usb-option-fix-runtime-pm-handling.patch +tools-vm-page-types.c-catch-sigbus-if-raced-with-truncate.patch +hugetlb-restrict-hugepage_migration_support-to-x86_64.patch +kthread-fix-return-value-of-kthread_create-upon-sigkill.patch +mm-vmscan-do-not-throttle-based-on-pfmemalloc-reserves-if-node-has-no-zone_normal.patch +memcg-do-not-hang-on-oom-when-killed-by-userspace-oom-access-to-memory-reserves.patch +mm-page_alloc-use-word-based-accesses-for-get-set-pageblock-bitmaps.patch +mm-memory-failure.c-failure-send-right-signal-code-to-correct-thread.patch +mm-memory-failure.c-don-t-let-collect_procs-skip-over-processes-for-mf_action_required.patch +mm-memory-failure.c-support-use-of-a-dedicated-thread-to-handle-sigbus-bus_mceerr_ao.patch +zram-correct-offset-usage-in-zram_bio_discard.patch +mm-fix-sleeping-function-warning-from-__put_anon_vma.patch +hid-core-fix-validation-of-report-id-0.patch +mm-vmscan-clear-kswapd-s-special-reclaim-powers-before-exiting.patch +ptrace-fix-fork-event-messages-across-pid-namespaces.patch +arm64-ptrace-change-fs-when-passing-kernel-pointer-to-regset-code.patch +arm64-ptrace-fix-empty-registers-set-in-prstatus-of-aarch32-process-core.patch +idr-fix-overflow-bug-during-maximum-id-calculation-at-maximum-height.patch +s390-time-cast-tv_nsec-to-u64-prior-to-shift-in-update_vsyscall.patch +s390-lowcore-reserve-96-bytes-for-irb-in-lowcore.patch +regulator-s2mpa01-use-correct-register-for-buck1-ramp-delay.patch +regulator-s2mps11-fix-accidental-enable-of-buck6-ramp-delay.patch +regulator-s2mpa01-fix-accidental-enable-of-buck4-ramp-delay.patch +ext4-fix-data-integrity-sync-in-ordered-mode.patch +ext4-fix-zeroing-of-page-during-writeback.patch +ext4-fix-zero_range-test-failure-in-data-journalling.patch +ext4-fix-wrong-assert-in-ext4_mb_normalize_request.patch +matroxfb-perform-a-dummy-read-of-m_status.patch +usb-usb_wwan-fix-urb-leak-in-write-error-path.patch +usb-usb_wwan-fix-race-between-write-and-resume.patch +usb-usb_wwan-fix-write-and-suspend-race.patch +usb-usb_wwan-fix-urb-leak-at-shutdown.patch diff --git a/queue-3.15/tools-vm-page-types.c-catch-sigbus-if-raced-with-truncate.patch b/queue-3.15/tools-vm-page-types.c-catch-sigbus-if-raced-with-truncate.patch new file mode 100644 index 00000000000..5fe294d1c81 --- /dev/null +++ b/queue-3.15/tools-vm-page-types.c-catch-sigbus-if-raced-with-truncate.patch @@ -0,0 +1,113 @@ +From 1d46598b7903cd5ec83c49adbd741f43bb0ffcdc Mon Sep 17 00:00:00 2001 +From: Konstantin Khlebnikov +Date: Wed, 4 Jun 2014 16:05:30 -0700 +Subject: tools/vm/page-types.c: catch sigbus if raced with truncate + +From: Konstantin Khlebnikov + +commit 1d46598b7903cd5ec83c49adbd741f43bb0ffcdc upstream. + +Recently added page-cache dumping is known to be a little bit racy. +But after race with truncate it just dies due to unhandled SIGBUS +when it tries to poke pages beyond the new end of file. +This patch adds handler for SIGBUS which skips the rest of the file. + +Signed-off-by: Konstantin Khlebnikov +Cc: Naoya Horiguchi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + tools/vm/page-types.c | 35 ++++++++++++++++++++++++++++++++--- + 1 file changed, 32 insertions(+), 3 deletions(-) + +--- a/tools/vm/page-types.c ++++ b/tools/vm/page-types.c +@@ -32,6 +32,8 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include +@@ -824,21 +826,38 @@ static void show_file(const char *name, + atime, now - st->st_atime); + } + ++static sigjmp_buf sigbus_jmp; ++ ++static void * volatile sigbus_addr; ++ ++static void sigbus_handler(int sig, siginfo_t *info, void *ucontex) ++{ ++ (void)sig; ++ (void)ucontex; ++ sigbus_addr = info ? info->si_addr : NULL; ++ siglongjmp(sigbus_jmp, 1); ++} ++ ++static struct sigaction sigbus_action = { ++ .sa_sigaction = sigbus_handler, ++ .sa_flags = SA_SIGINFO, ++}; ++ + static void walk_file(const char *name, const struct stat *st) + { + uint8_t vec[PAGEMAP_BATCH]; + uint64_t buf[PAGEMAP_BATCH], flags; + unsigned long nr_pages, pfn, i; ++ off_t off, end = st->st_size; + int fd; +- off_t off; + ssize_t len; + void *ptr; + int first = 1; + + fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW); + +- for (off = 0; off < st->st_size; off += len) { +- nr_pages = (st->st_size - off + page_size - 1) / page_size; ++ for (off = 0; off < end; off += len) { ++ nr_pages = (end - off + page_size - 1) / page_size; + if (nr_pages > PAGEMAP_BATCH) + nr_pages = PAGEMAP_BATCH; + len = nr_pages * page_size; +@@ -855,11 +874,19 @@ static void walk_file(const char *name, + if (madvise(ptr, len, MADV_RANDOM)) + fatal("madvice failed: %s", name); + ++ if (sigsetjmp(sigbus_jmp, 1)) { ++ end = off + sigbus_addr ? sigbus_addr - ptr : 0; ++ fprintf(stderr, "got sigbus at offset %lld: %s\n", ++ (long long)end, name); ++ goto got_sigbus; ++ } ++ + /* populate ptes */ + for (i = 0; i < nr_pages ; i++) { + if (vec[i] & 1) + (void)*(volatile int *)(ptr + i * page_size); + } ++got_sigbus: + + /* turn off harvesting reference bits */ + if (madvise(ptr, len, MADV_SEQUENTIAL)) +@@ -910,6 +937,7 @@ static void walk_page_cache(void) + + kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); + pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY); ++ sigaction(SIGBUS, &sigbus_action, NULL); + + if (stat(opt_file, &st)) + fatal("stat failed: %s\n", opt_file); +@@ -925,6 +953,7 @@ static void walk_page_cache(void) + + close(kpageflags_fd); + close(pagemap_fd); ++ signal(SIGBUS, SIG_DFL); + } + + static void parse_file(const char *name) diff --git a/queue-3.15/usb-usb_wwan-fix-race-between-write-and-resume.patch b/queue-3.15/usb-usb_wwan-fix-race-between-write-and-resume.patch new file mode 100644 index 00000000000..ed40379ceb0 --- /dev/null +++ b/queue-3.15/usb-usb_wwan-fix-race-between-write-and-resume.patch @@ -0,0 +1,65 @@ +From d9e93c08d8d985e5ef89436ebc9f4aad7e31559f Mon Sep 17 00:00:00 2001 +From: xiao jin +Date: Mon, 26 May 2014 19:23:14 +0200 +Subject: USB: usb_wwan: fix race between write and resume + +From: xiao jin + +commit d9e93c08d8d985e5ef89436ebc9f4aad7e31559f upstream. + +We find a race between write and resume. usb_wwan_resume run play_delayed() +and spin_unlock, but intfdata->suspended still is not set to zero. +At this time usb_wwan_write is called and anchor the urb to delay +list. Then resume keep running but the delayed urb have no chance +to be commit until next resume. If the time of next resume is far +away, tty will be blocked in tty_wait_until_sent during time. The +race also can lead to writes being reordered. + +This patch put play_Delayed and intfdata->suspended together in the +spinlock, it's to avoid the write race during resume. + +Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the +option driver") + +Signed-off-by: xiao jin +Signed-off-by: Zhang, Qi1 +Reviewed-by: David Cohen +Signed-off-by: Johan Hovold +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/serial/usb_wwan.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/drivers/usb/serial/usb_wwan.c ++++ b/drivers/usb/serial/usb_wwan.c +@@ -660,17 +660,15 @@ int usb_wwan_resume(struct usb_serial *s + } + } + ++ spin_lock_irq(&intfdata->susp_lock); + for (i = 0; i < serial->num_ports; i++) { + /* walk all ports */ + port = serial->port[i]; + portdata = usb_get_serial_port_data(port); + + /* skip closed ports */ +- spin_lock_irq(&intfdata->susp_lock); +- if (!portdata || !portdata->opened) { +- spin_unlock_irq(&intfdata->susp_lock); ++ if (!portdata || !portdata->opened) + continue; +- } + + for (j = 0; j < N_IN_URB; j++) { + urb = portdata->in_urbs[j]; +@@ -683,9 +681,7 @@ int usb_wwan_resume(struct usb_serial *s + } + } + play_delayed(port); +- spin_unlock_irq(&intfdata->susp_lock); + } +- spin_lock_irq(&intfdata->susp_lock); + intfdata->suspended = 0; + spin_unlock_irq(&intfdata->susp_lock); + err_out: diff --git a/queue-3.15/usb-usb_wwan-fix-urb-leak-at-shutdown.patch b/queue-3.15/usb-usb_wwan-fix-urb-leak-at-shutdown.patch new file mode 100644 index 00000000000..1b45f062605 --- /dev/null +++ b/queue-3.15/usb-usb_wwan-fix-urb-leak-at-shutdown.patch @@ -0,0 +1,93 @@ +From 79eed03e77d481b55d85d1cfe5a1636a0d3897fd Mon Sep 17 00:00:00 2001 +From: Johan Hovold +Date: Mon, 26 May 2014 19:23:16 +0200 +Subject: USB: usb_wwan: fix urb leak at shutdown + +From: Johan Hovold + +commit 79eed03e77d481b55d85d1cfe5a1636a0d3897fd upstream. + +The delayed-write queue was never emptied at shutdown (close), something +which could lead to leaked urbs if the port is closed before being +runtime resumed due to a write. + +When this happens the output buffer would not drain on close +(closing_wait timeout), and after consecutive opens, writes could be +corrupted with previously buffered data, transfered with reduced +throughput or completely blocked. + +Note that unbusy_queued_urb() was simply moved out of CONFIG_PM. + +Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the +option driver") + +Signed-off-by: Johan Hovold +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/serial/usb_wwan.c | 34 ++++++++++++++++++++++------------ + 1 file changed, 22 insertions(+), 12 deletions(-) + +--- a/drivers/usb/serial/usb_wwan.c ++++ b/drivers/usb/serial/usb_wwan.c +@@ -414,12 +414,26 @@ int usb_wwan_open(struct tty_struct *tty + } + EXPORT_SYMBOL(usb_wwan_open); + ++static void unbusy_queued_urb(struct urb *urb, ++ struct usb_wwan_port_private *portdata) ++{ ++ int i; ++ ++ for (i = 0; i < N_OUT_URB; i++) { ++ if (urb == portdata->out_urbs[i]) { ++ clear_bit(i, &portdata->out_busy); ++ break; ++ } ++ } ++} ++ + void usb_wwan_close(struct usb_serial_port *port) + { + int i; + struct usb_serial *serial = port->serial; + struct usb_wwan_port_private *portdata; + struct usb_wwan_intf_private *intfdata = port->serial->private; ++ struct urb *urb; + + portdata = usb_get_serial_port_data(port); + +@@ -428,6 +442,14 @@ void usb_wwan_close(struct usb_serial_po + portdata->opened = 0; + spin_unlock_irq(&intfdata->susp_lock); + ++ for (;;) { ++ urb = usb_get_from_anchor(&portdata->delayed); ++ if (!urb) ++ break; ++ unbusy_queued_urb(urb, portdata); ++ usb_autopm_put_interface_async(serial->interface); ++ } ++ + for (i = 0; i < N_IN_URB; i++) + usb_kill_urb(portdata->in_urbs[i]); + for (i = 0; i < N_OUT_URB; i++) +@@ -596,18 +618,6 @@ int usb_wwan_suspend(struct usb_serial * + } + EXPORT_SYMBOL(usb_wwan_suspend); + +-static void unbusy_queued_urb(struct urb *urb, struct usb_wwan_port_private *portdata) +-{ +- int i; +- +- for (i = 0; i < N_OUT_URB; i++) { +- if (urb == portdata->out_urbs[i]) { +- clear_bit(i, &portdata->out_busy); +- break; +- } +- } +-} +- + static void play_delayed(struct usb_serial_port *port) + { + struct usb_wwan_intf_private *data; diff --git a/queue-3.15/usb-usb_wwan-fix-urb-leak-in-write-error-path.patch b/queue-3.15/usb-usb_wwan-fix-urb-leak-in-write-error-path.patch new file mode 100644 index 00000000000..37f60bf708b --- /dev/null +++ b/queue-3.15/usb-usb_wwan-fix-urb-leak-in-write-error-path.patch @@ -0,0 +1,48 @@ +From db0904737947d509844e171c9863ecc5b4534005 Mon Sep 17 00:00:00 2001 +From: xiao jin +Date: Mon, 26 May 2014 19:23:13 +0200 +Subject: USB: usb_wwan: fix urb leak in write error path + +From: xiao jin + +commit db0904737947d509844e171c9863ecc5b4534005 upstream. + +When enable usb serial for modem data, sometimes the tty is blocked +in tty_wait_until_sent because portdata->out_busy always is set and +have no chance to be cleared. + +We find a bug in write error path. usb_wwan_write set portdata->out_busy +firstly, then try autopm async with error. No out urb submit and no +usb_wwan_outdat_callback to this write, portdata->out_busy can't be +cleared. + +This patch clear portdata->out_busy if usb_wwan_write try autopm async +with error. + +Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the +option driver") + +Signed-off-by: xiao jin +Signed-off-by: Zhang, Qi1 +Reviewed-by: David Cohen +Signed-off-by: Johan Hovold +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/serial/usb_wwan.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/usb/serial/usb_wwan.c ++++ b/drivers/usb/serial/usb_wwan.c +@@ -228,8 +228,10 @@ int usb_wwan_write(struct tty_struct *tt + usb_pipeendpoint(this_urb->pipe), i); + + err = usb_autopm_get_interface_async(port->serial->interface); +- if (err < 0) ++ if (err < 0) { ++ clear_bit(i, &portdata->out_busy); + break; ++ } + + /* send the data */ + memcpy(this_urb->transfer_buffer, buf, todo); diff --git a/queue-3.15/usb-usb_wwan-fix-write-and-suspend-race.patch b/queue-3.15/usb-usb_wwan-fix-write-and-suspend-race.patch new file mode 100644 index 00000000000..c7f3a48ec6d --- /dev/null +++ b/queue-3.15/usb-usb_wwan-fix-write-and-suspend-race.patch @@ -0,0 +1,55 @@ +From 170fad9e22df0063eba0701adb966786d7a4ec5a Mon Sep 17 00:00:00 2001 +From: Johan Hovold +Date: Mon, 26 May 2014 19:23:15 +0200 +Subject: USB: usb_wwan: fix write and suspend race + +From: Johan Hovold + +commit 170fad9e22df0063eba0701adb966786d7a4ec5a upstream. + +Fix race between write() and suspend() which could lead to writes being +dropped (or I/O while suspended) if the device is runtime suspended +while a write request is being processed. + +Specifically, suspend() releases the susp_lock after determining the +device is idle but before setting the suspended flag, thus leaving a +window where a concurrent write() can submit an urb. + +Fixes: 383cedc3bb43 ("USB: serial: full autosuspend support for the +option driver") + +Signed-off-by: Johan Hovold +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/serial/usb_wwan.c | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +--- a/drivers/usb/serial/usb_wwan.c ++++ b/drivers/usb/serial/usb_wwan.c +@@ -579,20 +579,17 @@ static void stop_read_write_urbs(struct + int usb_wwan_suspend(struct usb_serial *serial, pm_message_t message) + { + struct usb_wwan_intf_private *intfdata = serial->private; +- int b; + ++ spin_lock_irq(&intfdata->susp_lock); + if (PMSG_IS_AUTO(message)) { +- spin_lock_irq(&intfdata->susp_lock); +- b = intfdata->in_flight; +- spin_unlock_irq(&intfdata->susp_lock); +- +- if (b) ++ if (intfdata->in_flight) { ++ spin_unlock_irq(&intfdata->susp_lock); + return -EBUSY; ++ } + } +- +- spin_lock_irq(&intfdata->susp_lock); + intfdata->suspended = 1; + spin_unlock_irq(&intfdata->susp_lock); ++ + stop_read_write_urbs(serial); + + return 0; diff --git a/queue-3.15/zram-correct-offset-usage-in-zram_bio_discard.patch b/queue-3.15/zram-correct-offset-usage-in-zram_bio_discard.patch new file mode 100644 index 00000000000..9fe84e56a00 --- /dev/null +++ b/queue-3.15/zram-correct-offset-usage-in-zram_bio_discard.patch @@ -0,0 +1,54 @@ +From 38515c73398a4c58059ecf1087e844561b58ee0f Mon Sep 17 00:00:00 2001 +From: Weijie Yang +Date: Wed, 4 Jun 2014 16:11:06 -0700 +Subject: zram: correct offset usage in zram_bio_discard + +From: Weijie Yang + +commit 38515c73398a4c58059ecf1087e844561b58ee0f upstream. + +We want to skip the physical block(PAGE_SIZE) which is partially covered +by the discard bio, so we check the remaining size and subtract it if +there is a need to goto the next physical block. + +The current offset usage in zram_bio_discard is incorrect, it will cause +its upper filesystem breakdown. Consider the following scenario: + +On some architecture or config, PAGE_SIZE is 64K for example, filesystem +is set up on zram disk without PAGE_SIZE aligned, a discard bio leads to a +offset = 4K and size=72K, normally, it should not really discard any +physical block as it partially cover two physical blocks. However, with +the current offset usage, it will discard the second physical block and +free its memory, which will cause filesystem breakdown. + +This patch corrects the offset usage in zram_bio_discard. + +Signed-off-by: Weijie Yang +Cc: Minchan Kim +Cc: Nitin Gupta +Acked-by: Joonsoo Kim +Cc: Sergey Senozhatsky +Cc: Bob Liu +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/zram/zram_drv.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -572,10 +572,10 @@ static void zram_bio_discard(struct zram + * skipping this logical block is appropriate here. + */ + if (offset) { +- if (n < offset) ++ if (n <= (PAGE_SIZE - offset)) + return; + +- n -= offset; ++ n -= (PAGE_SIZE - offset); + index++; + } + -- 2.47.3