]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Oct 2013 21:19:27 +0000 (14:19 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Oct 2013 21:19:27 +0000 (14:19 -0700)
added patches:
dm-snapshot-fix-data-corruption.patch
fs-buffer-move-allocation-failure-loop-into-the-allocator.patch
mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch
wireless-radiotap-fix-parsing-buffer-overrun.patch
writeback-fix-negative-bdi-max-pause.patch

queue-3.4/dm-snapshot-fix-data-corruption.patch [new file with mode: 0644]
queue-3.4/fs-buffer-move-allocation-failure-loop-into-the-allocator.patch [new file with mode: 0644]
queue-3.4/mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch [new file with mode: 0644]
queue-3.4/series
queue-3.4/wireless-radiotap-fix-parsing-buffer-overrun.patch [new file with mode: 0644]
queue-3.4/writeback-fix-negative-bdi-max-pause.patch [new file with mode: 0644]

diff --git a/queue-3.4/dm-snapshot-fix-data-corruption.patch b/queue-3.4/dm-snapshot-fix-data-corruption.patch
new file mode 100644 (file)
index 0000000..113824c
--- /dev/null
@@ -0,0 +1,88 @@
+From e9c6a182649f4259db704ae15a91ac820e63b0ca Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Wed, 16 Oct 2013 03:17:47 +0100
+Subject: dm snapshot: fix data corruption
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit e9c6a182649f4259db704ae15a91ac820e63b0ca upstream.
+
+This patch fixes a particular type of data corruption that has been
+encountered when loading a snapshot's metadata from disk.
+
+When we allocate a new chunk in persistent_prepare, we increment
+ps->next_free and we make sure that it doesn't point to a metadata area
+by further incrementing it if necessary.
+
+When we load metadata from disk on device activation, ps->next_free is
+positioned after the last used data chunk. However, if this last used
+data chunk is followed by a metadata area, ps->next_free is positioned
+erroneously to the metadata area. A newly-allocated chunk is placed at
+the same location as the metadata area, resulting in data or metadata
+corruption.
+
+This patch changes the code so that ps->next_free skips the metadata
+area when metadata are loaded in function read_exceptions.
+
+The patch also moves a piece of code from persistent_prepare_exception
+to a separate function skip_metadata to avoid code duplication.
+
+CVE-2013-4299
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Alasdair G Kergon <agk@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-snap-persistent.c |   18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/dm-snap-persistent.c
++++ b/drivers/md/dm-snap-persistent.c
+@@ -269,6 +269,14 @@ static chunk_t area_location(struct psto
+       return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
+ }
++static void skip_metadata(struct pstore *ps)
++{
++      uint32_t stride = ps->exceptions_per_area + 1;
++      chunk_t next_free = ps->next_free;
++      if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
++              ps->next_free++;
++}
++
+ /*
+  * Read or write a metadata area.  Remembering to skip the first
+  * chunk which holds the header.
+@@ -502,6 +510,8 @@ static int read_exceptions(struct pstore
+       ps->current_area--;
++      skip_metadata(ps);
++
+       return 0;
+ }
+@@ -616,8 +626,6 @@ static int persistent_prepare_exception(
+                                       struct dm_exception *e)
+ {
+       struct pstore *ps = get_info(store);
+-      uint32_t stride;
+-      chunk_t next_free;
+       sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
+       /* Is there enough room ? */
+@@ -630,10 +638,8 @@ static int persistent_prepare_exception(
+        * Move onto the next free pending, making sure to take
+        * into account the location of the metadata chunks.
+        */
+-      stride = (ps->exceptions_per_area + 1);
+-      next_free = ++ps->next_free;
+-      if (sector_div(next_free, stride) == 1)
+-              ps->next_free++;
++      ps->next_free++;
++      skip_metadata(ps);
+       atomic_inc(&ps->pending_count);
+       return 0;
diff --git a/queue-3.4/fs-buffer-move-allocation-failure-loop-into-the-allocator.patch b/queue-3.4/fs-buffer-move-allocation-failure-loop-into-the-allocator.patch
new file mode 100644 (file)
index 0000000..440cee6
--- /dev/null
@@ -0,0 +1,75 @@
+From 84235de394d9775bfaa7fa9762a59d91fef0c1fc Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Wed, 16 Oct 2013 13:47:00 -0700
+Subject: fs: buffer: move allocation failure loop into the allocator
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 84235de394d9775bfaa7fa9762a59d91fef0c1fc upstream.
+
+Buffer allocation has a very crude indefinite loop around waking the
+flusher threads and performing global NOFS direct reclaim because it can
+not handle allocation failures.
+
+The most immediate problem with this is that the allocation may fail due
+to a memory cgroup limit, where flushers + direct reclaim might not make
+any progress towards resolving the situation at all.  Because unlike the
+global case, a memory cgroup may not have any cache at all, only
+anonymous pages but no swap.  This situation will lead to a reclaim
+livelock with insane IO from waking the flushers and thrashing unrelated
+filesystem cache in a tight loop.
+
+Use __GFP_NOFAIL allocations for buffers for now.  This makes sure that
+any looping happens in the page allocator, which knows how to
+orchestrate kswapd, direct reclaim, and the flushers sensibly.  It also
+allows memory cgroups to detect allocations that can't handle failure
+and will allow them to ultimately bypass the limit if reclaim can not
+make progress.
+
+Reported-by: azurIt <azurit@pobox.sk>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/buffer.c     |   14 ++++++++++++--
+ mm/memcontrol.c |    2 ++
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -957,9 +957,19 @@ grow_dev_page(struct block_device *bdev,
+       struct buffer_head *bh;
+       sector_t end_block;
+       int ret = 0;            /* Will call free_more_memory() */
++      gfp_t gfp_mask;
+-      page = find_or_create_page(inode->i_mapping, index,
+-              (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
++      gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
++      gfp_mask |= __GFP_MOVABLE;
++      /*
++       * XXX: __getblk_slow() can not really deal with failure and
++       * will endlessly loop on improvised global reclaim.  Prefer
++       * looping in the allocator rather than here, at least that
++       * code knows what it's doing.
++       */
++      gfp_mask |= __GFP_NOFAIL;
++
++      page = find_or_create_page(inode->i_mapping, index, gfp_mask);
+       if (!page)
+               return ret;
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2412,6 +2412,8 @@ done:
+       return 0;
+ nomem:
+       *ptr = NULL;
++      if (gfp_mask & __GFP_NOFAIL)
++              return 0;
+       return -ENOMEM;
+ bypass:
+       *ptr = root_mem_cgroup;
diff --git a/queue-3.4/mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch b/queue-3.4/mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch
new file mode 100644 (file)
index 0000000..7ac0ea0
--- /dev/null
@@ -0,0 +1,80 @@
+From c3d16e16522fe3fe8759735850a0676da18f4b1d Mon Sep 17 00:00:00 2001
+From: Cyrill Gorcunov <gorcunov@gmail.com>
+Date: Wed, 16 Oct 2013 13:46:51 -0700
+Subject: mm: migration: do not lose soft dirty bit if page is in migration state
+
+From: Cyrill Gorcunov <gorcunov@gmail.com>
+
+commit c3d16e16522fe3fe8759735850a0676da18f4b1d upstream.
+
+If page migration is turned on in config and the page is migrating, we
+may lose the soft dirty bit.  If fork and mprotect are called on
+migrating pages (once migration is complete) pages do not obtain the
+soft dirty bit in the correspond pte entries.  Fix it adding an
+appropriate test on swap entries.
+
+Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Matt Mackall <mpm@selenic.com>
+Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
+Cc: Marcelo Tosatti <mtosatti@redhat.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
+Cc: Stephen Rothwell <sfr@canb.auug.org.au>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory.c   |    2 ++
+ mm/migrate.c  |    2 ++
+ mm/mprotect.c |    7 +++++--
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -876,6 +876,8 @@ copy_one_pte(struct mm_struct *dst_mm, s
+                                        */
+                                       make_migration_entry_read(&entry);
+                                       pte = swp_entry_to_pte(entry);
++                                      if (pte_swp_soft_dirty(*src_pte))
++                                              pte = pte_swp_mksoft_dirty(pte);
+                                       set_pte_at(src_mm, addr, src_pte, pte);
+                               }
+                       }
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -139,6 +139,8 @@ static int remove_migration_pte(struct p
+       get_page(new);
+       pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
++      if (pte_swp_soft_dirty(*ptep))
++              pte = pte_mksoft_dirty(pte);
+       if (is_write_migration_entry(entry))
+               pte = pte_mkwrite(pte);
+ #ifdef CONFIG_HUGETLB_PAGE
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -64,13 +64,16 @@ static void change_pte_range(struct mm_s
+                       swp_entry_t entry = pte_to_swp_entry(oldpte);
+                       if (is_write_migration_entry(entry)) {
++                              pte_t newpte;
+                               /*
+                                * A protection check is difficult so
+                                * just be safe and disable write
+                                */
+                               make_migration_entry_read(&entry);
+-                              set_pte_at(mm, addr, pte,
+-                                      swp_entry_to_pte(entry));
++                              newpte = swp_entry_to_pte(entry);
++                              if (pte_swp_soft_dirty(oldpte))
++                                      newpte = pte_swp_mksoft_dirty(newpte);
++                              set_pte_at(mm, addr, pte, newpte);
+                       }
+               }
+       } while (pte++, addr += PAGE_SIZE, addr != end);
index 792fcbb866c52172f28a437739eaa1baa61a7213..a7952a67ccc3d85bc24fb1a5cb32e0b00861c53c 100644 (file)
@@ -24,3 +24,8 @@ net-fix-cipso-packet-validation-when-netlabel.patch
 inet-fix-possible-memory-corruption-with-udp_cork-and-ufo.patch
 davinci_emac.c-fix-iff_allmulti-setup.patch
 ext3-return-32-64-bit-dir-name-hash-according-to-usage-type.patch
+dm-snapshot-fix-data-corruption.patch
+mm-migration-do-not-lose-soft-dirty-bit-if-page-is-in-migration-state.patch
+writeback-fix-negative-bdi-max-pause.patch
+wireless-radiotap-fix-parsing-buffer-overrun.patch
+fs-buffer-move-allocation-failure-loop-into-the-allocator.patch
diff --git a/queue-3.4/wireless-radiotap-fix-parsing-buffer-overrun.patch b/queue-3.4/wireless-radiotap-fix-parsing-buffer-overrun.patch
new file mode 100644 (file)
index 0000000..8b66665
--- /dev/null
@@ -0,0 +1,54 @@
+From f5563318ff1bde15b10e736e97ffce13be08bc1a Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Fri, 11 Oct 2013 14:47:05 +0200
+Subject: wireless: radiotap: fix parsing buffer overrun
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit f5563318ff1bde15b10e736e97ffce13be08bc1a upstream.
+
+When parsing an invalid radiotap header, the parser can overrun
+the buffer that is passed in because it doesn't correctly check
+ 1) the minimum radiotap header size
+ 2) the space for extended bitmaps
+
+The first issue doesn't affect any in-kernel user as they all
+check the minimum size before calling the radiotap function.
+The second issue could potentially affect the kernel if an skb
+is passed in that consists only of the radiotap header with a
+lot of extended bitmaps that extend past the SKB. In that case
+a read-only buffer overrun by at most 4 bytes is possible.
+
+Fix this by adding the appropriate checks to the parser.
+
+Reported-by: Evan Huus <eapache@gmail.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/wireless/radiotap.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/wireless/radiotap.c
++++ b/net/wireless/radiotap.c
+@@ -95,6 +95,10 @@ int ieee80211_radiotap_iterator_init(
+       struct ieee80211_radiotap_header *radiotap_header,
+       int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns)
+ {
++      /* check the radiotap header can actually be present */
++      if (max_length < sizeof(struct ieee80211_radiotap_header))
++              return -EINVAL;
++
+       /* Linux only supports version 0 radiotap format */
+       if (radiotap_header->it_version)
+               return -EINVAL;
+@@ -129,7 +133,8 @@ int ieee80211_radiotap_iterator_init(
+                        */
+                       if ((unsigned long)iterator->_arg -
+-                          (unsigned long)iterator->_rtheader >
++                          (unsigned long)iterator->_rtheader +
++                          sizeof(uint32_t) >
+                           (unsigned long)iterator->_max_length)
+                               return -EINVAL;
+               }
diff --git a/queue-3.4/writeback-fix-negative-bdi-max-pause.patch b/queue-3.4/writeback-fix-negative-bdi-max-pause.patch
new file mode 100644 (file)
index 0000000..48dcf5e
--- /dev/null
@@ -0,0 +1,93 @@
+From e3b6c655b91e01a1dade056cfa358581b47a5351 Mon Sep 17 00:00:00 2001
+From: Fengguang Wu <fengguang.wu@intel.com>
+Date: Wed, 16 Oct 2013 13:47:03 -0700
+Subject: writeback: fix negative bdi max pause
+
+From: Fengguang Wu <fengguang.wu@intel.com>
+
+commit e3b6c655b91e01a1dade056cfa358581b47a5351 upstream.
+
+Toralf runs trinity on UML/i386.  After some time it hangs and the last
+message line is
+
+       BUG: soft lockup - CPU#0 stuck for 22s! [trinity-child0:1521]
+
+It's found that pages_dirtied becomes very large.  More than 1000000000
+pages in this case:
+
+       period = HZ * pages_dirtied / task_ratelimit;
+       BUG_ON(pages_dirtied > 2000000000);
+       BUG_ON(pages_dirtied > 1000000000);      <---------
+
+UML debug printf shows that we got negative pause here:
+
+       ick: pause : -984
+       ick: pages_dirtied : 0
+       ick: task_ratelimit: 0
+
+        pause:
+       +       if (pause < 0)  {
+       +               extern int printf(char *, ...);
+       +               printf("ick : pause : %li\n", pause);
+       +               printf("ick: pages_dirtied : %lu\n", pages_dirtied);
+       +               printf("ick: task_ratelimit: %lu\n", task_ratelimit);
+       +               BUG_ON(1);
+       +       }
+               trace_balance_dirty_pages(bdi,
+
+Since pause is bounded by [min_pause, max_pause] where min_pause is also
+bounded by max_pause.  It's suspected and demonstrated that the
+max_pause calculation goes wrong:
+
+       ick: pause : -717
+       ick: min_pause : -177
+       ick: max_pause : -717
+       ick: pages_dirtied : 14
+       ick: task_ratelimit: 0
+
+The problem lies in the two "long = unsigned long" assignments in
+bdi_max_pause() which might go negative if the highest bit is 1, and the
+min_t(long, ...) check failed to protect it falling under 0.  Fix all of
+them by using "unsigned long" throughout the function.
+
+Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
+Reported-by: Toralf Förster <toralf.foerster@gmx.de>
+Tested-by: Toralf Förster <toralf.foerster@gmx.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Richard Weinberger <richard@nod.at>
+Cc: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page-writeback.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -1072,11 +1072,11 @@ static unsigned long dirty_poll_interval
+       return 1;
+ }
+-static long bdi_max_pause(struct backing_dev_info *bdi,
+-                        unsigned long bdi_dirty)
++static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
++                                 unsigned long bdi_dirty)
+ {
+-      long bw = bdi->avg_write_bandwidth;
+-      long t;
++      unsigned long bw = bdi->avg_write_bandwidth;
++      unsigned long t;
+       /*
+        * Limit pause time for small memory systems. If sleeping for too long
+@@ -1088,7 +1088,7 @@ static long bdi_max_pause(struct backing
+       t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
+       t++;
+-      return min_t(long, t, MAX_PAUSE);
++      return min_t(unsigned long, t, MAX_PAUSE);
+ }
+ static long bdi_min_pause(struct backing_dev_info *bdi,