]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.1 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Thu, 3 Nov 2011 18:59:50 +0000 (11:59 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Thu, 3 Nov 2011 18:59:50 +0000 (11:59 -0700)
queue-3.1/alsa-hda-add-missing-static-adc-tables-for-alc269-quirks.patch [new file with mode: 0644]
queue-3.1/binfmt_elf-fix-pie-execution-with-randomization-disabled.patch [new file with mode: 0644]
queue-3.1/drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch [new file with mode: 0644]
queue-3.1/mm-thp-tail-page-refcounting-fix.patch [new file with mode: 0644]
queue-3.1/proc-fix-races-against-execve-of-proc-pid-fd.patch [new file with mode: 0644]
queue-3.1/series

diff --git a/queue-3.1/alsa-hda-add-missing-static-adc-tables-for-alc269-quirks.patch b/queue-3.1/alsa-hda-add-missing-static-adc-tables-for-alc269-quirks.patch
new file mode 100644 (file)
index 0000000..10fc263
--- /dev/null
@@ -0,0 +1,81 @@
+From tiwai@suse.de  Thu Nov  3 10:55:14 2011
+From: Takashi Iwai <tiwai@suse.de>
+Date: Thu, 03 Nov 2011 15:56:55 +0100
+Subject: ALSA: hda - Add missing static ADC tables for ALC269 quirks
+To: stable@vger.kernel.org
+Cc: Chris Vine <chris@cvine.freeserve.co.uk>
+Message-ID: <s5hfwi59ruw.wl%tiwai@suse.de>
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[There is no upstream commit for this patch since the corresponding
+code was removed from 3.2 kernel.  This is a regression found only in
+3.1 kernel, so please apply this only to 3.1.x series.]
+
+Some ALC269 quirks define their own .cap_mixer field but without the
+static adc_nids[].  This resulted in the mismatch of ADC because ALC269
+may have the widget 0x07 for another audio-in, and the auto-parser picks
+this up instead.
+
+This patch fixes the problem by adding the static adc_nids[] and co
+again to these entries.
+
+Tested-by: Chris Vine <chris@cvine.freeserve.co.uk>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ sound/pci/hda/alc269_quirks.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/sound/pci/hda/alc269_quirks.c
++++ b/sound/pci/hda/alc269_quirks.c
+@@ -577,6 +577,9 @@ static const struct alc_config_preset al
+                               alc269_laptop_amic_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc269_dac_nids),
+               .dac_nids = alc269_dac_nids,
++              .adc_nids = alc269_adc_nids,
++              .capsrc_nids = alc269_capsrc_nids,
++              .num_adc_nids = ARRAY_SIZE(alc269_adc_nids),
+               .hp_nid = 0x03,
+               .num_channel_mode = ARRAY_SIZE(alc269_modes),
+               .channel_mode = alc269_modes,
+@@ -591,6 +594,9 @@ static const struct alc_config_preset al
+                               alc269_laptop_dmic_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc269_dac_nids),
+               .dac_nids = alc269_dac_nids,
++              .adc_nids = alc269_adc_nids,
++              .capsrc_nids = alc269_capsrc_nids,
++              .num_adc_nids = ARRAY_SIZE(alc269_adc_nids),
+               .hp_nid = 0x03,
+               .num_channel_mode = ARRAY_SIZE(alc269_modes),
+               .channel_mode = alc269_modes,
+@@ -605,6 +611,9 @@ static const struct alc_config_preset al
+                               alc269vb_laptop_amic_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc269_dac_nids),
+               .dac_nids = alc269_dac_nids,
++              .adc_nids = alc269vb_adc_nids,
++              .capsrc_nids = alc269vb_capsrc_nids,
++              .num_adc_nids = ARRAY_SIZE(alc269vb_adc_nids),
+               .hp_nid = 0x03,
+               .num_channel_mode = ARRAY_SIZE(alc269_modes),
+               .channel_mode = alc269_modes,
+@@ -619,6 +628,9 @@ static const struct alc_config_preset al
+                               alc269vb_laptop_dmic_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc269_dac_nids),
+               .dac_nids = alc269_dac_nids,
++              .adc_nids = alc269vb_adc_nids,
++              .capsrc_nids = alc269vb_capsrc_nids,
++              .num_adc_nids = ARRAY_SIZE(alc269vb_adc_nids),
+               .hp_nid = 0x03,
+               .num_channel_mode = ARRAY_SIZE(alc269_modes),
+               .channel_mode = alc269_modes,
+@@ -633,6 +645,8 @@ static const struct alc_config_preset al
+                               alc269_laptop_dmic_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc269_dac_nids),
+               .dac_nids = alc269_dac_nids,
++              .adc_nids = alc269_adc_nids,
++              .capsrc_nids = alc269_capsrc_nids,
+               .hp_nid = 0x03,
+               .num_channel_mode = ARRAY_SIZE(alc269_modes),
+               .channel_mode = alc269_modes,
diff --git a/queue-3.1/binfmt_elf-fix-pie-execution-with-randomization-disabled.patch b/queue-3.1/binfmt_elf-fix-pie-execution-with-randomization-disabled.patch
new file mode 100644 (file)
index 0000000..184425f
--- /dev/null
@@ -0,0 +1,55 @@
+From a3defbe5c337dbc6da911f8cc49ae3cc3b49b453 Mon Sep 17 00:00:00 2001
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Wed, 2 Nov 2011 13:37:41 -0700
+Subject: binfmt_elf: fix PIE execution with randomization disabled
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+commit a3defbe5c337dbc6da911f8cc49ae3cc3b49b453 upstream.
+
+The case of address space randomization being disabled in runtime through
+randomize_va_space sysctl is not treated properly in load_elf_binary(),
+resulting in SIGKILL coming at exec() time for certain PIE-linked binaries
+in case the randomization has been disabled at runtime prior to calling
+exec().
+
+Handle the randomize_va_space == 0 case the same way as if we were not
+supporting .text randomization at all.
+
+Based on original patch by H.J. Lu and Josh Boyer.
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Russell King <rmk@arm.linux.org.uk>
+Cc: H.J. Lu <hongjiu.lu@intel.com>
+Cc: <stable@kernel.org>
+Tested-by: Josh Boyer <jwboyer@redhat.com>
+Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/binfmt_elf.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/binfmt_elf.c
++++ b/fs/binfmt_elf.c
+@@ -795,7 +795,16 @@ static int load_elf_binary(struct linux_
+                        * might try to exec.  This is because the brk will
+                        * follow the loader, and is not movable.  */
+ #if defined(CONFIG_X86) || defined(CONFIG_ARM)
+-                      load_bias = 0;
++                      /* Memory randomization might have been switched off
++                       * in runtime via sysctl.
++                       * If that is the case, retain the original non-zero
++                       * load_bias value in order to establish proper
++                       * non-randomized mappings.
++                       */
++                      if (current->flags & PF_RANDOMIZE)
++                              load_bias = 0;
++                      else
++                              load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+ #else
+                       load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+ #endif
diff --git a/queue-3.1/drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch b/queue-3.1/drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch
new file mode 100644 (file)
index 0000000..56b713d
--- /dev/null
@@ -0,0 +1,37 @@
+From e0c87bd95e8dad455c23bc56513af8dcb1737e55 Mon Sep 17 00:00:00 2001
+From: Alexandre Bounine <alexandre.bounine@idt.com>
+Date: Wed, 2 Nov 2011 13:39:15 -0700
+Subject: drivers/net/rionet.c: fix ethernet address macros for LE platforms
+
+From: Alexandre Bounine <alexandre.bounine@idt.com>
+
+commit e0c87bd95e8dad455c23bc56513af8dcb1737e55 upstream.
+
+Modify Ethernet addess macros to be compatible with BE/LE platforms
+
+Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
+Cc: Chul Kim <chul.kim@idt.com>
+Cc: Kumar Gala <galak@kernel.crashing.org>
+Cc: Matt Porter <mporter@kernel.crashing.org>
+Cc: Li Yang <leoli@freescale.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/rionet.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/rionet.c
++++ b/drivers/net/rionet.c
+@@ -88,8 +88,8 @@ static struct rio_dev **rionet_active;
+ #define dev_rionet_capable(dev) \
+       is_rionet_capable(dev->src_ops, dev->dst_ops)
+-#define RIONET_MAC_MATCH(x)   (*(u32 *)x == 0x00010001)
+-#define RIONET_GET_DESTID(x)  (*(u16 *)(x + 4))
++#define RIONET_MAC_MATCH(x)   (!memcmp((x), "\00\01\00\01", 4))
++#define RIONET_GET_DESTID(x)  ((*((u8 *)x + 4) << 8) | *((u8 *)x + 5))
+ static int rionet_rx_clean(struct net_device *ndev)
+ {
diff --git a/queue-3.1/mm-thp-tail-page-refcounting-fix.patch b/queue-3.1/mm-thp-tail-page-refcounting-fix.patch
new file mode 100644 (file)
index 0000000..3bf21a6
--- /dev/null
@@ -0,0 +1,491 @@
+From 70b50f94f1644e2aa7cb374819cfd93f3c28d725 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Wed, 2 Nov 2011 13:36:59 -0700
+Subject: mm: thp: tail page refcounting fix
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+commit 70b50f94f1644e2aa7cb374819cfd93f3c28d725 upstream.
+
+Michel while working on the working set estimation code, noticed that
+calling get_page_unless_zero() on a random pfn_to_page(random_pfn)
+wasn't safe, if the pfn ended up being a tail page of a transparent
+hugepage under splitting by __split_huge_page_refcount().
+
+He then found the problem could also theoretically materialize with
+page_cache_get_speculative() during the speculative radix tree lookups
+that uses get_page_unless_zero() in SMP if the radix tree page is freed
+and reallocated and get_user_pages is called on it before
+page_cache_get_speculative has a chance to call get_page_unless_zero().
+
+So the best way to fix the problem is to keep page_tail->_count zero at
+all times.  This will guarantee that get_page_unless_zero() can never
+succeed on any tail page.  page_tail->_mapcount is guaranteed zero and
+is unused for all tail pages of a compound page, so we can simply
+account the tail page references there and transfer them to
+tail_page->_count in __split_huge_page_refcount() (in addition to the
+head_page->_mapcount).
+
+While debugging this s/_count/_mapcount/ change I also noticed get_page is
+called by direct-io.c on pages returned by get_user_pages.  That wasn't
+entirely safe because the two atomic_inc in get_page weren't atomic.  As
+opposed to other get_user_page users like secondary-MMU page fault to
+establish the shadow pagetables would never call any superflous get_page
+after get_user_page returns.  It's safer to make get_page universally safe
+for tail pages and to use get_page_foll() within follow_page (inside
+get_user_pages()).  get_page_foll() is safe to do the refcounting for tail
+pages without taking any locks because it is run within PT lock protected
+critical sections (PT lock for pte and page_table_lock for
+pmd_trans_huge).
+
+The standard get_page() as invoked by direct-io instead will now take
+the compound_lock but still only for tail pages.  The direct-io paths
+are usually I/O bound and the compound_lock is per THP so very
+finegrined, so there's no risk of scalability issues with it.  A simple
+direct-io benchmarks with all lockdep prove locking and spinlock
+debugging infrastructure enabled shows identical performance and no
+overhead.  So it's worth it.  Ideally direct-io should stop calling
+get_page() on pages returned by get_user_pages().  The spinlock in
+get_page() is already optimized away for no-THP builds but doing
+get_page() on tail pages returned by GUP is generally a rare operation
+and usually only run in I/O paths.
+
+This new refcounting on page_tail->_mapcount in addition to avoiding new
+RCU critical sections will also allow the working set estimation code to
+work without any further complexity associated to the tail page
+refcounting with THP.
+
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Reported-by: Michel Lespinasse <walken@google.com>
+Reviewed-by: Michel Lespinasse <walken@google.com>
+Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <jweiner@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/mm/gup.c    |    5 +-
+ arch/x86/mm/gup.c        |    5 +-
+ include/linux/mm.h       |   56 ++++++++++++-------------------
+ include/linux/mm_types.h |   21 +++++++++--
+ mm/huge_memory.c         |   37 ++++++++++++++------
+ mm/internal.h            |   46 ++++++++++++++++++++++++++
+ mm/memory.c              |    2 -
+ mm/swap.c                |   83 ++++++++++++++++++++++++++++++-----------------
+ 8 files changed, 171 insertions(+), 84 deletions(-)
+
+--- a/arch/powerpc/mm/gup.c
++++ b/arch/powerpc/mm/gup.c
+@@ -22,8 +22,9 @@ static inline void get_huge_page_tail(st
+        * __split_huge_page_refcount() cannot run
+        * from under us.
+        */
+-      VM_BUG_ON(atomic_read(&page->_count) < 0);
+-      atomic_inc(&page->_count);
++      VM_BUG_ON(page_mapcount(page) < 0);
++      VM_BUG_ON(atomic_read(&page->_count) != 0);
++      atomic_inc(&page->_mapcount);
+ }
+ /*
+--- a/arch/x86/mm/gup.c
++++ b/arch/x86/mm/gup.c
+@@ -114,8 +114,9 @@ static inline void get_huge_page_tail(st
+        * __split_huge_page_refcount() cannot run
+        * from under us.
+        */
+-      VM_BUG_ON(atomic_read(&page->_count) < 0);
+-      atomic_inc(&page->_count);
++      VM_BUG_ON(page_mapcount(page) < 0);
++      VM_BUG_ON(atomic_read(&page->_count) != 0);
++      atomic_inc(&page->_mapcount);
+ }
+ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -356,36 +356,39 @@ static inline struct page *compound_head
+       return page;
+ }
++/*
++ * The atomic page->_mapcount, starts from -1: so that transitions
++ * both from it and to it can be tracked, using atomic_inc_and_test
++ * and atomic_add_negative(-1).
++ */
++static inline void reset_page_mapcount(struct page *page)
++{
++      atomic_set(&(page)->_mapcount, -1);
++}
++
++static inline int page_mapcount(struct page *page)
++{
++      return atomic_read(&(page)->_mapcount) + 1;
++}
++
+ static inline int page_count(struct page *page)
+ {
+       return atomic_read(&compound_head(page)->_count);
+ }
++extern bool __get_page_tail(struct page *page);
++
+ static inline void get_page(struct page *page)
+ {
++      if (unlikely(PageTail(page)))
++              if (likely(__get_page_tail(page)))
++                      return;
+       /*
+        * Getting a normal page or the head of a compound page
+-       * requires to already have an elevated page->_count. Only if
+-       * we're getting a tail page, the elevated page->_count is
+-       * required only in the head page, so for tail pages the
+-       * bugcheck only verifies that the page->_count isn't
+-       * negative.
++       * requires to already have an elevated page->_count.
+        */
+-      VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
++      VM_BUG_ON(atomic_read(&page->_count) <= 0);
+       atomic_inc(&page->_count);
+-      /*
+-       * Getting a tail page will elevate both the head and tail
+-       * page->_count(s).
+-       */
+-      if (unlikely(PageTail(page))) {
+-              /*
+-               * This is safe only because
+-               * __split_huge_page_refcount can't run under
+-               * get_page().
+-               */
+-              VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+-              atomic_inc(&page->first_page->_count);
+-      }
+ }
+ static inline struct page *virt_to_head_page(const void *x)
+@@ -804,21 +807,6 @@ static inline pgoff_t page_index(struct
+ }
+ /*
+- * The atomic page->_mapcount, like _count, starts from -1:
+- * so that transitions both from it and to it can be tracked,
+- * using atomic_inc_and_test and atomic_add_negative(-1).
+- */
+-static inline void reset_page_mapcount(struct page *page)
+-{
+-      atomic_set(&(page)->_mapcount, -1);
+-}
+-
+-static inline int page_mapcount(struct page *page)
+-{
+-      return atomic_read(&(page)->_mapcount) + 1;
+-}
+-
+-/*
+  * Return true if this page is mapped into pagetables.
+  */
+ static inline int page_mapped(struct page *page)
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -62,10 +62,23 @@ struct page {
+                       struct {
+                               union {
+-                                      atomic_t _mapcount;     /* Count of ptes mapped in mms,
+-                                                       * to show when page is mapped
+-                                                       * & limit reverse map searches.
+-                                                       */
++                                      /*
++                                       * Count of ptes mapped in
++                                       * mms, to show when page is
++                                       * mapped & limit reverse map
++                                       * searches.
++                                       *
++                                       * Used also for tail pages
++                                       * refcounting instead of
++                                       * _count. Tail pages cannot
++                                       * be mapped and keeping the
++                                       * tail page _count zero at
++                                       * all times guarantees
++                                       * get_page_unless_zero() will
++                                       * never succeed on tail
++                                       * pages.
++                                       */
++                                      atomic_t _mapcount;
+                                       struct {
+                                               unsigned inuse:16;
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -989,7 +989,7 @@ struct page *follow_trans_huge_pmd(struc
+       page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
+       VM_BUG_ON(!PageCompound(page));
+       if (flags & FOLL_GET)
+-              get_page(page);
++              get_page_foll(page);
+ out:
+       return page;
+@@ -1156,6 +1156,7 @@ static void __split_huge_page_refcount(s
+       unsigned long head_index = page->index;
+       struct zone *zone = page_zone(page);
+       int zonestat;
++      int tail_count = 0;
+       /* prevent PageLRU to go away from under us, and freeze lru stats */
+       spin_lock_irq(&zone->lru_lock);
+@@ -1164,11 +1165,27 @@ static void __split_huge_page_refcount(s
+       for (i = 1; i < HPAGE_PMD_NR; i++) {
+               struct page *page_tail = page + i;
+-              /* tail_page->_count cannot change */
+-              atomic_sub(atomic_read(&page_tail->_count), &page->_count);
+-              BUG_ON(page_count(page) <= 0);
+-              atomic_add(page_mapcount(page) + 1, &page_tail->_count);
+-              BUG_ON(atomic_read(&page_tail->_count) <= 0);
++              /* tail_page->_mapcount cannot change */
++              BUG_ON(page_mapcount(page_tail) < 0);
++              tail_count += page_mapcount(page_tail);
++              /* check for overflow */
++              BUG_ON(tail_count < 0);
++              BUG_ON(atomic_read(&page_tail->_count) != 0);
++              /*
++               * tail_page->_count is zero and not changing from
++               * under us. But get_page_unless_zero() may be running
++               * from under us on the tail_page. If we used
++               * atomic_set() below instead of atomic_add(), we
++               * would then run atomic_set() concurrently with
++               * get_page_unless_zero(), and atomic_set() is
++               * implemented in C not using locked ops. spin_unlock
++               * on x86 sometime uses locked ops because of PPro
++               * errata 66, 92, so unless somebody can guarantee
++               * atomic_set() here would be safe on all archs (and
++               * not only on x86), it's safer to use atomic_add().
++               */
++              atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1,
++                         &page_tail->_count);
+               /* after clearing PageTail the gup refcount can be released */
+               smp_mb();
+@@ -1186,10 +1203,7 @@ static void __split_huge_page_refcount(s
+                                     (1L << PG_uptodate)));
+               page_tail->flags |= (1L << PG_dirty);
+-              /*
+-               * 1) clear PageTail before overwriting first_page
+-               * 2) clear PageTail before clearing PageHead for VM_BUG_ON
+-               */
++              /* clear PageTail before overwriting first_page */
+               smp_wmb();
+               /*
+@@ -1206,7 +1220,6 @@ static void __split_huge_page_refcount(s
+                * status is achieved setting a reserved bit in the
+                * pmd, not by clearing the present bit.
+               */
+-              BUG_ON(page_mapcount(page_tail));
+               page_tail->_mapcount = page->_mapcount;
+               BUG_ON(page_tail->mapping);
+@@ -1223,6 +1236,8 @@ static void __split_huge_page_refcount(s
+               lru_add_page_tail(zone, page, page_tail);
+       }
++      atomic_sub(tail_count, &page->_count);
++      BUG_ON(atomic_read(&page->_count) <= 0);
+       __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+       __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -37,6 +37,52 @@ static inline void __put_page(struct pag
+       atomic_dec(&page->_count);
+ }
++static inline void __get_page_tail_foll(struct page *page,
++                                      bool get_page_head)
++{
++      /*
++       * If we're getting a tail page, the elevated page->_count is
++       * required only in the head page and we will elevate the head
++       * page->_count and tail page->_mapcount.
++       *
++       * We elevate page_tail->_mapcount for tail pages to force
++       * page_tail->_count to be zero at all times to avoid getting
++       * false positives from get_page_unless_zero() with
++       * speculative page access (like in
++       * page_cache_get_speculative()) on tail pages.
++       */
++      VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
++      VM_BUG_ON(atomic_read(&page->_count) != 0);
++      VM_BUG_ON(page_mapcount(page) < 0);
++      if (get_page_head)
++              atomic_inc(&page->first_page->_count);
++      atomic_inc(&page->_mapcount);
++}
++
++/*
++ * This is meant to be called as the FOLL_GET operation of
++ * follow_page() and it must be called while holding the proper PT
++ * lock while the pte (or pmd_trans_huge) is still mapping the page.
++ */
++static inline void get_page_foll(struct page *page)
++{
++      if (unlikely(PageTail(page)))
++              /*
++               * This is safe only because
++               * __split_huge_page_refcount() can't run under
++               * get_page_foll() because we hold the proper PT lock.
++               */
++              __get_page_tail_foll(page, true);
++      else {
++              /*
++               * Getting a normal page or the head of a compound page
++               * requires to already have an elevated page->_count.
++               */
++              VM_BUG_ON(atomic_read(&page->_count) <= 0);
++              atomic_inc(&page->_count);
++      }
++}
++
+ extern unsigned long highest_memmap_pfn;
+ /*
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1503,7 +1503,7 @@ split_fallthrough:
+       }
+       if (flags & FOLL_GET)
+-              get_page(page);
++              get_page_foll(page);
+       if (flags & FOLL_TOUCH) {
+               if ((flags & FOLL_WRITE) &&
+                   !pte_dirty(pte) && !PageDirty(page))
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -78,39 +78,22 @@ static void put_compound_page(struct pag
+ {
+       if (unlikely(PageTail(page))) {
+               /* __split_huge_page_refcount can run under us */
+-              struct page *page_head = page->first_page;
+-              smp_rmb();
+-              /*
+-               * If PageTail is still set after smp_rmb() we can be sure
+-               * that the page->first_page we read wasn't a dangling pointer.
+-               * See __split_huge_page_refcount() smp_wmb().
+-               */
+-              if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
++              struct page *page_head = compound_trans_head(page);
++
++              if (likely(page != page_head &&
++                         get_page_unless_zero(page_head))) {
+                       unsigned long flags;
+                       /*
+-                       * Verify that our page_head wasn't converted
+-                       * to a a regular page before we got a
+-                       * reference on it.
++                       * page_head wasn't a dangling pointer but it
++                       * may not be a head page anymore by the time
++                       * we obtain the lock. That is ok as long as it
++                       * can't be freed from under us.
+                        */
+-                      if (unlikely(!PageHead(page_head))) {
+-                              /* PageHead is cleared after PageTail */
+-                              smp_rmb();
+-                              VM_BUG_ON(PageTail(page));
+-                              goto out_put_head;
+-                      }
+-                      /*
+-                       * Only run compound_lock on a valid PageHead,
+-                       * after having it pinned with
+-                       * get_page_unless_zero() above.
+-                       */
+-                      smp_mb();
+-                      /* page_head wasn't a dangling pointer */
+                       flags = compound_lock_irqsave(page_head);
+                       if (unlikely(!PageTail(page))) {
+                               /* __split_huge_page_refcount run before us */
+                               compound_unlock_irqrestore(page_head, flags);
+                               VM_BUG_ON(PageHead(page_head));
+-                      out_put_head:
+                               if (put_page_testzero(page_head))
+                                       __put_single_page(page_head);
+                       out_put_single:
+@@ -121,16 +104,17 @@ static void put_compound_page(struct pag
+                       VM_BUG_ON(page_head != page->first_page);
+                       /*
+                        * We can release the refcount taken by
+-                       * get_page_unless_zero now that
+-                       * split_huge_page_refcount is blocked on the
+-                       * compound_lock.
++                       * get_page_unless_zero() now that
++                       * __split_huge_page_refcount() is blocked on
++                       * the compound_lock.
+                        */
+                       if (put_page_testzero(page_head))
+                               VM_BUG_ON(1);
+                       /* __split_huge_page_refcount will wait now */
+-                      VM_BUG_ON(atomic_read(&page->_count) <= 0);
+-                      atomic_dec(&page->_count);
++                      VM_BUG_ON(page_mapcount(page) <= 0);
++                      atomic_dec(&page->_mapcount);
+                       VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
++                      VM_BUG_ON(atomic_read(&page->_count) != 0);
+                       compound_unlock_irqrestore(page_head, flags);
+                       if (put_page_testzero(page_head)) {
+                               if (PageHead(page_head))
+@@ -160,6 +144,45 @@ void put_page(struct page *page)
+ }
+ EXPORT_SYMBOL(put_page);
++/*
++ * This function is exported but must not be called by anything other
++ * than get_page(). It implements the slow path of get_page().
++ */
++bool __get_page_tail(struct page *page)
++{
++      /*
++       * This takes care of get_page() if run on a tail page
++       * returned by one of the get_user_pages/follow_page variants.
++       * get_user_pages/follow_page itself doesn't need the compound
++       * lock because it runs __get_page_tail_foll() under the
++       * proper PT lock that already serializes against
++       * split_huge_page().
++       */
++      unsigned long flags;
++      bool got = false;
++      struct page *page_head = compound_trans_head(page);
++
++      if (likely(page != page_head && get_page_unless_zero(page_head))) {
++              /*
++               * page_head wasn't a dangling pointer but it
++               * may not be a head page anymore by the time
++               * we obtain the lock. That is ok as long as it
++               * can't be freed from under us.
++               */
++              flags = compound_lock_irqsave(page_head);
++              /* here __split_huge_page_refcount won't run anymore */
++              if (likely(PageTail(page))) {
++                      __get_page_tail_foll(page, false);
++                      got = true;
++              }
++              compound_unlock_irqrestore(page_head, flags);
++              if (unlikely(!got))
++                      put_page(page_head);
++      }
++      return got;
++}
++EXPORT_SYMBOL(__get_page_tail);
++
+ /**
+  * put_pages_list() - release a list of pages
+  * @pages: list of pages threaded on page->lru
diff --git a/queue-3.1/proc-fix-races-against-execve-of-proc-pid-fd.patch b/queue-3.1/proc-fix-races-against-execve-of-proc-pid-fd.patch
new file mode 100644 (file)
index 0000000..7b52590
--- /dev/null
@@ -0,0 +1,261 @@
+From aa6afca5bcaba8101f3ea09d5c3e4100b2b9f0e5 Mon Sep 17 00:00:00 2001
+From: Vasiliy Kulikov <segoon@openwall.com>
+Date: Wed, 2 Nov 2011 13:38:44 -0700
+Subject: proc: fix races against execve() of /proc/PID/fd**
+
+From: Vasiliy Kulikov <segoon@openwall.com>
+
+commit aa6afca5bcaba8101f3ea09d5c3e4100b2b9f0e5 upstream.
+
+fd* files are restricted to the task's owner, and other users may not get
+direct access to them.  But one may open any of these files and run any
+setuid program, keeping opened file descriptors.  As there are permission
+checks on open(), but not on readdir() and read(), operations on the kept
+file descriptors will not be checked.  It makes it possible to violate
+procfs permission model.
+
+Reading fdinfo/* may disclosure current fds' position and flags, reading
+directory contents of fdinfo/ and fd/ may disclosure the number of opened
+files by the target task.  This information is not sensible per se, but it
+can reveal some private information (like length of a password stored in a
+file) under certain conditions.
+
+Used existing (un)lock_trace functions to check for ptrace_may_access(),
+but instead of using EPERM return code from it use EACCES to be consistent
+with existing proc_pid_follow_link()/proc_pid_readlink() return code.  If
+they differ, attacker can guess what fds exist by analyzing stat() return
+code.  Patched handlers: stat() for fd/*, stat() and read() for fdindo/*,
+readdir() and lookup() for fd/ and fdinfo/.
+
+Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
+Cc: Cyrill Gorcunov <gorcunov@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/base.c |  146 ++++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 103 insertions(+), 43 deletions(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -1665,12 +1665,46 @@ out:
+       return error;
+ }
++static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
++              struct kstat *stat)
++{
++      struct inode *inode = dentry->d_inode;
++      struct task_struct *task = get_proc_task(inode);
++      int rc;
++
++      if (task == NULL)
++              return -ESRCH;
++
++      rc = -EACCES;
++      if (lock_trace(task))
++              goto out_task;
++
++      generic_fillattr(inode, stat);
++      unlock_trace(task);
++      rc = 0;
++out_task:
++      put_task_struct(task);
++      return rc;
++}
++
+ static const struct inode_operations proc_pid_link_inode_operations = {
+       .readlink       = proc_pid_readlink,
+       .follow_link    = proc_pid_follow_link,
+       .setattr        = proc_setattr,
+ };
++static const struct inode_operations proc_fdinfo_link_inode_operations = {
++      .setattr        = proc_setattr,
++      .getattr        = proc_pid_fd_link_getattr,
++};
++
++static const struct inode_operations proc_fd_link_inode_operations = {
++      .readlink       = proc_pid_readlink,
++      .follow_link    = proc_pid_follow_link,
++      .setattr        = proc_setattr,
++      .getattr        = proc_pid_fd_link_getattr,
++};
++
+ /* building an inode */
+@@ -1902,49 +1936,61 @@ out:
+ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+ {
+-      struct task_struct *task = get_proc_task(inode);
+-      struct files_struct *files = NULL;
++      struct task_struct *task;
++      struct files_struct *files;
+       struct file *file;
+       int fd = proc_fd(inode);
++      int rc;
+-      if (task) {
+-              files = get_files_struct(task);
+-              put_task_struct(task);
+-      }
+-      if (files) {
+-              /*
+-               * We are not taking a ref to the file structure, so we must
+-               * hold ->file_lock.
+-               */
+-              spin_lock(&files->file_lock);
+-              file = fcheck_files(files, fd);
+-              if (file) {
+-                      unsigned int f_flags;
+-                      struct fdtable *fdt;
+-
+-                      fdt = files_fdtable(files);
+-                      f_flags = file->f_flags & ~O_CLOEXEC;
+-                      if (FD_ISSET(fd, fdt->close_on_exec))
+-                              f_flags |= O_CLOEXEC;
+-
+-                      if (path) {
+-                              *path = file->f_path;
+-                              path_get(&file->f_path);
+-                      }
+-                      if (info)
+-                              snprintf(info, PROC_FDINFO_MAX,
+-                                       "pos:\t%lli\n"
+-                                       "flags:\t0%o\n",
+-                                       (long long) file->f_pos,
+-                                       f_flags);
+-                      spin_unlock(&files->file_lock);
+-                      put_files_struct(files);
+-                      return 0;
++      task = get_proc_task(inode);
++      if (!task)
++              return -ENOENT;
++
++      rc = -EACCES;
++      if (lock_trace(task))
++              goto out_task;
++
++      rc = -ENOENT;
++      files = get_files_struct(task);
++      if (files == NULL)
++              goto out_unlock;
++
++      /*
++       * We are not taking a ref to the file structure, so we must
++       * hold ->file_lock.
++       */
++      spin_lock(&files->file_lock);
++      file = fcheck_files(files, fd);
++      if (file) {
++              unsigned int f_flags;
++              struct fdtable *fdt;
++
++              fdt = files_fdtable(files);
++              f_flags = file->f_flags & ~O_CLOEXEC;
++              if (FD_ISSET(fd, fdt->close_on_exec))
++                      f_flags |= O_CLOEXEC;
++
++              if (path) {
++                      *path = file->f_path;
++                      path_get(&file->f_path);
+               }
+-              spin_unlock(&files->file_lock);
+-              put_files_struct(files);
+-      }
+-      return -ENOENT;
++              if (info)
++                      snprintf(info, PROC_FDINFO_MAX,
++                               "pos:\t%lli\n"
++                               "flags:\t0%o\n",
++                               (long long) file->f_pos,
++                               f_flags);
++              rc = 0;
++      } else
++              rc = -ENOENT;
++      spin_unlock(&files->file_lock);
++      put_files_struct(files);
++
++out_unlock:
++      unlock_trace(task);
++out_task:
++      put_task_struct(task);
++      return rc;
+ }
+ static int proc_fd_link(struct inode *inode, struct path *path)
+@@ -2039,7 +2085,7 @@ static struct dentry *proc_fd_instantiat
+       spin_unlock(&files->file_lock);
+       put_files_struct(files);
+-      inode->i_op = &proc_pid_link_inode_operations;
++      inode->i_op = &proc_fd_link_inode_operations;
+       inode->i_size = 64;
+       ei->op.proc_get_link = proc_fd_link;
+       d_set_d_op(dentry, &tid_fd_dentry_operations);
+@@ -2071,7 +2117,12 @@ static struct dentry *proc_lookupfd_comm
+       if (fd == ~0U)
+               goto out;
++      result = ERR_PTR(-EACCES);
++      if (lock_trace(task))
++              goto out;
++
+       result = instantiate(dir, dentry, task, &fd);
++      unlock_trace(task);
+ out:
+       put_task_struct(task);
+ out_no_task:
+@@ -2091,23 +2142,28 @@ static int proc_readfd_common(struct fil
+       retval = -ENOENT;
+       if (!p)
+               goto out_no_task;
++
++      retval = -EACCES;
++      if (lock_trace(p))
++              goto out;
++
+       retval = 0;
+       fd = filp->f_pos;
+       switch (fd) {
+               case 0:
+                       if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+-                              goto out;
++                              goto out_unlock;
+                       filp->f_pos++;
+               case 1:
+                       ino = parent_ino(dentry);
+                       if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+-                              goto out;
++                              goto out_unlock;
+                       filp->f_pos++;
+               default:
+                       files = get_files_struct(p);
+                       if (!files)
+-                              goto out;
++                              goto out_unlock;
+                       rcu_read_lock();
+                       for (fd = filp->f_pos-2;
+                            fd < files_fdtable(files)->max_fds;
+@@ -2131,6 +2187,9 @@ static int proc_readfd_common(struct fil
+                       rcu_read_unlock();
+                       put_files_struct(files);
+       }
++
++out_unlock:
++      unlock_trace(p);
+ out:
+       put_task_struct(p);
+ out_no_task:
+@@ -2208,6 +2267,7 @@ static struct dentry *proc_fdinfo_instan
+       ei->fd = fd;
+       inode->i_mode = S_IFREG | S_IRUSR;
+       inode->i_fop = &proc_fdinfo_file_operations;
++      inode->i_op = &proc_fdinfo_link_inode_operations;
+       d_set_d_op(dentry, &tid_fd_dentry_operations);
+       d_add(dentry, inode);
+       /* Close the race of the process dying before we return the dentry */
index 3140cd56a5f358157971cff0d882660d3e1dfa8a..66a781bd5f2a01775b54a4b08ae41e956048764e 100644 (file)
@@ -158,3 +158,8 @@ jsm-remove-buggy-write-queue.patch
 ipv4-fix-ipsec-forward-performance-regression.patch
 ipv6-fix-route-error-binding-peer-in-func-icmp6_dst_alloc.patch
 tg3-fix-tigon3_dma_hwbug_workaround.patch
+mm-thp-tail-page-refcounting-fix.patch
+binfmt_elf-fix-pie-execution-with-randomization-disabled.patch
+proc-fix-races-against-execve-of-proc-pid-fd.patch
+alsa-hda-add-missing-static-adc-tables-for-alc269-quirks.patch
+drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch