From: Greg Kroah-Hartman Date: Thu, 3 Nov 2011 18:59:50 +0000 (-0700) Subject: 3.1 patches X-Git-Tag: v3.0.9~36 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2c1ef88872deed217ce8f3fcd9b035a4f4a11956;p=thirdparty%2Fkernel%2Fstable-queue.git 3.1 patches --- diff --git a/queue-3.1/alsa-hda-add-missing-static-adc-tables-for-alc269-quirks.patch b/queue-3.1/alsa-hda-add-missing-static-adc-tables-for-alc269-quirks.patch new file mode 100644 index 00000000000..10fc263de3d --- /dev/null +++ b/queue-3.1/alsa-hda-add-missing-static-adc-tables-for-alc269-quirks.patch @@ -0,0 +1,81 @@ +From tiwai@suse.de Thu Nov 3 10:55:14 2011 +From: Takashi Iwai +Date: Thu, 03 Nov 2011 15:56:55 +0100 +Subject: ALSA: hda - Add missing static ADC tables for ALC269 quirks +To: stable@vger.kernel.org +Cc: Chris Vine +Message-ID: + +From: Takashi Iwai + +[There is no upstream commit for this patch since the corresponding +code was removed from 3.2 kernel. This is a regression found only in +3.1 kernel, so please apply this only to 3.1.x series.] + +Some ALC269 quirks define their own .cap_mixer field but without the +static adc_nids[]. This resulted in the mismatch of ADC because ALC269 +may have the widget 0x07 for another audio-in, and the auto-parser picks +this up instead. + +This patch fixes the problem by adding the static adc_nids[] and co +again to these entries. + +Tested-by: Chris Vine +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman + +--- + sound/pci/hda/alc269_quirks.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/sound/pci/hda/alc269_quirks.c ++++ b/sound/pci/hda/alc269_quirks.c +@@ -577,6 +577,9 @@ static const struct alc_config_preset al + alc269_laptop_amic_init_verbs }, + .num_dacs = ARRAY_SIZE(alc269_dac_nids), + .dac_nids = alc269_dac_nids, ++ .adc_nids = alc269_adc_nids, ++ .capsrc_nids = alc269_capsrc_nids, ++ .num_adc_nids = ARRAY_SIZE(alc269_adc_nids), + .hp_nid = 0x03, + .num_channel_mode = ARRAY_SIZE(alc269_modes), + .channel_mode = alc269_modes, +@@ -591,6 +594,9 @@ static const struct alc_config_preset al + alc269_laptop_dmic_init_verbs }, + .num_dacs = ARRAY_SIZE(alc269_dac_nids), + .dac_nids = alc269_dac_nids, ++ .adc_nids = alc269_adc_nids, ++ .capsrc_nids = alc269_capsrc_nids, ++ .num_adc_nids = ARRAY_SIZE(alc269_adc_nids), + .hp_nid = 0x03, + .num_channel_mode = ARRAY_SIZE(alc269_modes), + .channel_mode = alc269_modes, +@@ -605,6 +611,9 @@ static const struct alc_config_preset al + alc269vb_laptop_amic_init_verbs }, + .num_dacs = ARRAY_SIZE(alc269_dac_nids), + .dac_nids = alc269_dac_nids, ++ .adc_nids = alc269vb_adc_nids, ++ .capsrc_nids = alc269vb_capsrc_nids, ++ .num_adc_nids = ARRAY_SIZE(alc269vb_adc_nids), + .hp_nid = 0x03, + .num_channel_mode = ARRAY_SIZE(alc269_modes), + .channel_mode = alc269_modes, +@@ -619,6 +628,9 @@ static const struct alc_config_preset al + alc269vb_laptop_dmic_init_verbs }, + .num_dacs = ARRAY_SIZE(alc269_dac_nids), + .dac_nids = alc269_dac_nids, ++ .adc_nids = alc269vb_adc_nids, ++ .capsrc_nids = alc269vb_capsrc_nids, ++ .num_adc_nids = ARRAY_SIZE(alc269vb_adc_nids), + .hp_nid = 0x03, + .num_channel_mode = ARRAY_SIZE(alc269_modes), + .channel_mode = alc269_modes, +@@ -633,6 +645,8 @@ static const struct alc_config_preset al + alc269_laptop_dmic_init_verbs }, + .num_dacs = ARRAY_SIZE(alc269_dac_nids), + .dac_nids = alc269_dac_nids, ++ .adc_nids = alc269_adc_nids, ++ .capsrc_nids = alc269_capsrc_nids, + .hp_nid = 0x03, + .num_channel_mode = ARRAY_SIZE(alc269_modes), + .channel_mode = alc269_modes, diff --git a/queue-3.1/binfmt_elf-fix-pie-execution-with-randomization-disabled.patch b/queue-3.1/binfmt_elf-fix-pie-execution-with-randomization-disabled.patch new file mode 100644 index 00000000000..184425fd927 --- /dev/null +++ b/queue-3.1/binfmt_elf-fix-pie-execution-with-randomization-disabled.patch @@ -0,0 +1,55 @@ +From a3defbe5c337dbc6da911f8cc49ae3cc3b49b453 Mon Sep 17 00:00:00 2001 +From: Jiri Kosina +Date: Wed, 2 Nov 2011 13:37:41 -0700 +Subject: binfmt_elf: fix PIE execution with randomization disabled + +From: Jiri Kosina + +commit a3defbe5c337dbc6da911f8cc49ae3cc3b49b453 upstream. + +The case of address space randomization being disabled in runtime through +randomize_va_space sysctl is not treated properly in load_elf_binary(), +resulting in SIGKILL coming at exec() time for certain PIE-linked binaries +in case the randomization has been disabled at runtime prior to calling +exec(). + +Handle the randomize_va_space == 0 case the same way as if we were not +supporting .text randomization at all. + +Based on original patch by H.J. Lu and Josh Boyer. + +Signed-off-by: Jiri Kosina +Cc: Ingo Molnar +Cc: Russell King +Cc: H.J. Lu +Cc: +Tested-by: Josh Boyer +Acked-by: Nicolas Pitre +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/binfmt_elf.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/fs/binfmt_elf.c ++++ b/fs/binfmt_elf.c +@@ -795,7 +795,16 @@ static int load_elf_binary(struct linux_ + * might try to exec. This is because the brk will + * follow the loader, and is not movable. */ + #if defined(CONFIG_X86) || defined(CONFIG_ARM) +- load_bias = 0; ++ /* Memory randomization might have been switched off ++ * in runtime via sysctl. ++ * If that is the case, retain the original non-zero ++ * load_bias value in order to establish proper ++ * non-randomized mappings. ++ */ ++ if (current->flags & PF_RANDOMIZE) ++ load_bias = 0; ++ else ++ load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); + #else + load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); + #endif diff --git a/queue-3.1/drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch b/queue-3.1/drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch new file mode 100644 index 00000000000..56b713de449 --- /dev/null +++ b/queue-3.1/drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch @@ -0,0 +1,37 @@ +From e0c87bd95e8dad455c23bc56513af8dcb1737e55 Mon Sep 17 00:00:00 2001 +From: Alexandre Bounine +Date: Wed, 2 Nov 2011 13:39:15 -0700 +Subject: drivers/net/rionet.c: fix ethernet address macros for LE platforms + +From: Alexandre Bounine + +commit e0c87bd95e8dad455c23bc56513af8dcb1737e55 upstream. + +Modify Ethernet addess macros to be compatible with BE/LE platforms + +Signed-off-by: Alexandre Bounine +Cc: Chul Kim +Cc: Kumar Gala +Cc: Matt Porter +Cc: Li Yang +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/rionet.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/rionet.c ++++ b/drivers/net/rionet.c +@@ -88,8 +88,8 @@ static struct rio_dev **rionet_active; + #define dev_rionet_capable(dev) \ + is_rionet_capable(dev->src_ops, dev->dst_ops) + +-#define RIONET_MAC_MATCH(x) (*(u32 *)x == 0x00010001) +-#define RIONET_GET_DESTID(x) (*(u16 *)(x + 4)) ++#define RIONET_MAC_MATCH(x) (!memcmp((x), "\00\01\00\01", 4)) ++#define RIONET_GET_DESTID(x) ((*((u8 *)x + 4) << 8) | *((u8 *)x + 5)) + + static int rionet_rx_clean(struct net_device *ndev) + { diff --git a/queue-3.1/mm-thp-tail-page-refcounting-fix.patch b/queue-3.1/mm-thp-tail-page-refcounting-fix.patch new file mode 100644 index 00000000000..3bf21a69f61 --- /dev/null +++ b/queue-3.1/mm-thp-tail-page-refcounting-fix.patch @@ -0,0 +1,491 @@ +From 70b50f94f1644e2aa7cb374819cfd93f3c28d725 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli +Date: Wed, 2 Nov 2011 13:36:59 -0700 +Subject: mm: thp: tail page refcounting fix + +From: Andrea Arcangeli + +commit 70b50f94f1644e2aa7cb374819cfd93f3c28d725 upstream. + +Michel while working on the working set estimation code, noticed that +calling get_page_unless_zero() on a random pfn_to_page(random_pfn) +wasn't safe, if the pfn ended up being a tail page of a transparent +hugepage under splitting by __split_huge_page_refcount(). + +He then found the problem could also theoretically materialize with +page_cache_get_speculative() during the speculative radix tree lookups +that uses get_page_unless_zero() in SMP if the radix tree page is freed +and reallocated and get_user_pages is called on it before +page_cache_get_speculative has a chance to call get_page_unless_zero(). + +So the best way to fix the problem is to keep page_tail->_count zero at +all times. This will guarantee that get_page_unless_zero() can never +succeed on any tail page. page_tail->_mapcount is guaranteed zero and +is unused for all tail pages of a compound page, so we can simply +account the tail page references there and transfer them to +tail_page->_count in __split_huge_page_refcount() (in addition to the +head_page->_mapcount). + +While debugging this s/_count/_mapcount/ change I also noticed get_page is +called by direct-io.c on pages returned by get_user_pages. That wasn't +entirely safe because the two atomic_inc in get_page weren't atomic. As +opposed to other get_user_page users like secondary-MMU page fault to +establish the shadow pagetables would never call any superflous get_page +after get_user_page returns. It's safer to make get_page universally safe +for tail pages and to use get_page_foll() within follow_page (inside +get_user_pages()). get_page_foll() is safe to do the refcounting for tail +pages without taking any locks because it is run within PT lock protected +critical sections (PT lock for pte and page_table_lock for +pmd_trans_huge). + +The standard get_page() as invoked by direct-io instead will now take +the compound_lock but still only for tail pages. The direct-io paths +are usually I/O bound and the compound_lock is per THP so very +finegrined, so there's no risk of scalability issues with it. A simple +direct-io benchmarks with all lockdep prove locking and spinlock +debugging infrastructure enabled shows identical performance and no +overhead. So it's worth it. Ideally direct-io should stop calling +get_page() on pages returned by get_user_pages(). The spinlock in +get_page() is already optimized away for no-THP builds but doing +get_page() on tail pages returned by GUP is generally a rare operation +and usually only run in I/O paths. + +This new refcounting on page_tail->_mapcount in addition to avoiding new +RCU critical sections will also allow the working set estimation code to +work without any further complexity associated to the tail page +refcounting with THP. + +Signed-off-by: Andrea Arcangeli +Reported-by: Michel Lespinasse +Reviewed-by: Michel Lespinasse +Reviewed-by: Minchan Kim +Cc: Peter Zijlstra +Cc: Hugh Dickins +Cc: Johannes Weiner +Cc: Rik van Riel +Cc: Mel Gorman +Cc: KOSAKI Motohiro +Cc: Benjamin Herrenschmidt +Cc: David Gibson +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/mm/gup.c | 5 +- + arch/x86/mm/gup.c | 5 +- + include/linux/mm.h | 56 ++++++++++++------------------- + include/linux/mm_types.h | 21 +++++++++-- + mm/huge_memory.c | 37 ++++++++++++++------ + mm/internal.h | 46 ++++++++++++++++++++++++++ + mm/memory.c | 2 - + mm/swap.c | 83 ++++++++++++++++++++++++++++++----------------- + 8 files changed, 171 insertions(+), 84 deletions(-) + +--- a/arch/powerpc/mm/gup.c ++++ b/arch/powerpc/mm/gup.c +@@ -22,8 +22,9 @@ static inline void get_huge_page_tail(st + * __split_huge_page_refcount() cannot run + * from under us. + */ +- VM_BUG_ON(atomic_read(&page->_count) < 0); +- atomic_inc(&page->_count); ++ VM_BUG_ON(page_mapcount(page) < 0); ++ VM_BUG_ON(atomic_read(&page->_count) != 0); ++ atomic_inc(&page->_mapcount); + } + + /* +--- a/arch/x86/mm/gup.c ++++ b/arch/x86/mm/gup.c +@@ -114,8 +114,9 @@ static inline void get_huge_page_tail(st + * __split_huge_page_refcount() cannot run + * from under us. + */ +- VM_BUG_ON(atomic_read(&page->_count) < 0); +- atomic_inc(&page->_count); ++ VM_BUG_ON(page_mapcount(page) < 0); ++ VM_BUG_ON(atomic_read(&page->_count) != 0); ++ atomic_inc(&page->_mapcount); + } + + static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -356,36 +356,39 @@ static inline struct page *compound_head + return page; + } + ++/* ++ * The atomic page->_mapcount, starts from -1: so that transitions ++ * both from it and to it can be tracked, using atomic_inc_and_test ++ * and atomic_add_negative(-1). ++ */ ++static inline void reset_page_mapcount(struct page *page) ++{ ++ atomic_set(&(page)->_mapcount, -1); ++} ++ ++static inline int page_mapcount(struct page *page) ++{ ++ return atomic_read(&(page)->_mapcount) + 1; ++} ++ + static inline int page_count(struct page *page) + { + return atomic_read(&compound_head(page)->_count); + } + ++extern bool __get_page_tail(struct page *page); ++ + static inline void get_page(struct page *page) + { ++ if (unlikely(PageTail(page))) ++ if (likely(__get_page_tail(page))) ++ return; + /* + * Getting a normal page or the head of a compound page +- * requires to already have an elevated page->_count. Only if +- * we're getting a tail page, the elevated page->_count is +- * required only in the head page, so for tail pages the +- * bugcheck only verifies that the page->_count isn't +- * negative. ++ * requires to already have an elevated page->_count. + */ +- VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page)); ++ VM_BUG_ON(atomic_read(&page->_count) <= 0); + atomic_inc(&page->_count); +- /* +- * Getting a tail page will elevate both the head and tail +- * page->_count(s). +- */ +- if (unlikely(PageTail(page))) { +- /* +- * This is safe only because +- * __split_huge_page_refcount can't run under +- * get_page(). +- */ +- VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); +- atomic_inc(&page->first_page->_count); +- } + } + + static inline struct page *virt_to_head_page(const void *x) +@@ -804,21 +807,6 @@ static inline pgoff_t page_index(struct + } + + /* +- * The atomic page->_mapcount, like _count, starts from -1: +- * so that transitions both from it and to it can be tracked, +- * using atomic_inc_and_test and atomic_add_negative(-1). +- */ +-static inline void reset_page_mapcount(struct page *page) +-{ +- atomic_set(&(page)->_mapcount, -1); +-} +- +-static inline int page_mapcount(struct page *page) +-{ +- return atomic_read(&(page)->_mapcount) + 1; +-} +- +-/* + * Return true if this page is mapped into pagetables. + */ + static inline int page_mapped(struct page *page) +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -62,10 +62,23 @@ struct page { + struct { + + union { +- atomic_t _mapcount; /* Count of ptes mapped in mms, +- * to show when page is mapped +- * & limit reverse map searches. +- */ ++ /* ++ * Count of ptes mapped in ++ * mms, to show when page is ++ * mapped & limit reverse map ++ * searches. ++ * ++ * Used also for tail pages ++ * refcounting instead of ++ * _count. Tail pages cannot ++ * be mapped and keeping the ++ * tail page _count zero at ++ * all times guarantees ++ * get_page_unless_zero() will ++ * never succeed on tail ++ * pages. ++ */ ++ atomic_t _mapcount; + + struct { + unsigned inuse:16; +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -989,7 +989,7 @@ struct page *follow_trans_huge_pmd(struc + page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; + VM_BUG_ON(!PageCompound(page)); + if (flags & FOLL_GET) +- get_page(page); ++ get_page_foll(page); + + out: + return page; +@@ -1156,6 +1156,7 @@ static void __split_huge_page_refcount(s + unsigned long head_index = page->index; + struct zone *zone = page_zone(page); + int zonestat; ++ int tail_count = 0; + + /* prevent PageLRU to go away from under us, and freeze lru stats */ + spin_lock_irq(&zone->lru_lock); +@@ -1164,11 +1165,27 @@ static void __split_huge_page_refcount(s + for (i = 1; i < HPAGE_PMD_NR; i++) { + struct page *page_tail = page + i; + +- /* tail_page->_count cannot change */ +- atomic_sub(atomic_read(&page_tail->_count), &page->_count); +- BUG_ON(page_count(page) <= 0); +- atomic_add(page_mapcount(page) + 1, &page_tail->_count); +- BUG_ON(atomic_read(&page_tail->_count) <= 0); ++ /* tail_page->_mapcount cannot change */ ++ BUG_ON(page_mapcount(page_tail) < 0); ++ tail_count += page_mapcount(page_tail); ++ /* check for overflow */ ++ BUG_ON(tail_count < 0); ++ BUG_ON(atomic_read(&page_tail->_count) != 0); ++ /* ++ * tail_page->_count is zero and not changing from ++ * under us. But get_page_unless_zero() may be running ++ * from under us on the tail_page. If we used ++ * atomic_set() below instead of atomic_add(), we ++ * would then run atomic_set() concurrently with ++ * get_page_unless_zero(), and atomic_set() is ++ * implemented in C not using locked ops. spin_unlock ++ * on x86 sometime uses locked ops because of PPro ++ * errata 66, 92, so unless somebody can guarantee ++ * atomic_set() here would be safe on all archs (and ++ * not only on x86), it's safer to use atomic_add(). ++ */ ++ atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1, ++ &page_tail->_count); + + /* after clearing PageTail the gup refcount can be released */ + smp_mb(); +@@ -1186,10 +1203,7 @@ static void __split_huge_page_refcount(s + (1L << PG_uptodate))); + page_tail->flags |= (1L << PG_dirty); + +- /* +- * 1) clear PageTail before overwriting first_page +- * 2) clear PageTail before clearing PageHead for VM_BUG_ON +- */ ++ /* clear PageTail before overwriting first_page */ + smp_wmb(); + + /* +@@ -1206,7 +1220,6 @@ static void __split_huge_page_refcount(s + * status is achieved setting a reserved bit in the + * pmd, not by clearing the present bit. + */ +- BUG_ON(page_mapcount(page_tail)); + page_tail->_mapcount = page->_mapcount; + + BUG_ON(page_tail->mapping); +@@ -1223,6 +1236,8 @@ static void __split_huge_page_refcount(s + + lru_add_page_tail(zone, page, page_tail); + } ++ atomic_sub(tail_count, &page->_count); ++ BUG_ON(atomic_read(&page->_count) <= 0); + + __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -37,6 +37,52 @@ static inline void __put_page(struct pag + atomic_dec(&page->_count); + } + ++static inline void __get_page_tail_foll(struct page *page, ++ bool get_page_head) ++{ ++ /* ++ * If we're getting a tail page, the elevated page->_count is ++ * required only in the head page and we will elevate the head ++ * page->_count and tail page->_mapcount. ++ * ++ * We elevate page_tail->_mapcount for tail pages to force ++ * page_tail->_count to be zero at all times to avoid getting ++ * false positives from get_page_unless_zero() with ++ * speculative page access (like in ++ * page_cache_get_speculative()) on tail pages. ++ */ ++ VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); ++ VM_BUG_ON(atomic_read(&page->_count) != 0); ++ VM_BUG_ON(page_mapcount(page) < 0); ++ if (get_page_head) ++ atomic_inc(&page->first_page->_count); ++ atomic_inc(&page->_mapcount); ++} ++ ++/* ++ * This is meant to be called as the FOLL_GET operation of ++ * follow_page() and it must be called while holding the proper PT ++ * lock while the pte (or pmd_trans_huge) is still mapping the page. ++ */ ++static inline void get_page_foll(struct page *page) ++{ ++ if (unlikely(PageTail(page))) ++ /* ++ * This is safe only because ++ * __split_huge_page_refcount() can't run under ++ * get_page_foll() because we hold the proper PT lock. ++ */ ++ __get_page_tail_foll(page, true); ++ else { ++ /* ++ * Getting a normal page or the head of a compound page ++ * requires to already have an elevated page->_count. ++ */ ++ VM_BUG_ON(atomic_read(&page->_count) <= 0); ++ atomic_inc(&page->_count); ++ } ++} ++ + extern unsigned long highest_memmap_pfn; + + /* +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1503,7 +1503,7 @@ split_fallthrough: + } + + if (flags & FOLL_GET) +- get_page(page); ++ get_page_foll(page); + if (flags & FOLL_TOUCH) { + if ((flags & FOLL_WRITE) && + !pte_dirty(pte) && !PageDirty(page)) +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -78,39 +78,22 @@ static void put_compound_page(struct pag + { + if (unlikely(PageTail(page))) { + /* __split_huge_page_refcount can run under us */ +- struct page *page_head = page->first_page; +- smp_rmb(); +- /* +- * If PageTail is still set after smp_rmb() we can be sure +- * that the page->first_page we read wasn't a dangling pointer. +- * See __split_huge_page_refcount() smp_wmb(). +- */ +- if (likely(PageTail(page) && get_page_unless_zero(page_head))) { ++ struct page *page_head = compound_trans_head(page); ++ ++ if (likely(page != page_head && ++ get_page_unless_zero(page_head))) { + unsigned long flags; + /* +- * Verify that our page_head wasn't converted +- * to a a regular page before we got a +- * reference on it. ++ * page_head wasn't a dangling pointer but it ++ * may not be a head page anymore by the time ++ * we obtain the lock. That is ok as long as it ++ * can't be freed from under us. + */ +- if (unlikely(!PageHead(page_head))) { +- /* PageHead is cleared after PageTail */ +- smp_rmb(); +- VM_BUG_ON(PageTail(page)); +- goto out_put_head; +- } +- /* +- * Only run compound_lock on a valid PageHead, +- * after having it pinned with +- * get_page_unless_zero() above. +- */ +- smp_mb(); +- /* page_head wasn't a dangling pointer */ + flags = compound_lock_irqsave(page_head); + if (unlikely(!PageTail(page))) { + /* __split_huge_page_refcount run before us */ + compound_unlock_irqrestore(page_head, flags); + VM_BUG_ON(PageHead(page_head)); +- out_put_head: + if (put_page_testzero(page_head)) + __put_single_page(page_head); + out_put_single: +@@ -121,16 +104,17 @@ static void put_compound_page(struct pag + VM_BUG_ON(page_head != page->first_page); + /* + * We can release the refcount taken by +- * get_page_unless_zero now that +- * split_huge_page_refcount is blocked on the +- * compound_lock. ++ * get_page_unless_zero() now that ++ * __split_huge_page_refcount() is blocked on ++ * the compound_lock. + */ + if (put_page_testzero(page_head)) + VM_BUG_ON(1); + /* __split_huge_page_refcount will wait now */ +- VM_BUG_ON(atomic_read(&page->_count) <= 0); +- atomic_dec(&page->_count); ++ VM_BUG_ON(page_mapcount(page) <= 0); ++ atomic_dec(&page->_mapcount); + VM_BUG_ON(atomic_read(&page_head->_count) <= 0); ++ VM_BUG_ON(atomic_read(&page->_count) != 0); + compound_unlock_irqrestore(page_head, flags); + if (put_page_testzero(page_head)) { + if (PageHead(page_head)) +@@ -160,6 +144,45 @@ void put_page(struct page *page) + } + EXPORT_SYMBOL(put_page); + ++/* ++ * This function is exported but must not be called by anything other ++ * than get_page(). It implements the slow path of get_page(). ++ */ ++bool __get_page_tail(struct page *page) ++{ ++ /* ++ * This takes care of get_page() if run on a tail page ++ * returned by one of the get_user_pages/follow_page variants. ++ * get_user_pages/follow_page itself doesn't need the compound ++ * lock because it runs __get_page_tail_foll() under the ++ * proper PT lock that already serializes against ++ * split_huge_page(). ++ */ ++ unsigned long flags; ++ bool got = false; ++ struct page *page_head = compound_trans_head(page); ++ ++ if (likely(page != page_head && get_page_unless_zero(page_head))) { ++ /* ++ * page_head wasn't a dangling pointer but it ++ * may not be a head page anymore by the time ++ * we obtain the lock. That is ok as long as it ++ * can't be freed from under us. ++ */ ++ flags = compound_lock_irqsave(page_head); ++ /* here __split_huge_page_refcount won't run anymore */ ++ if (likely(PageTail(page))) { ++ __get_page_tail_foll(page, false); ++ got = true; ++ } ++ compound_unlock_irqrestore(page_head, flags); ++ if (unlikely(!got)) ++ put_page(page_head); ++ } ++ return got; ++} ++EXPORT_SYMBOL(__get_page_tail); ++ + /** + * put_pages_list() - release a list of pages + * @pages: list of pages threaded on page->lru diff --git a/queue-3.1/proc-fix-races-against-execve-of-proc-pid-fd.patch b/queue-3.1/proc-fix-races-against-execve-of-proc-pid-fd.patch new file mode 100644 index 00000000000..7b525905456 --- /dev/null +++ b/queue-3.1/proc-fix-races-against-execve-of-proc-pid-fd.patch @@ -0,0 +1,261 @@ +From aa6afca5bcaba8101f3ea09d5c3e4100b2b9f0e5 Mon Sep 17 00:00:00 2001 +From: Vasiliy Kulikov +Date: Wed, 2 Nov 2011 13:38:44 -0700 +Subject: proc: fix races against execve() of /proc/PID/fd** + +From: Vasiliy Kulikov + +commit aa6afca5bcaba8101f3ea09d5c3e4100b2b9f0e5 upstream. + +fd* files are restricted to the task's owner, and other users may not get +direct access to them. But one may open any of these files and run any +setuid program, keeping opened file descriptors. As there are permission +checks on open(), but not on readdir() and read(), operations on the kept +file descriptors will not be checked. It makes it possible to violate +procfs permission model. + +Reading fdinfo/* may disclosure current fds' position and flags, reading +directory contents of fdinfo/ and fd/ may disclosure the number of opened +files by the target task. This information is not sensible per se, but it +can reveal some private information (like length of a password stored in a +file) under certain conditions. + +Used existing (un)lock_trace functions to check for ptrace_may_access(), +but instead of using EPERM return code from it use EACCES to be consistent +with existing proc_pid_follow_link()/proc_pid_readlink() return code. If +they differ, attacker can guess what fds exist by analyzing stat() return +code. Patched handlers: stat() for fd/*, stat() and read() for fdindo/*, +readdir() and lookup() for fd/ and fdinfo/. + +Signed-off-by: Vasiliy Kulikov +Cc: Cyrill Gorcunov +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/proc/base.c | 146 ++++++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 103 insertions(+), 43 deletions(-) + +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -1665,12 +1665,46 @@ out: + return error; + } + ++static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry, ++ struct kstat *stat) ++{ ++ struct inode *inode = dentry->d_inode; ++ struct task_struct *task = get_proc_task(inode); ++ int rc; ++ ++ if (task == NULL) ++ return -ESRCH; ++ ++ rc = -EACCES; ++ if (lock_trace(task)) ++ goto out_task; ++ ++ generic_fillattr(inode, stat); ++ unlock_trace(task); ++ rc = 0; ++out_task: ++ put_task_struct(task); ++ return rc; ++} ++ + static const struct inode_operations proc_pid_link_inode_operations = { + .readlink = proc_pid_readlink, + .follow_link = proc_pid_follow_link, + .setattr = proc_setattr, + }; + ++static const struct inode_operations proc_fdinfo_link_inode_operations = { ++ .setattr = proc_setattr, ++ .getattr = proc_pid_fd_link_getattr, ++}; ++ ++static const struct inode_operations proc_fd_link_inode_operations = { ++ .readlink = proc_pid_readlink, ++ .follow_link = proc_pid_follow_link, ++ .setattr = proc_setattr, ++ .getattr = proc_pid_fd_link_getattr, ++}; ++ + + /* building an inode */ + +@@ -1902,49 +1936,61 @@ out: + + static int proc_fd_info(struct inode *inode, struct path *path, char *info) + { +- struct task_struct *task = get_proc_task(inode); +- struct files_struct *files = NULL; ++ struct task_struct *task; ++ struct files_struct *files; + struct file *file; + int fd = proc_fd(inode); ++ int rc; + +- if (task) { +- files = get_files_struct(task); +- put_task_struct(task); +- } +- if (files) { +- /* +- * We are not taking a ref to the file structure, so we must +- * hold ->file_lock. +- */ +- spin_lock(&files->file_lock); +- file = fcheck_files(files, fd); +- if (file) { +- unsigned int f_flags; +- struct fdtable *fdt; +- +- fdt = files_fdtable(files); +- f_flags = file->f_flags & ~O_CLOEXEC; +- if (FD_ISSET(fd, fdt->close_on_exec)) +- f_flags |= O_CLOEXEC; +- +- if (path) { +- *path = file->f_path; +- path_get(&file->f_path); +- } +- if (info) +- snprintf(info, PROC_FDINFO_MAX, +- "pos:\t%lli\n" +- "flags:\t0%o\n", +- (long long) file->f_pos, +- f_flags); +- spin_unlock(&files->file_lock); +- put_files_struct(files); +- return 0; ++ task = get_proc_task(inode); ++ if (!task) ++ return -ENOENT; ++ ++ rc = -EACCES; ++ if (lock_trace(task)) ++ goto out_task; ++ ++ rc = -ENOENT; ++ files = get_files_struct(task); ++ if (files == NULL) ++ goto out_unlock; ++ ++ /* ++ * We are not taking a ref to the file structure, so we must ++ * hold ->file_lock. ++ */ ++ spin_lock(&files->file_lock); ++ file = fcheck_files(files, fd); ++ if (file) { ++ unsigned int f_flags; ++ struct fdtable *fdt; ++ ++ fdt = files_fdtable(files); ++ f_flags = file->f_flags & ~O_CLOEXEC; ++ if (FD_ISSET(fd, fdt->close_on_exec)) ++ f_flags |= O_CLOEXEC; ++ ++ if (path) { ++ *path = file->f_path; ++ path_get(&file->f_path); + } +- spin_unlock(&files->file_lock); +- put_files_struct(files); +- } +- return -ENOENT; ++ if (info) ++ snprintf(info, PROC_FDINFO_MAX, ++ "pos:\t%lli\n" ++ "flags:\t0%o\n", ++ (long long) file->f_pos, ++ f_flags); ++ rc = 0; ++ } else ++ rc = -ENOENT; ++ spin_unlock(&files->file_lock); ++ put_files_struct(files); ++ ++out_unlock: ++ unlock_trace(task); ++out_task: ++ put_task_struct(task); ++ return rc; + } + + static int proc_fd_link(struct inode *inode, struct path *path) +@@ -2039,7 +2085,7 @@ static struct dentry *proc_fd_instantiat + spin_unlock(&files->file_lock); + put_files_struct(files); + +- inode->i_op = &proc_pid_link_inode_operations; ++ inode->i_op = &proc_fd_link_inode_operations; + inode->i_size = 64; + ei->op.proc_get_link = proc_fd_link; + d_set_d_op(dentry, &tid_fd_dentry_operations); +@@ -2071,7 +2117,12 @@ static struct dentry *proc_lookupfd_comm + if (fd == ~0U) + goto out; + ++ result = ERR_PTR(-EACCES); ++ if (lock_trace(task)) ++ goto out; ++ + result = instantiate(dir, dentry, task, &fd); ++ unlock_trace(task); + out: + put_task_struct(task); + out_no_task: +@@ -2091,23 +2142,28 @@ static int proc_readfd_common(struct fil + retval = -ENOENT; + if (!p) + goto out_no_task; ++ ++ retval = -EACCES; ++ if (lock_trace(p)) ++ goto out; ++ + retval = 0; + + fd = filp->f_pos; + switch (fd) { + case 0: + if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) +- goto out; ++ goto out_unlock; + filp->f_pos++; + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) +- goto out; ++ goto out_unlock; + filp->f_pos++; + default: + files = get_files_struct(p); + if (!files) +- goto out; ++ goto out_unlock; + rcu_read_lock(); + for (fd = filp->f_pos-2; + fd < files_fdtable(files)->max_fds; +@@ -2131,6 +2187,9 @@ static int proc_readfd_common(struct fil + rcu_read_unlock(); + put_files_struct(files); + } ++ ++out_unlock: ++ unlock_trace(p); + out: + put_task_struct(p); + out_no_task: +@@ -2208,6 +2267,7 @@ static struct dentry *proc_fdinfo_instan + ei->fd = fd; + inode->i_mode = S_IFREG | S_IRUSR; + inode->i_fop = &proc_fdinfo_file_operations; ++ inode->i_op = &proc_fdinfo_link_inode_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); + d_add(dentry, inode); + /* Close the race of the process dying before we return the dentry */ diff --git a/queue-3.1/series b/queue-3.1/series index 3140cd56a5f..66a781bd5f2 100644 --- a/queue-3.1/series +++ b/queue-3.1/series @@ -158,3 +158,8 @@ jsm-remove-buggy-write-queue.patch ipv4-fix-ipsec-forward-performance-regression.patch ipv6-fix-route-error-binding-peer-in-func-icmp6_dst_alloc.patch tg3-fix-tigon3_dma_hwbug_workaround.patch +mm-thp-tail-page-refcounting-fix.patch +binfmt_elf-fix-pie-execution-with-randomization-disabled.patch +proc-fix-races-against-execve-of-proc-pid-fd.patch +alsa-hda-add-missing-static-adc-tables-for-alc269-quirks.patch +drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch