From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 3 Nov 2011 19:05:08 +0000 (-0700)
Subject: 3.0 patches
X-Git-Tag: v3.0.9~35
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=612d75393513d20c8e0530e06dfef17913b797e7;p=thirdparty%2Fkernel%2Fstable-queue.git

3.0 patches
---

diff --git a/queue-3.0/binfmt_elf-fix-pie-execution-with-randomization-disabled.patch b/queue-3.0/binfmt_elf-fix-pie-execution-with-randomization-disabled.patch
new file mode 100644
index 00000000000..0f6f370b109
--- /dev/null
+++ b/queue-3.0/binfmt_elf-fix-pie-execution-with-randomization-disabled.patch
@@ -0,0 +1,55 @@
+From a3defbe5c337dbc6da911f8cc49ae3cc3b49b453 Mon Sep 17 00:00:00 2001
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Wed, 2 Nov 2011 13:37:41 -0700
+Subject: binfmt_elf: fix PIE execution with randomization disabled
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+commit a3defbe5c337dbc6da911f8cc49ae3cc3b49b453 upstream.
+
+The case of address space randomization being disabled in runtime through
+randomize_va_space sysctl is not treated properly in load_elf_binary(),
+resulting in SIGKILL coming at exec() time for certain PIE-linked binaries
+in case the randomization has been disabled at runtime prior to calling
+exec().
+
+Handle the randomize_va_space == 0 case the same way as if we were not
+supporting .text randomization at all.
+
+Based on original patch by H.J. Lu and Josh Boyer.
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Russell King <rmk@arm.linux.org.uk>
+Cc: H.J. Lu <hongjiu.lu@intel.com>
+Cc: <stable@kernel.org>
+Tested-by: Josh Boyer <jwboyer@redhat.com>
+Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/binfmt_elf.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/binfmt_elf.c
++++ b/fs/binfmt_elf.c
+@@ -796,7 +796,16 @@ static int load_elf_binary(struct linux_
+ 			 * might try to exec.  This is because the brk will
+ 			 * follow the loader, and is not movable.  */
+ #if defined(CONFIG_X86) || defined(CONFIG_ARM)
+-			load_bias = 0;
++			/* Memory randomization might have been switched off
++			 * in runtime via sysctl.
++			 * If that is the case, retain the original non-zero
++			 * load_bias value in order to establish proper
++			 * non-randomized mappings.
++			 */
++			if (current->flags & PF_RANDOMIZE)
++				load_bias = 0;
++			else
++				load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+ #else
+ 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+ #endif
diff --git a/queue-3.0/drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch b/queue-3.0/drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch
new file mode 100644
index 00000000000..56b713de449
--- /dev/null
+++ b/queue-3.0/drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch
@@ -0,0 +1,37 @@
+From e0c87bd95e8dad455c23bc56513af8dcb1737e55 Mon Sep 17 00:00:00 2001
+From: Alexandre Bounine <alexandre.bounine@idt.com>
+Date: Wed, 2 Nov 2011 13:39:15 -0700
+Subject: drivers/net/rionet.c: fix ethernet address macros for LE platforms
+
+From: Alexandre Bounine <alexandre.bounine@idt.com>
+
+commit e0c87bd95e8dad455c23bc56513af8dcb1737e55 upstream.
+
+Modify Ethernet addess macros to be compatible with BE/LE platforms
+
+Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
+Cc: Chul Kim <chul.kim@idt.com>
+Cc: Kumar Gala <galak@kernel.crashing.org>
+Cc: Matt Porter <mporter@kernel.crashing.org>
+Cc: Li Yang <leoli@freescale.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/rionet.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/rionet.c
++++ b/drivers/net/rionet.c
+@@ -88,8 +88,8 @@ static struct rio_dev **rionet_active;
+ #define dev_rionet_capable(dev) \
+ 	is_rionet_capable(dev->src_ops, dev->dst_ops)
+ 
+-#define RIONET_MAC_MATCH(x)	(*(u32 *)x == 0x00010001)
+-#define RIONET_GET_DESTID(x)	(*(u16 *)(x + 4))
++#define RIONET_MAC_MATCH(x)	(!memcmp((x), "\00\01\00\01", 4))
++#define RIONET_GET_DESTID(x)	((*((u8 *)x + 4) << 8) | *((u8 *)x + 5))
+ 
+ static int rionet_rx_clean(struct net_device *ndev)
+ {
diff --git a/queue-3.0/iwlagn-do-not-use-interruptible-waits.patch b/queue-3.0/iwlagn-do-not-use-interruptible-waits.patch
new file mode 100644
index 00000000000..2ed3b4c20a7
--- /dev/null
+++ b/queue-3.0/iwlagn-do-not-use-interruptible-waits.patch
@@ -0,0 +1,130 @@
+From johannes@sipsolutions.net  Thu Nov  3 10:55:59 2011
+From: Johannes Berg <johannes@sipsolutions.net>
+Date: Thu, 03 Nov 2011 13:46:08 +0100
+Subject: iwlagn: do not use interruptible waits
+To: stable@vger.kernel.org
+Message-ID: <1320324368.3950.44.camel@jlt3.sipsolutions.net>
+
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+Upstream commit effd4d9aece9184f526e6556786a94d335e38b71.
+
+Since the dawn of its time, iwlwifi has used
+interruptible waits to wait for synchronous
+commands and firmware loading.
+
+This leads to "interesting" bugs, because it
+can't actually handle the interruptions; for
+example when a command sending is interrupted
+it will assume the command completed fully,
+and then leave it pending, which leads to all
+kinds of trouble when the command finishes
+later.
+
+Since there's no easy way to gracefully deal
+with interruptions, fix the driver to not use
+interruptible waits.
+
+This at least fixes the error
+iwlagn 0000:02:00.0: Error: Response NULL in  'REPLY_SCAN_ABORT_CMD'
+
+I have seen in P2P testing, but it is likely
+that there are other errors caused by this.
+
+Cc: Stanislaw Gruszka <sgruszka@redhat.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
+Signed-off-by: John W. Linville <linville@tuxdriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ drivers/net/wireless/iwlwifi/iwl-agn-ucode.c |    9 ++-------
+ drivers/net/wireless/iwlwifi/iwl-agn.c       |    2 +-
+ drivers/net/wireless/iwlwifi/iwl-core.c      |    4 ++--
+ drivers/net/wireless/iwlwifi/iwl-hcmd.c      |    2 +-
+ drivers/net/wireless/iwlwifi/iwl-rx.c        |    2 +-
+ drivers/net/wireless/iwlwifi/iwl-tx.c        |    2 +-
+ 6 files changed, 8 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c
++++ b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c
+@@ -144,13 +144,8 @@ static int iwlagn_load_section(struct iw
+ 		FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD);
+ 
+ 	IWL_DEBUG_INFO(priv, "%s uCode section being loaded...\n", name);
+-	ret = wait_event_interruptible_timeout(priv->wait_command_queue,
+-					priv->ucode_write_complete, 5 * HZ);
+-	if (ret == -ERESTARTSYS) {
+-		IWL_ERR(priv, "Could not load the %s uCode section due "
+-			"to interrupt\n", name);
+-		return ret;
+-	}
++	ret = wait_event_timeout(priv->wait_command_queue,
++				 priv->ucode_write_complete, 5 * HZ);
+ 	if (!ret) {
+ 		IWL_ERR(priv, "Could not load the %s uCode section\n",
+ 			name);
+--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
++++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
+@@ -797,7 +797,7 @@ static void iwl_irq_tasklet(struct iwl_p
+ 		handled |= CSR_INT_BIT_FH_TX;
+ 		/* Wake up uCode load routine, now that load is complete */
+ 		priv->ucode_write_complete = 1;
+-		wake_up_interruptible(&priv->wait_command_queue);
++		wake_up(&priv->wait_command_queue);
+ 	}
+ 
+ 	if (inta & ~handled) {
+--- a/drivers/net/wireless/iwlwifi/iwl-core.c
++++ b/drivers/net/wireless/iwlwifi/iwl-core.c
+@@ -899,7 +899,7 @@ void iwlagn_fw_error(struct iwl_priv *pr
+ 	 * commands by clearing the ready bit */
+ 	clear_bit(STATUS_READY, &priv->status);
+ 
+-	wake_up_interruptible(&priv->wait_command_queue);
++	wake_up(&priv->wait_command_queue);
+ 
+ 	if (!ondemand) {
+ 		/*
+@@ -950,7 +950,7 @@ void iwl_irq_handle_error(struct iwl_pri
+ 		 */
+ 		clear_bit(STATUS_READY, &priv->status);
+ 		clear_bit(STATUS_HCMD_ACTIVE, &priv->status);
+-		wake_up_interruptible(&priv->wait_command_queue);
++		wake_up(&priv->wait_command_queue);
+ 		IWL_ERR(priv, "RF is used by WiMAX\n");
+ 		return;
+ 	}
+--- a/drivers/net/wireless/iwlwifi/iwl-hcmd.c
++++ b/drivers/net/wireless/iwlwifi/iwl-hcmd.c
+@@ -194,7 +194,7 @@ int iwl_send_cmd_sync(struct iwl_priv *p
+ 		return ret;
+ 	}
+ 
+-	ret = wait_event_interruptible_timeout(priv->wait_command_queue,
++	ret = wait_event_timeout(priv->wait_command_queue,
+ 			!test_bit(STATUS_HCMD_ACTIVE, &priv->status),
+ 			HOST_COMPLETE_TIMEOUT);
+ 	if (!ret) {
+--- a/drivers/net/wireless/iwlwifi/iwl-rx.c
++++ b/drivers/net/wireless/iwlwifi/iwl-rx.c
+@@ -738,7 +738,7 @@ static void iwl_rx_card_state_notif(stru
+ 		wiphy_rfkill_set_hw_state(priv->hw->wiphy,
+ 			test_bit(STATUS_RF_KILL_HW, &priv->status));
+ 	else
+-		wake_up_interruptible(&priv->wait_command_queue);
++		wake_up(&priv->wait_command_queue);
+ }
+ 
+ static void iwl_rx_missed_beacon_notif(struct iwl_priv *priv,
+--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
++++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
+@@ -821,7 +821,7 @@ void iwl_tx_cmd_complete(struct iwl_priv
+ 		clear_bit(STATUS_HCMD_ACTIVE, &priv->status);
+ 		IWL_DEBUG_INFO(priv, "Clearing HCMD_ACTIVE for command %s\n",
+ 			       get_cmd_string(cmd->hdr.cmd));
+-		wake_up_interruptible(&priv->wait_command_queue);
++		wake_up(&priv->wait_command_queue);
+ 	}
+ 
+ 	/* Mark as unmapped */
diff --git a/queue-3.0/mm-thp-tail-page-refcounting-fix.patch b/queue-3.0/mm-thp-tail-page-refcounting-fix.patch
new file mode 100644
index 00000000000..983badc0f79
--- /dev/null
+++ b/queue-3.0/mm-thp-tail-page-refcounting-fix.patch
@@ -0,0 +1,492 @@
+From 70b50f94f1644e2aa7cb374819cfd93f3c28d725 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Wed, 2 Nov 2011 13:36:59 -0700
+Subject: mm: thp: tail page refcounting fix
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+commit 70b50f94f1644e2aa7cb374819cfd93f3c28d725 upstream.
+
+Michel while working on the working set estimation code, noticed that
+calling get_page_unless_zero() on a random pfn_to_page(random_pfn)
+wasn't safe, if the pfn ended up being a tail page of a transparent
+hugepage under splitting by __split_huge_page_refcount().
+
+He then found the problem could also theoretically materialize with
+page_cache_get_speculative() during the speculative radix tree lookups
+that uses get_page_unless_zero() in SMP if the radix tree page is freed
+and reallocated and get_user_pages is called on it before
+page_cache_get_speculative has a chance to call get_page_unless_zero().
+
+So the best way to fix the problem is to keep page_tail->_count zero at
+all times.  This will guarantee that get_page_unless_zero() can never
+succeed on any tail page.  page_tail->_mapcount is guaranteed zero and
+is unused for all tail pages of a compound page, so we can simply
+account the tail page references there and transfer them to
+tail_page->_count in __split_huge_page_refcount() (in addition to the
+head_page->_mapcount).
+
+While debugging this s/_count/_mapcount/ change I also noticed get_page is
+called by direct-io.c on pages returned by get_user_pages.  That wasn't
+entirely safe because the two atomic_inc in get_page weren't atomic.  As
+opposed to other get_user_page users like secondary-MMU page fault to
+establish the shadow pagetables would never call any superflous get_page
+after get_user_page returns.  It's safer to make get_page universally safe
+for tail pages and to use get_page_foll() within follow_page (inside
+get_user_pages()).  get_page_foll() is safe to do the refcounting for tail
+pages without taking any locks because it is run within PT lock protected
+critical sections (PT lock for pte and page_table_lock for
+pmd_trans_huge).
+
+The standard get_page() as invoked by direct-io instead will now take
+the compound_lock but still only for tail pages.  The direct-io paths
+are usually I/O bound and the compound_lock is per THP so very
+finegrined, so there's no risk of scalability issues with it.  A simple
+direct-io benchmarks with all lockdep prove locking and spinlock
+debugging infrastructure enabled shows identical performance and no
+overhead.  So it's worth it.  Ideally direct-io should stop calling
+get_page() on pages returned by get_user_pages().  The spinlock in
+get_page() is already optimized away for no-THP builds but doing
+get_page() on tail pages returned by GUP is generally a rare operation
+and usually only run in I/O paths.
+
+This new refcounting on page_tail->_mapcount in addition to avoiding new
+RCU critical sections will also allow the working set estimation code to
+work without any further complexity associated to the tail page
+refcounting with THP.
+
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Reported-by: Michel Lespinasse <walken@google.com>
+Reviewed-by: Michel Lespinasse <walken@google.com>
+Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <jweiner@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/mm/gup.c    |    5 +-
+ arch/x86/mm/gup.c        |    5 +-
+ include/linux/mm.h       |   56 ++++++++++++-------------------
+ include/linux/mm_types.h |   22 ++++++++++--
+ mm/huge_memory.c         |   37 ++++++++++++++------
+ mm/internal.h            |   46 ++++++++++++++++++++++++++
+ mm/memory.c              |    2 -
+ mm/swap.c                |   83 ++++++++++++++++++++++++++++++-----------------
+ 8 files changed, 172 insertions(+), 84 deletions(-)
+
+--- a/arch/powerpc/mm/gup.c
++++ b/arch/powerpc/mm/gup.c
+@@ -22,8 +22,9 @@ static inline void get_huge_page_tail(st
+ 	 * __split_huge_page_refcount() cannot run
+ 	 * from under us.
+ 	 */
+-	VM_BUG_ON(atomic_read(&page->_count) < 0);
+-	atomic_inc(&page->_count);
++	VM_BUG_ON(page_mapcount(page) < 0);
++	VM_BUG_ON(atomic_read(&page->_count) != 0);
++	atomic_inc(&page->_mapcount);
+ }
+ 
+ /*
+--- a/arch/x86/mm/gup.c
++++ b/arch/x86/mm/gup.c
+@@ -114,8 +114,9 @@ static inline void get_huge_page_tail(st
+ 	 * __split_huge_page_refcount() cannot run
+ 	 * from under us.
+ 	 */
+-	VM_BUG_ON(atomic_read(&page->_count) < 0);
+-	atomic_inc(&page->_count);
++	VM_BUG_ON(page_mapcount(page) < 0);
++	VM_BUG_ON(atomic_read(&page->_count) != 0);
++	atomic_inc(&page->_mapcount);
+ }
+ 
+ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -355,36 +355,39 @@ static inline struct page *compound_head
+ 	return page;
+ }
+ 
++/*
++ * The atomic page->_mapcount, starts from -1: so that transitions
++ * both from it and to it can be tracked, using atomic_inc_and_test
++ * and atomic_add_negative(-1).
++ */
++static inline void reset_page_mapcount(struct page *page)
++{
++	atomic_set(&(page)->_mapcount, -1);
++}
++
++static inline int page_mapcount(struct page *page)
++{
++	return atomic_read(&(page)->_mapcount) + 1;
++}
++
+ static inline int page_count(struct page *page)
+ {
+ 	return atomic_read(&compound_head(page)->_count);
+ }
+ 
++extern bool __get_page_tail(struct page *page);
++
+ static inline void get_page(struct page *page)
+ {
++	if (unlikely(PageTail(page)))
++		if (likely(__get_page_tail(page)))
++			return;
+ 	/*
+ 	 * Getting a normal page or the head of a compound page
+-	 * requires to already have an elevated page->_count. Only if
+-	 * we're getting a tail page, the elevated page->_count is
+-	 * required only in the head page, so for tail pages the
+-	 * bugcheck only verifies that the page->_count isn't
+-	 * negative.
++	 * requires to already have an elevated page->_count.
+ 	 */
+-	VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
++	VM_BUG_ON(atomic_read(&page->_count) <= 0);
+ 	atomic_inc(&page->_count);
+-	/*
+-	 * Getting a tail page will elevate both the head and tail
+-	 * page->_count(s).
+-	 */
+-	if (unlikely(PageTail(page))) {
+-		/*
+-		 * This is safe only because
+-		 * __split_huge_page_refcount can't run under
+-		 * get_page().
+-		 */
+-		VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+-		atomic_inc(&page->first_page->_count);
+-	}
+ }
+ 
+ static inline struct page *virt_to_head_page(const void *x)
+@@ -803,21 +806,6 @@ static inline pgoff_t page_index(struct
+ }
+ 
+ /*
+- * The atomic page->_mapcount, like _count, starts from -1:
+- * so that transitions both from it and to it can be tracked,
+- * using atomic_inc_and_test and atomic_add_negative(-1).
+- */
+-static inline void reset_page_mapcount(struct page *page)
+-{
+-	atomic_set(&(page)->_mapcount, -1);
+-}
+-
+-static inline int page_mapcount(struct page *page)
+-{
+-	return atomic_read(&(page)->_mapcount) + 1;
+-}
+-
+-/*
+  * Return true if this page is mapped into pagetables.
+  */
+ static inline int page_mapped(struct page *page)
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -36,10 +36,24 @@ struct page {
+ 					 * updated asynchronously */
+ 	atomic_t _count;		/* Usage count, see below. */
+ 	union {
+-		atomic_t _mapcount;	/* Count of ptes mapped in mms,
+-					 * to show when page is mapped
+-					 * & limit reverse map searches.
+-					 */
++		/*
++		 * Count of ptes mapped in
++		 * mms, to show when page is
++		 * mapped & limit reverse map
++		 * searches.
++		 *
++		 * Used also for tail pages
++		 * refcounting instead of
++		 * _count. Tail pages cannot
++		 * be mapped and keeping the
++		 * tail page _count zero at
++		 * all times guarantees
++		 * get_page_unless_zero() will
++		 * never succeed on tail
++		 * pages.
++		 */
++		atomic_t _mapcount;
++
+ 		struct {		/* SLUB */
+ 			u16 inuse;
+ 			u16 objects;
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -989,7 +989,7 @@ struct page *follow_trans_huge_pmd(struc
+ 	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
+ 	VM_BUG_ON(!PageCompound(page));
+ 	if (flags & FOLL_GET)
+-		get_page(page);
++		get_page_foll(page);
+ 
+ out:
+ 	return page;
+@@ -1156,6 +1156,7 @@ static void __split_huge_page_refcount(s
+ 	unsigned long head_index = page->index;
+ 	struct zone *zone = page_zone(page);
+ 	int zonestat;
++	int tail_count = 0;
+ 
+ 	/* prevent PageLRU to go away from under us, and freeze lru stats */
+ 	spin_lock_irq(&zone->lru_lock);
+@@ -1164,11 +1165,27 @@ static void __split_huge_page_refcount(s
+ 	for (i = 1; i < HPAGE_PMD_NR; i++) {
+ 		struct page *page_tail = page + i;
+ 
+-		/* tail_page->_count cannot change */
+-		atomic_sub(atomic_read(&page_tail->_count), &page->_count);
+-		BUG_ON(page_count(page) <= 0);
+-		atomic_add(page_mapcount(page) + 1, &page_tail->_count);
+-		BUG_ON(atomic_read(&page_tail->_count) <= 0);
++		/* tail_page->_mapcount cannot change */
++		BUG_ON(page_mapcount(page_tail) < 0);
++		tail_count += page_mapcount(page_tail);
++		/* check for overflow */
++		BUG_ON(tail_count < 0);
++		BUG_ON(atomic_read(&page_tail->_count) != 0);
++		/*
++		 * tail_page->_count is zero and not changing from
++		 * under us. But get_page_unless_zero() may be running
++		 * from under us on the tail_page. If we used
++		 * atomic_set() below instead of atomic_add(), we
++		 * would then run atomic_set() concurrently with
++		 * get_page_unless_zero(), and atomic_set() is
++		 * implemented in C not using locked ops. spin_unlock
++		 * on x86 sometime uses locked ops because of PPro
++		 * errata 66, 92, so unless somebody can guarantee
++		 * atomic_set() here would be safe on all archs (and
++		 * not only on x86), it's safer to use atomic_add().
++		 */
++		atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1,
++			   &page_tail->_count);
+ 
+ 		/* after clearing PageTail the gup refcount can be released */
+ 		smp_mb();
+@@ -1186,10 +1203,7 @@ static void __split_huge_page_refcount(s
+ 				      (1L << PG_uptodate)));
+ 		page_tail->flags |= (1L << PG_dirty);
+ 
+-		/*
+-		 * 1) clear PageTail before overwriting first_page
+-		 * 2) clear PageTail before clearing PageHead for VM_BUG_ON
+-		 */
++		/* clear PageTail before overwriting first_page */
+ 		smp_wmb();
+ 
+ 		/*
+@@ -1206,7 +1220,6 @@ static void __split_huge_page_refcount(s
+ 		 * status is achieved setting a reserved bit in the
+ 		 * pmd, not by clearing the present bit.
+ 		*/
+-		BUG_ON(page_mapcount(page_tail));
+ 		page_tail->_mapcount = page->_mapcount;
+ 
+ 		BUG_ON(page_tail->mapping);
+@@ -1223,6 +1236,8 @@ static void __split_huge_page_refcount(s
+ 
+ 		lru_add_page_tail(zone, page, page_tail);
+ 	}
++	atomic_sub(tail_count, &page->_count);
++	BUG_ON(atomic_read(&page->_count) <= 0);
+ 
+ 	__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+ 	__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -37,6 +37,52 @@ static inline void __put_page(struct pag
+ 	atomic_dec(&page->_count);
+ }
+ 
++static inline void __get_page_tail_foll(struct page *page,
++					bool get_page_head)
++{
++	/*
++	 * If we're getting a tail page, the elevated page->_count is
++	 * required only in the head page and we will elevate the head
++	 * page->_count and tail page->_mapcount.
++	 *
++	 * We elevate page_tail->_mapcount for tail pages to force
++	 * page_tail->_count to be zero at all times to avoid getting
++	 * false positives from get_page_unless_zero() with
++	 * speculative page access (like in
++	 * page_cache_get_speculative()) on tail pages.
++	 */
++	VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
++	VM_BUG_ON(atomic_read(&page->_count) != 0);
++	VM_BUG_ON(page_mapcount(page) < 0);
++	if (get_page_head)
++		atomic_inc(&page->first_page->_count);
++	atomic_inc(&page->_mapcount);
++}
++
++/*
++ * This is meant to be called as the FOLL_GET operation of
++ * follow_page() and it must be called while holding the proper PT
++ * lock while the pte (or pmd_trans_huge) is still mapping the page.
++ */
++static inline void get_page_foll(struct page *page)
++{
++	if (unlikely(PageTail(page)))
++		/*
++		 * This is safe only because
++		 * __split_huge_page_refcount() can't run under
++		 * get_page_foll() because we hold the proper PT lock.
++		 */
++		__get_page_tail_foll(page, true);
++	else {
++		/*
++		 * Getting a normal page or the head of a compound page
++		 * requires to already have an elevated page->_count.
++		 */
++		VM_BUG_ON(atomic_read(&page->_count) <= 0);
++		atomic_inc(&page->_count);
++	}
++}
++
+ extern unsigned long highest_memmap_pfn;
+ 
+ /*
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1514,7 +1514,7 @@ split_fallthrough:
+ 	}
+ 
+ 	if (flags & FOLL_GET)
+-		get_page(page);
++		get_page_foll(page);
+ 	if (flags & FOLL_TOUCH) {
+ 		if ((flags & FOLL_WRITE) &&
+ 		    !pte_dirty(pte) && !PageDirty(page))
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -78,39 +78,22 @@ static void put_compound_page(struct pag
+ {
+ 	if (unlikely(PageTail(page))) {
+ 		/* __split_huge_page_refcount can run under us */
+-		struct page *page_head = page->first_page;
+-		smp_rmb();
+-		/*
+-		 * If PageTail is still set after smp_rmb() we can be sure
+-		 * that the page->first_page we read wasn't a dangling pointer.
+-		 * See __split_huge_page_refcount() smp_wmb().
+-		 */
+-		if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
++		struct page *page_head = compound_trans_head(page);
++
++		if (likely(page != page_head &&
++			   get_page_unless_zero(page_head))) {
+ 			unsigned long flags;
+ 			/*
+-			 * Verify that our page_head wasn't converted
+-			 * to a a regular page before we got a
+-			 * reference on it.
++			 * page_head wasn't a dangling pointer but it
++			 * may not be a head page anymore by the time
++			 * we obtain the lock. That is ok as long as it
++			 * can't be freed from under us.
+ 			 */
+-			if (unlikely(!PageHead(page_head))) {
+-				/* PageHead is cleared after PageTail */
+-				smp_rmb();
+-				VM_BUG_ON(PageTail(page));
+-				goto out_put_head;
+-			}
+-			/*
+-			 * Only run compound_lock on a valid PageHead,
+-			 * after having it pinned with
+-			 * get_page_unless_zero() above.
+-			 */
+-			smp_mb();
+-			/* page_head wasn't a dangling pointer */
+ 			flags = compound_lock_irqsave(page_head);
+ 			if (unlikely(!PageTail(page))) {
+ 				/* __split_huge_page_refcount run before us */
+ 				compound_unlock_irqrestore(page_head, flags);
+ 				VM_BUG_ON(PageHead(page_head));
+-			out_put_head:
+ 				if (put_page_testzero(page_head))
+ 					__put_single_page(page_head);
+ 			out_put_single:
+@@ -121,16 +104,17 @@ static void put_compound_page(struct pag
+ 			VM_BUG_ON(page_head != page->first_page);
+ 			/*
+ 			 * We can release the refcount taken by
+-			 * get_page_unless_zero now that
+-			 * split_huge_page_refcount is blocked on the
+-			 * compound_lock.
++			 * get_page_unless_zero() now that
++			 * __split_huge_page_refcount() is blocked on
++			 * the compound_lock.
+ 			 */
+ 			if (put_page_testzero(page_head))
+ 				VM_BUG_ON(1);
+ 			/* __split_huge_page_refcount will wait now */
+-			VM_BUG_ON(atomic_read(&page->_count) <= 0);
+-			atomic_dec(&page->_count);
++			VM_BUG_ON(page_mapcount(page) <= 0);
++			atomic_dec(&page->_mapcount);
+ 			VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
++			VM_BUG_ON(atomic_read(&page->_count) != 0);
+ 			compound_unlock_irqrestore(page_head, flags);
+ 			if (put_page_testzero(page_head)) {
+ 				if (PageHead(page_head))
+@@ -160,6 +144,45 @@ void put_page(struct page *page)
+ }
+ EXPORT_SYMBOL(put_page);
+ 
++/*
++ * This function is exported but must not be called by anything other
++ * than get_page(). It implements the slow path of get_page().
++ */
++bool __get_page_tail(struct page *page)
++{
++	/*
++	 * This takes care of get_page() if run on a tail page
++	 * returned by one of the get_user_pages/follow_page variants.
++	 * get_user_pages/follow_page itself doesn't need the compound
++	 * lock because it runs __get_page_tail_foll() under the
++	 * proper PT lock that already serializes against
++	 * split_huge_page().
++	 */
++	unsigned long flags;
++	bool got = false;
++	struct page *page_head = compound_trans_head(page);
++
++	if (likely(page != page_head && get_page_unless_zero(page_head))) {
++		/*
++		 * page_head wasn't a dangling pointer but it
++		 * may not be a head page anymore by the time
++		 * we obtain the lock. That is ok as long as it
++		 * can't be freed from under us.
++		 */
++		flags = compound_lock_irqsave(page_head);
++		/* here __split_huge_page_refcount won't run anymore */
++		if (likely(PageTail(page))) {
++			__get_page_tail_foll(page, false);
++			got = true;
++		}
++		compound_unlock_irqrestore(page_head, flags);
++		if (unlikely(!got))
++			put_page(page_head);
++	}
++	return got;
++}
++EXPORT_SYMBOL(__get_page_tail);
++
+ /**
+  * put_pages_list() - release a list of pages
+  * @pages: list of pages threaded on page->lru
diff --git a/queue-3.0/proc-fix-races-against-execve-of-proc-pid-fd.patch b/queue-3.0/proc-fix-races-against-execve-of-proc-pid-fd.patch
new file mode 100644
index 00000000000..20688cf6371
--- /dev/null
+++ b/queue-3.0/proc-fix-races-against-execve-of-proc-pid-fd.patch
@@ -0,0 +1,261 @@
+From aa6afca5bcaba8101f3ea09d5c3e4100b2b9f0e5 Mon Sep 17 00:00:00 2001
+From: Vasiliy Kulikov <segoon@openwall.com>
+Date: Wed, 2 Nov 2011 13:38:44 -0700
+Subject: proc: fix races against execve() of /proc/PID/fd**
+
+From: Vasiliy Kulikov <segoon@openwall.com>
+
+commit aa6afca5bcaba8101f3ea09d5c3e4100b2b9f0e5 upstream.
+
+fd* files are restricted to the task's owner, and other users may not get
+direct access to them.  But one may open any of these files and run any
+setuid program, keeping opened file descriptors.  As there are permission
+checks on open(), but not on readdir() and read(), operations on the kept
+file descriptors will not be checked.  It makes it possible to violate
+procfs permission model.
+
+Reading fdinfo/* may disclosure current fds' position and flags, reading
+directory contents of fdinfo/ and fd/ may disclosure the number of opened
+files by the target task.  This information is not sensible per se, but it
+can reveal some private information (like length of a password stored in a
+file) under certain conditions.
+
+Used existing (un)lock_trace functions to check for ptrace_may_access(),
+but instead of using EPERM return code from it use EACCES to be consistent
+with existing proc_pid_follow_link()/proc_pid_readlink() return code.  If
+they differ, attacker can guess what fds exist by analyzing stat() return
+code.  Patched handlers: stat() for fd/*, stat() and read() for fdindo/*,
+readdir() and lookup() for fd/ and fdinfo/.
+
+Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
+Cc: Cyrill Gorcunov <gorcunov@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/base.c |  146 ++++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 103 insertions(+), 43 deletions(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -1666,12 +1666,46 @@ out:
+ 	return error;
+ }
+ 
++static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
++		struct kstat *stat)
++{
++	struct inode *inode = dentry->d_inode;
++	struct task_struct *task = get_proc_task(inode);
++	int rc;
++
++	if (task == NULL)
++		return -ESRCH;
++
++	rc = -EACCES;
++	if (lock_trace(task))
++		goto out_task;
++
++	generic_fillattr(inode, stat);
++	unlock_trace(task);
++	rc = 0;
++out_task:
++	put_task_struct(task);
++	return rc;
++}
++
+ static const struct inode_operations proc_pid_link_inode_operations = {
+ 	.readlink	= proc_pid_readlink,
+ 	.follow_link	= proc_pid_follow_link,
+ 	.setattr	= proc_setattr,
+ };
+ 
++static const struct inode_operations proc_fdinfo_link_inode_operations = {
++	.setattr	= proc_setattr,
++	.getattr	= proc_pid_fd_link_getattr,
++};
++
++static const struct inode_operations proc_fd_link_inode_operations = {
++	.readlink	= proc_pid_readlink,
++	.follow_link	= proc_pid_follow_link,
++	.setattr	= proc_setattr,
++	.getattr	= proc_pid_fd_link_getattr,
++};
++
+ 
+ /* building an inode */
+ 
+@@ -1903,49 +1937,61 @@ out:
+ 
+ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+ {
+-	struct task_struct *task = get_proc_task(inode);
+-	struct files_struct *files = NULL;
++	struct task_struct *task;
++	struct files_struct *files;
+ 	struct file *file;
+ 	int fd = proc_fd(inode);
++	int rc;
+ 
+-	if (task) {
+-		files = get_files_struct(task);
+-		put_task_struct(task);
+-	}
+-	if (files) {
+-		/*
+-		 * We are not taking a ref to the file structure, so we must
+-		 * hold ->file_lock.
+-		 */
+-		spin_lock(&files->file_lock);
+-		file = fcheck_files(files, fd);
+-		if (file) {
+-			unsigned int f_flags;
+-			struct fdtable *fdt;
+-
+-			fdt = files_fdtable(files);
+-			f_flags = file->f_flags & ~O_CLOEXEC;
+-			if (FD_ISSET(fd, fdt->close_on_exec))
+-				f_flags |= O_CLOEXEC;
+-
+-			if (path) {
+-				*path = file->f_path;
+-				path_get(&file->f_path);
+-			}
+-			if (info)
+-				snprintf(info, PROC_FDINFO_MAX,
+-					 "pos:\t%lli\n"
+-					 "flags:\t0%o\n",
+-					 (long long) file->f_pos,
+-					 f_flags);
+-			spin_unlock(&files->file_lock);
+-			put_files_struct(files);
+-			return 0;
++	task = get_proc_task(inode);
++	if (!task)
++		return -ENOENT;
++
++	rc = -EACCES;
++	if (lock_trace(task))
++		goto out_task;
++
++	rc = -ENOENT;
++	files = get_files_struct(task);
++	if (files == NULL)
++		goto out_unlock;
++
++	/*
++	 * We are not taking a ref to the file structure, so we must
++	 * hold ->file_lock.
++	 */
++	spin_lock(&files->file_lock);
++	file = fcheck_files(files, fd);
++	if (file) {
++		unsigned int f_flags;
++		struct fdtable *fdt;
++
++		fdt = files_fdtable(files);
++		f_flags = file->f_flags & ~O_CLOEXEC;
++		if (FD_ISSET(fd, fdt->close_on_exec))
++			f_flags |= O_CLOEXEC;
++
++		if (path) {
++			*path = file->f_path;
++			path_get(&file->f_path);
+ 		}
+-		spin_unlock(&files->file_lock);
+-		put_files_struct(files);
+-	}
+-	return -ENOENT;
++		if (info)
++			snprintf(info, PROC_FDINFO_MAX,
++				 "pos:\t%lli\n"
++				 "flags:\t0%o\n",
++				 (long long) file->f_pos,
++				 f_flags);
++		rc = 0;
++	} else
++		rc = -ENOENT;
++	spin_unlock(&files->file_lock);
++	put_files_struct(files);
++
++out_unlock:
++	unlock_trace(task);
++out_task:
++	put_task_struct(task);
++	return rc;
+ }
+ 
+ static int proc_fd_link(struct inode *inode, struct path *path)
+@@ -2040,7 +2086,7 @@ static struct dentry *proc_fd_instantiat
+ 	spin_unlock(&files->file_lock);
+ 	put_files_struct(files);
+ 
+-	inode->i_op = &proc_pid_link_inode_operations;
++	inode->i_op = &proc_fd_link_inode_operations;
+ 	inode->i_size = 64;
+ 	ei->op.proc_get_link = proc_fd_link;
+ 	d_set_d_op(dentry, &tid_fd_dentry_operations);
+@@ -2072,7 +2118,12 @@ static struct dentry *proc_lookupfd_comm
+ 	if (fd == ~0U)
+ 		goto out;
+ 
++	result = ERR_PTR(-EACCES);
++	if (lock_trace(task))
++		goto out;
++
+ 	result = instantiate(dir, dentry, task, &fd);
++	unlock_trace(task);
+ out:
+ 	put_task_struct(task);
+ out_no_task:
+@@ -2092,23 +2143,28 @@ static int proc_readfd_common(struct fil
+ 	retval = -ENOENT;
+ 	if (!p)
+ 		goto out_no_task;
++
++	retval = -EACCES;
++	if (lock_trace(p))
++		goto out;
++
+ 	retval = 0;
+ 
+ 	fd = filp->f_pos;
+ 	switch (fd) {
+ 		case 0:
+ 			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+-				goto out;
++				goto out_unlock;
+ 			filp->f_pos++;
+ 		case 1:
+ 			ino = parent_ino(dentry);
+ 			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+-				goto out;
++				goto out_unlock;
+ 			filp->f_pos++;
+ 		default:
+ 			files = get_files_struct(p);
+ 			if (!files)
+-				goto out;
++				goto out_unlock;
+ 			rcu_read_lock();
+ 			for (fd = filp->f_pos-2;
+ 			     fd < files_fdtable(files)->max_fds;
+@@ -2132,6 +2188,9 @@ static int proc_readfd_common(struct fil
+ 			rcu_read_unlock();
+ 			put_files_struct(files);
+ 	}
++
++out_unlock:
++	unlock_trace(p);
+ out:
+ 	put_task_struct(p);
+ out_no_task:
+@@ -2209,6 +2268,7 @@ static struct dentry *proc_fdinfo_instan
+ 	ei->fd = fd;
+ 	inode->i_mode = S_IFREG | S_IRUSR;
+ 	inode->i_fop = &proc_fdinfo_file_operations;
++	inode->i_op = &proc_fdinfo_link_inode_operations;
+ 	d_set_d_op(dentry, &tid_fd_dentry_operations);
+ 	d_add(dentry, inode);
+ 	/* Close the race of the process dying before we return the dentry */
diff --git a/queue-3.0/series b/queue-3.0/series
index 647b8f7b8e0..1ced05fc8f1 100644
--- a/queue-3.0/series
+++ b/queue-3.0/series
@@ -145,3 +145,9 @@ tg3-negate-use_phylib-flag-check.patch
 ipv6-nullify-ipv6_ac_list-and-ipv6_fl_list-when-creating-new-socket.patch
 make-packet_statistics-getsockopt-report-consistently-between-ring-and-non-ring.patch
 net-xen-netback-correctly-restart-tx-after-a-vm-restore-migrate.patch
+mm-thp-tail-page-refcounting-fix.patch
+binfmt_elf-fix-pie-execution-with-randomization-disabled.patch
+vfs-show-o_cloexe-bit-properly-in-proc-pid-fdinfo-fd-files.patch
+proc-fix-races-against-execve-of-proc-pid-fd.patch
+iwlagn-do-not-use-interruptible-waits.patch
+drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch
diff --git a/queue-3.0/vfs-show-o_cloexe-bit-properly-in-proc-pid-fdinfo-fd-files.patch b/queue-3.0/vfs-show-o_cloexe-bit-properly-in-proc-pid-fdinfo-fd-files.patch
new file mode 100644
index 00000000000..8823d27db35
--- /dev/null
+++ b/queue-3.0/vfs-show-o_cloexe-bit-properly-in-proc-pid-fdinfo-fd-files.patch
@@ -0,0 +1,59 @@
+From 1117f72ea0217ba0cc19f05adbbd8b9a397f5ab7 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sat, 6 Aug 2011 11:51:33 -0700
+Subject: vfs: show O_CLOEXE bit properly in /proc/<pid>/fdinfo/<fd> files
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 1117f72ea0217ba0cc19f05adbbd8b9a397f5ab7 upstream.
+
+The CLOEXE bit is magical, and for performance (and semantic) reasons we
+don't actually maintain it in the file descriptor itself, but in a
+separate bit array.  Which means that when we show f_flags, the CLOEXE
+status is shown incorrectly: we show the status not as it is now, but as
+it was when the file was opened.
+
+Fix that by looking up the bit properly in the 'fdt->close_on_exec' bit
+array.
+
+Uli needs this in order to re-implement the pfiles program:
+
+  "For normal file descriptors (not sockets) this was the last piece of
+   information which wasn't available.  This is all part of my 'give
+   Solaris users no reason to not switch' effort.  I intend to offer the
+   code to the util-linux-ng maintainers."
+
+Requested-by: Ulrich Drepper <drepper@akkadia.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/base.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -1920,6 +1920,14 @@ static int proc_fd_info(struct inode *in
+ 		spin_lock(&files->file_lock);
+ 		file = fcheck_files(files, fd);
+ 		if (file) {
++			unsigned int f_flags;
++			struct fdtable *fdt;
++
++			fdt = files_fdtable(files);
++			f_flags = file->f_flags & ~O_CLOEXEC;
++			if (FD_ISSET(fd, fdt->close_on_exec))
++				f_flags |= O_CLOEXEC;
++
+ 			if (path) {
+ 				*path = file->f_path;
+ 				path_get(&file->f_path);
+@@ -1929,7 +1937,7 @@ static int proc_fd_info(struct inode *in
+ 					 "pos:\t%lli\n"
+ 					 "flags:\t0%o\n",
+ 					 (long long) file->f_pos,
+-					 file->f_flags);
++					 f_flags);
+ 			spin_unlock(&files->file_lock);
+ 			put_files_struct(files);
+ 			return 0;