--- /dev/null
+From a3defbe5c337dbc6da911f8cc49ae3cc3b49b453 Mon Sep 17 00:00:00 2001
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Wed, 2 Nov 2011 13:37:41 -0700
+Subject: binfmt_elf: fix PIE execution with randomization disabled
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+commit a3defbe5c337dbc6da911f8cc49ae3cc3b49b453 upstream.
+
+The case of address space randomization being disabled in runtime through
+randomize_va_space sysctl is not treated properly in load_elf_binary(),
+resulting in SIGKILL coming at exec() time for certain PIE-linked binaries
+in case the randomization has been disabled at runtime prior to calling
+exec().
+
+Handle the randomize_va_space == 0 case the same way as if we were not
+supporting .text randomization at all.
+
+Based on original patch by H.J. Lu and Josh Boyer.
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Russell King <rmk@arm.linux.org.uk>
+Cc: H.J. Lu <hongjiu.lu@intel.com>
+Cc: <stable@kernel.org>
+Tested-by: Josh Boyer <jwboyer@redhat.com>
+Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/binfmt_elf.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/binfmt_elf.c
++++ b/fs/binfmt_elf.c
+@@ -796,7 +796,16 @@ static int load_elf_binary(struct linux_
+ * might try to exec. This is because the brk will
+ * follow the loader, and is not movable. */
+ #if defined(CONFIG_X86) || defined(CONFIG_ARM)
+- load_bias = 0;
++ /* Memory randomization might have been switched off
++ * in runtime via sysctl.
++ * If that is the case, retain the original non-zero
++ * load_bias value in order to establish proper
++ * non-randomized mappings.
++ */
++ if (current->flags & PF_RANDOMIZE)
++ load_bias = 0;
++ else
++ load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+ #else
+ load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+ #endif
--- /dev/null
+From e0c87bd95e8dad455c23bc56513af8dcb1737e55 Mon Sep 17 00:00:00 2001
+From: Alexandre Bounine <alexandre.bounine@idt.com>
+Date: Wed, 2 Nov 2011 13:39:15 -0700
+Subject: drivers/net/rionet.c: fix ethernet address macros for LE platforms
+
+From: Alexandre Bounine <alexandre.bounine@idt.com>
+
+commit e0c87bd95e8dad455c23bc56513af8dcb1737e55 upstream.
+
+Modify Ethernet addess macros to be compatible with BE/LE platforms
+
+Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
+Cc: Chul Kim <chul.kim@idt.com>
+Cc: Kumar Gala <galak@kernel.crashing.org>
+Cc: Matt Porter <mporter@kernel.crashing.org>
+Cc: Li Yang <leoli@freescale.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/rionet.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/rionet.c
++++ b/drivers/net/rionet.c
+@@ -88,8 +88,8 @@ static struct rio_dev **rionet_active;
+ #define dev_rionet_capable(dev) \
+ is_rionet_capable(dev->src_ops, dev->dst_ops)
+
+-#define RIONET_MAC_MATCH(x) (*(u32 *)x == 0x00010001)
+-#define RIONET_GET_DESTID(x) (*(u16 *)(x + 4))
++#define RIONET_MAC_MATCH(x) (!memcmp((x), "\00\01\00\01", 4))
++#define RIONET_GET_DESTID(x) ((*((u8 *)x + 4) << 8) | *((u8 *)x + 5))
+
+ static int rionet_rx_clean(struct net_device *ndev)
+ {
--- /dev/null
+From johannes@sipsolutions.net Thu Nov 3 10:55:59 2011
+From: Johannes Berg <johannes@sipsolutions.net>
+Date: Thu, 03 Nov 2011 13:46:08 +0100
+Subject: iwlagn: do not use interruptible waits
+To: stable@vger.kernel.org
+Message-ID: <1320324368.3950.44.camel@jlt3.sipsolutions.net>
+
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+Upstream commit effd4d9aece9184f526e6556786a94d335e38b71.
+
+Since the dawn of its time, iwlwifi has used
+interruptible waits to wait for synchronous
+commands and firmware loading.
+
+This leads to "interesting" bugs, because it
+can't actually handle the interruptions; for
+example when a command sending is interrupted
+it will assume the command completed fully,
+and then leave it pending, which leads to all
+kinds of trouble when the command finishes
+later.
+
+Since there's no easy way to gracefully deal
+with interruptions, fix the driver to not use
+interruptible waits.
+
+This at least fixes the error
+iwlagn 0000:02:00.0: Error: Response NULL in 'REPLY_SCAN_ABORT_CMD'
+
+I have seen in P2P testing, but it is likely
+that there are other errors caused by this.
+
+Cc: Stanislaw Gruszka <sgruszka@redhat.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
+Signed-off-by: John W. Linville <linville@tuxdriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ drivers/net/wireless/iwlwifi/iwl-agn-ucode.c | 9 ++-------
+ drivers/net/wireless/iwlwifi/iwl-agn.c | 2 +-
+ drivers/net/wireless/iwlwifi/iwl-core.c | 4 ++--
+ drivers/net/wireless/iwlwifi/iwl-hcmd.c | 2 +-
+ drivers/net/wireless/iwlwifi/iwl-rx.c | 2 +-
+ drivers/net/wireless/iwlwifi/iwl-tx.c | 2 +-
+ 6 files changed, 8 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c
++++ b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c
+@@ -144,13 +144,8 @@ static int iwlagn_load_section(struct iw
+ FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD);
+
+ IWL_DEBUG_INFO(priv, "%s uCode section being loaded...\n", name);
+- ret = wait_event_interruptible_timeout(priv->wait_command_queue,
+- priv->ucode_write_complete, 5 * HZ);
+- if (ret == -ERESTARTSYS) {
+- IWL_ERR(priv, "Could not load the %s uCode section due "
+- "to interrupt\n", name);
+- return ret;
+- }
++ ret = wait_event_timeout(priv->wait_command_queue,
++ priv->ucode_write_complete, 5 * HZ);
+ if (!ret) {
+ IWL_ERR(priv, "Could not load the %s uCode section\n",
+ name);
+--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
++++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
+@@ -797,7 +797,7 @@ static void iwl_irq_tasklet(struct iwl_p
+ handled |= CSR_INT_BIT_FH_TX;
+ /* Wake up uCode load routine, now that load is complete */
+ priv->ucode_write_complete = 1;
+- wake_up_interruptible(&priv->wait_command_queue);
++ wake_up(&priv->wait_command_queue);
+ }
+
+ if (inta & ~handled) {
+--- a/drivers/net/wireless/iwlwifi/iwl-core.c
++++ b/drivers/net/wireless/iwlwifi/iwl-core.c
+@@ -899,7 +899,7 @@ void iwlagn_fw_error(struct iwl_priv *pr
+ * commands by clearing the ready bit */
+ clear_bit(STATUS_READY, &priv->status);
+
+- wake_up_interruptible(&priv->wait_command_queue);
++ wake_up(&priv->wait_command_queue);
+
+ if (!ondemand) {
+ /*
+@@ -950,7 +950,7 @@ void iwl_irq_handle_error(struct iwl_pri
+ */
+ clear_bit(STATUS_READY, &priv->status);
+ clear_bit(STATUS_HCMD_ACTIVE, &priv->status);
+- wake_up_interruptible(&priv->wait_command_queue);
++ wake_up(&priv->wait_command_queue);
+ IWL_ERR(priv, "RF is used by WiMAX\n");
+ return;
+ }
+--- a/drivers/net/wireless/iwlwifi/iwl-hcmd.c
++++ b/drivers/net/wireless/iwlwifi/iwl-hcmd.c
+@@ -194,7 +194,7 @@ int iwl_send_cmd_sync(struct iwl_priv *p
+ return ret;
+ }
+
+- ret = wait_event_interruptible_timeout(priv->wait_command_queue,
++ ret = wait_event_timeout(priv->wait_command_queue,
+ !test_bit(STATUS_HCMD_ACTIVE, &priv->status),
+ HOST_COMPLETE_TIMEOUT);
+ if (!ret) {
+--- a/drivers/net/wireless/iwlwifi/iwl-rx.c
++++ b/drivers/net/wireless/iwlwifi/iwl-rx.c
+@@ -738,7 +738,7 @@ static void iwl_rx_card_state_notif(stru
+ wiphy_rfkill_set_hw_state(priv->hw->wiphy,
+ test_bit(STATUS_RF_KILL_HW, &priv->status));
+ else
+- wake_up_interruptible(&priv->wait_command_queue);
++ wake_up(&priv->wait_command_queue);
+ }
+
+ static void iwl_rx_missed_beacon_notif(struct iwl_priv *priv,
+--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
++++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
+@@ -821,7 +821,7 @@ void iwl_tx_cmd_complete(struct iwl_priv
+ clear_bit(STATUS_HCMD_ACTIVE, &priv->status);
+ IWL_DEBUG_INFO(priv, "Clearing HCMD_ACTIVE for command %s\n",
+ get_cmd_string(cmd->hdr.cmd));
+- wake_up_interruptible(&priv->wait_command_queue);
++ wake_up(&priv->wait_command_queue);
+ }
+
+ /* Mark as unmapped */
--- /dev/null
+From 70b50f94f1644e2aa7cb374819cfd93f3c28d725 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Wed, 2 Nov 2011 13:36:59 -0700
+Subject: mm: thp: tail page refcounting fix
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+commit 70b50f94f1644e2aa7cb374819cfd93f3c28d725 upstream.
+
+Michel while working on the working set estimation code, noticed that
+calling get_page_unless_zero() on a random pfn_to_page(random_pfn)
+wasn't safe, if the pfn ended up being a tail page of a transparent
+hugepage under splitting by __split_huge_page_refcount().
+
+He then found the problem could also theoretically materialize with
+page_cache_get_speculative() during the speculative radix tree lookups
+that uses get_page_unless_zero() in SMP if the radix tree page is freed
+and reallocated and get_user_pages is called on it before
+page_cache_get_speculative has a chance to call get_page_unless_zero().
+
+So the best way to fix the problem is to keep page_tail->_count zero at
+all times. This will guarantee that get_page_unless_zero() can never
+succeed on any tail page. page_tail->_mapcount is guaranteed zero and
+is unused for all tail pages of a compound page, so we can simply
+account the tail page references there and transfer them to
+tail_page->_count in __split_huge_page_refcount() (in addition to the
+head_page->_mapcount).
+
+While debugging this s/_count/_mapcount/ change I also noticed get_page is
+called by direct-io.c on pages returned by get_user_pages. That wasn't
+entirely safe because the two atomic_inc in get_page weren't atomic. As
+opposed to other get_user_page users like secondary-MMU page fault to
+establish the shadow pagetables would never call any superflous get_page
+after get_user_page returns. It's safer to make get_page universally safe
+for tail pages and to use get_page_foll() within follow_page (inside
+get_user_pages()). get_page_foll() is safe to do the refcounting for tail
+pages without taking any locks because it is run within PT lock protected
+critical sections (PT lock for pte and page_table_lock for
+pmd_trans_huge).
+
+The standard get_page() as invoked by direct-io instead will now take
+the compound_lock but still only for tail pages. The direct-io paths
+are usually I/O bound and the compound_lock is per THP so very
+finegrined, so there's no risk of scalability issues with it. A simple
+direct-io benchmarks with all lockdep prove locking and spinlock
+debugging infrastructure enabled shows identical performance and no
+overhead. So it's worth it. Ideally direct-io should stop calling
+get_page() on pages returned by get_user_pages(). The spinlock in
+get_page() is already optimized away for no-THP builds but doing
+get_page() on tail pages returned by GUP is generally a rare operation
+and usually only run in I/O paths.
+
+This new refcounting on page_tail->_mapcount in addition to avoiding new
+RCU critical sections will also allow the working set estimation code to
+work without any further complexity associated to the tail page
+refcounting with THP.
+
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Reported-by: Michel Lespinasse <walken@google.com>
+Reviewed-by: Michel Lespinasse <walken@google.com>
+Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <jweiner@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/mm/gup.c | 5 +-
+ arch/x86/mm/gup.c | 5 +-
+ include/linux/mm.h | 56 ++++++++++++-------------------
+ include/linux/mm_types.h | 22 ++++++++++--
+ mm/huge_memory.c | 37 ++++++++++++++------
+ mm/internal.h | 46 ++++++++++++++++++++++++++
+ mm/memory.c | 2 -
+ mm/swap.c | 83 ++++++++++++++++++++++++++++++-----------------
+ 8 files changed, 172 insertions(+), 84 deletions(-)
+
+--- a/arch/powerpc/mm/gup.c
++++ b/arch/powerpc/mm/gup.c
+@@ -22,8 +22,9 @@ static inline void get_huge_page_tail(st
+ * __split_huge_page_refcount() cannot run
+ * from under us.
+ */
+- VM_BUG_ON(atomic_read(&page->_count) < 0);
+- atomic_inc(&page->_count);
++ VM_BUG_ON(page_mapcount(page) < 0);
++ VM_BUG_ON(atomic_read(&page->_count) != 0);
++ atomic_inc(&page->_mapcount);
+ }
+
+ /*
+--- a/arch/x86/mm/gup.c
++++ b/arch/x86/mm/gup.c
+@@ -114,8 +114,9 @@ static inline void get_huge_page_tail(st
+ * __split_huge_page_refcount() cannot run
+ * from under us.
+ */
+- VM_BUG_ON(atomic_read(&page->_count) < 0);
+- atomic_inc(&page->_count);
++ VM_BUG_ON(page_mapcount(page) < 0);
++ VM_BUG_ON(atomic_read(&page->_count) != 0);
++ atomic_inc(&page->_mapcount);
+ }
+
+ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -355,36 +355,39 @@ static inline struct page *compound_head
+ return page;
+ }
+
++/*
++ * The atomic page->_mapcount, starts from -1: so that transitions
++ * both from it and to it can be tracked, using atomic_inc_and_test
++ * and atomic_add_negative(-1).
++ */
++static inline void reset_page_mapcount(struct page *page)
++{
++ atomic_set(&(page)->_mapcount, -1);
++}
++
++static inline int page_mapcount(struct page *page)
++{
++ return atomic_read(&(page)->_mapcount) + 1;
++}
++
+ static inline int page_count(struct page *page)
+ {
+ return atomic_read(&compound_head(page)->_count);
+ }
+
++extern bool __get_page_tail(struct page *page);
++
+ static inline void get_page(struct page *page)
+ {
++ if (unlikely(PageTail(page)))
++ if (likely(__get_page_tail(page)))
++ return;
+ /*
+ * Getting a normal page or the head of a compound page
+- * requires to already have an elevated page->_count. Only if
+- * we're getting a tail page, the elevated page->_count is
+- * required only in the head page, so for tail pages the
+- * bugcheck only verifies that the page->_count isn't
+- * negative.
++ * requires to already have an elevated page->_count.
+ */
+- VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
++ VM_BUG_ON(atomic_read(&page->_count) <= 0);
+ atomic_inc(&page->_count);
+- /*
+- * Getting a tail page will elevate both the head and tail
+- * page->_count(s).
+- */
+- if (unlikely(PageTail(page))) {
+- /*
+- * This is safe only because
+- * __split_huge_page_refcount can't run under
+- * get_page().
+- */
+- VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+- atomic_inc(&page->first_page->_count);
+- }
+ }
+
+ static inline struct page *virt_to_head_page(const void *x)
+@@ -803,21 +806,6 @@ static inline pgoff_t page_index(struct
+ }
+
+ /*
+- * The atomic page->_mapcount, like _count, starts from -1:
+- * so that transitions both from it and to it can be tracked,
+- * using atomic_inc_and_test and atomic_add_negative(-1).
+- */
+-static inline void reset_page_mapcount(struct page *page)
+-{
+- atomic_set(&(page)->_mapcount, -1);
+-}
+-
+-static inline int page_mapcount(struct page *page)
+-{
+- return atomic_read(&(page)->_mapcount) + 1;
+-}
+-
+-/*
+ * Return true if this page is mapped into pagetables.
+ */
+ static inline int page_mapped(struct page *page)
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -36,10 +36,24 @@ struct page {
+ * updated asynchronously */
+ atomic_t _count; /* Usage count, see below. */
+ union {
+- atomic_t _mapcount; /* Count of ptes mapped in mms,
+- * to show when page is mapped
+- * & limit reverse map searches.
+- */
++ /*
++ * Count of ptes mapped in
++ * mms, to show when page is
++ * mapped & limit reverse map
++ * searches.
++ *
++ * Used also for tail pages
++ * refcounting instead of
++ * _count. Tail pages cannot
++ * be mapped and keeping the
++ * tail page _count zero at
++ * all times guarantees
++ * get_page_unless_zero() will
++ * never succeed on tail
++ * pages.
++ */
++ atomic_t _mapcount;
++
+ struct { /* SLUB */
+ u16 inuse;
+ u16 objects;
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -989,7 +989,7 @@ struct page *follow_trans_huge_pmd(struc
+ page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
+ VM_BUG_ON(!PageCompound(page));
+ if (flags & FOLL_GET)
+- get_page(page);
++ get_page_foll(page);
+
+ out:
+ return page;
+@@ -1156,6 +1156,7 @@ static void __split_huge_page_refcount(s
+ unsigned long head_index = page->index;
+ struct zone *zone = page_zone(page);
+ int zonestat;
++ int tail_count = 0;
+
+ /* prevent PageLRU to go away from under us, and freeze lru stats */
+ spin_lock_irq(&zone->lru_lock);
+@@ -1164,11 +1165,27 @@ static void __split_huge_page_refcount(s
+ for (i = 1; i < HPAGE_PMD_NR; i++) {
+ struct page *page_tail = page + i;
+
+- /* tail_page->_count cannot change */
+- atomic_sub(atomic_read(&page_tail->_count), &page->_count);
+- BUG_ON(page_count(page) <= 0);
+- atomic_add(page_mapcount(page) + 1, &page_tail->_count);
+- BUG_ON(atomic_read(&page_tail->_count) <= 0);
++ /* tail_page->_mapcount cannot change */
++ BUG_ON(page_mapcount(page_tail) < 0);
++ tail_count += page_mapcount(page_tail);
++ /* check for overflow */
++ BUG_ON(tail_count < 0);
++ BUG_ON(atomic_read(&page_tail->_count) != 0);
++ /*
++ * tail_page->_count is zero and not changing from
++ * under us. But get_page_unless_zero() may be running
++ * from under us on the tail_page. If we used
++ * atomic_set() below instead of atomic_add(), we
++ * would then run atomic_set() concurrently with
++ * get_page_unless_zero(), and atomic_set() is
++ * implemented in C not using locked ops. spin_unlock
++ * on x86 sometime uses locked ops because of PPro
++ * errata 66, 92, so unless somebody can guarantee
++ * atomic_set() here would be safe on all archs (and
++ * not only on x86), it's safer to use atomic_add().
++ */
++ atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1,
++ &page_tail->_count);
+
+ /* after clearing PageTail the gup refcount can be released */
+ smp_mb();
+@@ -1186,10 +1203,7 @@ static void __split_huge_page_refcount(s
+ (1L << PG_uptodate)));
+ page_tail->flags |= (1L << PG_dirty);
+
+- /*
+- * 1) clear PageTail before overwriting first_page
+- * 2) clear PageTail before clearing PageHead for VM_BUG_ON
+- */
++ /* clear PageTail before overwriting first_page */
+ smp_wmb();
+
+ /*
+@@ -1206,7 +1220,6 @@ static void __split_huge_page_refcount(s
+ * status is achieved setting a reserved bit in the
+ * pmd, not by clearing the present bit.
+ */
+- BUG_ON(page_mapcount(page_tail));
+ page_tail->_mapcount = page->_mapcount;
+
+ BUG_ON(page_tail->mapping);
+@@ -1223,6 +1236,8 @@ static void __split_huge_page_refcount(s
+
+ lru_add_page_tail(zone, page, page_tail);
+ }
++ atomic_sub(tail_count, &page->_count);
++ BUG_ON(atomic_read(&page->_count) <= 0);
+
+ __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+ __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -37,6 +37,52 @@ static inline void __put_page(struct pag
+ atomic_dec(&page->_count);
+ }
+
++static inline void __get_page_tail_foll(struct page *page,
++ bool get_page_head)
++{
++ /*
++ * If we're getting a tail page, the elevated page->_count is
++ * required only in the head page and we will elevate the head
++ * page->_count and tail page->_mapcount.
++ *
++ * We elevate page_tail->_mapcount for tail pages to force
++ * page_tail->_count to be zero at all times to avoid getting
++ * false positives from get_page_unless_zero() with
++ * speculative page access (like in
++ * page_cache_get_speculative()) on tail pages.
++ */
++ VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
++ VM_BUG_ON(atomic_read(&page->_count) != 0);
++ VM_BUG_ON(page_mapcount(page) < 0);
++ if (get_page_head)
++ atomic_inc(&page->first_page->_count);
++ atomic_inc(&page->_mapcount);
++}
++
++/*
++ * This is meant to be called as the FOLL_GET operation of
++ * follow_page() and it must be called while holding the proper PT
++ * lock while the pte (or pmd_trans_huge) is still mapping the page.
++ */
++static inline void get_page_foll(struct page *page)
++{
++ if (unlikely(PageTail(page)))
++ /*
++ * This is safe only because
++ * __split_huge_page_refcount() can't run under
++ * get_page_foll() because we hold the proper PT lock.
++ */
++ __get_page_tail_foll(page, true);
++ else {
++ /*
++ * Getting a normal page or the head of a compound page
++ * requires to already have an elevated page->_count.
++ */
++ VM_BUG_ON(atomic_read(&page->_count) <= 0);
++ atomic_inc(&page->_count);
++ }
++}
++
+ extern unsigned long highest_memmap_pfn;
+
+ /*
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1514,7 +1514,7 @@ split_fallthrough:
+ }
+
+ if (flags & FOLL_GET)
+- get_page(page);
++ get_page_foll(page);
+ if (flags & FOLL_TOUCH) {
+ if ((flags & FOLL_WRITE) &&
+ !pte_dirty(pte) && !PageDirty(page))
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -78,39 +78,22 @@ static void put_compound_page(struct pag
+ {
+ if (unlikely(PageTail(page))) {
+ /* __split_huge_page_refcount can run under us */
+- struct page *page_head = page->first_page;
+- smp_rmb();
+- /*
+- * If PageTail is still set after smp_rmb() we can be sure
+- * that the page->first_page we read wasn't a dangling pointer.
+- * See __split_huge_page_refcount() smp_wmb().
+- */
+- if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
++ struct page *page_head = compound_trans_head(page);
++
++ if (likely(page != page_head &&
++ get_page_unless_zero(page_head))) {
+ unsigned long flags;
+ /*
+- * Verify that our page_head wasn't converted
+- * to a a regular page before we got a
+- * reference on it.
++ * page_head wasn't a dangling pointer but it
++ * may not be a head page anymore by the time
++ * we obtain the lock. That is ok as long as it
++ * can't be freed from under us.
+ */
+- if (unlikely(!PageHead(page_head))) {
+- /* PageHead is cleared after PageTail */
+- smp_rmb();
+- VM_BUG_ON(PageTail(page));
+- goto out_put_head;
+- }
+- /*
+- * Only run compound_lock on a valid PageHead,
+- * after having it pinned with
+- * get_page_unless_zero() above.
+- */
+- smp_mb();
+- /* page_head wasn't a dangling pointer */
+ flags = compound_lock_irqsave(page_head);
+ if (unlikely(!PageTail(page))) {
+ /* __split_huge_page_refcount run before us */
+ compound_unlock_irqrestore(page_head, flags);
+ VM_BUG_ON(PageHead(page_head));
+- out_put_head:
+ if (put_page_testzero(page_head))
+ __put_single_page(page_head);
+ out_put_single:
+@@ -121,16 +104,17 @@ static void put_compound_page(struct pag
+ VM_BUG_ON(page_head != page->first_page);
+ /*
+ * We can release the refcount taken by
+- * get_page_unless_zero now that
+- * split_huge_page_refcount is blocked on the
+- * compound_lock.
++ * get_page_unless_zero() now that
++ * __split_huge_page_refcount() is blocked on
++ * the compound_lock.
+ */
+ if (put_page_testzero(page_head))
+ VM_BUG_ON(1);
+ /* __split_huge_page_refcount will wait now */
+- VM_BUG_ON(atomic_read(&page->_count) <= 0);
+- atomic_dec(&page->_count);
++ VM_BUG_ON(page_mapcount(page) <= 0);
++ atomic_dec(&page->_mapcount);
+ VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
++ VM_BUG_ON(atomic_read(&page->_count) != 0);
+ compound_unlock_irqrestore(page_head, flags);
+ if (put_page_testzero(page_head)) {
+ if (PageHead(page_head))
+@@ -160,6 +144,45 @@ void put_page(struct page *page)
+ }
+ EXPORT_SYMBOL(put_page);
+
++/*
++ * This function is exported but must not be called by anything other
++ * than get_page(). It implements the slow path of get_page().
++ */
++bool __get_page_tail(struct page *page)
++{
++ /*
++ * This takes care of get_page() if run on a tail page
++ * returned by one of the get_user_pages/follow_page variants.
++ * get_user_pages/follow_page itself doesn't need the compound
++ * lock because it runs __get_page_tail_foll() under the
++ * proper PT lock that already serializes against
++ * split_huge_page().
++ */
++ unsigned long flags;
++ bool got = false;
++ struct page *page_head = compound_trans_head(page);
++
++ if (likely(page != page_head && get_page_unless_zero(page_head))) {
++ /*
++ * page_head wasn't a dangling pointer but it
++ * may not be a head page anymore by the time
++ * we obtain the lock. That is ok as long as it
++ * can't be freed from under us.
++ */
++ flags = compound_lock_irqsave(page_head);
++ /* here __split_huge_page_refcount won't run anymore */
++ if (likely(PageTail(page))) {
++ __get_page_tail_foll(page, false);
++ got = true;
++ }
++ compound_unlock_irqrestore(page_head, flags);
++ if (unlikely(!got))
++ put_page(page_head);
++ }
++ return got;
++}
++EXPORT_SYMBOL(__get_page_tail);
++
+ /**
+ * put_pages_list() - release a list of pages
+ * @pages: list of pages threaded on page->lru
--- /dev/null
+From aa6afca5bcaba8101f3ea09d5c3e4100b2b9f0e5 Mon Sep 17 00:00:00 2001
+From: Vasiliy Kulikov <segoon@openwall.com>
+Date: Wed, 2 Nov 2011 13:38:44 -0700
+Subject: proc: fix races against execve() of /proc/PID/fd**
+
+From: Vasiliy Kulikov <segoon@openwall.com>
+
+commit aa6afca5bcaba8101f3ea09d5c3e4100b2b9f0e5 upstream.
+
+fd* files are restricted to the task's owner, and other users may not get
+direct access to them. But one may open any of these files and run any
+setuid program, keeping opened file descriptors. As there are permission
+checks on open(), but not on readdir() and read(), operations on the kept
+file descriptors will not be checked. It makes it possible to violate
+procfs permission model.
+
+Reading fdinfo/* may disclosure current fds' position and flags, reading
+directory contents of fdinfo/ and fd/ may disclosure the number of opened
+files by the target task. This information is not sensible per se, but it
+can reveal some private information (like length of a password stored in a
+file) under certain conditions.
+
+Used existing (un)lock_trace functions to check for ptrace_may_access(),
+but instead of using EPERM return code from it use EACCES to be consistent
+with existing proc_pid_follow_link()/proc_pid_readlink() return code. If
+they differ, attacker can guess what fds exist by analyzing stat() return
+code. Patched handlers: stat() for fd/*, stat() and read() for fdindo/*,
+readdir() and lookup() for fd/ and fdinfo/.
+
+Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
+Cc: Cyrill Gorcunov <gorcunov@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/base.c | 146 ++++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 103 insertions(+), 43 deletions(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -1666,12 +1666,46 @@ out:
+ return error;
+ }
+
++static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ struct inode *inode = dentry->d_inode;
++ struct task_struct *task = get_proc_task(inode);
++ int rc;
++
++ if (task == NULL)
++ return -ESRCH;
++
++ rc = -EACCES;
++ if (lock_trace(task))
++ goto out_task;
++
++ generic_fillattr(inode, stat);
++ unlock_trace(task);
++ rc = 0;
++out_task:
++ put_task_struct(task);
++ return rc;
++}
++
+ static const struct inode_operations proc_pid_link_inode_operations = {
+ .readlink = proc_pid_readlink,
+ .follow_link = proc_pid_follow_link,
+ .setattr = proc_setattr,
+ };
+
++static const struct inode_operations proc_fdinfo_link_inode_operations = {
++ .setattr = proc_setattr,
++ .getattr = proc_pid_fd_link_getattr,
++};
++
++static const struct inode_operations proc_fd_link_inode_operations = {
++ .readlink = proc_pid_readlink,
++ .follow_link = proc_pid_follow_link,
++ .setattr = proc_setattr,
++ .getattr = proc_pid_fd_link_getattr,
++};
++
+
+ /* building an inode */
+
+@@ -1903,49 +1937,61 @@ out:
+
+ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+ {
+- struct task_struct *task = get_proc_task(inode);
+- struct files_struct *files = NULL;
++ struct task_struct *task;
++ struct files_struct *files;
+ struct file *file;
+ int fd = proc_fd(inode);
++ int rc;
+
+- if (task) {
+- files = get_files_struct(task);
+- put_task_struct(task);
+- }
+- if (files) {
+- /*
+- * We are not taking a ref to the file structure, so we must
+- * hold ->file_lock.
+- */
+- spin_lock(&files->file_lock);
+- file = fcheck_files(files, fd);
+- if (file) {
+- unsigned int f_flags;
+- struct fdtable *fdt;
+-
+- fdt = files_fdtable(files);
+- f_flags = file->f_flags & ~O_CLOEXEC;
+- if (FD_ISSET(fd, fdt->close_on_exec))
+- f_flags |= O_CLOEXEC;
+-
+- if (path) {
+- *path = file->f_path;
+- path_get(&file->f_path);
+- }
+- if (info)
+- snprintf(info, PROC_FDINFO_MAX,
+- "pos:\t%lli\n"
+- "flags:\t0%o\n",
+- (long long) file->f_pos,
+- f_flags);
+- spin_unlock(&files->file_lock);
+- put_files_struct(files);
+- return 0;
++ task = get_proc_task(inode);
++ if (!task)
++ return -ENOENT;
++
++ rc = -EACCES;
++ if (lock_trace(task))
++ goto out_task;
++
++ rc = -ENOENT;
++ files = get_files_struct(task);
++ if (files == NULL)
++ goto out_unlock;
++
++ /*
++ * We are not taking a ref to the file structure, so we must
++ * hold ->file_lock.
++ */
++ spin_lock(&files->file_lock);
++ file = fcheck_files(files, fd);
++ if (file) {
++ unsigned int f_flags;
++ struct fdtable *fdt;
++
++ fdt = files_fdtable(files);
++ f_flags = file->f_flags & ~O_CLOEXEC;
++ if (FD_ISSET(fd, fdt->close_on_exec))
++ f_flags |= O_CLOEXEC;
++
++ if (path) {
++ *path = file->f_path;
++ path_get(&file->f_path);
+ }
+- spin_unlock(&files->file_lock);
+- put_files_struct(files);
+- }
+- return -ENOENT;
++ if (info)
++ snprintf(info, PROC_FDINFO_MAX,
++ "pos:\t%lli\n"
++ "flags:\t0%o\n",
++ (long long) file->f_pos,
++ f_flags);
++ rc = 0;
++ } else
++ rc = -ENOENT;
++ spin_unlock(&files->file_lock);
++ put_files_struct(files);
++
++out_unlock:
++ unlock_trace(task);
++out_task:
++ put_task_struct(task);
++ return rc;
+ }
+
+ static int proc_fd_link(struct inode *inode, struct path *path)
+@@ -2040,7 +2086,7 @@ static struct dentry *proc_fd_instantiat
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+- inode->i_op = &proc_pid_link_inode_operations;
++ inode->i_op = &proc_fd_link_inode_operations;
+ inode->i_size = 64;
+ ei->op.proc_get_link = proc_fd_link;
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
+@@ -2072,7 +2118,12 @@ static struct dentry *proc_lookupfd_comm
+ if (fd == ~0U)
+ goto out;
+
++ result = ERR_PTR(-EACCES);
++ if (lock_trace(task))
++ goto out;
++
+ result = instantiate(dir, dentry, task, &fd);
++ unlock_trace(task);
+ out:
+ put_task_struct(task);
+ out_no_task:
+@@ -2092,23 +2143,28 @@ static int proc_readfd_common(struct fil
+ retval = -ENOENT;
+ if (!p)
+ goto out_no_task;
++
++ retval = -EACCES;
++ if (lock_trace(p))
++ goto out;
++
+ retval = 0;
+
+ fd = filp->f_pos;
+ switch (fd) {
+ case 0:
+ if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+- goto out;
++ goto out_unlock;
+ filp->f_pos++;
+ case 1:
+ ino = parent_ino(dentry);
+ if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+- goto out;
++ goto out_unlock;
+ filp->f_pos++;
+ default:
+ files = get_files_struct(p);
+ if (!files)
+- goto out;
++ goto out_unlock;
+ rcu_read_lock();
+ for (fd = filp->f_pos-2;
+ fd < files_fdtable(files)->max_fds;
+@@ -2132,6 +2188,9 @@ static int proc_readfd_common(struct fil
+ rcu_read_unlock();
+ put_files_struct(files);
+ }
++
++out_unlock:
++ unlock_trace(p);
+ out:
+ put_task_struct(p);
+ out_no_task:
+@@ -2209,6 +2268,7 @@ static struct dentry *proc_fdinfo_instan
+ ei->fd = fd;
+ inode->i_mode = S_IFREG | S_IRUSR;
+ inode->i_fop = &proc_fdinfo_file_operations;
++ inode->i_op = &proc_fdinfo_link_inode_operations;
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
+ d_add(dentry, inode);
+ /* Close the race of the process dying before we return the dentry */
ipv6-nullify-ipv6_ac_list-and-ipv6_fl_list-when-creating-new-socket.patch
make-packet_statistics-getsockopt-report-consistently-between-ring-and-non-ring.patch
net-xen-netback-correctly-restart-tx-after-a-vm-restore-migrate.patch
+mm-thp-tail-page-refcounting-fix.patch
+binfmt_elf-fix-pie-execution-with-randomization-disabled.patch
+vfs-show-o_cloexe-bit-properly-in-proc-pid-fdinfo-fd-files.patch
+proc-fix-races-against-execve-of-proc-pid-fd.patch
+iwlagn-do-not-use-interruptible-waits.patch
+drivers-net-rionet.c-fix-ethernet-address-macros-for-le-platforms.patch
--- /dev/null
+From 1117f72ea0217ba0cc19f05adbbd8b9a397f5ab7 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sat, 6 Aug 2011 11:51:33 -0700
+Subject: vfs: show O_CLOEXE bit properly in /proc/<pid>/fdinfo/<fd> files
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 1117f72ea0217ba0cc19f05adbbd8b9a397f5ab7 upstream.
+
+The CLOEXE bit is magical, and for performance (and semantic) reasons we
+don't actually maintain it in the file descriptor itself, but in a
+separate bit array. Which means that when we show f_flags, the CLOEXE
+status is shown incorrectly: we show the status not as it is now, but as
+it was when the file was opened.
+
+Fix that by looking up the bit properly in the 'fdt->close_on_exec' bit
+array.
+
+Uli needs this in order to re-implement the pfiles program:
+
+ "For normal file descriptors (not sockets) this was the last piece of
+ information which wasn't available. This is all part of my 'give
+ Solaris users no reason to not switch' effort. I intend to offer the
+ code to the util-linux-ng maintainers."
+
+Requested-by: Ulrich Drepper <drepper@akkadia.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/base.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -1920,6 +1920,14 @@ static int proc_fd_info(struct inode *in
+ spin_lock(&files->file_lock);
+ file = fcheck_files(files, fd);
+ if (file) {
++ unsigned int f_flags;
++ struct fdtable *fdt;
++
++ fdt = files_fdtable(files);
++ f_flags = file->f_flags & ~O_CLOEXEC;
++ if (FD_ISSET(fd, fdt->close_on_exec))
++ f_flags |= O_CLOEXEC;
++
+ if (path) {
+ *path = file->f_path;
+ path_get(&file->f_path);
+@@ -1929,7 +1937,7 @@ static int proc_fd_info(struct inode *in
+ "pos:\t%lli\n"
+ "flags:\t0%o\n",
+ (long long) file->f_pos,
+- file->f_flags);
++ f_flags);
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ return 0;