--- /dev/null
+From 0b8c82190c12e530eb6003720dac103bf63e146e Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Mon, 15 Feb 2016 16:37:24 +0100
+Subject: ALSA: hda - Cancel probe work instead of flush at remove
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 0b8c82190c12e530eb6003720dac103bf63e146e upstream.
+
+The commit [991f86d7ae4e: ALSA: hda - Flush the pending probe work at
+remove] introduced the sync of async probe work at remove for fixing
+the race. However, this may lead to another hangup when the module
+removal is performed quickly before starting the probe work, because
+it issues flush_work() and it's blocked forever.
+
+The workaround is to use cancel_work_sync() instead of flush_work()
+there.
+
+Fixes: 991f86d7ae4e ('ALSA: hda - Flush the pending probe work at remove')
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/pci/hda/hda_intel.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -2143,10 +2143,10 @@ static void azx_remove(struct pci_dev *p
+ struct hda_intel *hda;
+
+ if (card) {
+- /* flush the pending probing work */
++ /* cancel the pending probing work */
+ chip = card->private_data;
+ hda = container_of(chip, struct hda_intel, chip);
+- flush_work(&hda->probe_work);
++ cancel_work_sync(&hda->probe_work);
+
+ snd_card_free(card);
+ }
--- /dev/null
+From 67ec1072b053c15564e6090ab30127895dc77a89 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Wed, 17 Feb 2016 14:30:26 +0100
+Subject: ALSA: pcm: Fix rwsem deadlock for non-atomic PCM stream
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 67ec1072b053c15564e6090ab30127895dc77a89 upstream.
+
+A non-atomic PCM stream may take snd_pcm_link_rwsem rw semaphore twice
+in the same code path, e.g. one in snd_pcm_action_nonatomic() and
+another in snd_pcm_stream_lock(). Usually this is OK, but when a
+write lock is issued between these two read locks, the problem
+happens: the write lock is blocked due to the first reade lock, and
+the second read lock is also blocked by the write lock. This
+eventually deadlocks.
+
+The reason is the way rwsem manages waiters; it's queued like FIFO, so
+even if the writer itself doesn't take the lock yet, it blocks all the
+waiters (including reads) queued after it.
+
+As a workaround, in this patch, we replace the standard down_write()
+with an spinning loop. This is far from optimal, but it's good
+enough, as the spinning time is supposed to be relatively short for
+normal PCM operations, and the code paths requiring the write lock
+aren't called so often.
+
+Reported-by: Vinod Koul <vinod.koul@intel.com>
+Tested-by: Ramesh Babu <ramesh.babu@intel.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/core/pcm_native.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/sound/core/pcm_native.c
++++ b/sound/core/pcm_native.c
+@@ -74,6 +74,18 @@ static int snd_pcm_open(struct file *fil
+ static DEFINE_RWLOCK(snd_pcm_link_rwlock);
+ static DECLARE_RWSEM(snd_pcm_link_rwsem);
+
++/* Writer in rwsem may block readers even during its waiting in queue,
++ * and this may lead to a deadlock when the code path takes read sem
++ * twice (e.g. one in snd_pcm_action_nonatomic() and another in
++ * snd_pcm_stream_lock()). As a (suboptimal) workaround, let writer to
++ * spin until it gets the lock.
++ */
++static inline void down_write_nonblock(struct rw_semaphore *lock)
++{
++ while (!down_write_trylock(lock))
++ cond_resched();
++}
++
+ /**
+ * snd_pcm_stream_lock - Lock the PCM stream
+ * @substream: PCM substream
+@@ -1813,7 +1825,7 @@ static int snd_pcm_link(struct snd_pcm_s
+ res = -ENOMEM;
+ goto _nolock;
+ }
+- down_write(&snd_pcm_link_rwsem);
++ down_write_nonblock(&snd_pcm_link_rwsem);
+ write_lock_irq(&snd_pcm_link_rwlock);
+ if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN ||
+ substream->runtime->status->state != substream1->runtime->status->state ||
+@@ -1860,7 +1872,7 @@ static int snd_pcm_unlink(struct snd_pcm
+ struct snd_pcm_substream *s;
+ int res = 0;
+
+- down_write(&snd_pcm_link_rwsem);
++ down_write_nonblock(&snd_pcm_link_rwsem);
+ write_lock_irq(&snd_pcm_link_rwlock);
+ if (!snd_pcm_stream_linked(substream)) {
+ res = -EALREADY;
--- /dev/null
+From 13d5e5d4725c64ec06040d636832e78453f477b7 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Tue, 16 Feb 2016 14:15:59 +0100
+Subject: ALSA: seq: Fix double port list deletion
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 13d5e5d4725c64ec06040d636832e78453f477b7 upstream.
+
+The commit [7f0973e973cd: ALSA: seq: Fix lockdep warnings due to
+double mutex locks] split the management of two linked lists (source
+and destination) into two individual calls for avoiding the AB/BA
+deadlock. However, this may leave the possible double deletion of one
+of two lists when the counterpart is being deleted concurrently.
+It ends up with a list corruption, as revealed by syzkaller fuzzer.
+
+This patch fixes it by checking the list emptiness and skipping the
+deletion and the following process.
+
+BugLink: http://lkml.kernel.org/r/CACT4Y+bay9qsrz6dQu31EcGaH9XwfW7o3oBzSQUG9fMszoh=Sg@mail.gmail.com
+Fixes: 7f0973e973cd ('ALSA: seq: Fix lockdep warnings due to 'double mutex locks)
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Tested-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/core/seq/seq_ports.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/sound/core/seq/seq_ports.c
++++ b/sound/core/seq/seq_ports.c
+@@ -535,19 +535,22 @@ static void delete_and_unsubscribe_port(
+ bool is_src, bool ack)
+ {
+ struct snd_seq_port_subs_info *grp;
++ struct list_head *list;
++ bool empty;
+
+ grp = is_src ? &port->c_src : &port->c_dest;
++ list = is_src ? &subs->src_list : &subs->dest_list;
+ down_write(&grp->list_mutex);
+ write_lock_irq(&grp->list_lock);
+- if (is_src)
+- list_del(&subs->src_list);
+- else
+- list_del(&subs->dest_list);
++ empty = list_empty(list);
++ if (!empty)
++ list_del_init(list);
+ grp->exclusive = 0;
+ write_unlock_irq(&grp->list_lock);
+ up_write(&grp->list_mutex);
+
+- unsubscribe_port(client, port, grp, &subs->info, ack);
++ if (!empty)
++ unsubscribe_port(client, port, grp, &subs->info, ack);
+ }
+
+ /* connect two ports */
--- /dev/null
+From d99a36f4728fcbcc501b78447f625bdcce15b842 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Mon, 15 Feb 2016 16:20:24 +0100
+Subject: ALSA: seq: Fix leak of pool buffer at concurrent writes
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit d99a36f4728fcbcc501b78447f625bdcce15b842 upstream.
+
+When multiple concurrent writes happen on the ALSA sequencer device
+right after the open, it may try to allocate vmalloc buffer for each
+write and leak some of them. It's because the presence check and the
+assignment of the buffer is done outside the spinlock for the pool.
+
+The fix is to move the check and the assignment into the spinlock.
+
+(The current implementation is suboptimal, as there can be multiple
+ unnecessary vmallocs because the allocation is done before the check
+ in the spinlock. But the pool size is already checked beforehand, so
+ this isn't a big problem; that is, the only possible path is the
+ multiple writes before any pool assignment, and practically seen, the
+ current coverage should be "good enough".)
+
+The issue was triggered by syzkaller fuzzer.
+
+BugLink: http://lkml.kernel.org/r/CACT4Y+bSzazpXNvtAr=WXaL8hptqjHwqEyFA+VN2AWEx=aurkg@mail.gmail.com
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Tested-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/core/seq/seq_memory.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/sound/core/seq/seq_memory.c
++++ b/sound/core/seq/seq_memory.c
+@@ -383,15 +383,20 @@ int snd_seq_pool_init(struct snd_seq_poo
+
+ if (snd_BUG_ON(!pool))
+ return -EINVAL;
+- if (pool->ptr) /* should be atomic? */
+- return 0;
+
+- pool->ptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size);
+- if (!pool->ptr)
++ cellptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size);
++ if (!cellptr)
+ return -ENOMEM;
+
+ /* add new cells to the free cell list */
+ spin_lock_irqsave(&pool->lock, flags);
++ if (pool->ptr) {
++ spin_unlock_irqrestore(&pool->lock, flags);
++ vfree(cellptr);
++ return 0;
++ }
++
++ pool->ptr = cellptr;
+ pool->free = NULL;
+
+ for (cell = 0; cell < pool->size; cell++) {
--- /dev/null
+From 5070fb14a0154f075c8b418e5bc58a620ae85a45 Mon Sep 17 00:00:00 2001
+From: Linus Walleij <linus.walleij@linaro.org>
+Date: Mon, 8 Feb 2016 09:14:37 +0100
+Subject: ARM: 8517/1: ICST: avoid arithmetic overflow in icst_hz()
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+commit 5070fb14a0154f075c8b418e5bc58a620ae85a45 upstream.
+
+When trying to set the ICST 307 clock to 25174000 Hz I ran into
+this arithmetic error: the icst_hz_to_vco() correctly figure out
+DIVIDE=2, RDW=100 and VDW=99 yielding a frequency of
+25174000 Hz out of the VCO. (I replicated the icst_hz() function
+in a spreadsheet to verify this.)
+
+However, when I called icst_hz() on these VCO settings it would
+instead return 4122709 Hz. This causes an error in the common
+clock driver for ICST as the common clock framework will call
+.round_rate() on the clock which will utilize icst_hz_to_vco()
+followed by icst_hz() suggesting the erroneous frequency, and
+then the clock gets set to this.
+
+The error did not manifest in the old clock framework since
+this high frequency was only used by the CLCD, which calls
+clk_set_rate() without first calling clk_round_rate() and since
+the old clock framework would not call clk_round_rate() before
+setting the frequency, the correct values propagated into
+the VCO.
+
+After some experimenting I figured out that it was due to a simple
+arithmetic overflow: the divisor for 24Mhz reference frequency
+as reference becomes 24000000*2*(99+8)=0x132212400 and the "1"
+in bit 32 overflows and is lost.
+
+But introducing an explicit 64-by-32 bit do_div() and casting
+the divisor into (u64) we get the right frequency back, and the
+right frequency gets set.
+
+Tested on the ARM Versatile.
+
+Cc: linux-clk@vger.kernel.org
+Cc: Pawel Moll <pawel.moll@arm.com>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/common/icst.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/arm/common/icst.c
++++ b/arch/arm/common/icst.c
+@@ -16,7 +16,7 @@
+ */
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+-
++#include <asm/div64.h>
+ #include <asm/hardware/icst.h>
+
+ /*
+@@ -29,7 +29,11 @@ EXPORT_SYMBOL(icst525_s2div);
+
+ unsigned long icst_hz(const struct icst_params *p, struct icst_vco vco)
+ {
+- return p->ref * 2 * (vco.v + 8) / ((vco.r + 2) * p->s2div[vco.s]);
++ u64 dividend = p->ref * 2 * (u64)(vco.v + 8);
++ u32 divisor = (vco.r + 2) * p->s2div[vco.s];
++
++ do_div(dividend, divisor);
++ return (unsigned long)dividend;
+ }
+
+ EXPORT_SYMBOL(icst_hz);
--- /dev/null
+From e972c37459c813190461dabfeaac228e00aae259 Mon Sep 17 00:00:00 2001
+From: Linus Walleij <linus.walleij@linaro.org>
+Date: Wed, 10 Feb 2016 09:25:17 +0100
+Subject: ARM: 8519/1: ICST: try other dividends than 1
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+commit e972c37459c813190461dabfeaac228e00aae259 upstream.
+
+Since the dawn of time the ICST code has only supported divide
+by one or hang in an eternal loop. Luckily we were always dividing
+by one because the reference frequency for the systems using
+the ICSTs is 24MHz and the [min,max] values for the PLL input
+if [10,320] MHz for ICST307 and [6,200] for ICST525, so the loop
+will always terminate immediately without assigning any divisor
+for the reference frequency.
+
+But for the code to make sense, let's insert the missing i++
+
+Reported-by: David Binderman <dcb314@hotmail.com>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/common/icst.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm/common/icst.c
++++ b/arch/arm/common/icst.c
+@@ -58,6 +58,7 @@ icst_hz_to_vco(const struct icst_params
+
+ if (f > p->vco_min && f <= p->vco_max)
+ break;
++ i++;
+ } while (i < 8);
+
+ if (i >= 8)
--- /dev/null
+From 929e883f2bfdf68d4bd3aec43912e956417005c7 Mon Sep 17 00:00:00 2001
+From: Mohamed Jamsheeth Hajanajubudeen
+ <mohamedjamsheeth.hajanajubudeen@atmel.com>
+Date: Fri, 11 Dec 2015 17:06:26 +0530
+Subject: ARM: dts: at91: sama5d4: fix instance id of DBGU
+
+From: Mohamed Jamsheeth Hajanajubudeen <mohamedjamsheeth.hajanajubudeen@atmel.com>
+
+commit 929e883f2bfdf68d4bd3aec43912e956417005c7 upstream.
+
+Change instance id of DBGU to 45.
+
+Signed-off-by: Mohamed Jamsheeth Hajanajubudeen <mohamedjamsheeth.hajanajubudeen@atmel.com>
+Fixes: 7c661394c56c ("ARM: at91: dt: add device tree file for SAMA5D4 SoC")
+Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/sama5d4.dtsi | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/boot/dts/sama5d4.dtsi
++++ b/arch/arm/boot/dts/sama5d4.dtsi
+@@ -1342,7 +1342,7 @@
+ dbgu: serial@fc069000 {
+ compatible = "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ reg = <0xfc069000 0x200>;
+- interrupts = <2 IRQ_TYPE_LEVEL_HIGH 7>;
++ interrupts = <45 IRQ_TYPE_LEVEL_HIGH 7>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_dbgu>;
+ clocks = <&dbgu_clk>;
--- /dev/null
+From e873cc022ce5e2c04bbc53b5874494b657e29d3f Mon Sep 17 00:00:00 2001
+From: Nicolas Ferre <nicolas.ferre@atmel.com>
+Date: Wed, 27 Jan 2016 11:03:02 +0100
+Subject: ARM: dts: at91: sama5d4 xplained: fix phy0 IRQ type
+
+From: Nicolas Ferre <nicolas.ferre@atmel.com>
+
+commit e873cc022ce5e2c04bbc53b5874494b657e29d3f upstream.
+
+For phy0 KSZ8081, the type of GPIO IRQ should be "level low" instead of
+"edge falling".
+
+Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
+Fixes: 38153a017896 ("ARM: at91/dt: sama5d4: add dts for sama5d4 xplained board")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/at91-sama5d4_xplained.dts | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+@@ -91,7 +91,7 @@
+
+ phy0: ethernet-phy@1 {
+ interrupt-parent = <&pioE>;
+- interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
++ interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
+ reg = <1>;
+ };
+ };
--- /dev/null
+From f505dba762ae826bb68978a85ee5c8ced7dea8d7 Mon Sep 17 00:00:00 2001
+From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Date: Fri, 15 Jan 2016 09:30:18 +0100
+Subject: ARM: dts: at91: sama5d4 xplained: properly mux phy interrupt
+
+From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+
+commit f505dba762ae826bb68978a85ee5c8ced7dea8d7 upstream.
+
+No interrupt were received from the phy because PIOE 1 may not be properly
+muxed. It prevented proper link detection, especially since commit
+321beec5047a ("net: phy: Use interrupts when available in NOLINK state")
+disables polling.
+
+Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/at91-sama5d4_xplained.dts | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+@@ -86,6 +86,8 @@
+ macb0: ethernet@f8020000 {
+ phy-mode = "rmii";
+ status = "okay";
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
+
+ phy0: ethernet-phy@1 {
+ interrupt-parent = <&pioE>;
+@@ -152,6 +154,10 @@
+ atmel,pins =
+ <AT91_PIOE 8 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+ };
++ pinctrl_macb0_phy_irq: macb0_phy_irq_0 {
++ atmel,pins =
++ <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
++ };
+ };
+ };
+ };
--- /dev/null
+From aae6b18f5c95b9dc78de66d1e27e8afeee2763b7 Mon Sep 17 00:00:00 2001
+From: Wenyou Yang <wenyou.yang@atmel.com>
+Date: Wed, 27 Jan 2016 13:16:24 +0800
+Subject: ARM: dts: at91: sama5d4ek: add phy address and IRQ for macb0
+
+From: Wenyou Yang <wenyou.yang@atmel.com>
+
+commit aae6b18f5c95b9dc78de66d1e27e8afeee2763b7 upstream.
+
+On SAMA5D4EK board, the Ethernet doesn't work after resuming from the suspend
+state.
+
+Signed-off-by: Wenyou Yang <wenyou.yang@atmel.com>
+[nicolas.ferre@atmel.com: adapt to newer kernel]
+Fixes: 38153a017896 ("ARM: at91/dt: sama5d4: add dts for sama5d4 xplained board")
+Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/at91-sama5d4ek.dts | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/arch/arm/boot/dts/at91-sama5d4ek.dts
++++ b/arch/arm/boot/dts/at91-sama5d4ek.dts
+@@ -160,8 +160,15 @@
+ };
+
+ macb0: ethernet@f8020000 {
++ pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>;
+ phy-mode = "rmii";
+ status = "okay";
++
++ ethernet-phy@1 {
++ reg = <0x1>;
++ interrupt-parent = <&pioE>;
++ interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
++ };
+ };
+
+ mmc1: mmc@fc000000 {
+@@ -193,6 +200,10 @@
+
+ pinctrl@fc06a000 {
+ board {
++ pinctrl_macb0_phy_irq: macb0_phy_irq {
++ atmel,pins =
++ <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
++ };
+ pinctrl_mmc0_cd: mmc0_cd {
+ atmel,pins =
+ <AT91_PIOE 5 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
--- /dev/null
+From af756bbccff85504ce05c63a50f80b9d7823c500 Mon Sep 17 00:00:00 2001
+From: Tony Lindgren <tony@atomide.com>
+Date: Mon, 11 Jan 2016 14:35:24 -0800
+Subject: ARM: dts: Fix omap5 PMIC control lines for RTC writes
+
+From: Tony Lindgren <tony@atomide.com>
+
+commit af756bbccff85504ce05c63a50f80b9d7823c500 upstream.
+
+The palmas PMIC has two control lines that need to be muxed properly
+for things to work. The sys_nirq pin is used for interrupts, and msecure
+pin is used for enabling writes to some PMIC registers.
+
+Without these pins configured properly things can fail in mysterious
+ways. For example, we can't update the RTC registers on palmas PMIC
+unless the msecure pin is configured. And this is probably the reason
+why we had RTC missing from the omap5 dts file.
+
+According to "OMAP5430 ES2.0 Data Manual [Public] VErsion A (Rev. F)"
+swps052f.pdf, mux mode 1 is for sys_drm_msecure so in theory there's
+should be no need to configure it as a GPIO pin.
+
+However, it seems there are some reliability issues using the msecure
+mux mode. And the TI trees configure the msecure pin as GPIO out high
+instead.
+
+As the PMIC only cares that the msecure line is high to allow access
+to the RTC registers, let's use a GPIO hog as suggested by Nishanth
+Menon <nm@ti.com>. Also the use of the internal pull was considered
+but supposedly that may not be capable of keeping the line high in
+a noisy environment.
+
+If we ever see high security omap5 products in the mainline tree,
+those need to skip the msecure pin muxing and ignore setting the GPIO
+hog. Chances are the related pin mux registers are locked in that case
+and the msecure pin is managed by whatever software may be running in
+the ARM TrustZone.
+
+Who knows what the original intention of the msecure pin was. Maybe
+it was supposed to prevent the system time to be set back for some
+game demo modes to time out? Anyways, it seems that later PMICs like
+tps659037 have recycled this pin for "powerhold" and devices like
+beagle-x15 do not need changes to the msecure pin configuration.
+
+To avoid further confusion with TWL variant PMICs, beagle-x15 does
+not have a back-up battery for RTC palmas. Instead the mcp79410 RTC
+is used with rtc-ds1307 driver. There is a "powerhold" jumper j5
+holes near the palmas PMIC, and shorting it seems to power up
+beagle-x15 automatically. It is unknown if it also has other side
+effects to the beagle-x15 power up sequence.
+
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/omap5-board-common.dtsi | 25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+--- a/arch/arm/boot/dts/omap5-board-common.dtsi
++++ b/arch/arm/boot/dts/omap5-board-common.dtsi
+@@ -130,6 +130,16 @@
+ };
+ };
+
++&gpio8 {
++ /* TI trees use GPIO instead of msecure, see also muxing */
++ p234 {
++ gpio-hog;
++ gpios = <10 GPIO_ACTIVE_HIGH>;
++ output-high;
++ line-name = "gpio8_234/msecure";
++ };
++};
++
+ &omap5_pmx_core {
+ pinctrl-names = "default";
+ pinctrl-0 = <
+@@ -213,6 +223,13 @@
+ >;
+ };
+
++ /* TI trees use GPIO mode; msecure mode does not work reliably? */
++ palmas_msecure_pins: palmas_msecure_pins {
++ pinctrl-single,pins = <
++ OMAP5_IOPAD(0x180, PIN_OUTPUT | MUX_MODE6) /* gpio8_234 */
++ >;
++ };
++
+ usbhost_pins: pinmux_usbhost_pins {
+ pinctrl-single,pins = <
+ 0x84 (PIN_INPUT | MUX_MODE0) /* usbb2_hsic_strobe */
+@@ -278,6 +295,12 @@
+ &usbhost_wkup_pins
+ >;
+
++ palmas_sys_nirq_pins: pinmux_palmas_sys_nirq_pins {
++ pinctrl-single,pins = <
++ OMAP5_IOPAD(0x068, PIN_INPUT_PULLUP | MUX_MODE0) /* sys_nirq1 */
++ >;
++ };
++
+ usbhost_wkup_pins: pinmux_usbhost_wkup_pins {
+ pinctrl-single,pins = <
+ 0x1A (PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */
+@@ -345,6 +368,8 @@
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ ti,system-power-controller;
++ pinctrl-names = "default";
++ pinctrl-0 = <&palmas_sys_nirq_pins &palmas_msecure_pins>;
+
+ extcon_usb3: palmas_usb {
+ compatible = "ti,palmas-usb-vid";
--- /dev/null
+From 0ea24daae053a9ba65d2f3eb20523002c1a8af38 Mon Sep 17 00:00:00 2001
+From: Adam Ford <aford173@gmail.com>
+Date: Thu, 21 Jan 2016 11:03:20 -0600
+Subject: ARM: dts: Fix wl12xx missing clocks that cause hangs
+
+From: Adam Ford <aford173@gmail.com>
+
+commit 0ea24daae053a9ba65d2f3eb20523002c1a8af38 upstream.
+
+The tcxo-clock-frequency binding is listed as optional,
+but without it the wl12xx used on the torpedo + wireless
+may hang. Scanning also appears broken without this patch.
+
+Signed-off-by: Adam Ford <aford173@gmail.com>
+Fixes: 687c27676151 ("ARM: dts: Add minimal support for LogicPD
+Torpedo DM3730 devkit")
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
++++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+@@ -122,6 +122,7 @@
+ interrupt-parent = <&gpio5>;
+ interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */
+ ref-clock-frequency = <26000000>;
++ tcxo-clock-frequency = <26000000>;
+ };
+ };
+
--- /dev/null
+From c08659d431b40ad5beb97d7dde49ad9796cb812c Mon Sep 17 00:00:00 2001
+From: "H. Nikolaus Schaller" <hns@goldelico.com>
+Date: Tue, 5 Jan 2016 13:01:37 +0100
+Subject: ARM: dts: omap5-board-common: enable rtc and charging of backup battery
+
+From: H. Nikolaus Schaller <hns@goldelico.com>
+
+commit c08659d431b40ad5beb97d7dde49ad9796cb812c upstream.
+
+tested on OMP5432 EVM
+
+Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/omap5-board-common.dtsi | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/arch/arm/boot/dts/omap5-board-common.dtsi
++++ b/arch/arm/boot/dts/omap5-board-common.dtsi
+@@ -383,6 +383,14 @@
+ #clock-cells = <0>;
+ };
+
++ rtc {
++ compatible = "ti,palmas-rtc";
++ interrupt-parent = <&palmas>;
++ interrupts = <8 IRQ_TYPE_NONE>;
++ ti,backup-battery-chargeable;
++ ti,backup-battery-charge-high-current;
++ };
++
+ palmas_pmic {
+ compatible = "ti,palmas-pmic";
+ interrupt-parent = <&palmas>;
--- /dev/null
+From 079ae0c121fd23287f4ad2be9e9f8a13f63cae73 Mon Sep 17 00:00:00 2001
+From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Date: Fri, 4 Dec 2015 14:29:02 +0100
+Subject: ARM: mvebu: remove duplicated regulator definition in Armada 388 GP
+
+From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+
+commit 079ae0c121fd23287f4ad2be9e9f8a13f63cae73 upstream.
+
+The Armada 388 GP Device Tree file describes two times a regulator
+named 'reg_usb2_1_vbus', with the exact same description. This has
+been wrong since Armada 388 GP support was introduced.
+
+Fixes: 928413bd859c0 ("ARM: mvebu: Add Armada 388 General Purpose Development Board support")
+Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/armada-388-gp.dts | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/arch/arm/boot/dts/armada-388-gp.dts
++++ b/arch/arm/boot/dts/armada-388-gp.dts
+@@ -303,16 +303,6 @@
+ gpio = <&expander0 4 GPIO_ACTIVE_HIGH>;
+ };
+
+- reg_usb2_1_vbus: v5-vbus1 {
+- compatible = "regulator-fixed";
+- regulator-name = "v5.0-vbus1";
+- regulator-min-microvolt = <5000000>;
+- regulator-max-microvolt = <5000000>;
+- enable-active-high;
+- regulator-always-on;
+- gpio = <&expander0 4 GPIO_ACTIVE_HIGH>;
+- };
+-
+ reg_sata0: pwr-sata0 {
+ compatible = "regulator-fixed";
+ regulator-name = "pwr_en_sata0";
--- /dev/null
+From 418d5516568b3fdbc4e7b53677dd78aed8514565 Mon Sep 17 00:00:00 2001
+From: Linus Walleij <linus.walleij@linaro.org>
+Date: Mon, 1 Feb 2016 14:18:57 +0100
+Subject: ARM: nomadik: fix up SD/MMC DT settings
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+commit 418d5516568b3fdbc4e7b53677dd78aed8514565 upstream.
+
+The DTSI file for the Nomadik does not properly specify how the
+PL180 levelshifter is connected: the Nomadik actually needs all
+the five st,sig-dir-* flags set to properly control all lines out.
+
+Further this board supports full power cycling of the card, and
+since this variant has no hardware clock gating, it needs a
+ridiculously low frequency setting to keep up with the ever
+overflowing FIFO.
+
+The pin configuration set-up is a bit of a mystery, because of
+course these pins are a mix of inputs and outputs. However the
+reference implementation sets all pins to "output" with
+unspecified initial value, so let's do that here as well.
+
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Olof Johansson <olof@lixom.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/boot/dts/ste-nomadik-stn8815.dtsi | 37 +++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 17 deletions(-)
+
+--- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
++++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
+@@ -127,22 +127,14 @@
+ };
+ mmcsd_default_mode: mmcsd_default {
+ mmcsd_default_cfg1 {
+- /* MCCLK */
+- pins = "GPIO8_B10";
+- ste,output = <0>;
+- };
+- mmcsd_default_cfg2 {
+- /* MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 */
+- pins = "GPIO10_C11", "GPIO15_A12",
+- "GPIO16_C13", "GPIO23_D15";
+- ste,output = <1>;
+- };
+- mmcsd_default_cfg3 {
+- /* MCCMD, MCDAT3-0, MCMSFBCLK */
+- pins = "GPIO9_A10", "GPIO11_B11",
+- "GPIO12_A11", "GPIO13_C12",
+- "GPIO14_B12", "GPIO24_C15";
+- ste,input = <1>;
++ /*
++ * MCCLK, MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2
++ * MCCMD, MCDAT3-0, MCMSFBCLK
++ */
++ pins = "GPIO8_B10", "GPIO9_A10", "GPIO10_C11", "GPIO11_B11",
++ "GPIO12_A11", "GPIO13_C12", "GPIO14_B12", "GPIO15_A12",
++ "GPIO16_C13", "GPIO23_D15", "GPIO24_C15";
++ ste,output = <2>;
+ };
+ };
+ };
+@@ -802,10 +794,21 @@
+ clock-names = "mclk", "apb_pclk";
+ interrupt-parent = <&vica>;
+ interrupts = <22>;
+- max-frequency = <48000000>;
++ max-frequency = <400000>;
+ bus-width = <4>;
+ cap-mmc-highspeed;
+ cap-sd-highspeed;
++ full-pwr-cycle;
++ /*
++ * The STw4811 circuit used with the Nomadik strictly
++ * requires that all of these signal direction pins be
++ * routed and used for its 4-bit levelshifter.
++ */
++ st,sig-dir-dat0;
++ st,sig-dir-dat2;
++ st,sig-dir-dat31;
++ st,sig-dir-cmd;
++ st,sig-pin-fbclk;
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>;
+ vmmc-supply = <&vmmc_regulator>;
--- /dev/null
+From 0a0b13275558c32bbf6241464a7244b1ffd5afb3 Mon Sep 17 00:00:00 2001
+From: Tony Lindgren <tony@atomide.com>
+Date: Thu, 14 Jan 2016 12:20:47 -0800
+Subject: ARM: OMAP2+: Fix l2_inv_api_params for rodata
+
+From: Tony Lindgren <tony@atomide.com>
+
+commit 0a0b13275558c32bbf6241464a7244b1ffd5afb3 upstream.
+
+We don't want to write to .text, so let's move l2_inv_api_params
+to .data and access it via a pointer.
+
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Laura Abbott <labbott@redhat.com>
+Cc: Nishanth Menon <nm@ti.com>
+Cc: Richard Woodruff <r-woodruff2@ti.com>
+Cc: Russell King <linux@arm.linux.org.uk>
+Cc: Tero Kristo <t-kristo@ti.com>
+Acked-by: Nicolas Pitre <nico@linaro.org>
+Fixes: 1e6b48116a95 ("ARM: mm: allow non-text sections to be
+non-executable")
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/mach-omap2/sleep34xx.S | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/arm/mach-omap2/sleep34xx.S
++++ b/arch/arm/mach-omap2/sleep34xx.S
+@@ -427,12 +427,14 @@ skipl2dis:
+ and r1, #0x700
+ cmp r1, #0x300
+ beq l2_inv_gp
++ adr r0, l2_inv_api_params_offset
++ ldr r3, [r0]
++ add r3, r3, r0 @ r3 points to dummy parameters
+ mov r0, #40 @ set service ID for PPA
+ mov r12, r0 @ copy secure Service ID in r12
+ mov r1, #0 @ set task id for ROM code in r1
+ mov r2, #4 @ set some flags in r2, r6
+ mov r6, #0xff
+- adr r3, l2_inv_api_params @ r3 points to dummy parameters
+ dsb @ data write barrier
+ dmb @ data memory barrier
+ smc #1 @ call SMI monitor (smi #1)
+@@ -466,8 +468,8 @@ skipl2dis:
+ b logic_l1_restore
+
+ .align
+-l2_inv_api_params:
+- .word 0x1, 0x00
++l2_inv_api_params_offset:
++ .long l2_inv_api_params - .
+ l2_inv_gp:
+ /* Execute smi to invalidate L2 cache */
+ mov r12, #0x1 @ set up to invalidate L2
+@@ -516,6 +518,10 @@ control_mem_rta:
+ l2dis_3630:
+ .word 0
+
++ .data
++l2_inv_api_params:
++ .word 0x1, 0x00
++
+ /*
+ * Internal functions
+ */
--- /dev/null
+From eeaf9646aca89d097861caa24d9818434e48810e Mon Sep 17 00:00:00 2001
+From: Tony Lindgren <tony@atomide.com>
+Date: Thu, 14 Jan 2016 12:20:47 -0800
+Subject: ARM: OMAP2+: Fix l2dis_3630 for rodata
+
+From: Tony Lindgren <tony@atomide.com>
+
+commit eeaf9646aca89d097861caa24d9818434e48810e upstream.
+
+We don't want to write to .text section. Let's move l2dis_3630
+to .data and access it via a pointer.
+
+For calculating the offset, let's optimize out the add and do it
+in ldr/str as suggested by Nicolas Pitre <nicolas.pitre@linaro.org>.
+
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Laura Abbott <labbott@redhat.com>
+Cc: Nishanth Menon <nm@ti.com>
+Cc: Richard Woodruff <r-woodruff2@ti.com>
+Cc: Russell King <linux@arm.linux.org.uk>
+Cc: Tero Kristo <t-kristo@ti.com>
+Acked-by: Nicolas Pitre <nico@linaro.org>
+Fixes: 1e6b48116a95 ("ARM: mm: allow non-text sections to be
+non-executable")
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/mach-omap2/sleep34xx.S | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+--- a/arch/arm/mach-omap2/sleep34xx.S
++++ b/arch/arm/mach-omap2/sleep34xx.S
+@@ -86,8 +86,9 @@ ENTRY(enable_omap3630_toggle_l2_on_resto
+ stmfd sp!, {lr} @ save registers on stack
+ /* Setup so that we will disable and enable l2 */
+ mov r1, #0x1
+- adrl r2, l2dis_3630 @ may be too distant for plain adr
+- str r1, [r2]
++ adrl r3, l2dis_3630_offset @ may be too distant for plain adr
++ ldr r2, [r3] @ value for offset
++ str r1, [r2, r3] @ write to l2dis_3630
+ ldmfd sp!, {pc} @ restore regs and return
+ ENDPROC(enable_omap3630_toggle_l2_on_restore)
+
+@@ -415,7 +416,9 @@ ENTRY(omap3_restore)
+ cmp r2, #0x0 @ Check if target power state was OFF or RET
+ bne logic_l1_restore
+
+- ldr r0, l2dis_3630
++ adr r1, l2dis_3630_offset @ address for offset
++ ldr r0, [r1] @ value for offset
++ ldr r0, [r1, r0] @ value at l2dis_3630
+ cmp r0, #0x1 @ should we disable L2 on 3630?
+ bne skipl2dis
+ mrc p15, 0, r0, c1, c0, 1
+@@ -486,7 +489,9 @@ l2_inv_gp:
+ mov r12, #0x2
+ smc #0 @ Call SMI monitor (smieq)
+ logic_l1_restore:
+- ldr r1, l2dis_3630
++ adr r0, l2dis_3630_offset @ adress for offset
++ ldr r1, [r0] @ value for offset
++ ldr r1, [r0, r1] @ value at l2dis_3630
+ cmp r1, #0x1 @ Test if L2 re-enable needed on 3630
+ bne skipl2reen
+ mrc p15, 0, r1, c1, c0, 1
+@@ -515,6 +520,10 @@ control_stat:
+ .word CONTROL_STAT
+ control_mem_rta:
+ .word CONTROL_MEM_RTA_CTRL
++l2dis_3630_offset:
++ .long l2dis_3630 - .
++
++ .data
+ l2dis_3630:
+ .word 0
+
--- /dev/null
+From 4da597d16602d14405b71a18d45e1c59f28f0fd2 Mon Sep 17 00:00:00 2001
+From: Tony Lindgren <tony@atomide.com>
+Date: Thu, 14 Jan 2016 12:20:48 -0800
+Subject: ARM: OMAP2+: Fix ppa_zero_params and ppa_por_params for rodata
+
+From: Tony Lindgren <tony@atomide.com>
+
+commit 4da597d16602d14405b71a18d45e1c59f28f0fd2 upstream.
+
+We don't want to write to .text so let's move ppa_zero_params and
+ppa_por_params to .data and access them via pointers.
+
+Note that I have not been able to test as we I don't have a HS
+omap4 to test with. The code has been changed in similar way as
+for omap3 though.
+
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Laura Abbott <labbott@redhat.com>
+Cc: Nishanth Menon <nm@ti.com>
+Cc: Richard Woodruff <r-woodruff2@ti.com>
+Cc: Russell King <linux@arm.linux.org.uk>
+Cc: Tero Kristo <t-kristo@ti.com>
+Acked-by: Nicolas Pitre <nico@linaro.org>
+Fixes: 1e6b48116a95 ("ARM: mm: allow non-text sections to be
+non-executable")
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/mach-omap2/sleep44xx.S | 25 +++++++++++++++++--------
+ 1 file changed, 17 insertions(+), 8 deletions(-)
+
+--- a/arch/arm/mach-omap2/sleep44xx.S
++++ b/arch/arm/mach-omap2/sleep44xx.S
+@@ -29,12 +29,6 @@
+ dsb
+ .endm
+
+-ppa_zero_params:
+- .word 0x0
+-
+-ppa_por_params:
+- .word 1, 0
+-
+ #ifdef CONFIG_ARCH_OMAP4
+
+ /*
+@@ -266,7 +260,9 @@ ENTRY(omap4_cpu_resume)
+ beq skip_ns_smp_enable
+ ppa_actrl_retry:
+ mov r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX
+- adr r3, ppa_zero_params @ Pointer to parameters
++ adr r1, ppa_zero_params_offset
++ ldr r3, [r1]
++ add r3, r3, r1 @ Pointer to ppa_zero_params
+ mov r1, #0x0 @ Process ID
+ mov r2, #0x4 @ Flag
+ mov r6, #0xff
+@@ -303,7 +299,9 @@ skip_ns_smp_enable:
+ ldr r0, =OMAP4_PPA_L2_POR_INDEX
+ ldr r1, =OMAP44XX_SAR_RAM_BASE
+ ldr r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
+- adr r3, ppa_por_params
++ adr r1, ppa_por_params_offset
++ ldr r3, [r1]
++ add r3, r3, r1 @ Pointer to ppa_por_params
+ str r4, [r3, #0x04]
+ mov r1, #0x0 @ Process ID
+ mov r2, #0x4 @ Flag
+@@ -328,6 +326,8 @@ skip_l2en:
+ #endif
+
+ b cpu_resume @ Jump to generic resume
++ppa_por_params_offset:
++ .long ppa_por_params - .
+ ENDPROC(omap4_cpu_resume)
+ #endif /* CONFIG_ARCH_OMAP4 */
+
+@@ -380,4 +380,13 @@ ENTRY(omap_do_wfi)
+ nop
+
+ ldmfd sp!, {pc}
++ppa_zero_params_offset:
++ .long ppa_zero_params - .
+ ENDPROC(omap_do_wfi)
++
++ .data
++ppa_zero_params:
++ .word 0
++
++ppa_por_params:
++ .word 1, 0
--- /dev/null
+From a5311d4d13df80bd71a9e47f9ecaf327f478fab1 Mon Sep 17 00:00:00 2001
+From: Tony Lindgren <tony@atomide.com>
+Date: Thu, 14 Jan 2016 12:20:47 -0800
+Subject: ARM: OMAP2+: Fix save_secure_ram_context for rodata
+
+From: Tony Lindgren <tony@atomide.com>
+
+commit a5311d4d13df80bd71a9e47f9ecaf327f478fab1 upstream.
+
+We don't want to write to .text and we can move save_secure_ram_context
+into .data as it all gets copied into SRAM anyways.
+
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Laura Abbott <labbott@redhat.com>
+Cc: Nishanth Menon <nm@ti.com>
+Cc: Richard Woodruff <r-woodruff2@ti.com>
+Cc: Russell King <linux@arm.linux.org.uk>
+Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+Cc: Tero Kristo <t-kristo@ti.com>
+Acked-by: Nicolas Pitre <nico@linaro.org>
+Fixes: 1e6b48116a95 ("ARM: mm: allow non-text sections to be
+non-executable")
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/mach-omap2/sleep34xx.S | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/arm/mach-omap2/sleep34xx.S
++++ b/arch/arm/mach-omap2/sleep34xx.S
+@@ -92,8 +92,12 @@ ENTRY(enable_omap3630_toggle_l2_on_resto
+ ldmfd sp!, {pc} @ restore regs and return
+ ENDPROC(enable_omap3630_toggle_l2_on_restore)
+
+- .text
+-/* Function to call rom code to save secure ram context */
++/*
++ * Function to call rom code to save secure ram context. This gets
++ * relocated to SRAM, so it can be all in .data section. Otherwise
++ * we need to initialize api_params separately.
++ */
++ .data
+ .align 3
+ ENTRY(save_secure_ram_context)
+ stmfd sp!, {r4 - r11, lr} @ save registers on stack
+@@ -127,6 +131,8 @@ ENDPROC(save_secure_ram_context)
+ ENTRY(save_secure_ram_context_sz)
+ .word . - save_secure_ram_context
+
++ .text
++
+ /*
+ * ======================
+ * == Idle entry point ==
--- /dev/null
+From d9db59103305eb5ec2a86369f32063e9921b6ac5 Mon Sep 17 00:00:00 2001
+From: Tony Lindgren <tony@atomide.com>
+Date: Thu, 14 Jan 2016 12:20:47 -0800
+Subject: ARM: OMAP2+: Fix wait_dll_lock_timed for rodata
+
+From: Tony Lindgren <tony@atomide.com>
+
+commit d9db59103305eb5ec2a86369f32063e9921b6ac5 upstream.
+
+We don't want to be writing to .text so it can be set rodata.
+Fix error "Unable to handle kernel paging request at virtual address
+c012396c" in wait_dll_lock_timed if CONFIG_DEBUG_RODATA is selected.
+
+As these counters are for debugging only and unused, we can just
+remove them.
+
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Laura Abbott <labbott@redhat.com>
+Cc: Nishanth Menon <nm@ti.com>
+Cc: Richard Woodruff <r-woodruff2@ti.com>
+Cc: Russell King <linux@arm.linux.org.uk>
+Cc: Tero Kristo <t-kristo@ti.com>
+Acked-by: Nicolas Pitre <nico@linaro.org>
+Fixes: 1e6b48116a95 ("ARM: mm: allow non-text sections to be
+non-executable")
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/mach-omap2/sleep34xx.S | 22 ----------------------
+ 1 file changed, 22 deletions(-)
+
+--- a/arch/arm/mach-omap2/sleep34xx.S
++++ b/arch/arm/mach-omap2/sleep34xx.S
+@@ -289,12 +289,6 @@ wait_sdrc_ready:
+ bic r5, r5, #0x40
+ str r5, [r4]
+
+-/*
+- * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a
+- * base instead.
+- * Be careful not to clobber r7 when maintaing this code.
+- */
+-
+ is_dll_in_lock_mode:
+ /* Is dll in lock mode? */
+ ldr r4, sdrc_dlla_ctrl
+@@ -302,11 +296,7 @@ is_dll_in_lock_mode:
+ tst r5, #0x4
+ bne exit_nonoff_modes @ Return if locked
+ /* wait till dll locks */
+- adr r7, kick_counter
+ wait_dll_lock_timed:
+- ldr r4, wait_dll_lock_counter
+- add r4, r4, #1
+- str r4, [r7, #wait_dll_lock_counter - kick_counter]
+ ldr r4, sdrc_dlla_status
+ /* Wait 20uS for lock */
+ mov r6, #8
+@@ -330,9 +320,6 @@ kick_dll:
+ orr r6, r6, #(1<<3) @ enable dll
+ str r6, [r4]
+ dsb
+- ldr r4, kick_counter
+- add r4, r4, #1
+- str r4, [r7] @ kick_counter
+ b wait_dll_lock_timed
+
+ exit_nonoff_modes:
+@@ -360,15 +347,6 @@ sdrc_dlla_status:
+ .word SDRC_DLLA_STATUS_V
+ sdrc_dlla_ctrl:
+ .word SDRC_DLLA_CTRL_V
+- /*
+- * When exporting to userspace while the counters are in SRAM,
+- * these 2 words need to be at the end to facilitate retrival!
+- */
+-kick_counter:
+- .word 0
+-wait_dll_lock_counter:
+- .word 0
+-
+ ENTRY(omap3_do_wfi_sz)
+ .word . - omap3_do_wfi
+
--- /dev/null
+From 722ec35f7faefcc34d12616eca7976a848870f9d Mon Sep 17 00:00:00 2001
+From: Marek Szyprowski <m.szyprowski@samsung.com>
+Date: Tue, 16 Feb 2016 15:14:44 +0100
+Subject: arm64: dma-mapping: fix handling of devices registered before arch_initcall
+
+From: Marek Szyprowski <m.szyprowski@samsung.com>
+
+commit 722ec35f7faefcc34d12616eca7976a848870f9d upstream.
+
+This patch ensures that devices, which got registered before arch_initcall
+will be handled correctly by IOMMU-based DMA-mapping code.
+
+Fixes: 13b8629f6511 ("arm64: Add IOMMU dma_ops")
+Acked-by: Robin Murphy <robin.murphy@arm.com>
+Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/dma-mapping.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/arm64/mm/dma-mapping.c
++++ b/arch/arm64/mm/dma-mapping.c
+@@ -933,6 +933,10 @@ static int __init __iommu_dma_init(void)
+ ret = register_iommu_dma_ops_notifier(&platform_bus_type);
+ if (!ret)
+ ret = register_iommu_dma_ops_notifier(&amba_bustype);
++
++ /* handle devices queued before this arch_initcall */
++ if (!ret)
++ __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL);
+ return ret;
+ }
+ arch_initcall(__iommu_dma_init);
--- /dev/null
+From 57adec866c0440976c96a4b8f5b59fb411b1cacb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Mika=20Penttil=C3=A4?= <mika.penttila@nextfour.com>
+Date: Tue, 26 Jan 2016 15:47:25 +0000
+Subject: arm64: mm: avoid calling apply_to_page_range on empty range
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mika Penttilä <mika.penttila@nextfour.com>
+
+commit 57adec866c0440976c96a4b8f5b59fb411b1cacb upstream.
+
+Calling apply_to_page_range with an empty range results in a BUG_ON
+from the core code. This can be triggered by trying to load the st_drv
+module with CONFIG_DEBUG_SET_MODULE_RONX enabled:
+
+ kernel BUG at mm/memory.c:1874!
+ Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
+ Modules linked in:
+ CPU: 3 PID: 1764 Comm: insmod Not tainted 4.5.0-rc1+ #2
+ Hardware name: ARM Juno development board (r0) (DT)
+ task: ffffffc9763b8000 ti: ffffffc975af8000 task.ti: ffffffc975af8000
+ PC is at apply_to_page_range+0x2cc/0x2d0
+ LR is at change_memory_common+0x80/0x108
+
+This patch fixes the issue by making change_memory_common (called by the
+set_memory_* functions) a NOP when numpages == 0, therefore avoiding the
+erroneous call to apply_to_page_range and bringing us into line with x86
+and s390.
+
+Reviewed-by: Laura Abbott <labbott@redhat.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Mika Penttilä <mika.penttila@nextfour.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/pageattr.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/arm64/mm/pageattr.c
++++ b/arch/arm64/mm/pageattr.c
+@@ -57,6 +57,9 @@ static int change_memory_common(unsigned
+ if (end < MODULES_VADDR || end >= MODULES_END)
+ return -EINVAL;
+
++ if (!numpages)
++ return 0;
++
+ data.set_mask = set_mask;
+ data.clear_mask = clear_mask;
+
--- /dev/null
+From 1636d1d77ef4e01e57f706a4cae3371463896136 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 15 Feb 2016 16:20:26 +0000
+Subject: Btrfs: fix direct IO requests not reporting IO error to user space
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 1636d1d77ef4e01e57f706a4cae3371463896136 upstream.
+
+If a bio for a direct IO request fails, we were not setting the error in
+the parent bio (the main DIO bio), making us not return the error to
+user space in btrfs_direct_IO(), that is, it made __blockdev_direct_IO()
+return the number of bytes issued for IO and not the error a bio created
+and submitted by btrfs_submit_direct() got from the block layer.
+This essentially happens because when we call:
+
+ dio_end_io(dio_bio, bio->bi_error);
+
+It does not set dio_bio->bi_error to the value of the second argument.
+So just add this missing assignment in endio callbacks, just as we do in
+the error path at btrfs_submit_direct() when we fail to clone the dio bio
+or allocate its private object. This follows the convention of what is
+done with other similar APIs such as bio_endio() where the caller is
+responsible for setting the bi_error field in the bio it passes as an
+argument to bio_endio().
+
+This was detected by the new generic test cases in xfstests: 271, 272,
+276 and 278. Which essentially setup a dm error target, then load the
+error table, do a direct IO write and unload the error table. They
+expect the write to fail with -EIO, which was not getting reported
+when testing against btrfs.
+
+Fixes: 4246a0b63bd8 ("block: add a bi_error field to struct bio")
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -7997,6 +7997,7 @@ static void btrfs_endio_direct_read(stru
+
+ kfree(dip);
+
++ dio_bio->bi_error = bio->bi_error;
+ dio_end_io(dio_bio, bio->bi_error);
+
+ if (io_bio->end_io)
+@@ -8042,6 +8043,7 @@ out_test:
+
+ kfree(dip);
+
++ dio_bio->bi_error = bio->bi_error;
+ dio_end_io(dio_bio, bio->bi_error);
+ bio_put(bio);
+ }
--- /dev/null
+From 8cdc7c5b00d945a3c823fc4277af304abb9cb43d Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 6 Jan 2016 22:42:35 +0000
+Subject: Btrfs: fix fitrim discarding device area reserved for boot loader's use
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8cdc7c5b00d945a3c823fc4277af304abb9cb43d upstream.
+
+As of the 4.3 kernel release, the fitrim ioctl can now discard any region
+of a disk that is not allocated to any chunk/block group, including the
+first megabyte which is used for our primary superblock and by the boot
+loader (grub for example).
+
+Fix this by not allowing to trim/discard any region in the device starting
+with an offset not greater than min(alloc_start_mount_option, 1Mb), just
+as it was not possible before 4.3.
+
+A reproducer test case for xfstests follows.
+
+ seq=`basename $0`
+ seqres=$RESULT_DIR/$seq
+ echo "QA output created by $seq"
+ tmp=/tmp/$$
+ status=1 # failure is the default!
+ trap "_cleanup; exit \$status" 0 1 2 3 15
+
+ _cleanup()
+ {
+ cd /
+ rm -f $tmp.*
+ }
+
+ # get standard environment, filters and checks
+ . ./common/rc
+ . ./common/filter
+
+ # real QA test starts here
+ _need_to_be_root
+ _supported_fs btrfs
+ _supported_os Linux
+ _require_scratch
+
+ rm -f $seqres.full
+
+ _scratch_mkfs >>$seqres.full 2>&1
+
+ # Write to the [0, 64Kb[ and [68Kb, 1Mb[ ranges of the device. These ranges are
+ # reserved for a boot loader to use (GRUB for example) and btrfs should never
+ # use them - neither for allocating metadata/data nor should trim/discard them.
+ # The range [64Kb, 68Kb[ is used for the primary superblock of the filesystem.
+ $XFS_IO_PROG -c "pwrite -S 0xfd 0 64K" $SCRATCH_DEV | _filter_xfs_io
+ $XFS_IO_PROG -c "pwrite -S 0xfd 68K 956K" $SCRATCH_DEV | _filter_xfs_io
+
+ # Now mount the filesystem and perform a fitrim against it.
+ _scratch_mount
+ _require_batched_discard $SCRATCH_MNT
+ $FSTRIM_PROG $SCRATCH_MNT
+
+ # Now unmount the filesystem and verify the content of the ranges was not
+ # modified (no trim/discard happened on them).
+ _scratch_unmount
+ echo "Content of the ranges [0, 64Kb] and [68Kb, 1Mb[ after fitrim:"
+ od -t x1 -N $((64 * 1024)) $SCRATCH_DEV
+ od -t x1 -j $((68 * 1024)) -N $((956 * 1024)) $SCRATCH_DEV
+
+ status=0
+ exit
+
+Reported-by: Vincent Petry <PVince81@yahoo.fr>
+Reported-by: Andrei Borzenkov <arvidjaar@gmail.com>
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=109341
+Fixes: 499f377f49f0 (btrfs: iterate over unused chunk space in FITRIM)
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -1257,6 +1257,15 @@ int find_free_dev_extent_start(struct bt
+ int ret;
+ int slot;
+ struct extent_buffer *l;
++ u64 min_search_start;
++
++ /*
++ * We don't want to overwrite the superblock on the drive nor any area
++ * used by the boot loader (grub for example), so we make sure to start
++ * at an offset of at least 1MB.
++ */
++ min_search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
++ search_start = max(search_start, min_search_start);
+
+ path = btrfs_alloc_path();
+ if (!path)
+@@ -1397,18 +1406,9 @@ int find_free_dev_extent(struct btrfs_tr
+ struct btrfs_device *device, u64 num_bytes,
+ u64 *start, u64 *len)
+ {
+- struct btrfs_root *root = device->dev_root;
+- u64 search_start;
+-
+ /* FIXME use last free of some kind */
+-
+- /*
+- * we don't want to overwrite the superblock on the drive,
+- * so we make sure to start at an offset of at least 1MB
+- */
+- search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
+ return find_free_dev_extent_start(trans->transaction, device,
+- num_bytes, search_start, start, len);
++ num_bytes, 0, start, len);
+ }
+
+ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
--- /dev/null
+From 0c0fe3b0fa45082cd752553fdb3a4b42503a118e Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 3 Feb 2016 19:17:27 +0000
+Subject: Btrfs: fix hang on extent buffer lock caused by the inode_paths ioctl
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 0c0fe3b0fa45082cd752553fdb3a4b42503a118e upstream.
+
+While doing some tests I ran into an hang on an extent buffer's rwlock
+that produced the following trace:
+
+[39389.800012] NMI watchdog: BUG: soft lockup - CPU#15 stuck for 22s! [fdm-stress:32166]
+[39389.800016] NMI watchdog: BUG: soft lockup - CPU#14 stuck for 22s! [fdm-stress:32165]
+[39389.800016] Modules linked in: btrfs dm_mod ppdev xor sha256_generic hmac raid6_pq drbg ansi_cprng aesni_intel i2c_piix4 acpi_cpufreq aes_x86_64 ablk_helper tpm_tis parport_pc i2c_core sg cryptd evdev psmouse lrw tpm parport gf128mul serio_raw pcspkr glue_helper processor button loop autofs4 ext4 crc16 mbcache jbd2 sd_mod sr_mod cdrom ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring crc32c_intel scsi_mod e1000 virtio floppy [last unloaded: btrfs]
+[39389.800016] irq event stamp: 0
+[39389.800016] hardirqs last enabled at (0): [< (null)>] (null)
+[39389.800016] hardirqs last disabled at (0): [<ffffffff8104e58d>] copy_process+0x638/0x1a35
+[39389.800016] softirqs last enabled at (0): [<ffffffff8104e58d>] copy_process+0x638/0x1a35
+[39389.800016] softirqs last disabled at (0): [< (null)>] (null)
+[39389.800016] CPU: 14 PID: 32165 Comm: fdm-stress Not tainted 4.4.0-rc6-btrfs-next-18+ #1
+[39389.800016] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014
+[39389.800016] task: ffff880175b1ca40 ti: ffff8800a185c000 task.ti: ffff8800a185c000
+[39389.800016] RIP: 0010:[<ffffffff810902af>] [<ffffffff810902af>] queued_spin_lock_slowpath+0x57/0x158
+[39389.800016] RSP: 0018:ffff8800a185fb80 EFLAGS: 00000202
+[39389.800016] RAX: 0000000000000101 RBX: ffff8801710c4e9c RCX: 0000000000000101
+[39389.800016] RDX: 0000000000000100 RSI: 0000000000000001 RDI: 0000000000000001
+[39389.800016] RBP: ffff8800a185fb98 R08: 0000000000000001 R09: 0000000000000000
+[39389.800016] R10: ffff8800a185fb68 R11: 6db6db6db6db6db7 R12: ffff8801710c4e98
+[39389.800016] R13: ffff880175b1ca40 R14: ffff8800a185fc10 R15: ffff880175b1ca40
+[39389.800016] FS: 00007f6d37fff700(0000) GS:ffff8802be9c0000(0000) knlGS:0000000000000000
+[39389.800016] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[39389.800016] CR2: 00007f6d300019b8 CR3: 0000000037c93000 CR4: 00000000001406e0
+[39389.800016] Stack:
+[39389.800016] ffff8801710c4e98 ffff8801710c4e98 ffff880175b1ca40 ffff8800a185fbb0
+[39389.800016] ffffffff81091e11 ffff8801710c4e98 ffff8800a185fbc8 ffffffff81091895
+[39389.800016] ffff8801710c4e98 ffff8800a185fbe8 ffffffff81486c5c ffffffffa067288c
+[39389.800016] Call Trace:
+[39389.800016] [<ffffffff81091e11>] queued_read_lock_slowpath+0x46/0x60
+[39389.800016] [<ffffffff81091895>] do_raw_read_lock+0x3e/0x41
+[39389.800016] [<ffffffff81486c5c>] _raw_read_lock+0x3d/0x44
+[39389.800016] [<ffffffffa067288c>] ? btrfs_tree_read_lock+0x54/0x125 [btrfs]
+[39389.800016] [<ffffffffa067288c>] btrfs_tree_read_lock+0x54/0x125 [btrfs]
+[39389.800016] [<ffffffffa0622ced>] ? btrfs_find_item+0xa7/0xd2 [btrfs]
+[39389.800016] [<ffffffffa069363f>] btrfs_ref_to_path+0xd6/0x174 [btrfs]
+[39389.800016] [<ffffffffa0693730>] inode_to_path+0x53/0xa2 [btrfs]
+[39389.800016] [<ffffffffa0693e2e>] paths_from_inode+0x117/0x2ec [btrfs]
+[39389.800016] [<ffffffffa0670cff>] btrfs_ioctl+0xd5b/0x2793 [btrfs]
+[39389.800016] [<ffffffff8108a8b0>] ? arch_local_irq_save+0x9/0xc
+[39389.800016] [<ffffffff81276727>] ? __this_cpu_preempt_check+0x13/0x15
+[39389.800016] [<ffffffff8108a8b0>] ? arch_local_irq_save+0x9/0xc
+[39389.800016] [<ffffffff8118b3d4>] ? rcu_read_unlock+0x3e/0x5d
+[39389.800016] [<ffffffff811822f8>] do_vfs_ioctl+0x42b/0x4ea
+[39389.800016] [<ffffffff8118b4f3>] ? __fget_light+0x62/0x71
+[39389.800016] [<ffffffff8118240e>] SyS_ioctl+0x57/0x79
+[39389.800016] [<ffffffff814872d7>] entry_SYSCALL_64_fastpath+0x12/0x6f
+[39389.800016] Code: b9 01 01 00 00 f7 c6 00 ff ff ff 75 32 83 fe 01 89 ca 89 f0 0f 45 d7 f0 0f b1 13 39 f0 74 04 89 c6 eb e2 ff ca 0f 84 fa 00 00 00 <8b> 03 84 c0 74 04 f3 90 eb f6 66 c7 03 01 00 e9 e6 00 00 00 e8
+[39389.800012] Modules linked in: btrfs dm_mod ppdev xor sha256_generic hmac raid6_pq drbg ansi_cprng aesni_intel i2c_piix4 acpi_cpufreq aes_x86_64 ablk_helper tpm_tis parport_pc i2c_core sg cryptd evdev psmouse lrw tpm parport gf128mul serio_raw pcspkr glue_helper processor button loop autofs4 ext4 crc16 mbcache jbd2 sd_mod sr_mod cdrom ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring crc32c_intel scsi_mod e1000 virtio floppy [last unloaded: btrfs]
+[39389.800012] irq event stamp: 0
+[39389.800012] hardirqs last enabled at (0): [< (null)>] (null)
+[39389.800012] hardirqs last disabled at (0): [<ffffffff8104e58d>] copy_process+0x638/0x1a35
+[39389.800012] softirqs last enabled at (0): [<ffffffff8104e58d>] copy_process+0x638/0x1a35
+[39389.800012] softirqs last disabled at (0): [< (null)>] (null)
+[39389.800012] CPU: 15 PID: 32166 Comm: fdm-stress Tainted: G L 4.4.0-rc6-btrfs-next-18+ #1
+[39389.800012] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014
+[39389.800012] task: ffff880179294380 ti: ffff880034a60000 task.ti: ffff880034a60000
+[39389.800012] RIP: 0010:[<ffffffff81091e8d>] [<ffffffff81091e8d>] queued_write_lock_slowpath+0x62/0x72
+[39389.800012] RSP: 0018:ffff880034a639f0 EFLAGS: 00000206
+[39389.800012] RAX: 0000000000000101 RBX: ffff8801710c4e98 RCX: 0000000000000000
+[39389.800012] RDX: 00000000000000ff RSI: 0000000000000000 RDI: ffff8801710c4e9c
+[39389.800012] RBP: ffff880034a639f8 R08: 0000000000000001 R09: 0000000000000000
+[39389.800012] R10: ffff880034a639b0 R11: 0000000000001000 R12: ffff8801710c4e98
+[39389.800012] R13: 0000000000000001 R14: ffff880172cbc000 R15: ffff8801710c4e00
+[39389.800012] FS: 00007f6d377fe700(0000) GS:ffff8802be9e0000(0000) knlGS:0000000000000000
+[39389.800012] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[39389.800012] CR2: 00007f6d3d3c1000 CR3: 0000000037c93000 CR4: 00000000001406e0
+[39389.800012] Stack:
+[39389.800012] ffff8801710c4e98 ffff880034a63a10 ffffffff81091963 ffff8801710c4e98
+[39389.800012] ffff880034a63a30 ffffffff81486f1b ffffffffa0672cb3 ffff8801710c4e00
+[39389.800012] ffff880034a63a78 ffffffffa0672cb3 ffff8801710c4e00 ffff880034a63a58
+[39389.800012] Call Trace:
+[39389.800012] [<ffffffff81091963>] do_raw_write_lock+0x72/0x8c
+[39389.800012] [<ffffffff81486f1b>] _raw_write_lock+0x3a/0x41
+[39389.800012] [<ffffffffa0672cb3>] ? btrfs_tree_lock+0x119/0x251 [btrfs]
+[39389.800012] [<ffffffffa0672cb3>] btrfs_tree_lock+0x119/0x251 [btrfs]
+[39389.800012] [<ffffffffa061aeba>] ? rcu_read_unlock+0x5b/0x5d [btrfs]
+[39389.800012] [<ffffffffa061ce13>] ? btrfs_root_node+0xda/0xe6 [btrfs]
+[39389.800012] [<ffffffffa061ce83>] btrfs_lock_root_node+0x22/0x42 [btrfs]
+[39389.800012] [<ffffffffa062046b>] btrfs_search_slot+0x1b8/0x758 [btrfs]
+[39389.800012] [<ffffffff810fc6b0>] ? time_hardirqs_on+0x15/0x28
+[39389.800012] [<ffffffffa06365db>] btrfs_lookup_inode+0x31/0x95 [btrfs]
+[39389.800012] [<ffffffff8108d62f>] ? trace_hardirqs_on+0xd/0xf
+[39389.800012] [<ffffffff8148482b>] ? mutex_lock_nested+0x397/0x3bc
+[39389.800012] [<ffffffffa068821b>] __btrfs_update_delayed_inode+0x59/0x1c0 [btrfs]
+[39389.800012] [<ffffffffa068858e>] __btrfs_commit_inode_delayed_items+0x194/0x5aa [btrfs]
+[39389.800012] [<ffffffff81486ab7>] ? _raw_spin_unlock+0x31/0x44
+[39389.800012] [<ffffffffa0688a48>] __btrfs_run_delayed_items+0xa4/0x15c [btrfs]
+[39389.800012] [<ffffffffa0688d62>] btrfs_run_delayed_items+0x11/0x13 [btrfs]
+[39389.800012] [<ffffffffa064048e>] btrfs_commit_transaction+0x234/0x96e [btrfs]
+[39389.800012] [<ffffffffa0618d10>] btrfs_sync_fs+0x145/0x1ad [btrfs]
+[39389.800012] [<ffffffffa0671176>] btrfs_ioctl+0x11d2/0x2793 [btrfs]
+[39389.800012] [<ffffffff8108a8b0>] ? arch_local_irq_save+0x9/0xc
+[39389.800012] [<ffffffff81140261>] ? __might_fault+0x4c/0xa7
+[39389.800012] [<ffffffff81140261>] ? __might_fault+0x4c/0xa7
+[39389.800012] [<ffffffff8108a8b0>] ? arch_local_irq_save+0x9/0xc
+[39389.800012] [<ffffffff8118b3d4>] ? rcu_read_unlock+0x3e/0x5d
+[39389.800012] [<ffffffff811822f8>] do_vfs_ioctl+0x42b/0x4ea
+[39389.800012] [<ffffffff8118b4f3>] ? __fget_light+0x62/0x71
+[39389.800012] [<ffffffff8118240e>] SyS_ioctl+0x57/0x79
+[39389.800012] [<ffffffff814872d7>] entry_SYSCALL_64_fastpath+0x12/0x6f
+[39389.800012] Code: f0 0f b1 13 85 c0 75 ef eb 2a f3 90 8a 03 84 c0 75 f8 f0 0f b0 13 84 c0 75 f0 ba ff 00 00 00 eb 0a f0 0f b1 13 ff c8 74 0b f3 90 <8b> 03 83 f8 01 75 f7 eb ed c6 43 04 00 5b 5d c3 0f 1f 44 00 00
+
+This happens because in the code path executed by the inode_paths ioctl we
+end up nesting two calls to read lock a leaf's rwlock when after the first
+call to read_lock() and before the second call to read_lock(), another
+task (running the delayed items as part of a transaction commit) has
+already called write_lock() against the leaf's rwlock. This situation is
+illustrated by the following diagram:
+
+ Task A Task B
+
+ btrfs_ref_to_path() btrfs_commit_transaction()
+ read_lock(&eb->lock);
+
+ btrfs_run_delayed_items()
+ __btrfs_commit_inode_delayed_items()
+ __btrfs_update_delayed_inode()
+ btrfs_lookup_inode()
+
+ write_lock(&eb->lock);
+ --> task waits for lock
+
+ read_lock(&eb->lock);
+ --> makes this task hang
+ forever (and task B too
+ of course)
+
+So fix this by avoiding doing the nested read lock, which is easily
+avoidable. This issue does not happen if task B calls write_lock() after
+task A does the second call to read_lock(), however there does not seem
+to exist anything in the documentation that mentions what is the expected
+behaviour for recursive locking of rwlocks (leaving the idea that doing
+so is not a good usage of rwlocks).
+
+Also, as a side effect necessary for this fix, make sure we do not
+needlessly read lock extent buffers when the input path has skip_locking
+set (used when called from send).
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/backref.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -1417,7 +1417,8 @@ char *btrfs_ref_to_path(struct btrfs_roo
+ read_extent_buffer(eb, dest + bytes_left,
+ name_off, name_len);
+ if (eb != eb_in) {
+- btrfs_tree_read_unlock_blocking(eb);
++ if (!path->skip_locking)
++ btrfs_tree_read_unlock_blocking(eb);
+ free_extent_buffer(eb);
+ }
+ ret = btrfs_find_item(fs_root, path, parent, 0,
+@@ -1437,9 +1438,10 @@ char *btrfs_ref_to_path(struct btrfs_roo
+ eb = path->nodes[0];
+ /* make sure we can use eb after releasing the path */
+ if (eb != eb_in) {
+- atomic_inc(&eb->refs);
+- btrfs_tree_read_lock(eb);
+- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
++ if (!path->skip_locking)
++ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
++ path->nodes[0] = NULL;
++ path->locks[0] = 0;
+ }
+ btrfs_release_path(path);
+ iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
--- /dev/null
+From e0bd70c67bf996b360f706b6c643000f2e384681 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 27 Jan 2016 10:20:58 +0000
+Subject: Btrfs: fix invalid page accesses in extent_same (dedup) ioctl
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit e0bd70c67bf996b360f706b6c643000f2e384681 upstream.
+
+In the extent_same ioctl we are getting the pages for the source and
+target ranges and unlocking them immediately after, which is incorrect
+because later we attempt to map them (with kmap_atomic) and access their
+contents at btrfs_cmp_data(). When we do such access the pages might have
+been relocated or removed from memory, which leads to an invalid memory
+access. This issue is detected on a kernel with CONFIG_DEBUG_PAGEALLOC=y
+which produces a trace like the following:
+
+186736.677437] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
+[186736.680382] Modules linked in: btrfs dm_flakey dm_mod ppdev xor raid6_pq sha256_generic hmac drbg ansi_cprng acpi_cpufreq evdev sg aesni_intel aes_x86_64
+parport_pc ablk_helper tpm_tis psmouse parport i2c_piix4 tpm cryptd i2c_core lrw processor button serio_raw pcspkr gf128mul glue_helper loop autofs4 ext4
+crc16 mbcache jbd2 sd_mod sr_mod cdrom ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring crc32c_intel scsi_mod e1000 virtio floppy [last
+unloaded: btrfs]
+[186736.681319] CPU: 13 PID: 10222 Comm: duperemove Tainted: G W 4.4.0-rc6-btrfs-next-18+ #1
+[186736.681319] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014
+[186736.681319] task: ffff880132600400 ti: ffff880362284000 task.ti: ffff880362284000
+[186736.681319] RIP: 0010:[<ffffffff81264d00>] [<ffffffff81264d00>] memcmp+0xb/0x22
+[186736.681319] RSP: 0018:ffff880362287d70 EFLAGS: 00010287
+[186736.681319] RAX: 000002c002468acf RBX: 0000000012345678 RCX: 0000000000000000
+[186736.681319] RDX: 0000000000001000 RSI: 0005d129c5cf9000 RDI: 0005d129c5cf9000
+[186736.681319] RBP: ffff880362287d70 R08: 0000000000000000 R09: 0000000000001000
+[186736.681319] R10: ffff880000000000 R11: 0000000000000476 R12: 0000000000001000
+[186736.681319] R13: ffff8802f91d4c88 R14: ffff8801f2a77830 R15: ffff880352e83e40
+[186736.681319] FS: 00007f27b37fe700(0000) GS:ffff88043dda0000(0000) knlGS:0000000000000000
+[186736.681319] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[186736.681319] CR2: 00007f27a406a000 CR3: 0000000217421000 CR4: 00000000001406e0
+[186736.681319] Stack:
+[186736.681319] ffff880362287ea0 ffffffffa048d0bd 000000000009f000 0000000000001000
+[186736.681319] 0100000000000000 ffff8801f2a77850 ffff8802f91d49b0 ffff880132600400
+[186736.681319] 00000000000004f8 ffff8801c1efbe41 0000000000000000 0000000000000038
+[186736.681319] Call Trace:
+[186736.681319] [<ffffffffa048d0bd>] btrfs_ioctl+0x24cb/0x2731 [btrfs]
+[186736.681319] [<ffffffff8108a8b0>] ? arch_local_irq_save+0x9/0xc
+[186736.681319] [<ffffffff8118b3d4>] ? rcu_read_unlock+0x3e/0x5d
+[186736.681319] [<ffffffff811822f8>] do_vfs_ioctl+0x42b/0x4ea
+[186736.681319] [<ffffffff8118b4f3>] ? __fget_light+0x62/0x71
+[186736.681319] [<ffffffff8118240e>] SyS_ioctl+0x57/0x79
+[186736.681319] [<ffffffff814872d7>] entry_SYSCALL_64_fastpath+0x12/0x6f
+[186736.681319] Code: 0a 3c 6e 74 0d 3c 79 74 04 3c 59 75 0c c6 06 01 eb 03 c6 06 00 31 c0 eb 05 b8 ea ff ff ff 5d c3 55 31 c9 48 89 e5 48 39 d1 74 13 <0f> b6
+04 0f 44 0f b6 04 0e 48 ff c1 44 29 c0 74 ea eb 02 31 c0
+
+(gdb) list *(btrfs_ioctl+0x24cb)
+0x5e0e1 is in btrfs_ioctl (fs/btrfs/ioctl.c:2972).
+2967 dst_addr = kmap_atomic(dst_page);
+2968
+2969 flush_dcache_page(src_page);
+2970 flush_dcache_page(dst_page);
+2971
+2972 if (memcmp(addr, dst_addr, cmp_len))
+2973 ret = BTRFS_SAME_DATA_DIFFERS;
+2974
+2975 kunmap_atomic(addr);
+2976 kunmap_atomic(dst_addr);
+
+So fix this by making sure we keep the pages locked and respect the same
+locking order as everywhere else: get and lock the pages first and then
+lock the range in the inode's io tree (like for example at
+__btrfs_buffered_write() and extent_readpages()). If an ordered extent
+is found after locking the range in the io tree, unlock the range,
+unlock the pages, wait for the ordered extent to complete and repeat the
+entire locking process until no overlapping ordered extents are found.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 76 insertions(+), 14 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -2799,7 +2799,6 @@ static struct page *extent_same_get_page
+ return NULL;
+ }
+ }
+- unlock_page(page);
+
+ return page;
+ }
+@@ -2818,10 +2817,17 @@ static int gather_extent_pages(struct in
+ return 0;
+ }
+
+-static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
++static int lock_extent_range(struct inode *inode, u64 off, u64 len,
++ bool retry_range_locking)
+ {
+- /* do any pending delalloc/csum calc on src, one way or
+- another, and lock file content */
++ /*
++ * Do any pending delalloc/csum calculations on inode, one way or
++ * another, and lock file content.
++ * The locking order is:
++ *
++ * 1) pages
++ * 2) range in the inode's io tree
++ */
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+ lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
+@@ -2839,8 +2845,11 @@ static inline void lock_extent_range(str
+ unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
++ if (!retry_range_locking)
++ return -EAGAIN;
+ btrfs_wait_ordered_range(inode, off, len);
+ }
++ return 0;
+ }
+
+ static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
+@@ -2865,15 +2874,24 @@ static void btrfs_double_extent_unlock(s
+ unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+ }
+
+-static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+- struct inode *inode2, u64 loff2, u64 len)
++static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
++ struct inode *inode2, u64 loff2, u64 len,
++ bool retry_range_locking)
+ {
++ int ret;
++
+ if (inode1 < inode2) {
+ swap(inode1, inode2);
+ swap(loff1, loff2);
+ }
+- lock_extent_range(inode1, loff1, len);
+- lock_extent_range(inode2, loff2, len);
++ ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
++ if (ret)
++ return ret;
++ ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
++ if (ret)
++ unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
++ loff1 + len - 1);
++ return ret;
+ }
+
+ struct cmp_pages {
+@@ -2889,11 +2907,15 @@ static void btrfs_cmp_data_free(struct c
+
+ for (i = 0; i < cmp->num_pages; i++) {
+ pg = cmp->src_pages[i];
+- if (pg)
++ if (pg) {
++ unlock_page(pg);
+ page_cache_release(pg);
++ }
+ pg = cmp->dst_pages[i];
+- if (pg)
++ if (pg) {
++ unlock_page(pg);
+ page_cache_release(pg);
++ }
+ }
+ kfree(cmp->src_pages);
+ kfree(cmp->dst_pages);
+@@ -2954,6 +2976,8 @@ static int btrfs_cmp_data(struct inode *
+
+ src_page = cmp->src_pages[i];
+ dst_page = cmp->dst_pages[i];
++ ASSERT(PageLocked(src_page));
++ ASSERT(PageLocked(dst_page));
+
+ addr = kmap_atomic(src_page);
+ dst_addr = kmap_atomic(dst_page);
+@@ -3066,14 +3090,46 @@ static int btrfs_extent_same(struct inod
+ goto out_unlock;
+ }
+
++again:
+ ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+ if (ret)
+ goto out_unlock;
+
+ if (same_inode)
+- lock_extent_range(src, same_lock_start, same_lock_len);
++ ret = lock_extent_range(src, same_lock_start, same_lock_len,
++ false);
+ else
+- btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
++ ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
++ false);
++ /*
++ * If one of the inodes has dirty pages in the respective range or
++ * ordered extents, we need to flush dellaloc and wait for all ordered
++ * extents in the range. We must unlock the pages and the ranges in the
++ * io trees to avoid deadlocks when flushing delalloc (requires locking
++ * pages) and when waiting for ordered extents to complete (they require
++ * range locking).
++ */
++ if (ret == -EAGAIN) {
++ /*
++ * Ranges in the io trees already unlocked. Now unlock all
++ * pages before waiting for all IO to complete.
++ */
++ btrfs_cmp_data_free(&cmp);
++ if (same_inode) {
++ btrfs_wait_ordered_range(src, same_lock_start,
++ same_lock_len);
++ } else {
++ btrfs_wait_ordered_range(src, loff, len);
++ btrfs_wait_ordered_range(dst, dst_loff, len);
++ }
++ goto again;
++ }
++ ASSERT(ret == 0);
++ if (WARN_ON(ret)) {
++ /* ranges in the io trees already unlocked */
++ btrfs_cmp_data_free(&cmp);
++ return ret;
++ }
+
+ /* pass original length for comparison so we stay within i_size */
+ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
+@@ -3895,9 +3951,15 @@ static noinline long btrfs_ioctl_clone(s
+ u64 lock_start = min_t(u64, off, destoff);
+ u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
+
+- lock_extent_range(src, lock_start, lock_len);
++ ret = lock_extent_range(src, lock_start, lock_len, true);
+ } else {
+- btrfs_double_extent_lock(src, off, inode, destoff, len);
++ ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
++ true);
++ }
++ ASSERT(ret == 0);
++ if (WARN_ON(ret)) {
++ /* ranges in the io trees already unlocked */
++ goto out_unlock;
+ }
+
+ ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
--- /dev/null
+From 313140023026ae542ad76e7e268c56a1eaa2c28e Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 27 Jan 2016 18:37:47 +0000
+Subject: Btrfs: fix page reading in extent_same ioctl leading to csum errors
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 313140023026ae542ad76e7e268c56a1eaa2c28e upstream.
+
+In the extent_same ioctl, we were grabbing the pages (locked) and
+attempting to read them without bothering about any concurrent IO
+against them. That is, we were not checking for any ongoing ordered
+extents nor waiting for them to complete, which leads to a race where
+the extent_same() code gets a checksum verification error when it
+reads the pages, producing a message like the following in dmesg
+and making the operation fail to user space with -ENOMEM:
+
+[18990.161265] BTRFS warning (device sdc): csum failed ino 259 off 495616 csum 685204116 expected csum 1515870868
+
+Fix this by using btrfs_readpage() for reading the pages instead of
+extent_read_full_page_nolock(), which waits for any concurrent ordered
+extents to complete and locks the io range. Also do better error handling
+and don't treat all failures as -ENOMEM, as that's clearly misleasing,
+becoming identical to the checks and operation of prepare_uptodate_page().
+
+The use of extent_read_full_page_nolock() was required before
+commit f441460202cb ("btrfs: fix deadlock with extent-same and readpage"),
+as we had the range locked in an inode's io tree before attempting to
+read the pages.
+
+Fixes: f441460202cb ("btrfs: fix deadlock with extent-same and readpage")
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c | 29 +++++++++++++++++++++--------
+ 1 file changed, 21 insertions(+), 8 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -2782,21 +2782,27 @@ out:
+ static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
+ {
+ struct page *page;
+- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page)
+- return NULL;
++ return ERR_PTR(-ENOMEM);
+
+ if (!PageUptodate(page)) {
+- if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
+- 0))
+- return NULL;
++ int ret;
++
++ ret = btrfs_readpage(NULL, page);
++ if (ret)
++ return ERR_PTR(ret);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+- return NULL;
++ return ERR_PTR(-EIO);
++ }
++ if (page->mapping != inode->i_mapping) {
++ unlock_page(page);
++ page_cache_release(page);
++ return ERR_PTR(-EAGAIN);
+ }
+ }
+
+@@ -2810,9 +2816,16 @@ static int gather_extent_pages(struct in
+ pgoff_t index = off >> PAGE_CACHE_SHIFT;
+
+ for (i = 0; i < num_pages; i++) {
++again:
+ pages[i] = extent_same_get_page(inode, index + i);
+- if (!pages[i])
+- return -ENOMEM;
++ if (IS_ERR(pages[i])) {
++ int err = PTR_ERR(pages[i]);
++
++ if (err == -EAGAIN)
++ goto again;
++ pages[i] = NULL;
++ return err;
++ }
+ }
+ return 0;
+ }
--- /dev/null
+From f5cdedd73fa71b74dcc42f2a11a5735d89ce7c4f Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Mon, 30 Nov 2015 17:27:06 +0100
+Subject: btrfs: handle invalid num_stripes in sys_array
+
+From: David Sterba <dsterba@suse.com>
+
+commit f5cdedd73fa71b74dcc42f2a11a5735d89ce7c4f upstream.
+
+We can handle the special case of num_stripes == 0 directly inside
+btrfs_read_sys_array. The BUG_ON in btrfs_chunk_item_size is there to
+catch other unhandled cases where we fail to validate external data.
+
+A crafted or corrupted image crashes at mount time:
+
+BTRFS: device fsid 9006933e-2a9a-44f0-917f-514252aeec2c devid 1 transid 7 /dev/loop0
+BTRFS info (device loop0): disk space caching is enabled
+BUG: failure at fs/btrfs/ctree.h:337/btrfs_chunk_item_size()!
+Kernel panic - not syncing: BUG!
+CPU: 0 PID: 313 Comm: mount Not tainted 4.2.5-00657-ge047887-dirty #25
+Stack:
+ 637af890 60062489 602aeb2e 604192ba
+ 60387961 00000011 637af8a0 6038a835
+ 637af9c0 6038776b 634ef32b 00000000
+Call Trace:
+ [<6001c86d>] show_stack+0xfe/0x15b
+ [<6038a835>] dump_stack+0x2a/0x2c
+ [<6038776b>] panic+0x13e/0x2b3
+ [<6020f099>] btrfs_read_sys_array+0x25d/0x2ff
+ [<601cfbbe>] open_ctree+0x192d/0x27af
+ [<6019c2c1>] btrfs_mount+0x8f5/0xb9a
+ [<600bc9a7>] mount_fs+0x11/0xf3
+ [<600d5167>] vfs_kern_mount+0x75/0x11a
+ [<6019bcb0>] btrfs_mount+0x2e4/0xb9a
+ [<600bc9a7>] mount_fs+0x11/0xf3
+ [<600d5167>] vfs_kern_mount+0x75/0x11a
+ [<600d710b>] do_mount+0xa35/0xbc9
+ [<600d7557>] SyS_mount+0x95/0xc8
+ [<6001e884>] handle_syscall+0x6b/0x8e
+
+Reported-by: Jiri Slaby <jslaby@suse.com>
+Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -6512,6 +6512,14 @@ int btrfs_read_sys_array(struct btrfs_ro
+ goto out_short_read;
+
+ num_stripes = btrfs_chunk_num_stripes(sb, chunk);
++ if (!num_stripes) {
++ printk(KERN_ERR
++ "BTRFS: invalid number of stripes %u in sys_array at offset %u\n",
++ num_stripes, cur_offset);
++ ret = -EIO;
++ break;
++ }
++
+ len = btrfs_chunk_item_size(num_stripes);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
--- /dev/null
+From bc4ef7592f657ae81b017207a1098817126ad4cb Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Fri, 13 Nov 2015 13:44:28 +0100
+Subject: btrfs: properly set the termination value of ctx->pos in readdir
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: David Sterba <dsterba@suse.com>
+
+commit bc4ef7592f657ae81b017207a1098817126ad4cb upstream.
+
+The value of ctx->pos in the last readdir call is supposed to be set to
+INT_MAX due to 32bit compatibility, unless 'pos' is intentially set to a
+larger value, then it's LLONG_MAX.
+
+There's a report from PaX SIZE_OVERFLOW plugin that "ctx->pos++"
+overflows (https://forums.grsecurity.net/viewtopic.php?f=1&t=4284), on a
+64bit arch, where the value is 0x7fffffffffffffff ie. LLONG_MAX before
+the increment.
+
+We can get to that situation like that:
+
+* emit all regular readdir entries
+* still in the same call to readdir, bump the last pos to INT_MAX
+* next call to readdir will not emit any entries, but will reach the
+ bump code again, finds pos to be INT_MAX and sets it to LLONG_MAX
+
+Normally this is not a problem, but if we call readdir again, we'll find
+'pos' set to LLONG_MAX and the unconditional increment will overflow.
+
+The report from Victor at
+(http://thread.gmane.org/gmane.comp.file-systems.btrfs/49500) with debugging
+print shows that pattern:
+
+ Overflow: e
+ Overflow: 7fffffff
+ Overflow: 7fffffffffffffff
+ PAX: size overflow detected in function btrfs_real_readdir
+ fs/btrfs/inode.c:5760 cicus.935_282 max, count: 9, decl: pos; num: 0;
+ context: dir_context;
+ CPU: 0 PID: 2630 Comm: polkitd Not tainted 4.2.3-grsec #1
+ Hardware name: Gigabyte Technology Co., Ltd. H81ND2H/H81ND2H, BIOS F3 08/11/2015
+ ffffffff81901608 0000000000000000 ffffffff819015e6 ffffc90004973d48
+ ffffffff81742f0f 0000000000000007 ffffffff81901608 ffffc90004973d78
+ ffffffff811cb706 0000000000000000 ffff8800d47359e0 ffffc90004973ed8
+ Call Trace:
+ [<ffffffff81742f0f>] dump_stack+0x4c/0x7f
+ [<ffffffff811cb706>] report_size_overflow+0x36/0x40
+ [<ffffffff812ef0bc>] btrfs_real_readdir+0x69c/0x6d0
+ [<ffffffff811dafc8>] iterate_dir+0xa8/0x150
+ [<ffffffff811e6d8d>] ? __fget_light+0x2d/0x70
+ [<ffffffff811dba3a>] SyS_getdents+0xba/0x1c0
+ Overflow: 1a
+ [<ffffffff811db070>] ? iterate_dir+0x150/0x150
+ [<ffffffff81749b69>] entry_SYSCALL_64_fastpath+0x12/0x83
+
+The jump from 7fffffff to 7fffffffffffffff happens when new dir entries
+are not yet synced and are processed from the delayed list. Then the code
+could go to the bump section again even though it might not emit any new
+dir entries from the delayed list.
+
+The fix avoids entering the "bump" section again once we've finished
+emitting the entries, both for synced and delayed entries.
+
+References: https://forums.grsecurity.net/viewtopic.php?f=1&t=4284
+Reported-by: Victor <services@swwu.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Tested-by: Holger Hoffstätte <holger.hoffstaette@googlemail.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/delayed-inode.c | 3 ++-
+ fs/btrfs/delayed-inode.h | 2 +-
+ fs/btrfs/inode.c | 14 +++++++++++++-
+ 3 files changed, 16 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1694,7 +1694,7 @@ int btrfs_should_delete_dir_index(struct
+ *
+ */
+ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
+- struct list_head *ins_list)
++ struct list_head *ins_list, bool *emitted)
+ {
+ struct btrfs_dir_item *di;
+ struct btrfs_delayed_item *curr, *next;
+@@ -1738,6 +1738,7 @@ int btrfs_readdir_delayed_dir_index(stru
+
+ if (over)
+ return 1;
++ *emitted = true;
+ }
+ return 0;
+ }
+--- a/fs/btrfs/delayed-inode.h
++++ b/fs/btrfs/delayed-inode.h
+@@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list
+ int btrfs_should_delete_dir_index(struct list_head *del_list,
+ u64 index);
+ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
+- struct list_head *ins_list);
++ struct list_head *ins_list, bool *emitted);
+
+ /* for init */
+ int __init btrfs_delayed_inode_init(void);
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -5741,6 +5741,7 @@ static int btrfs_real_readdir(struct fil
+ char *name_ptr;
+ int name_len;
+ int is_curr = 0; /* ctx->pos points to the current index? */
++ bool emitted;
+
+ /* FIXME, use a real flag for deciding about the key type */
+ if (root->fs_info->tree_root == root)
+@@ -5769,6 +5770,7 @@ static int btrfs_real_readdir(struct fil
+ if (ret < 0)
+ goto err;
+
++ emitted = false;
+ while (1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+@@ -5848,6 +5850,7 @@ skip:
+
+ if (over)
+ goto nopos;
++ emitted = true;
+ di_len = btrfs_dir_name_len(leaf, di) +
+ btrfs_dir_data_len(leaf, di) + sizeof(*di);
+ di_cur += di_len;
+@@ -5860,11 +5863,20 @@ next:
+ if (key_type == BTRFS_DIR_INDEX_KEY) {
+ if (is_curr)
+ ctx->pos++;
+- ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
++ ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
+ if (ret)
+ goto nopos;
+ }
+
++ /*
++ * If we haven't emitted any dir entry, we must not touch ctx->pos as
++ * it was was set to the termination value in previous call. We assume
++ * that "." and ".." were emitted if we reach this point and set the
++ * termination value as well for an empty directory.
++ */
++ if (ctx->pos > 2 && !emitted)
++ goto nopos;
++
+ /* Reached end of directory/root. Bump pos past the last item. */
+ ctx->pos++;
+
--- /dev/null
+From 4b550af519854421dfec9f7732cdddeb057134b2 Mon Sep 17 00:00:00 2001
+From: Anton Protopopov <a.s.protopopov@gmail.com>
+Date: Wed, 10 Feb 2016 12:50:21 -0500
+Subject: cifs: fix erroneous return value
+
+From: Anton Protopopov <a.s.protopopov@gmail.com>
+
+commit 4b550af519854421dfec9f7732cdddeb057134b2 upstream.
+
+The setup_ntlmv2_rsp() function may return positive value ENOMEM instead
+of -ENOMEM in case of kmalloc failure.
+
+Signed-off-by: Anton Protopopov <a.s.protopopov@gmail.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifsencrypt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/cifs/cifsencrypt.c
++++ b/fs/cifs/cifsencrypt.c
+@@ -714,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, c
+
+ ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
+ if (!ses->auth_key.response) {
+- rc = ENOMEM;
++ rc = -ENOMEM;
+ ses->auth_key.len = 0;
+ goto setup_ntlmv2_rsp_ret;
+ }
--- /dev/null
+From 820962dc700598ffe8cd21b967e30e7520c34748 Mon Sep 17 00:00:00 2001
+From: Rabin Vincent <rabin.vincent@axis.com>
+Date: Wed, 23 Dec 2015 07:32:41 +0100
+Subject: cifs: fix race between call_async() and reconnect()
+
+From: Rabin Vincent <rabin.vincent@axis.com>
+
+commit 820962dc700598ffe8cd21b967e30e7520c34748 upstream.
+
+cifs_call_async() queues the MID to the pending list and calls
+smb_send_rqst(). If smb_send_rqst() performs a partial send, it sets
+the tcpStatus to CifsNeedReconnect and returns an error code to
+cifs_call_async(). In this case, cifs_call_async() removes the MID
+from the list and returns to the caller.
+
+However, cifs_call_async() releases the server mutex _before_ removing
+the MID. This means that a cifs_reconnect() can race with this function
+and manage to remove the MID from the list and delete the entry before
+cifs_call_async() calls cifs_delete_mid(). This leads to various
+crashes due to the use after free in cifs_delete_mid().
+
+Task1 Task2
+
+cifs_call_async():
+ - rc = -EAGAIN
+ - mutex_unlock(srv_mutex)
+
+ cifs_reconnect():
+ - mutex_lock(srv_mutex)
+ - mutex_unlock(srv_mutex)
+ - list_delete(mid)
+ - mid->callback()
+ cifs_writev_callback():
+ - mutex_lock(srv_mutex)
+ - delete(mid)
+ - mutex_unlock(srv_mutex)
+
+ - cifs_delete_mid(mid) <---- use after free
+
+Fix this by removing the MID in cifs_call_async() before releasing the
+srv_mutex. Also hold the srv_mutex in cifs_reconnect() until the MIDs
+are moved out of the pending list.
+
+Signed-off-by: Rabin Vincent <rabin.vincent@axis.com>
+Acked-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
+Signed-off-by: Steve French <sfrench@localhost.localdomain>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/connect.c | 2 +-
+ fs/cifs/transport.c | 6 ++++--
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -368,7 +368,6 @@ cifs_reconnect(struct TCP_Server_Info *s
+ server->session_key.response = NULL;
+ server->session_key.len = 0;
+ server->lstrp = jiffies;
+- mutex_unlock(&server->srv_mutex);
+
+ /* mark submitted MIDs for retry and issue callback */
+ INIT_LIST_HEAD(&retry_list);
+@@ -381,6 +380,7 @@ cifs_reconnect(struct TCP_Server_Info *s
+ list_move(&mid_entry->qhead, &retry_list);
+ }
+ spin_unlock(&GlobalMid_Lock);
++ mutex_unlock(&server->srv_mutex);
+
+ cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
+ list_for_each_safe(tmp, tmp2, &retry_list) {
+--- a/fs/cifs/transport.c
++++ b/fs/cifs/transport.c
+@@ -576,14 +576,16 @@ cifs_call_async(struct TCP_Server_Info *
+ cifs_in_send_dec(server);
+ cifs_save_when_sent(mid);
+
+- if (rc < 0)
++ if (rc < 0) {
+ server->sequence_number -= 2;
++ cifs_delete_mid(mid);
++ }
++
+ mutex_unlock(&server->srv_mutex);
+
+ if (rc == 0)
+ return 0;
+
+- cifs_delete_mid(mid);
+ add_credits_and_wake_if(server, credits, optype);
+ return rc;
+ }
--- /dev/null
+From ec7147a99e33a9e4abad6fc6e1b40d15df045d53 Mon Sep 17 00:00:00 2001
+From: Jamie Bainbridge <jamie.bainbridge@gmail.com>
+Date: Sat, 7 Nov 2015 22:13:49 +1000
+Subject: cifs: Ratelimit kernel log messages
+
+From: Jamie Bainbridge <jamie.bainbridge@gmail.com>
+
+commit ec7147a99e33a9e4abad6fc6e1b40d15df045d53 upstream.
+
+Under some conditions, CIFS can repeatedly call the cifs_dbg() logging
+wrapper. If done rapidly enough, the console framebuffer can softlockup
+or "rcu_sched self-detected stall". Apply the built-in log ratelimiters
+to prevent such hangs.
+
+Signed-off-by: Jamie Bainbridge <jamie.bainbridge@gmail.com>
+Signed-off-by: Steve French <smfrench@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifs_debug.c | 2 +-
+ fs/cifs/cifs_debug.h | 9 ++++-----
+ 2 files changed, 5 insertions(+), 6 deletions(-)
+
+--- a/fs/cifs/cifs_debug.c
++++ b/fs/cifs/cifs_debug.c
+@@ -50,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...)
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+- pr_err("CIFS VFS: %pV", &vaf);
++ pr_err_ratelimited("CIFS VFS: %pV", &vaf);
+
+ va_end(args);
+ }
+--- a/fs/cifs/cifs_debug.h
++++ b/fs/cifs/cifs_debug.h
+@@ -51,14 +51,13 @@ __printf(1, 2) void cifs_vfs_err(const c
+ /* information message: e.g., configuration, major event */
+ #define cifs_dbg(type, fmt, ...) \
+ do { \
+- if (type == FYI) { \
+- if (cifsFYI & CIFS_INFO) { \
+- pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \
+- } \
++ if (type == FYI && cifsFYI & CIFS_INFO) { \
++ pr_debug_ratelimited("%s: " \
++ fmt, __FILE__, ##__VA_ARGS__); \
+ } else if (type == VFS) { \
+ cifs_vfs_err(fmt, ##__VA_ARGS__); \
+ } else if (type == NOISY && type != 0) { \
+- pr_debug(fmt, ##__VA_ARGS__); \
++ pr_debug_ratelimited(fmt, ##__VA_ARGS__); \
+ } \
+ } while (0)
+
--- /dev/null
+From 01b9b0b28626db4a47d7f48744d70abca9914ef1 Mon Sep 17 00:00:00 2001
+From: Vasily Averin <vvs@virtuozzo.com>
+Date: Thu, 14 Jan 2016 13:41:14 +0300
+Subject: cifs_dbg() outputs an uninitialized buffer in cifs_readdir()
+
+From: Vasily Averin <vvs@virtuozzo.com>
+
+commit 01b9b0b28626db4a47d7f48744d70abca9914ef1 upstream.
+
+In some cases tmp_bug can be not filled in cifs_filldir and stay uninitialized,
+therefore its printk with "%s" modifier can leak content of kernelspace memory.
+If old content of this buffer does not contain '\0' access bejond end of
+allocated object can crash the host.
+
+Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
+Signed-off-by: Steve French <sfrench@localhost.localdomain>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/readdir.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/cifs/readdir.c
++++ b/fs/cifs/readdir.c
+@@ -847,6 +847,7 @@ int cifs_readdir(struct file *file, stru
+ * if buggy server returns . and .. late do we want to
+ * check for that here?
+ */
++ *tmp_buf = 0;
+ rc = cifs_filldir(current_entry, file, ctx,
+ tmp_buf, max_len);
+ if (rc) {
--- /dev/null
+From 9273a8bbf58a15051e53a777389a502420ddc60e Mon Sep 17 00:00:00 2001
+From: Toshi Kani <toshi.kani@hpe.com>
+Date: Wed, 17 Feb 2016 13:11:29 -0800
+Subject: devm_memremap_release(): fix memremap'd addr handling
+
+From: Toshi Kani <toshi.kani@hpe.com>
+
+commit 9273a8bbf58a15051e53a777389a502420ddc60e upstream.
+
+The pmem driver calls devm_memremap() to map a persistent memory range.
+When the pmem driver is unloaded, this memremap'd range is not released
+so the kernel will leak a vma.
+
+Fix devm_memremap_release() to handle a given memremap'd address
+properly.
+
+Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
+Acked-by: Dan Williams <dan.j.williams@intel.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
+Cc: Matthew Wilcox <willy@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/memremap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/memremap.c
++++ b/kernel/memremap.c
+@@ -111,7 +111,7 @@ EXPORT_SYMBOL(memunmap);
+
+ static void devm_memremap_release(struct device *dev, void *res)
+ {
+- memunmap(res);
++ memunmap(*(void **)res);
+ }
+
+ static int devm_memremap_match(struct device *dev, void *res, void *match_data)
--- /dev/null
+From ea535e418c01837d07b6c94e817540f50bfdadb0 Mon Sep 17 00:00:00 2001
+From: Laura Abbott <labbott@fedoraproject.org>
+Date: Thu, 14 Jan 2016 15:16:50 -0800
+Subject: dma-debug: switch check from _text to _stext
+
+From: Laura Abbott <labbott@fedoraproject.org>
+
+commit ea535e418c01837d07b6c94e817540f50bfdadb0 upstream.
+
+In include/asm-generic/sections.h:
+
+ /*
+ * Usage guidelines:
+ * _text, _data: architecture specific, don't use them in
+ * arch-independent code
+ * [_stext, _etext]: contains .text.* sections, may also contain
+ * .rodata.*
+ * and/or .init.* sections
+
+_text is not guaranteed across architectures. Architectures such as ARM
+may reuse parts which are not actually text and erroneously trigger a bug.
+Switch to using _stext which is guaranteed to contain text sections.
+
+Came out of https://lkml.kernel.org/g/<567B1176.4000106@redhat.com>
+
+Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: Russell King <linux@arm.linux.org.uk>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/dma-debug.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/dma-debug.c
++++ b/lib/dma-debug.c
+@@ -1181,7 +1181,7 @@ static inline bool overlap(void *addr, u
+
+ static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
+ {
+- if (overlap(addr, len, _text, _etext) ||
++ if (overlap(addr, len, _stext, _etext) ||
+ overlap(addr, len, __start_rodata, __end_rodata))
+ err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
+ }
--- /dev/null
+From c6400ba7e13a41539342f1b6e1f9e78419cb0148 Mon Sep 17 00:00:00 2001
+From: Matthew Wilcox <willy@linux.intel.com>
+Date: Tue, 2 Feb 2016 16:57:55 -0800
+Subject: drivers/hwspinlock: fix race between radix tree insertion and lookup
+
+From: Matthew Wilcox <willy@linux.intel.com>
+
+commit c6400ba7e13a41539342f1b6e1f9e78419cb0148 upstream.
+
+of_hwspin_lock_get_id() is protected by the RCU lock, which means that
+insertions can occur simultaneously with the lookup. If the radix tree
+transitions from a height of 0, we can see a slot with the indirect_ptr
+bit set, which will cause us to at least read random memory, and could
+cause other havoc.
+
+Fix this by using the newly introduced radix_tree_iter_retry().
+
+Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Ohad Ben-Cohen <ohad@wizery.com>
+Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hwspinlock/hwspinlock_core.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/hwspinlock/hwspinlock_core.c
++++ b/drivers/hwspinlock/hwspinlock_core.c
+@@ -313,6 +313,10 @@ int of_hwspin_lock_get_id(struct device_
+ hwlock = radix_tree_deref_slot(slot);
+ if (unlikely(!hwlock))
+ continue;
++ if (radix_tree_is_indirect_ptr(hwlock)) {
++ slot = radix_tree_iter_retry(&iter);
++ continue;
++ }
+
+ if (hwlock->bank->dev->of_node == args.np) {
+ ret = 0;
--- /dev/null
+From 461c7fa126794157484dca48e88effa4963e3af3 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Tue, 2 Feb 2016 16:57:35 -0800
+Subject: drivers/scsi/sg.c: mark VMA as VM_IO to prevent migration
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit 461c7fa126794157484dca48e88effa4963e3af3 upstream.
+
+Reduced testcase:
+
+ #include <fcntl.h>
+ #include <unistd.h>
+ #include <sys/mman.h>
+ #include <numaif.h>
+
+ #define SIZE 0x2000
+
+ int main()
+ {
+ int fd;
+ void *p;
+
+ fd = open("/dev/sg0", O_RDWR);
+ p = mmap(NULL, SIZE, PROT_EXEC, MAP_PRIVATE | MAP_LOCKED, fd, 0);
+ mbind(p, SIZE, 0, NULL, 0, MPOL_MF_MOVE);
+ return 0;
+ }
+
+We shouldn't try to migrate pages in sg VMA as we don't have a way to
+update Sg_scatter_hold::pages accordingly from mm core.
+
+Let's mark the VMA as VM_IO to indicate to mm core that the VMA is not
+migratable.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Doug Gilbert <dgilbert@interlog.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Shiraz Hashim <shashim@codeaurora.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: syzkaller <syzkaller@googlegroups.com>
+Cc: Kostya Serebryany <kcc@google.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/sg.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/scsi/sg.c
++++ b/drivers/scsi/sg.c
+@@ -1261,7 +1261,7 @@ sg_mmap(struct file *filp, struct vm_are
+ }
+
+ sfp->mmap_called = 1;
+- vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
++ vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_private_data = sfp;
+ vma->vm_ops = &sg_mmap_vm_ops;
+ return 0;
--- /dev/null
+From d7ce36924344ace0dbdc855b1206cacc46b36d45 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 5 Feb 2016 15:36:16 -0800
+Subject: dump_stack: avoid potential deadlocks
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit d7ce36924344ace0dbdc855b1206cacc46b36d45 upstream.
+
+Some servers experienced fatal deadlocks because of a combination of
+bugs, leading to multiple cpus calling dump_stack().
+
+The checksumming bug was fixed in commit 34ae6a1aa054 ("ipv6: update
+skb->csum when CE mark is propagated").
+
+The second problem is a faulty locking in dump_stack()
+
+CPU1 runs in process context and calls dump_stack(), grabs dump_lock.
+
+ CPU2 receives a TCP packet under softirq, grabs socket spinlock, and
+ call dump_stack() from netdev_rx_csum_fault().
+
+ dump_stack() spins on atomic_cmpxchg(&dump_lock, -1, 2), since
+ dump_lock is owned by CPU1
+
+While dumping its stack, CPU1 is interrupted by a softirq, and happens
+to process a packet for the TCP socket locked by CPU2.
+
+CPU1 spins forever in spin_lock() : deadlock
+
+Stack trace on CPU1 looked like :
+
+ NMI backtrace for cpu 1
+ RIP: _raw_spin_lock+0x25/0x30
+ ...
+ Call Trace:
+ <IRQ>
+ tcp_v6_rcv+0x243/0x620
+ ip6_input_finish+0x11f/0x330
+ ip6_input+0x38/0x40
+ ip6_rcv_finish+0x3c/0x90
+ ipv6_rcv+0x2a9/0x500
+ process_backlog+0x461/0xaa0
+ net_rx_action+0x147/0x430
+ __do_softirq+0x167/0x2d0
+ call_softirq+0x1c/0x30
+ do_softirq+0x3f/0x80
+ irq_exit+0x6e/0xc0
+ smp_call_function_single_interrupt+0x35/0x40
+ call_function_single_interrupt+0x6a/0x70
+ <EOI>
+ printk+0x4d/0x4f
+ printk_address+0x31/0x33
+ print_trace_address+0x33/0x3c
+ print_context_stack+0x7f/0x119
+ dump_trace+0x26b/0x28e
+ show_trace_log_lvl+0x4f/0x5c
+ show_stack_log_lvl+0x104/0x113
+ show_stack+0x42/0x44
+ dump_stack+0x46/0x58
+ netdev_rx_csum_fault+0x38/0x3c
+ __skb_checksum_complete_head+0x6e/0x80
+ __skb_checksum_complete+0x11/0x20
+ tcp_rcv_established+0x2bd5/0x2fd0
+ tcp_v6_do_rcv+0x13c/0x620
+ sk_backlog_rcv+0x15/0x30
+ release_sock+0xd2/0x150
+ tcp_recvmsg+0x1c1/0xfc0
+ inet_recvmsg+0x7d/0x90
+ sock_recvmsg+0xaf/0xe0
+ ___sys_recvmsg+0x111/0x3b0
+ SyS_recvmsg+0x5c/0xb0
+ system_call_fastpath+0x16/0x1b
+
+Fixes: b58d977432c8 ("dump_stack: serialize the output from dump_stack()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Alex Thorlton <athorlton@sgi.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/dump_stack.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/lib/dump_stack.c
++++ b/lib/dump_stack.c
+@@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(
+
+ asmlinkage __visible void dump_stack(void)
+ {
++ unsigned long flags;
+ int was_locked;
+ int old;
+ int cpu;
+@@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(voi
+ * Permit this cpu to perform nested stack dumps while serialising
+ * against other CPUs
+ */
+- preempt_disable();
+-
+ retry:
++ local_irq_save(flags);
+ cpu = smp_processor_id();
+ old = atomic_cmpxchg(&dump_lock, -1, cpu);
+ if (old == -1) {
+@@ -43,6 +43,7 @@ retry:
+ } else if (old == cpu) {
+ was_locked = 1;
+ } else {
++ local_irq_restore(flags);
+ cpu_relax();
+ goto retry;
+ }
+@@ -52,7 +53,7 @@ retry:
+ if (!was_locked)
+ atomic_set(&dump_lock, -1);
+
+- preempt_enable();
++ local_irq_restore(flags);
+ }
+ #else
+ asmlinkage __visible void dump_stack(void)
--- /dev/null
+From bcff24887d00bce102e0857d7b0a8c44a40f53d1 Mon Sep 17 00:00:00 2001
+From: Eryu Guan <guaneryu@gmail.com>
+Date: Fri, 12 Feb 2016 01:20:43 -0500
+Subject: ext4: don't read blocks from disk after extents being swapped
+
+From: Eryu Guan <guaneryu@gmail.com>
+
+commit bcff24887d00bce102e0857d7b0a8c44a40f53d1 upstream.
+
+I notice ext4/307 fails occasionally on ppc64 host, reporting md5
+checksum mismatch after moving data from original file to donor file.
+
+The reason is that move_extent_per_page() calls __block_write_begin()
+and block_commit_write() to write saved data from original inode blocks
+to donor inode blocks, but __block_write_begin() not only maps buffer
+heads but also reads block content from disk if the size is not block
+size aligned. At this time the physical block number in mapped buffer
+head is pointing to the donor file not the original file, and that
+results in reading wrong data to page, which get written to disk in
+following block_commit_write call.
+
+This also can be reproduced by the following script on 1k block size ext4
+on x86_64 host:
+
+ mnt=/mnt/ext4
+ donorfile=$mnt/donor
+ testfile=$mnt/testfile
+ e4compact=~/xfstests/src/e4compact
+
+ rm -f $donorfile $testfile
+
+ # reserve space for donor file, written by 0xaa and sync to disk to
+ # avoid EBUSY on EXT4_IOC_MOVE_EXT
+ xfs_io -fc "pwrite -S 0xaa 0 1m" -c "fsync" $donorfile
+
+ # create test file written by 0xbb
+ xfs_io -fc "pwrite -S 0xbb 0 1023" -c "fsync" $testfile
+
+ # compute initial md5sum
+ md5sum $testfile | tee md5sum.txt
+ # drop cache, force e4compact to read data from disk
+ echo 3 > /proc/sys/vm/drop_caches
+
+ # test defrag
+ echo "$testfile" | $e4compact -i -v -f $donorfile
+ # check md5sum
+ md5sum -c md5sum.txt
+
+Fix it by creating & mapping buffer heads only but not reading blocks
+from disk, because all the data in page is guaranteed to be up-to-date
+in mext_page_mkuptodate().
+
+Signed-off-by: Eryu Guan <guaneryu@gmail.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/move_extent.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -265,11 +265,12 @@ move_extent_per_page(struct file *o_filp
+ ext4_lblk_t orig_blk_offset, donor_blk_offset;
+ unsigned long blocksize = orig_inode->i_sb->s_blocksize;
+ unsigned int tmp_data_size, data_size, replaced_size;
+- int err2, jblocks, retries = 0;
++ int i, err2, jblocks, retries = 0;
+ int replaced_count = 0;
+ int from = data_offset_in_page << orig_inode->i_blkbits;
+ int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+ struct super_block *sb = orig_inode->i_sb;
++ struct buffer_head *bh = NULL;
+
+ /*
+ * It needs twice the amount of ordinary journal buffers because
+@@ -380,8 +381,16 @@ data_copy:
+ }
+ /* Perform all necessary steps similar write_begin()/write_end()
+ * but keeping in mind that i_size will not change */
+- *err = __block_write_begin(pagep[0], from, replaced_size,
+- ext4_get_block);
++ if (!page_has_buffers(pagep[0]))
++ create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
++ bh = page_buffers(pagep[0]);
++ for (i = 0; i < data_offset_in_page; i++)
++ bh = bh->b_this_page;
++ for (i = 0; i < block_len_in_page; i++) {
++ *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
++ if (*err < 0)
++ break;
++ }
+ if (!*err)
+ *err = block_commit_write(pagep[0], from, from + replaced_size);
+
--- /dev/null
+From 46901760b46064964b41015d00c140c83aa05bcf Mon Sep 17 00:00:00 2001
+From: Insu Yun <wuninsu@gmail.com>
+Date: Fri, 12 Feb 2016 01:15:59 -0500
+Subject: ext4: fix potential integer overflow
+
+From: Insu Yun <wuninsu@gmail.com>
+
+commit 46901760b46064964b41015d00c140c83aa05bcf upstream.
+
+Since sizeof(ext_new_group_data) > sizeof(ext_new_flex_group_data),
+integer overflow could be happened.
+Therefore, need to fix integer overflow sanitization.
+
+Signed-off-by: Insu Yun <wuninsu@gmail.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/resize.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *
+ if (flex_gd == NULL)
+ goto out3;
+
+- if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data))
++ if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
+ goto out2;
+ flex_gd->count = flexbg_size;
+
--- /dev/null
+From 05145bd799e498ce4e3b5145894174ee881f02b0 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 11 Feb 2016 23:15:12 -0500
+Subject: ext4: fix scheduling in atomic on group checksum failure
+
+From: Jan Kara <jack@suse.cz>
+
+commit 05145bd799e498ce4e3b5145894174ee881f02b0 upstream.
+
+When block group checksum is wrong, we call ext4_error() while holding
+group spinlock from ext4_init_block_bitmap() or
+ext4_init_inode_bitmap() which results in scheduling while in atomic.
+Fix the issue by calling ext4_error() later after dropping the spinlock.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/balloc.c | 7 ++++---
+ fs/ext4/ialloc.c | 6 ++++--
+ 2 files changed, 8 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -191,7 +191,6 @@ static int ext4_init_block_bitmap(struct
+ /* If checksum is bad mark all blocks used to prevent allocation
+ * essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
+- ext4_error(sb, "Checksum bad for group %u", block_group);
+ grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+@@ -442,14 +441,16 @@ ext4_read_block_bitmap_nowait(struct sup
+ }
+ ext4_lock_group(sb, block_group);
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+-
+ err = ext4_init_block_bitmap(sb, bh, block_group, desc);
+ set_bitmap_uptodate(bh);
+ set_buffer_uptodate(bh);
+ ext4_unlock_group(sb, block_group);
+ unlock_buffer(bh);
+- if (err)
++ if (err) {
++ ext4_error(sb, "Failed to init block bitmap for group "
++ "%u: %d", block_group, err);
+ goto out;
++ }
+ goto verify;
+ }
+ ext4_unlock_group(sb, block_group);
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -76,7 +76,6 @@ static int ext4_init_inode_bitmap(struct
+ /* If checksum is bad mark all blocks and inodes use to prevent
+ * allocation, essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
+- ext4_error(sb, "Checksum bad for group %u", block_group);
+ grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+@@ -191,8 +190,11 @@ ext4_read_inode_bitmap(struct super_bloc
+ set_buffer_verified(bh);
+ ext4_unlock_group(sb, block_group);
+ unlock_buffer(bh);
+- if (err)
++ if (err) {
++ ext4_error(sb, "Failed to init inode bitmap for group "
++ "%u: %d", block_group, err);
+ goto out;
++ }
+ return bh;
+ }
+ ext4_unlock_group(sb, block_group);
--- /dev/null
+From 9aacdd354d197ad64685941b36d28ea20ab88757 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Fri, 15 Jan 2016 16:57:37 -0800
+Subject: fs/hugetlbfs/inode.c: fix bugs in hugetlb_vmtruncate_list()
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 9aacdd354d197ad64685941b36d28ea20ab88757 upstream.
+
+Hillf Danton noticed bugs in the hugetlb_vmtruncate_list routine. The
+argument end is of type pgoff_t. It was being converted to a vaddr
+offset and passed to unmap_hugepage_range. However, end was also being
+used as an argument to the vma_interval_tree_foreach controlling loop.
+In addition, the conversion of end to vaddr offset was incorrect.
+
+hugetlb_vmtruncate_list is called as part of a file truncate or
+fallocate hole punch operation.
+
+When truncating a hugetlbfs file, this bug could prevent some pages from
+being unmapped. This is possible if there are multiple vmas mapping the
+file, and there is a sufficiently sized hole between the mappings. The
+size of the hole between two vmas (A,B) must be such that the starting
+virtual address of B is greater than (ending virtual address of A <<
+PAGE_SHIFT). In this case, the pages in B would not be unmapped. If
+pages are not properly unmapped during truncate, the following BUG is
+hit:
+
+ kernel BUG at fs/hugetlbfs/inode.c:428!
+
+In the fallocate hole punch case, this bug could prevent pages from
+being unmapped as in the truncate case. However, for hole punch the
+result is that unmapped pages will not be removed during the operation.
+For hole punch, it is also possible that more pages than desired will be
+unmapped. This unnecessary unmapping will cause page faults to
+reestablish the mappings on subsequent page access.
+
+Fixes: 1bfad99ab (" hugetlbfs: hugetlb_vmtruncate_list() needs to take a range")Reported-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/hugetlbfs/inode.c | 19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -463,6 +463,7 @@ hugetlb_vmdelete_list(struct rb_root *ro
+ */
+ vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
+ unsigned long v_offset;
++ unsigned long v_end;
+
+ /*
+ * Can the expression below overflow on 32-bit arches?
+@@ -475,15 +476,17 @@ hugetlb_vmdelete_list(struct rb_root *ro
+ else
+ v_offset = 0;
+
+- if (end) {
+- end = ((end - start) << PAGE_SHIFT) +
+- vma->vm_start + v_offset;
+- if (end > vma->vm_end)
+- end = vma->vm_end;
+- } else
+- end = vma->vm_end;
++ if (!end)
++ v_end = vma->vm_end;
++ else {
++ v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
++ + vma->vm_start;
++ if (v_end > vma->vm_end)
++ v_end = vma->vm_end;
++ }
+
+- unmap_hugepage_range(vma, vma->vm_start + v_offset, end, NULL);
++ unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
++ NULL);
+ }
+ }
+
--- /dev/null
+From fb75a4282d0d9a3c7c44d940582c2d226cf3acfb Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 19 Dec 2015 20:07:38 +0000
+Subject: futex: Drop refcount if requeue_pi() acquired the rtmutex
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit fb75a4282d0d9a3c7c44d940582c2d226cf3acfb upstream.
+
+If the proxy lock in the requeue loop acquires the rtmutex for a
+waiter then it acquired also refcount on the pi_state related to the
+futex, but the waiter side does not drop the reference count.
+
+Add the missing free_pi_state() call.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Darren Hart <darren@dvhart.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Bhuvanesh_Surachari@mentor.com
+Cc: Andy Lowe <Andy_Lowe@mentor.com>
+Link: http://lkml.kernel.org/r/20151219200607.178132067@linutronix.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2755,6 +2755,11 @@ static int futex_wait_requeue_pi(u32 __u
+ if (q.pi_state && (q.pi_state->owner != current)) {
+ spin_lock(q.lock_ptr);
+ ret = fixup_pi_state_owner(uaddr2, &q, current);
++ /*
++ * Drop the reference to the pi state which
++ * the requeue_pi() code acquired for us.
++ */
++ free_pi_state(q.pi_state);
+ spin_unlock(q.lock_ptr);
+ }
+ } else {
--- /dev/null
+From 9d0be85d4e2cfa2519ae16efe7ff4a7150c43c0b Mon Sep 17 00:00:00 2001
+From: Jonathan Cameron <jic23@kernel.org>
+Date: Fri, 1 Jan 2016 18:05:34 +0000
+Subject: iio:adc:ti_am335x_adc Fix buffered mode by identifying as software buffer.
+
+From: Jonathan Cameron <jic23@kernel.org>
+
+commit 9d0be85d4e2cfa2519ae16efe7ff4a7150c43c0b upstream.
+
+Whilst this part has a hardware buffer, the identifcation that IIO cares
+about is the userspace facing end. It this case we push individual elements
+from the hardware fifo into the software interface (specifically a kfifo)
+rather than providing direct reads through to a hardware buffer
+(as we still do in the sca3000 for example).
+
+Technically the original specification as a hardware buffer could be
+considered wrong, but it didn't matter until the patch listed below.
+
+Result is that any attempt to enable the buffer will return -EINVAL
+
+Fixes: 225d59adf1c8 ("iio: Specify supported modes for buffers")
+Signed-off-by: Jonathan Cameron <jic23@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iio/adc/ti_am335x_adc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iio/adc/ti_am335x_adc.c
++++ b/drivers/iio/adc/ti_am335x_adc.c
+@@ -289,7 +289,7 @@ static int tiadc_iio_buffered_hardware_s
+ goto error_kfifo_free;
+
+ indio_dev->setup_ops = setup_ops;
+- indio_dev->modes |= INDIO_BUFFER_HARDWARE;
++ indio_dev->modes |= INDIO_BUFFER_SOFTWARE;
+
+ return 0;
+
--- /dev/null
+From 005ce0713006a76d2b0c924ce0e2629e5d8510c3 Mon Sep 17 00:00:00 2001
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Sat, 2 Jan 2016 14:04:40 +0100
+Subject: iio: add HAS_IOMEM dependency to VF610_ADC
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+commit 005ce0713006a76d2b0c924ce0e2629e5d8510c3 upstream.
+
+Ran into this on UML:
+
+drivers/built-in.o: In function `vf610_adc_probe':
+drivers/iio/adc/vf610_adc.c:744: undefined reference to `devm_ioremap_resource'
+
+devm_ioremap_resource() is defined only when HAS_IOMEM is selected.
+
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Jonathan Cameron <jic23@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iio/adc/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/iio/adc/Kconfig
++++ b/drivers/iio/adc/Kconfig
+@@ -372,6 +372,7 @@ config TWL6030_GPADC
+ config VF610_ADC
+ tristate "Freescale vf610 ADC driver"
+ depends on OF
++ depends on HAS_IOMEM
+ select IIO_BUFFER
+ select IIO_TRIGGERED_BUFFER
+ help
--- /dev/null
+From 01cc5235604d61018712c11a14d74230f6a38bf4 Mon Sep 17 00:00:00 2001
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Sat, 2 Jan 2016 14:04:39 +0100
+Subject: iio: add IIO_TRIGGER dependency to STK8BA50
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+commit 01cc5235604d61018712c11a14d74230f6a38bf4 upstream.
+
+Ran into this on UML:
+
+drivers/iio/accel/stk8ba50.c: In function ‘stk8ba50_data_rdy_trigger_set_state’:
+drivers/iio/accel/stk8ba50.c:163:9: error: implicit declaration of function ‘iio_trigger_get_drvdata’ [-Werror=implicit-function-declaration]
+
+iio_trigger_get_drvdata() is defined only when IIO_TRIGGER is selected.
+
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Jonathan Cameron <jic23@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iio/accel/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/iio/accel/Kconfig
++++ b/drivers/iio/accel/Kconfig
+@@ -173,6 +173,7 @@ config STK8312
+ config STK8BA50
+ tristate "Sensortek STK8BA50 3-Axis Accelerometer Driver"
+ depends on I2C
++ depends on IIO_TRIGGER
+ help
+ Say yes here to get support for the Sensortek STK8BA50 3-axis
+ accelerometer.
--- /dev/null
+From d590faf9e8f8509a0a0aa79c38e87fcc6b913248 Mon Sep 17 00:00:00 2001
+From: Lars-Peter Clausen <lars@metafoo.de>
+Date: Fri, 27 Nov 2015 14:55:56 +0100
+Subject: iio: adis_buffer: Fix out-of-bounds memory access
+
+From: Lars-Peter Clausen <lars@metafoo.de>
+
+commit d590faf9e8f8509a0a0aa79c38e87fcc6b913248 upstream.
+
+The SPI tx and rx buffers are both supposed to be scan_bytes amount of
+bytes large and a common allocation is used to allocate both buffers. This
+puts the beginning of the tx buffer scan_bytes bytes after the rx buffer.
+The initialization of the tx buffer pointer is done adding scan_bytes to
+the beginning of the rx buffer, but since the rx buffer is of type __be16
+this will actually add two times as much and the tx buffer ends up pointing
+after the allocated buffer.
+
+Fix this by using scan_count, which is scan_bytes / 2, instead of
+scan_bytes when initializing the tx buffer pointer.
+
+Fixes: aacff892cbd5 ("staging:iio:adis: Preallocate transfer message")
+Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
+Signed-off-by: Jonathan Cameron <jic23@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iio/imu/adis_buffer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iio/imu/adis_buffer.c
++++ b/drivers/iio/imu/adis_buffer.c
+@@ -43,7 +43,7 @@ int adis_update_scan_mode(struct iio_dev
+ return -ENOMEM;
+
+ rx = adis->buffer;
+- tx = rx + indio_dev->scan_bytes;
++ tx = rx + scan_count;
+
+ spi_message_init(&adis->msg);
+
--- /dev/null
+From 97a249e98a72d6b79fb7350a8dd56b147e9d5bdb Mon Sep 17 00:00:00 2001
+From: Yong Li <sdliyong@gmail.com>
+Date: Wed, 6 Jan 2016 09:09:43 +0800
+Subject: iio: dac: mcp4725: set iio name property in sysfs
+
+From: Yong Li <sdliyong@gmail.com>
+
+commit 97a249e98a72d6b79fb7350a8dd56b147e9d5bdb upstream.
+
+Without this change, the name entity for mcp4725 is missing in
+/sys/bus/iio/devices/iio\:device*/name
+
+With this change, name is reported correctly
+
+Signed-off-by: Yong Li <sdliyong@gmail.com>
+Signed-off-by: Jonathan Cameron <jic23@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iio/dac/mcp4725.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/iio/dac/mcp4725.c
++++ b/drivers/iio/dac/mcp4725.c
+@@ -300,6 +300,7 @@ static int mcp4725_probe(struct i2c_clie
+ data->client = client;
+
+ indio_dev->dev.parent = &client->dev;
++ indio_dev->name = id->name;
+ indio_dev->info = &mcp4725_info;
+ indio_dev->channels = &mcp4725_channel;
+ indio_dev->num_channels = 1;
--- /dev/null
+From d81dac3c1c5295c61b15293074ac2bd3254e1875 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 26 Jan 2016 12:25:21 +0300
+Subject: iio: inkern: fix a NULL dereference on error
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit d81dac3c1c5295c61b15293074ac2bd3254e1875 upstream.
+
+In twl4030_bci_probe() there are some failure paths where we call
+iio_channel_release() with a NULL pointer. (Apparently, that driver can
+opperate without a valid channel pointer). Let's fix it by adding a
+NULL check in iio_channel_release().
+
+Fixes: 2202e1fc5a29 ('drivers: power: twl4030_charger: fix link problems when building as module')
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Jonathan Cameron <jic23@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iio/inkern.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/iio/inkern.c
++++ b/drivers/iio/inkern.c
+@@ -351,6 +351,8 @@ EXPORT_SYMBOL_GPL(iio_channel_get);
+
+ void iio_channel_release(struct iio_channel *channel)
+ {
++ if (!channel)
++ return;
+ iio_device_put(channel->indio_dev);
+ kfree(channel);
+ }
--- /dev/null
+From fa34e6dd44d7c02c8a8468ce4a52a7506f907bef Mon Sep 17 00:00:00 2001
+From: Gabriele Mazzotta <gabriele.mzt@gmail.com>
+Date: Tue, 12 Jan 2016 16:21:39 +0100
+Subject: iio: light: acpi-als: Report data as processed
+
+From: Gabriele Mazzotta <gabriele.mzt@gmail.com>
+
+commit fa34e6dd44d7c02c8a8468ce4a52a7506f907bef upstream.
+
+As per the ACPI specification (Revision 5.0) [1], the data coming
+from the sensor represent the ambient light illuminance reading
+expressed in lux. So use IIO_CHAN_INFO_PROCESSED to signify that
+the data are pre-processed.
+
+However, to keep backward ABI compatibility, the IIO_CHAN_INFO_RAW
+bit is not removed.
+
+[1] http://www.acpi.info/DOWNLOADS/ACPIspec50.pdf
+
+This issue has also been responsible for at least one userspace bug
+report hence marking what is a small semantic fix really for stable.
+[2] https://github.com/hadess/iio-sensor-proxy/issues/46
+
+Signed-off-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
+Signed-off-by: Jonathan Cameron <jic23@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iio/light/acpi-als.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/iio/light/acpi-als.c
++++ b/drivers/iio/light/acpi-als.c
+@@ -54,7 +54,9 @@ static const struct iio_chan_spec acpi_a
+ .realbits = 32,
+ .storagebits = 32,
+ },
+- .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
++ /* _RAW is here for backward ABI compatibility */
++ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
++ BIT(IIO_CHAN_INFO_PROCESSED),
+ },
+ };
+
+@@ -152,7 +154,7 @@ static int acpi_als_read_raw(struct iio_
+ s32 temp_val;
+ int ret;
+
+- if (mask != IIO_CHAN_INFO_RAW)
++ if ((mask != IIO_CHAN_INFO_PROCESSED) && (mask != IIO_CHAN_INFO_RAW))
+ return -EINVAL;
+
+ /* we support only illumination (_ALI) so far. */
--- /dev/null
+From c08ae18560aaed50fed306a2e11f36ce70130f65 Mon Sep 17 00:00:00 2001
+From: Markus Elfring <elfring@users.sourceforge.net>
+Date: Sat, 19 Dec 2015 14:14:54 +0100
+Subject: iio-light: Use a signed return type for ltr501_match_samp_freq()
+
+From: Markus Elfring <elfring@users.sourceforge.net>
+
+commit c08ae18560aaed50fed306a2e11f36ce70130f65 upstream.
+
+The return type "unsigned int" was used by the ltr501_match_samp_freq()
+function despite of the aspect that it will eventually return a negative
+error code.
+Improve this implementation detail by deletion of the type modifier then.
+
+This issue was detected by using the Coccinelle software.
+
+Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
+Acked-by: Peter Meerwald-Stadler <pmeerw@pmeerw.net>
+Signed-off-by: Jonathan Cameron <jic23@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iio/light/ltr501.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iio/light/ltr501.c
++++ b/drivers/iio/light/ltr501.c
+@@ -180,7 +180,7 @@ static const struct ltr501_samp_table lt
+ {500000, 2000000}
+ };
+
+-static unsigned int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
++static int ltr501_match_samp_freq(const struct ltr501_samp_table *tab,
+ int len, int val, int val2)
+ {
+ int i, freq;
--- /dev/null
+From 431386e783a3a6c8b7707bee32d18c353b8688b2 Mon Sep 17 00:00:00 2001
+From: Akinobu Mita <akinobu.mita@gmail.com>
+Date: Thu, 21 Jan 2016 01:07:31 +0900
+Subject: iio: pressure: mpl115: fix temperature offset sign
+
+From: Akinobu Mita <akinobu.mita@gmail.com>
+
+commit 431386e783a3a6c8b7707bee32d18c353b8688b2 upstream.
+
+According to the datasheet, the resolusion of temperature sensor is
+-5.35 counts/C. Temperature ADC is 472 counts at 25C.
+(https://www.sparkfun.com/datasheets/Sensors/Pressure/MPL115A1.pdf
+NOTE: This is older revision, but this information is removed from the
+latest datasheet from nxp somehow)
+
+Temp [C] = (Tadc - 472) / -5.35 + 25
+ = (Tadc - 605.750000) * -0.186915888
+
+So the correct offset is -605.750000.
+
+Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
+Acked-by: Peter Meerwald-Stadler <pmeerw@pmeerw.net>
+Signed-off-by: Jonathan Cameron <jic23@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iio/pressure/mpl115.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iio/pressure/mpl115.c
++++ b/drivers/iio/pressure/mpl115.c
+@@ -117,7 +117,7 @@ static int mpl115_read_raw(struct iio_de
+ *val = ret >> 6;
+ return IIO_VAL_INT;
+ case IIO_CHAN_INFO_OFFSET:
+- *val = 605;
++ *val = -605;
+ *val2 = 750000;
+ return IIO_VAL_INT_PLUS_MICRO;
+ case IIO_CHAN_INFO_SCALE:
--- /dev/null
+From 6544a1df11c48c8413071aac3316792e4678fbfb Mon Sep 17 00:00:00 2001
+From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+Date: Mon, 11 Jan 2016 17:35:38 -0800
+Subject: Input: elantech - mark protocols v2 and v3 as semi-mt
+
+From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+
+commit 6544a1df11c48c8413071aac3316792e4678fbfb upstream.
+
+When using a protocol v2 or v3 hardware, elantech uses the function
+elantech_report_semi_mt_data() to report data. This devices are rather
+creepy because if num_finger is 3, (x2,y2) is (0,0). Yes, only one valid
+touch is reported.
+
+Anyway, userspace (libinput) is now confused by these (0,0) touches,
+and detect them as palm, and rejects them.
+
+Commit 3c0213d17a09 ("Input: elantech - fix semi-mt protocol for v3 HW")
+was sufficient enough for xf86-input-synaptics and libinput before it has
+palm rejection. Now we need to actually tell libinput that this device is
+a semi-mt one and it should not rely on the actual values of the 2 touches.
+
+Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/input/mouse/elantech.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/input/mouse/elantech.c
++++ b/drivers/input/mouse/elantech.c
+@@ -1222,7 +1222,7 @@ static int elantech_set_input_params(str
+ input_set_abs_params(dev, ABS_TOOL_WIDTH, ETP_WMIN_V2,
+ ETP_WMAX_V2, 0, 0);
+ }
+- input_mt_init_slots(dev, 2, 0);
++ input_mt_init_slots(dev, 2, INPUT_MT_SEMI_MT);
+ input_set_abs_params(dev, ABS_MT_POSITION_X, x_min, x_max, 0, 0);
+ input_set_abs_params(dev, ABS_MT_POSITION_Y, y_min, y_max, 0, 0);
+ break;
--- /dev/null
+From dd0d0d4de582a6a61c032332c91f4f4cb2bab569 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Aur=C3=A9lien=20Francillon?= <aurelien@francillon.net>
+Date: Sat, 2 Jan 2016 20:39:54 -0800
+Subject: Input: i8042 - add Fujitsu Lifebook U745 to the nomux list
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Aurélien Francillon <aurelien@francillon.net>
+
+commit dd0d0d4de582a6a61c032332c91f4f4cb2bab569 upstream.
+
+Without i8042.nomux=1 the Elantech touch pad is not working at all on
+a Fujitsu Lifebook U745. This patch does not seem necessary for all
+U745 (maybe because of different BIOS versions?). However, it was
+verified that the patch does not break those (see opensuse bug 883192:
+https://bugzilla.opensuse.org/show_bug.cgi?id=883192).
+
+Signed-off-by: Aurélien Francillon <aurelien@francillon.net>
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/input/serio/i8042-x86ia64io.h
++++ b/drivers/input/serio/i8042-x86ia64io.h
+@@ -258,6 +258,13 @@ static const struct dmi_system_id __init
+ },
+ },
+ {
++ /* Fujitsu Lifebook U745 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U745"),
++ },
++ },
++ {
+ /* Fujitsu T70H */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
--- /dev/null
+From d4f1b06d685d11ebdaccf11c0db1cb3c78736862 Mon Sep 17 00:00:00 2001
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Date: Sat, 16 Jan 2016 10:04:49 -0800
+Subject: Input: vmmouse - fix absolute device registration
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+commit d4f1b06d685d11ebdaccf11c0db1cb3c78736862 upstream.
+
+We should set device's capabilities first, and then register it,
+otherwise various handlers already present in the kernel will not be
+able to connect to the device.
+
+Reported-by: Lauri Kasanen <cand@gmx.com>
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/input/mouse/vmmouse.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/drivers/input/mouse/vmmouse.c
++++ b/drivers/input/mouse/vmmouse.c
+@@ -458,8 +458,6 @@ int vmmouse_init(struct psmouse *psmouse
+ priv->abs_dev = abs_dev;
+ psmouse->private = priv;
+
+- input_set_capability(rel_dev, EV_REL, REL_WHEEL);
+-
+ /* Set up and register absolute device */
+ snprintf(priv->phys, sizeof(priv->phys), "%s/input1",
+ psmouse->ps2dev.serio->phys);
+@@ -475,10 +473,6 @@ int vmmouse_init(struct psmouse *psmouse
+ abs_dev->id.version = psmouse->model;
+ abs_dev->dev.parent = &psmouse->ps2dev.serio->dev;
+
+- error = input_register_device(priv->abs_dev);
+- if (error)
+- goto init_fail;
+-
+ /* Set absolute device capabilities */
+ input_set_capability(abs_dev, EV_KEY, BTN_LEFT);
+ input_set_capability(abs_dev, EV_KEY, BTN_RIGHT);
+@@ -488,6 +482,13 @@ int vmmouse_init(struct psmouse *psmouse
+ input_set_abs_params(abs_dev, ABS_X, 0, VMMOUSE_MAX_X, 0, 0);
+ input_set_abs_params(abs_dev, ABS_Y, 0, VMMOUSE_MAX_Y, 0, 0);
+
++ error = input_register_device(priv->abs_dev);
++ if (error)
++ goto init_fail;
++
++ /* Add wheel capability to the relative device */
++ input_set_capability(rel_dev, EV_REL, REL_WHEEL);
++
+ psmouse->protocol_handler = vmmouse_process_byte;
+ psmouse->disconnect = vmmouse_disconnect;
+ psmouse->reconnect = vmmouse_reconnect;
--- /dev/null
+From b1d353ad3d5835b16724653b33c05124e1b5acf1 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 26 Jan 2016 12:24:25 +0300
+Subject: intel_scu_ipcutil: underflow in scu_reg_access()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit b1d353ad3d5835b16724653b33c05124e1b5acf1 upstream.
+
+"count" is controlled by the user and it can be negative. Let's prevent
+that by making it unsigned. You have to have CAP_SYS_RAWIO to call this
+function so the bug is not as serious as it could be.
+
+Fixes: 5369c02d951a ('intel_scu_ipc: Utility driver for intel scu ipc')
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Darren Hart <dvhart@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/platform/x86/intel_scu_ipcutil.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/platform/x86/intel_scu_ipcutil.c
++++ b/drivers/platform/x86/intel_scu_ipcutil.c
+@@ -49,7 +49,7 @@ struct scu_ipc_data {
+
+ static int scu_reg_access(u32 cmd, struct scu_ipc_data *data)
+ {
+- int count = data->count;
++ unsigned int count = data->count;
+
+ if (count == 0 || count == 3 || count > 4)
+ return -EINVAL;
--- /dev/null
+From 9b1a12d29109234d2b9718d04d4d404b7da4e794 Mon Sep 17 00:00:00 2001
+From: Baoquan He <bhe@redhat.com>
+Date: Wed, 20 Jan 2016 22:01:19 +0800
+Subject: iommu/amd: Correct the wrong setting of alias DTE in do_attach
+
+From: Baoquan He <bhe@redhat.com>
+
+commit 9b1a12d29109234d2b9718d04d4d404b7da4e794 upstream.
+
+In below commit alias DTE is set when its peripheral is
+setting DTE. However there's a code bug here to wrongly
+set the alias DTE, correct it in this patch.
+
+commit e25bfb56ea7f046b71414e02f80f620deb5c6362
+Author: Joerg Roedel <jroedel@suse.de>
+Date: Tue Oct 20 17:33:38 2015 +0200
+
+ iommu/amd: Set alias DTE in do_attach/do_detach
+
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Tested-by: Mark Hounschell <markh@compro.net>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/amd_iommu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iommu/amd_iommu.c
++++ b/drivers/iommu/amd_iommu.c
+@@ -1905,7 +1905,7 @@ static void do_attach(struct iommu_dev_d
+ /* Update device table */
+ set_dte_entry(dev_data->devid, domain, ats);
+ if (alias != dev_data->devid)
+- set_dte_entry(dev_data->devid, domain, ats);
++ set_dte_entry(alias, domain, ats);
+
+ device_flush_dte(dev_data);
+ }
--- /dev/null
+From 46924008273ed03bd11dbb32136e3da4cfe056e1 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <David.Woodhouse@intel.com>
+Date: Mon, 15 Feb 2016 12:42:38 +0000
+Subject: iommu/vt-d: Clear PPR bit to ensure we get more page request interrupts
+
+From: David Woodhouse <David.Woodhouse@intel.com>
+
+commit 46924008273ed03bd11dbb32136e3da4cfe056e1 upstream.
+
+According to the VT-d specification we need to clear the PPR bit in
+the Page Request Status register when handling page requests, or the
+hardware won't generate any more interrupts.
+
+This wasn't actually necessary on SKL/KBL (which may well be the
+subject of a hardware erratum, although it's harmless enough). But
+other implementations do appear to get it right, and we only ever get
+one interrupt unless we clear the PPR bit.
+
+Reported-by: CQ Tang <cq.tang@intel.com>
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/intel-svm.c | 4 ++++
+ include/linux/intel-iommu.h | 3 +++
+ 2 files changed, 7 insertions(+)
+
+--- a/drivers/iommu/intel-svm.c
++++ b/drivers/iommu/intel-svm.c
+@@ -524,6 +524,10 @@ static irqreturn_t prq_event_thread(int
+ struct intel_svm *svm = NULL;
+ int head, tail, handled = 0;
+
++ /* Clear PPR bit before reading head/tail registers, to
++ * ensure that we get a new interrupt if needed. */
++ writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
++
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ while (head != tail) {
+--- a/include/linux/intel-iommu.h
++++ b/include/linux/intel-iommu.h
+@@ -235,6 +235,9 @@ static inline void dmar_writeq(void __io
+ /* low 64 bit */
+ #define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT))
+
++/* PRS_REG */
++#define DMA_PRS_PPR ((u32)1)
++
+ #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+ do { \
+ cycles_t start_time = get_cycles(); \
--- /dev/null
+From da972fb13bc5a1baad450c11f9182e4cd0a091f6 Mon Sep 17 00:00:00 2001
+From: Jeremy McNicoll <jmcnicol@redhat.com>
+Date: Thu, 14 Jan 2016 21:33:06 -0800
+Subject: iommu/vt-d: Don't skip PCI devices when disabling IOTLB
+
+From: Jeremy McNicoll <jmcnicol@redhat.com>
+
+commit da972fb13bc5a1baad450c11f9182e4cd0a091f6 upstream.
+
+Fix a simple typo when disabling IOTLB on PCI(e) devices.
+
+Fixes: b16d0cb9e2fc ("iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS")
+Signed-off-by: Jeremy McNicoll <jmcnicol@redhat.com>
+Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/intel-iommu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iommu/intel-iommu.c
++++ b/drivers/iommu/intel-iommu.c
+@@ -1489,7 +1489,7 @@ static void iommu_disable_dev_iotlb(stru
+ {
+ struct pci_dev *pdev;
+
+- if (dev_is_pci(info->dev))
++ if (!dev_is_pci(info->dev))
+ return;
+
+ pdev = to_pci_dev(info->dev);
--- /dev/null
+From fda3bec12d0979aae3f02ee645913d66fbc8a26e Mon Sep 17 00:00:00 2001
+From: CQ Tang <cq.tang@intel.com>
+Date: Wed, 13 Jan 2016 21:15:03 +0000
+Subject: iommu/vt-d: Fix 64-bit accesses to 32-bit DMAR_GSTS_REG
+
+From: CQ Tang <cq.tang@intel.com>
+
+commit fda3bec12d0979aae3f02ee645913d66fbc8a26e upstream.
+
+This is a 32-bit register. Apparently harmless on real hardware, but
+causing justified warnings in simulation.
+
+Signed-off-by: CQ Tang <cq.tang@intel.com>
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/dmar.c | 2 +-
+ drivers/iommu/intel_irq_remapping.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/iommu/dmar.c
++++ b/drivers/iommu/dmar.c
+@@ -1347,7 +1347,7 @@ void dmar_disable_qi(struct intel_iommu
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flags);
+
+- sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
++ sts = readl(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_QIES))
+ goto end;
+
+--- a/drivers/iommu/intel_irq_remapping.c
++++ b/drivers/iommu/intel_irq_remapping.c
+@@ -629,7 +629,7 @@ static void iommu_disable_irq_remapping(
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flags);
+
+- sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
++ sts = readl(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_IRES))
+ goto end;
+
--- /dev/null
+From e57e58bd390a6843db58560bf7b8341665d2e058 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <David.Woodhouse@intel.com>
+Date: Tue, 12 Jan 2016 19:18:06 +0000
+Subject: iommu/vt-d: Fix mm refcounting to hold mm_count not mm_users
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: David Woodhouse <David.Woodhouse@intel.com>
+
+commit e57e58bd390a6843db58560bf7b8341665d2e058 upstream.
+
+Holding mm_users works OK for graphics, which was the first user of SVM
+with VT-d. However, it works less well for other devices, where we actually
+do a mmap() from the file descriptor to which the SVM PASID state is tied.
+
+In this case on process exit we end up with a recursive reference count:
+ - The MM remains alive until the file is closed and the driver's release()
+ call ends up unbinding the PASID.
+ - The VMA corresponding to the mmap() remains intact until the MM is
+ destroyed.
+ - Thus the file isn't closed, even when exit_files() runs, because the
+ VMA is still holding a reference to it. And the MM remains alive…
+
+To address this issue, we *stop* holding mm_users while the PASID is bound.
+We already hold mm_count by virtue of the MMU notifier, and that can be
+made to be sufficient.
+
+It means that for a period during process exit, the fun part of mmput()
+has happened and exit_mmap() has been called so the MM is basically
+defunct. But the PGD still exists and the PASID is still bound to it.
+
+During this period, we have to be very careful — exit_mmap() doesn't use
+mm->mmap_sem because it doesn't expect anyone else to be touching the MM
+(quite reasonably, since mm_users is zero). So we also need to fix the
+fault handler to just report failure if mm_users is already zero, and to
+temporarily bump mm_users while handling any faults.
+
+Additionally, exit_mmap() calls mmu_notifier_release() *before* it tears
+down the page tables, which is too early for us to flush the IOTLB for
+this PASID. And __mmu_notifier_release() removes every notifier from the
+list, so when exit_mmap() finally *does* tear down the mappings and
+clear the page tables, we don't get notified. So we work around this by
+clearing the PASID table entry in our MMU notifier release() callback.
+That way, the hardware *can't* get any pages back from the page tables
+before they get cleared.
+
+Hardware designers have confirmed that the resulting 'PASID not present'
+faults should be handled just as gracefully as 'page not present' faults,
+the important criterion being that they don't perturb the operation for
+any *other* PASID in the system.
+
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/intel-svm.c | 33 +++++++++++++++++++++++++++------
+ 1 file changed, 27 insertions(+), 6 deletions(-)
+
+--- a/drivers/iommu/intel-svm.c
++++ b/drivers/iommu/intel-svm.c
+@@ -249,12 +249,30 @@ static void intel_flush_pasid_dev(struct
+ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+ {
+ struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
++ struct intel_svm_dev *sdev;
+
++ /* This might end up being called from exit_mmap(), *before* the page
++ * tables are cleared. And __mmu_notifier_release() will delete us from
++ * the list of notifiers so that our invalidate_range() callback doesn't
++ * get called when the page tables are cleared. So we need to protect
++ * against hardware accessing those page tables.
++ *
++ * We do it by clearing the entry in the PASID table and then flushing
++ * the IOTLB and the PASID table caches. This might upset hardware;
++ * perhaps we'll want to point the PASID to a dummy PGD (like the zero
++ * page) so that we end up taking a fault that the hardware really
++ * *has* to handle gracefully without affecting other processes.
++ */
+ svm->iommu->pasid_table[svm->pasid].val = 0;
++ wmb();
++
++ rcu_read_lock();
++ list_for_each_entry_rcu(sdev, &svm->devs, list) {
++ intel_flush_pasid_dev(svm, sdev, svm->pasid);
++ intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
++ }
++ rcu_read_unlock();
+
+- /* There's no need to do any flush because we can't get here if there
+- * are any devices left anyway. */
+- WARN_ON(!list_empty(&svm->devs));
+ }
+
+ static const struct mmu_notifier_ops intel_mmuops = {
+@@ -379,7 +397,6 @@ int intel_svm_bind_mm(struct device *dev
+ goto out;
+ }
+ iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
+- mm = NULL;
+ } else
+ iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
+ wmb();
+@@ -442,11 +459,11 @@ int intel_svm_unbind_mm(struct device *d
+ kfree_rcu(sdev, rcu);
+
+ if (list_empty(&svm->devs)) {
+- mmu_notifier_unregister(&svm->notifier, svm->mm);
+
+ idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+ if (svm->mm)
+- mmput(svm->mm);
++ mmu_notifier_unregister(&svm->notifier, svm->mm);
++
+ /* We mandate that no page faults may be outstanding
+ * for the PASID when intel_svm_unbind_mm() is called.
+ * If that is not obeyed, subtle errors will happen.
+@@ -551,6 +568,9 @@ static irqreturn_t prq_event_thread(int
+ * any faults on kernel addresses. */
+ if (!svm->mm)
+ goto bad_req;
++ /* If the mm is already defunct, don't handle faults. */
++ if (!atomic_inc_not_zero(&svm->mm->mm_users))
++ goto bad_req;
+ down_read(&svm->mm->mmap_sem);
+ vma = find_extend_vma(svm->mm, address);
+ if (!vma || address < vma->vm_start)
+@@ -567,6 +587,7 @@ static irqreturn_t prq_event_thread(int
+ result = QI_RESP_SUCCESS;
+ invalid:
+ up_read(&svm->mm->mmap_sem);
++ mmput(svm->mm);
+ bad_req:
+ /* Accounting for major/minor faults? */
+ rcu_read_lock();
--- /dev/null
+From 1ac0b6dec656f3f78d1c3dd216fad84cb4d0a01e Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Wed, 17 Feb 2016 13:11:35 -0800
+Subject: ipc/shm: handle removed segments gracefully in shm_mmap()
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit 1ac0b6dec656f3f78d1c3dd216fad84cb4d0a01e upstream.
+
+remap_file_pages(2) emulation can reach file which represents removed
+IPC ID as long as a memory segment is mapped. It breaks expectations of
+IPC subsystem.
+
+Test case (rewritten to be more human readable, originally autogenerated
+by syzkaller[1]):
+
+ #define _GNU_SOURCE
+ #include <stdlib.h>
+ #include <sys/ipc.h>
+ #include <sys/mman.h>
+ #include <sys/shm.h>
+
+ #define PAGE_SIZE 4096
+
+ int main()
+ {
+ int id;
+ void *p;
+
+ id = shmget(IPC_PRIVATE, 3 * PAGE_SIZE, 0);
+ p = shmat(id, NULL, 0);
+ shmctl(id, IPC_RMID, NULL);
+ remap_file_pages(p, 3 * PAGE_SIZE, 0, 7, 0);
+
+ return 0;
+ }
+
+The patch changes shm_mmap() and code around shm_lock() to propagate
+locking error back to caller of shm_mmap().
+
+[1] http://github.com/google/syzkaller
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/shm.c | 53 +++++++++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 43 insertions(+), 10 deletions(-)
+
+--- a/ipc/shm.c
++++ b/ipc/shm.c
+@@ -156,11 +156,12 @@ static inline struct shmid_kernel *shm_l
+ struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
+
+ /*
+- * We raced in the idr lookup or with shm_destroy(). Either way, the
+- * ID is busted.
++ * Callers of shm_lock() must validate the status of the returned ipc
++ * object pointer (as returned by ipc_lock()), and error out as
++ * appropriate.
+ */
+- WARN_ON(IS_ERR(ipcp));
+-
++ if (IS_ERR(ipcp))
++ return (void *)ipcp;
+ return container_of(ipcp, struct shmid_kernel, shm_perm);
+ }
+
+@@ -186,18 +187,33 @@ static inline void shm_rmid(struct ipc_n
+ }
+
+
+-/* This is called by fork, once for every shm attach. */
+-static void shm_open(struct vm_area_struct *vma)
++static int __shm_open(struct vm_area_struct *vma)
+ {
+ struct file *file = vma->vm_file;
+ struct shm_file_data *sfd = shm_file_data(file);
+ struct shmid_kernel *shp;
+
+ shp = shm_lock(sfd->ns, sfd->id);
++
++ if (IS_ERR(shp))
++ return PTR_ERR(shp);
++
+ shp->shm_atim = get_seconds();
+ shp->shm_lprid = task_tgid_vnr(current);
+ shp->shm_nattch++;
+ shm_unlock(shp);
++ return 0;
++}
++
++/* This is called by fork, once for every shm attach. */
++static void shm_open(struct vm_area_struct *vma)
++{
++ int err = __shm_open(vma);
++ /*
++ * We raced in the idr lookup or with shm_destroy().
++ * Either way, the ID is busted.
++ */
++ WARN_ON_ONCE(err);
+ }
+
+ /*
+@@ -260,6 +276,14 @@ static void shm_close(struct vm_area_str
+ down_write(&shm_ids(ns).rwsem);
+ /* remove from the list of attaches of the shm segment */
+ shp = shm_lock(ns, sfd->id);
++
++ /*
++ * We raced in the idr lookup or with shm_destroy().
++ * Either way, the ID is busted.
++ */
++ if (WARN_ON_ONCE(IS_ERR(shp)))
++ goto done; /* no-op */
++
+ shp->shm_lprid = task_tgid_vnr(current);
+ shp->shm_dtim = get_seconds();
+ shp->shm_nattch--;
+@@ -267,6 +291,7 @@ static void shm_close(struct vm_area_str
+ shm_destroy(ns, shp);
+ else
+ shm_unlock(shp);
++done:
+ up_write(&shm_ids(ns).rwsem);
+ }
+
+@@ -388,17 +413,25 @@ static int shm_mmap(struct file *file, s
+ struct shm_file_data *sfd = shm_file_data(file);
+ int ret;
+
++ /*
++ * In case of remap_file_pages() emulation, the file can represent
++ * removed IPC ID: propogate shm_lock() error to caller.
++ */
++ ret =__shm_open(vma);
++ if (ret)
++ return ret;
++
+ ret = sfd->file->f_op->mmap(sfd->file, vma);
+- if (ret != 0)
++ if (ret) {
++ shm_close(vma);
+ return ret;
++ }
+ sfd->vm_ops = vma->vm_ops;
+ #ifdef CONFIG_MMU
+ WARN_ON(!sfd->vm_ops->fault);
+ #endif
+ vma->vm_ops = &shm_vm_ops;
+- shm_open(vma);
+-
+- return ret;
++ return 0;
+ }
+
+ static int shm_release(struct inode *ino, struct file *file)
--- /dev/null
+From 26a99c19f810b2593410899a5b304b21b47428a6 Mon Sep 17 00:00:00 2001
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+Date: Tue, 19 Jan 2016 16:15:27 -0800
+Subject: iscsi-target: Fix potential dead-lock during node acl delete
+
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+
+commit 26a99c19f810b2593410899a5b304b21b47428a6 upstream.
+
+This patch is a iscsi-target specific bug-fix for a dead-lock
+that can occur during explicit struct se_node_acl->acl_group
+se_session deletion via configfs rmdir(2), when iscsi-target
+time2retain timer is still active.
+
+It changes iscsi-target to obtain se_portal_group->session_lock
+internally using spin_in_locked() to check for the specific
+se_node_acl configfs shutdown rmdir(2) case.
+
+Note this patch is intended for stable, and the subsequent
+v4.5-rc patch converts target_core_tpg.c to use proper
+se_sess->sess_kref reference counting for both se_node_acl
+deletion + se_node_acl->queue_depth se_session restart.
+
+Reported-by:: Sagi Grimberg <sagig@mellanox.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Andy Grover <agrover@redhat.com>
+Cc: Mike Christie <michaelc@cs.wisc.edu>
+Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/target/iscsi/iscsi_target_configfs.c | 16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+--- a/drivers/target/iscsi/iscsi_target_configfs.c
++++ b/drivers/target/iscsi/iscsi_target_configfs.c
+@@ -1593,7 +1593,8 @@ static int lio_tpg_check_prot_fabric_onl
+ }
+
+ /*
+- * Called with spin_lock_bh(struct se_portal_group->session_lock) held..
++ * Called with spin_lock_irq(struct se_portal_group->session_lock) held
++ * or not held.
+ *
+ * Also, this function calls iscsit_inc_session_usage_count() on the
+ * struct iscsi_session in question.
+@@ -1601,19 +1602,32 @@ static int lio_tpg_check_prot_fabric_onl
+ static int lio_tpg_shutdown_session(struct se_session *se_sess)
+ {
+ struct iscsi_session *sess = se_sess->fabric_sess_ptr;
++ struct se_portal_group *se_tpg = se_sess->se_tpg;
++ bool local_lock = false;
++
++ if (!spin_is_locked(&se_tpg->session_lock)) {
++ spin_lock_irq(&se_tpg->session_lock);
++ local_lock = true;
++ }
+
+ spin_lock(&sess->conn_lock);
+ if (atomic_read(&sess->session_fall_back_to_erl0) ||
+ atomic_read(&sess->session_logout) ||
+ (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+ spin_unlock(&sess->conn_lock);
++ if (local_lock)
++ spin_unlock_irq(&sess->conn_lock);
+ return 0;
+ }
+ atomic_set(&sess->session_reinstatement, 1);
+ spin_unlock(&sess->conn_lock);
+
+ iscsit_stop_time2retain_timer(sess);
++ spin_unlock_irq(&se_tpg->session_lock);
++
+ iscsit_stop_session(sess, 1, 1);
++ if (!local_lock)
++ spin_lock_irq(&se_tpg->session_lock);
+
+ return 1;
+ }
--- /dev/null
+From 51cbb5242a41700a3f250ecfb48dcfb7e4375ea4 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Jan 2016 16:54:48 +0000
+Subject: itimers: Handle relative timers with CONFIG_TIME_LOW_RES proper
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 51cbb5242a41700a3f250ecfb48dcfb7e4375ea4 upstream.
+
+As Helge reported for timerfd we have the same issue in itimers. We return
+remaining time larger than the programmed relative time to user space in case
+of CONFIG_TIME_LOW_RES=y. Use the proper function to adjust the extra time
+added in hrtimer_start_range_ns().
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Helge Deller <deller@gmx.de>
+Cc: John Stultz <john.stultz@linaro.org>
+Cc: linux-m68k@lists.linux-m68k.org
+Cc: dhowells@redhat.com
+Link: http://lkml.kernel.org/r/20160114164159.528222587@linutronix.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/itimer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/time/itimer.c
++++ b/kernel/time/itimer.c
+@@ -26,7 +26,7 @@
+ */
+ static struct timeval itimer_get_remtime(struct hrtimer *timer)
+ {
+- ktime_t rem = hrtimer_get_remaining(timer);
++ ktime_t rem = __hrtimer_get_remaining(timer, true);
+
+ /*
+ * Racy but safe: if the itimer expires after the above
--- /dev/null
+From 00cd29b799e3449f0c68b1cc77cd4a5f95b42d17 Mon Sep 17 00:00:00 2001
+From: James Bottomley <James.Bottomley@HansenPartnership.com>
+Date: Wed, 13 Jan 2016 08:10:31 -0800
+Subject: klist: fix starting point removed bug in klist iterators
+
+From: James Bottomley <James.Bottomley@HansenPartnership.com>
+
+commit 00cd29b799e3449f0c68b1cc77cd4a5f95b42d17 upstream.
+
+The starting node for a klist iteration is often passed in from
+somewhere way above the klist infrastructure, meaning there's no
+guarantee the node is still on the list. We've seen this in SCSI where
+we use bus_find_device() to iterate through a list of devices. In the
+face of heavy hotplug activity, the last device returned by
+bus_find_device() can be removed before the next call. This leads to
+
+Dec 3 13:22:02 localhost kernel: WARNING: CPU: 2 PID: 28073 at include/linux/kref.h:47 klist_iter_init_node+0x3d/0x50()
+Dec 3 13:22:02 localhost kernel: Modules linked in: scsi_debug x86_pkg_temp_thermal kvm_intel kvm irqbypass crc32c_intel joydev iTCO_wdt dcdbas ipmi_devintf acpi_power_meter iTCO_vendor_support ipmi_si imsghandler pcspkr wmi acpi_cpufreq tpm_tis tpm shpchp lpc_ich mfd_core nfsd nfs_acl lockd grace sunrpc tg3 ptp pps_core
+Dec 3 13:22:02 localhost kernel: CPU: 2 PID: 28073 Comm: cat Not tainted 4.4.0-rc1+ #2
+Dec 3 13:22:02 localhost kernel: Hardware name: Dell Inc. PowerEdge R320/08VT7V, BIOS 2.0.22 11/19/2013
+Dec 3 13:22:02 localhost kernel: ffffffff81a20e77 ffff880613acfd18 ffffffff81321eef 0000000000000000
+Dec 3 13:22:02 localhost kernel: ffff880613acfd50 ffffffff8107ca52 ffff88061176b198 0000000000000000
+Dec 3 13:22:02 localhost kernel: ffffffff814542b0 ffff880610cfb100 ffff88061176b198 ffff880613acfd60
+Dec 3 13:22:02 localhost kernel: Call Trace:
+Dec 3 13:22:02 localhost kernel: [<ffffffff81321eef>] dump_stack+0x44/0x55
+Dec 3 13:22:02 localhost kernel: [<ffffffff8107ca52>] warn_slowpath_common+0x82/0xc0
+Dec 3 13:22:02 localhost kernel: [<ffffffff814542b0>] ? proc_scsi_show+0x20/0x20
+Dec 3 13:22:02 localhost kernel: [<ffffffff8107cb4a>] warn_slowpath_null+0x1a/0x20
+Dec 3 13:22:02 localhost kernel: [<ffffffff8167225d>] klist_iter_init_node+0x3d/0x50
+Dec 3 13:22:02 localhost kernel: [<ffffffff81421d41>] bus_find_device+0x51/0xb0
+Dec 3 13:22:02 localhost kernel: [<ffffffff814545ad>] scsi_seq_next+0x2d/0x40
+[...]
+
+And an eventual crash. It can actually occur in any hotplug system
+which has a device finder and a starting device.
+
+We can fix this globally by making sure the starting node for
+klist_iter_init_node() is actually a member of the list before using it
+(and by starting from the beginning if it isn't).
+
+Reported-by: Ewan D. Milne <emilne@redhat.com>
+Tested-by: Ewan D. Milne <emilne@redhat.com>
+Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/klist.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/lib/klist.c
++++ b/lib/klist.c
+@@ -282,9 +282,9 @@ void klist_iter_init_node(struct klist *
+ struct klist_node *n)
+ {
+ i->i_klist = k;
+- i->i_cur = n;
+- if (n)
+- kref_get(&n->n_ref);
++ i->i_cur = NULL;
++ if (n && kref_get_unless_zero(&n->n_ref))
++ i->i_cur = n;
+ }
+ EXPORT_SYMBOL_GPL(klist_iter_init_node);
+
--- /dev/null
+From b3aff6ccbb1d25e506b60ccd9c559013903f3464 Mon Sep 17 00:00:00 2001
+From: Andre Przywara <andre.przywara@arm.com>
+Date: Wed, 3 Feb 2016 16:56:51 +0000
+Subject: KVM: arm/arm64: Fix reference to uninitialised VGIC
+
+From: Andre Przywara <andre.przywara@arm.com>
+
+commit b3aff6ccbb1d25e506b60ccd9c559013903f3464 upstream.
+
+Commit 4b4b4512da2a ("arm/arm64: KVM: Rework the arch timer to use
+level-triggered semantics") brought the virtual architected timer
+closer to the VGIC. There is one occasion were we don't properly
+check for the VGIC actually having been initialized before, but
+instead go on to check the active state of some IRQ number.
+If userland hasn't instantiated a virtual GIC, we end up with a
+kernel NULL pointer dereference:
+=========
+Unable to handle kernel NULL pointer dereference at virtual address 00000000
+pgd = ffffffc9745c5000
+[00000000] *pgd=00000009f631e003, *pud=00000009f631e003, *pmd=0000000000000000
+Internal error: Oops: 96000006 [#2] PREEMPT SMP
+Modules linked in:
+CPU: 0 PID: 2144 Comm: kvm_simplest-ar Tainted: G D 4.5.0-rc2+ #1300
+Hardware name: ARM Juno development board (r1) (DT)
+task: ffffffc976da8000 ti: ffffffc976e28000 task.ti: ffffffc976e28000
+PC is at vgic_bitmap_get_irq_val+0x78/0x90
+LR is at kvm_vgic_map_is_active+0xac/0xc8
+pc : [<ffffffc0000b7e28>] lr : [<ffffffc0000b972c>] pstate: 20000145
+....
+=========
+
+Fix this by bailing out early of kvm_timer_flush_hwstate() if we don't
+have a VGIC at all.
+
+Reported-by: Cosmin Gorgovan <cosmin@linux-geek.org>
+Acked-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Andre Przywara <andre.przywara@arm.com>
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ virt/kvm/arm/arch_timer.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/virt/kvm/arm/arch_timer.c
++++ b/virt/kvm/arm/arch_timer.c
+@@ -143,7 +143,7 @@ static void kvm_timer_update_irq(struct
+ * Check if there was a change in the timer state (should we raise or lower
+ * the line level to the GIC).
+ */
+-static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
++static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
+ {
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+@@ -154,10 +154,12 @@ static void kvm_timer_update_state(struc
+ * until we call this function from kvm_timer_flush_hwstate.
+ */
+ if (!vgic_initialized(vcpu->kvm))
+- return;
++ return -ENODEV;
+
+ if (kvm_timer_should_fire(vcpu) != timer->irq.level)
+ kvm_timer_update_irq(vcpu, !timer->irq.level);
++
++ return 0;
+ }
+
+ /*
+@@ -218,7 +220,8 @@ void kvm_timer_flush_hwstate(struct kvm_
+ bool phys_active;
+ int ret;
+
+- kvm_timer_update_state(vcpu);
++ if (kvm_timer_update_state(vcpu))
++ return;
+
+ /*
+ * If we enter the guest with the virtual input level to the VGIC
--- /dev/null
+From 760a7364f27d974d100118d88190e574626e18a6 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 20 Nov 2015 09:11:45 +0100
+Subject: KVM: PPC: Fix emulation of H_SET_DABR/X on POWER8
+
+From: Thomas Huth <thuth@redhat.com>
+
+commit 760a7364f27d974d100118d88190e574626e18a6 upstream.
+
+In the old DABR register, the BT (Breakpoint Translation) bit
+is bit number 61. In the new DAWRX register, the WT (Watchpoint
+Translation) bit is bit number 59. So to move the DABR-BT bit
+into the position of the DAWRX-WT bit, it has to be shifted by
+two, not only by one. This fixes hardware watchpoints in gdb of
+older guests that only use the H_SET_DABR/X interface instead
+of the new H_SET_MODE interface.
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+Reviewed-by: Laurent Vivier <lvivier@redhat.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Paul Mackerras <paulus@samba.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+ /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
+ 2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
+- rlwimi r5, r4, 1, DAWRX_WT
++ rlwimi r5, r4, 2, DAWRX_WT
+ clrrdi r4, r4, 3
+ std r4, VCPU_DAWR(r3)
+ std r5, VCPU_DAWRX(r3)
--- /dev/null
+From b4d7f161feb3015d6306e1d35b565c888ff70c9d Mon Sep 17 00:00:00 2001
+From: Greg Kurz <gkurz@linux.vnet.ibm.com>
+Date: Wed, 13 Jan 2016 18:28:17 +0100
+Subject: KVM: PPC: Fix ONE_REG AltiVec support
+
+From: Greg Kurz <gkurz@linux.vnet.ibm.com>
+
+commit b4d7f161feb3015d6306e1d35b565c888ff70c9d upstream.
+
+The get and set operations got exchanged by mistake when moving the
+code from book3s.c to powerpc.c.
+
+Fixes: 3840edc8033ad5b86deee309c1c321ca54257452
+Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
+Signed-off-by: Paul Mackerras <paulus@samba.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/powerpc.c | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -919,21 +919,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kv
+ r = -ENXIO;
+ break;
+ }
+- vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
++ val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+- vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
++ val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+- r = -ENXIO;
+- break;
+- }
+- vcpu->arch.vrsave = set_reg_val(reg->id, val);
++ val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ break;
+ #endif /* CONFIG_ALTIVEC */
+ default:
+@@ -974,17 +970,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kv
+ r = -ENXIO;
+ break;
+ }
+- val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
++ vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+- val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
++ vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+- val = get_reg_val(reg->id, vcpu->arch.vrsave);
++ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
++ r = -ENXIO;
++ break;
++ }
++ vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ break;
+ #endif /* CONFIG_ALTIVEC */
+ default:
--- /dev/null
+From e07ecd76d4db7bda1e9495395b2110a3fe28845a Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 5 Jan 2016 18:37:23 -0800
+Subject: libnvdimm: fix namespace object confusion in is_uuid_busy()
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit e07ecd76d4db7bda1e9495395b2110a3fe28845a upstream.
+
+When btt devices were re-worked to be child devices of regions this
+routine was overlooked. It mistakenly attempts to_nd_namespace_pmem()
+or to_nd_namespace_blk() conversions on btt and pfn devices. By luck to
+date we have happened to be hitting valid memory leading to a uuid
+miscompare, but a recent change to struct nd_namespace_common causes:
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
+ IP: [<ffffffff814610dc>] memcmp+0xc/0x40
+ [..]
+ Call Trace:
+ [<ffffffffa0028631>] is_uuid_busy+0xc1/0x2a0 [libnvdimm]
+ [<ffffffffa0028570>] ? to_nd_blk_region+0x50/0x50 [libnvdimm]
+ [<ffffffff8158c9c0>] device_for_each_child+0x50/0x90
+
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvdimm/namespace_devs.c | 53 +++++++++++++++++++++++++++++++++++++
+ drivers/nvdimm/region_devs.c | 56 ----------------------------------------
+ 2 files changed, 53 insertions(+), 56 deletions(-)
+
+--- a/drivers/nvdimm/namespace_devs.c
++++ b/drivers/nvdimm/namespace_devs.c
+@@ -77,6 +77,59 @@ static bool is_namespace_io(struct devic
+ return dev ? dev->type == &namespace_io_device_type : false;
+ }
+
++static int is_uuid_busy(struct device *dev, void *data)
++{
++ u8 *uuid1 = data, *uuid2 = NULL;
++
++ if (is_namespace_pmem(dev)) {
++ struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
++
++ uuid2 = nspm->uuid;
++ } else if (is_namespace_blk(dev)) {
++ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
++
++ uuid2 = nsblk->uuid;
++ } else if (is_nd_btt(dev)) {
++ struct nd_btt *nd_btt = to_nd_btt(dev);
++
++ uuid2 = nd_btt->uuid;
++ } else if (is_nd_pfn(dev)) {
++ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
++
++ uuid2 = nd_pfn->uuid;
++ }
++
++ if (uuid2 && memcmp(uuid1, uuid2, NSLABEL_UUID_LEN) == 0)
++ return -EBUSY;
++
++ return 0;
++}
++
++static int is_namespace_uuid_busy(struct device *dev, void *data)
++{
++ if (is_nd_pmem(dev) || is_nd_blk(dev))
++ return device_for_each_child(dev, data, is_uuid_busy);
++ return 0;
++}
++
++/**
++ * nd_is_uuid_unique - verify that no other namespace has @uuid
++ * @dev: any device on a nvdimm_bus
++ * @uuid: uuid to check
++ */
++bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
++{
++ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
++
++ if (!nvdimm_bus)
++ return false;
++ WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
++ if (device_for_each_child(&nvdimm_bus->dev, uuid,
++ is_namespace_uuid_busy) != 0)
++ return false;
++ return true;
++}
++
+ bool pmem_should_map_pages(struct device *dev)
+ {
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+--- a/drivers/nvdimm/region_devs.c
++++ b/drivers/nvdimm/region_devs.c
+@@ -134,62 +134,6 @@ int nd_region_to_nstype(struct nd_region
+ }
+ EXPORT_SYMBOL(nd_region_to_nstype);
+
+-static int is_uuid_busy(struct device *dev, void *data)
+-{
+- struct nd_region *nd_region = to_nd_region(dev->parent);
+- u8 *uuid = data;
+-
+- switch (nd_region_to_nstype(nd_region)) {
+- case ND_DEVICE_NAMESPACE_PMEM: {
+- struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+-
+- if (!nspm->uuid)
+- break;
+- if (memcmp(uuid, nspm->uuid, NSLABEL_UUID_LEN) == 0)
+- return -EBUSY;
+- break;
+- }
+- case ND_DEVICE_NAMESPACE_BLK: {
+- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+-
+- if (!nsblk->uuid)
+- break;
+- if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) == 0)
+- return -EBUSY;
+- break;
+- }
+- default:
+- break;
+- }
+-
+- return 0;
+-}
+-
+-static int is_namespace_uuid_busy(struct device *dev, void *data)
+-{
+- if (is_nd_pmem(dev) || is_nd_blk(dev))
+- return device_for_each_child(dev, data, is_uuid_busy);
+- return 0;
+-}
+-
+-/**
+- * nd_is_uuid_unique - verify that no other namespace has @uuid
+- * @dev: any device on a nvdimm_bus
+- * @uuid: uuid to check
+- */
+-bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
+-{
+- struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+-
+- if (!nvdimm_bus)
+- return false;
+- WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
+- if (device_for_each_child(&nvdimm_bus->dev, uuid,
+- is_namespace_uuid_busy) != 0)
+- return false;
+- return true;
+-}
+-
+ static ssize_t size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
--- /dev/null
+From 96f859d52bcb1c6ea6f3388d39862bf7143e2f30 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 4 Jan 2016 16:13:21 +1100
+Subject: libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 96f859d52bcb1c6ea6f3388d39862bf7143e2f30 upstream.
+
+Because struct xfs_agfl is 36 bytes long and has a 64-bit integer
+inside it, gcc will quietly round the structure size up to the nearest
+64 bits -- in this case, 40 bytes. This results in the XFS_AGFL_SIZE
+macro returning incorrect results for v5 filesystems on 64-bit
+machines (118 items instead of 119). As a result, a 32-bit xfs_repair
+will see garbage in AGFL item 119 and complain.
+
+Therefore, tell gcc not to pad the structure so that the AGFL size
+calculation is correct.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_format.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_format.h
++++ b/fs/xfs/libxfs/xfs_format.h
+@@ -786,7 +786,7 @@ typedef struct xfs_agfl {
+ __be64 agfl_lsn;
+ __be32 agfl_crc;
+ __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
+-} xfs_agfl_t;
++} __attribute__((packed)) xfs_agfl_t;
+
+ #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
+
--- /dev/null
+From 601f1db653217f205ffa5fb33514b4e1711e56d1 Mon Sep 17 00:00:00 2001
+From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Date: Thu, 14 Jan 2016 15:16:47 -0800
+Subject: m32r: fix m32104ut_defconfig build fail
+
+From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+
+commit 601f1db653217f205ffa5fb33514b4e1711e56d1 upstream.
+
+The build of m32104ut_defconfig for m32r arch was failing for long long
+time with the error:
+
+ ERROR: "memory_start" [fs/udf/udf.ko] undefined!
+ ERROR: "memory_end" [fs/udf/udf.ko] undefined!
+ ERROR: "memory_end" [drivers/scsi/sg.ko] undefined!
+ ERROR: "memory_start" [drivers/scsi/sg.ko] undefined!
+ ERROR: "memory_end" [drivers/i2c/i2c-dev.ko] undefined!
+ ERROR: "memory_start" [drivers/i2c/i2c-dev.ko] undefined!
+
+As done in other architectures export the symbols to fix the error.
+
+Reported-by: Fengguang Wu <fengguang.wu@intel.com>
+Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/m32r/kernel/setup.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/m32r/kernel/setup.c
++++ b/arch/m32r/kernel/setup.c
+@@ -81,7 +81,10 @@ static struct resource code_resource = {
+ };
+
+ unsigned long memory_start;
++EXPORT_SYMBOL(memory_start);
++
+ unsigned long memory_end;
++EXPORT_SYMBOL(memory_end);
+
+ void __init setup_arch(char **);
+ int get_cpuinfo(char *);
--- /dev/null
+From 114bf37e04d839b555b3dc460b5e6ce156f49cf0 Mon Sep 17 00:00:00 2001
+From: Rich Felker <dalias@libc.org>
+Date: Fri, 22 Jan 2016 15:11:05 -0800
+Subject: MAINTAINERS: return arch/sh to maintained state, with new maintainers
+
+From: Rich Felker <dalias@libc.org>
+
+commit 114bf37e04d839b555b3dc460b5e6ce156f49cf0 upstream.
+
+Add Yoshinori Sato and Rich Felker as maintainers for arch/sh
+(SUPERH).
+
+Signed-off-by: Rich Felker <dalias@libc.org>
+Signed-off-by: Yoshinori Sato <ysato@users.sourceforge.jp>
+Acked-by: D. Jeff Dionne <jeff@uClinux.org>
+Acked-by: Rob Landley <rob@landley.net>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Simon Horman <horms+renesas@verge.net.au>
+Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ MAINTAINERS | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -10289,9 +10289,11 @@ S: Maintained
+ F: drivers/net/ethernet/dlink/sundance.c
+
+ SUPERH
++M: Yoshinori Sato <ysato@users.sourceforge.jp>
++M: Rich Felker <dalias@libc.org>
+ L: linux-sh@vger.kernel.org
+ Q: http://patchwork.kernel.org/project/linux-sh/list/
+-S: Orphan
++S: Maintained
+ F: Documentation/sh/
+ F: arch/sh/
+ F: drivers/sh/
--- /dev/null
+From 6611d8d76132f86faa501de9451a89bf23fb2371 Mon Sep 17 00:00:00 2001
+From: Martijn Coenen <maco@google.com>
+Date: Fri, 15 Jan 2016 16:57:49 -0800
+Subject: memcg: only free spare array when readers are done
+
+From: Martijn Coenen <maco@google.com>
+
+commit 6611d8d76132f86faa501de9451a89bf23fb2371 upstream.
+
+A spare array holding mem cgroup threshold events is kept around to make
+sure we can always safely deregister an event and have an array to store
+the new set of events in.
+
+In the scenario where we're going from 1 to 0 registered events, the
+pointer to the primary array containing 1 event is copied to the spare
+slot, and then the spare slot is freed because no events are left.
+However, it is freed before calling synchronize_rcu(), which means
+readers may still be accessing threshold->primary after it is freed.
+
+Fixed by only freeing after synchronize_rcu().
+
+Signed-off-by: Martijn Coenen <maco@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -3522,16 +3522,17 @@ static void __mem_cgroup_usage_unregiste
+ swap_buffers:
+ /* Swap primary and spare array */
+ thresholds->spare = thresholds->primary;
+- /* If all events are unregistered, free the spare array */
+- if (!new) {
+- kfree(thresholds->spare);
+- thresholds->spare = NULL;
+- }
+
+ rcu_assign_pointer(thresholds->primary, new);
+
+ /* To be sure that nobody uses thresholds */
+ synchronize_rcu();
++
++ /* If all events are unregistered, free the spare array */
++ if (!new) {
++ kfree(thresholds->spare);
++ thresholds->spare = NULL;
++ }
+ unlock:
+ mutex_unlock(&memcg->thresholds_lock);
+ }
--- /dev/null
+From 7162a1e87b3e380133dadc7909081bb70d0a7041 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Thu, 21 Jan 2016 16:40:27 -0800
+Subject: mm: fix mlock accouting
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit 7162a1e87b3e380133dadc7909081bb70d0a7041 upstream.
+
+Tetsuo Handa reported underflow of NR_MLOCK on munlock.
+
+Testcase:
+
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <sys/mman.h>
+
+ #define BASE ((void *)0x400000000000)
+ #define SIZE (1UL << 21)
+
+ int main(int argc, char *argv[])
+ {
+ void *addr;
+
+ system("grep Mlocked /proc/meminfo");
+ addr = mmap(BASE, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED | MAP_FIXED,
+ -1, 0);
+ if (addr == MAP_FAILED)
+ printf("mmap() failed\n"), exit(1);
+ munmap(addr, SIZE);
+ system("grep Mlocked /proc/meminfo");
+ return 0;
+ }
+
+It happens on munlock_vma_page() due to unfortunate choice of nr_pages
+data type:
+
+ __mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
+
+For unsigned int nr_pages, implicitly casted to long in
+__mod_zone_page_state(), it becomes something around UINT_MAX.
+
+munlock_vma_page() usually called for THP as small pages go though
+pagevec.
+
+Let's make nr_pages signed int.
+
+Similar fixes in 6cdb18ad98a4 ("mm/vmstat: fix overflow in
+mod_zone_page_state()") used `long' type, but `int' here is OK for a
+count of the number of sub-pages in a huge page.
+
+Fixes: ff6a6da60b89 ("mm: accelerate munlock() treatment of THP pages")
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Cc: Michel Lespinasse <walken@google.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mlock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -172,7 +172,7 @@ static void __munlock_isolation_failed(s
+ */
+ unsigned int munlock_vma_page(struct page *page)
+ {
+- unsigned int nr_pages;
++ int nr_pages;
+ struct zone *zone = page_zone(page);
+
+ /* For try_to_munlock() and to serialize with page migration */
--- /dev/null
+From 48f7df329474b49d83d0dffec1b6186647f11976 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Wed, 17 Feb 2016 13:11:15 -0800
+Subject: mm: fix regression in remap_file_pages() emulation
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit 48f7df329474b49d83d0dffec1b6186647f11976 upstream.
+
+Grazvydas Ignotas has reported a regression in remap_file_pages()
+emulation.
+
+Testcase:
+ #define _GNU_SOURCE
+ #include <assert.h>
+ #include <stdlib.h>
+ #include <stdio.h>
+ #include <sys/mman.h>
+
+ #define SIZE (4096 * 3)
+
+ int main(int argc, char **argv)
+ {
+ unsigned long *p;
+ long i;
+
+ p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ return -1;
+ }
+
+ for (i = 0; i < SIZE / 4096; i++)
+ p[i * 4096 / sizeof(*p)] = i;
+
+ if (remap_file_pages(p, 4096, 0, 1, 0)) {
+ perror("remap_file_pages");
+ return -1;
+ }
+
+ if (remap_file_pages(p, 4096 * 2, 0, 1, 0)) {
+ perror("remap_file_pages");
+ return -1;
+ }
+
+ assert(p[0] == 1);
+
+ munmap(p, SIZE);
+
+ return 0;
+ }
+
+The second remap_file_pages() fails with -EINVAL.
+
+The reason is that remap_file_pages() emulation assumes that the target
+vma covers whole area we want to over map. That assumption is broken by
+first remap_file_pages() call: it split the area into two vma.
+
+The solution is to check next adjacent vmas, if they map the same file
+with the same flags.
+
+Fixes: c8d78c1823f4 ("mm: replace remap_file_pages() syscall with emulation")
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reported-by: Grazvydas Ignotas <notasas@gmail.com>
+Tested-by: Grazvydas Ignotas <notasas@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mmap.c | 34 +++++++++++++++++++++++++++++-----
+ 1 file changed, 29 insertions(+), 5 deletions(-)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -2668,12 +2668,29 @@ SYSCALL_DEFINE5(remap_file_pages, unsign
+ if (!vma || !(vma->vm_flags & VM_SHARED))
+ goto out;
+
+- if (start < vma->vm_start || start + size > vma->vm_end)
++ if (start < vma->vm_start)
+ goto out;
+
+- if (pgoff == linear_page_index(vma, start)) {
+- ret = 0;
+- goto out;
++ if (start + size > vma->vm_end) {
++ struct vm_area_struct *next;
++
++ for (next = vma->vm_next; next; next = next->vm_next) {
++ /* hole between vmas ? */
++ if (next->vm_start != next->vm_prev->vm_end)
++ goto out;
++
++ if (next->vm_file != vma->vm_file)
++ goto out;
++
++ if (next->vm_flags != vma->vm_flags)
++ goto out;
++
++ if (start + size <= next->vm_end)
++ break;
++ }
++
++ if (!next)
++ goto out;
+ }
+
+ prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+@@ -2683,9 +2700,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsign
+ flags &= MAP_NONBLOCK;
+ flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+ if (vma->vm_flags & VM_LOCKED) {
++ struct vm_area_struct *tmp;
+ flags |= MAP_LOCKED;
++
+ /* drop PG_Mlocked flag for over-mapped range */
+- munlock_vma_pages_range(vma, start, start + size);
++ for (tmp = vma; tmp->vm_start >= start + size;
++ tmp = tmp->vm_next) {
++ munlock_vma_pages_range(tmp,
++ max(tmp->vm_start, start),
++ min(tmp->vm_end, start + size));
++ }
+ }
+
+ file = get_file(vma->vm_file);
--- /dev/null
+From 12352d3cae2cebe18805a91fab34b534d7444231 Mon Sep 17 00:00:00 2001
+From: Konstantin Khlebnikov <koct9i@gmail.com>
+Date: Fri, 5 Feb 2016 15:36:50 -0800
+Subject: mm: replace vma_lock_anon_vma with anon_vma_lock_read/write
+
+From: Konstantin Khlebnikov <koct9i@gmail.com>
+
+commit 12352d3cae2cebe18805a91fab34b534d7444231 upstream.
+
+Sequence vma_lock_anon_vma() - vma_unlock_anon_vma() isn't safe if
+anon_vma appeared between lock and unlock. We have to check anon_vma
+first or call anon_vma_prepare() to be sure that it's here. There are
+only few users of these legacy helpers. Let's get rid of them.
+
+This patch fixes anon_vma lock imbalance in validate_mm(). Write lock
+isn't required here, read lock is enough.
+
+And reorders expand_downwards/expand_upwards: security_mmap_addr() and
+wrapping-around check don't have to be under anon vma lock.
+
+Link: https://lkml.kernel.org/r/CACT4Y+Y908EjM2z=706dv4rV6dWtxTLK9nFg9_7DhRMLppBo2g@mail.gmail.com
+Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/rmap.h | 14 ------------
+ mm/mmap.c | 55 +++++++++++++++++++++++----------------------------
+ 2 files changed, 25 insertions(+), 44 deletions(-)
+
+--- a/include/linux/rmap.h
++++ b/include/linux/rmap.h
+@@ -108,20 +108,6 @@ static inline void put_anon_vma(struct a
+ __put_anon_vma(anon_vma);
+ }
+
+-static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
+-{
+- struct anon_vma *anon_vma = vma->anon_vma;
+- if (anon_vma)
+- down_write(&anon_vma->root->rwsem);
+-}
+-
+-static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
+-{
+- struct anon_vma *anon_vma = vma->anon_vma;
+- if (anon_vma)
+- up_write(&anon_vma->root->rwsem);
+-}
+-
+ static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
+ {
+ down_write(&anon_vma->root->rwsem);
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -441,12 +441,16 @@ static void validate_mm(struct mm_struct
+ struct vm_area_struct *vma = mm->mmap;
+
+ while (vma) {
++ struct anon_vma *anon_vma = vma->anon_vma;
+ struct anon_vma_chain *avc;
+
+- vma_lock_anon_vma(vma);
+- list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+- anon_vma_interval_tree_verify(avc);
+- vma_unlock_anon_vma(vma);
++ if (anon_vma) {
++ anon_vma_lock_read(anon_vma);
++ list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
++ anon_vma_interval_tree_verify(avc);
++ anon_vma_unlock_read(anon_vma);
++ }
++
+ highest_address = vma->vm_end;
+ vma = vma->vm_next;
+ i++;
+@@ -2147,32 +2151,27 @@ static int acct_stack_growth(struct vm_a
+ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+ {
+ struct mm_struct *mm = vma->vm_mm;
+- int error;
++ int error = 0;
+
+ if (!(vma->vm_flags & VM_GROWSUP))
+ return -EFAULT;
+
+- /*
+- * We must make sure the anon_vma is allocated
+- * so that the anon_vma locking is not a noop.
+- */
++ /* Guard against wrapping around to address 0. */
++ if (address < PAGE_ALIGN(address+4))
++ address = PAGE_ALIGN(address+4);
++ else
++ return -ENOMEM;
++
++ /* We must make sure the anon_vma is allocated. */
+ if (unlikely(anon_vma_prepare(vma)))
+ return -ENOMEM;
+- vma_lock_anon_vma(vma);
+
+ /*
+ * vma->vm_start/vm_end cannot change under us because the caller
+ * is required to hold the mmap_sem in read mode. We need the
+ * anon_vma lock to serialize against concurrent expand_stacks.
+- * Also guard against wrapping around to address 0.
+ */
+- if (address < PAGE_ALIGN(address+4))
+- address = PAGE_ALIGN(address+4);
+- else {
+- vma_unlock_anon_vma(vma);
+- return -ENOMEM;
+- }
+- error = 0;
++ anon_vma_lock_write(vma->anon_vma);
+
+ /* Somebody else might have raced and expanded it already */
+ if (address > vma->vm_end) {
+@@ -2190,7 +2189,7 @@ int expand_upwards(struct vm_area_struct
+ * updates, but we only hold a shared mmap_sem
+ * lock here, so we need to protect against
+ * concurrent vma expansions.
+- * vma_lock_anon_vma() doesn't help here, as
++ * anon_vma_lock_write() doesn't help here, as
+ * we don't guarantee that all growable vmas
+ * in a mm share the same root anon vma.
+ * So, we reuse mm->page_table_lock to guard
+@@ -2214,7 +2213,7 @@ int expand_upwards(struct vm_area_struct
+ }
+ }
+ }
+- vma_unlock_anon_vma(vma);
++ anon_vma_unlock_write(vma->anon_vma);
+ khugepaged_enter_vma_merge(vma, vma->vm_flags);
+ validate_mm(mm);
+ return error;
+@@ -2230,25 +2229,21 @@ int expand_downwards(struct vm_area_stru
+ struct mm_struct *mm = vma->vm_mm;
+ int error;
+
+- /*
+- * We must make sure the anon_vma is allocated
+- * so that the anon_vma locking is not a noop.
+- */
+- if (unlikely(anon_vma_prepare(vma)))
+- return -ENOMEM;
+-
+ address &= PAGE_MASK;
+ error = security_mmap_addr(address);
+ if (error)
+ return error;
+
+- vma_lock_anon_vma(vma);
++ /* We must make sure the anon_vma is allocated. */
++ if (unlikely(anon_vma_prepare(vma)))
++ return -ENOMEM;
+
+ /*
+ * vma->vm_start/vm_end cannot change under us because the caller
+ * is required to hold the mmap_sem in read mode. We need the
+ * anon_vma lock to serialize against concurrent expand_stacks.
+ */
++ anon_vma_lock_write(vma->anon_vma);
+
+ /* Somebody else might have raced and expanded it already */
+ if (address < vma->vm_start) {
+@@ -2266,7 +2261,7 @@ int expand_downwards(struct vm_area_stru
+ * updates, but we only hold a shared mmap_sem
+ * lock here, so we need to protect against
+ * concurrent vma expansions.
+- * vma_lock_anon_vma() doesn't help here, as
++ * anon_vma_lock_write() doesn't help here, as
+ * we don't guarantee that all growable vmas
+ * in a mm share the same root anon vma.
+ * So, we reuse mm->page_table_lock to guard
+@@ -2288,7 +2283,7 @@ int expand_downwards(struct vm_area_stru
+ }
+ }
+ }
+- vma_unlock_anon_vma(vma);
++ anon_vma_unlock_write(vma->anon_vma);
+ khugepaged_enter_vma_merge(vma, vma->vm_flags);
+ validate_mm(mm);
+ return error;
--- /dev/null
+From d96b339f453997f2f08c52da3f41423be48c978f Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Fri, 15 Jan 2016 16:54:03 -0800
+Subject: mm: soft-offline: check return value in second __get_any_page() call
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit d96b339f453997f2f08c52da3f41423be48c978f upstream.
+
+I saw the following BUG_ON triggered in a testcase where a process calls
+madvise(MADV_SOFT_OFFLINE) on thps, along with a background process that
+calls migratepages command repeatedly (doing ping-pong among different
+NUMA nodes) for the first process:
+
+ Soft offlining page 0x60000 at 0x700000600000
+ __get_any_page: 0x60000 free buddy page
+ page:ffffea0001800000 count:0 mapcount:-127 mapping: (null) index:0x1
+ flags: 0x1fffc0000000000()
+ page dumped because: VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0)
+ ------------[ cut here ]------------
+ kernel BUG at /src/linux-dev/include/linux/mm.h:342!
+ invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC
+ Modules linked in: cfg80211 rfkill crc32c_intel serio_raw virtio_balloon i2c_piix4 virtio_blk virtio_net ata_generic pata_acpi
+ CPU: 3 PID: 3035 Comm: test_alloc_gene Tainted: G O 4.4.0-rc8-v4.4-rc8-160107-1501-00000-rc8+ #74
+ Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+ task: ffff88007c63d5c0 ti: ffff88007c210000 task.ti: ffff88007c210000
+ RIP: 0010:[<ffffffff8118998c>] [<ffffffff8118998c>] put_page+0x5c/0x60
+ RSP: 0018:ffff88007c213e00 EFLAGS: 00010246
+ Call Trace:
+ put_hwpoison_page+0x4e/0x80
+ soft_offline_page+0x501/0x520
+ SyS_madvise+0x6bc/0x6f0
+ entry_SYSCALL_64_fastpath+0x12/0x6a
+ Code: 8b fc ff ff 5b 5d c3 48 89 df e8 b0 fa ff ff 48 89 df 31 f6 e8 c6 7d ff ff 5b 5d c3 48 c7 c6 08 54 a2 81 48 89 df e8 a4 c5 01 00 <0f> 0b 66 90 66 66 66 66 90 55 48 89 e5 41 55 41 54 53 48 8b 47
+ RIP [<ffffffff8118998c>] put_page+0x5c/0x60
+ RSP <ffff88007c213e00>
+
+The root cause resides in get_any_page() which retries to get a refcount
+of the page to be soft-offlined. This function calls
+put_hwpoison_page(), expecting that the target page is putback to LRU
+list. But it can be also freed to buddy. So the second check need to
+care about such case.
+
+Fixes: af8fae7c0886 ("mm/memory-failure.c: clean up soft_offline_page()")
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Jerome Marchand <jmarchan@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Steve Capper <steve.capper@linaro.org>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1572,7 +1572,7 @@ static int get_any_page(struct page *pag
+ * Did it turn free?
+ */
+ ret = __get_any_page(page, pfn, 0);
+- if (!PageLRU(page)) {
++ if (ret == 1 && !PageLRU(page)) {
+ /* Drop page reference which is from __get_any_page() */
+ put_hwpoison_page(page);
+ pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
--- /dev/null
+From 6a6ac72fd6ea32594b316513e1826c3f6db4cc93 Mon Sep 17 00:00:00 2001
+From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
+Date: Thu, 11 Feb 2016 16:13:09 -0800
+Subject: mm,thp: khugepaged: call pte flush at the time of collapse
+
+From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
+
+commit 6a6ac72fd6ea32594b316513e1826c3f6db4cc93 upstream.
+
+This showed up on ARC when running LMBench bw_mem tests as Overlapping
+TLB Machine Check Exception triggered due to STLB entry (2M pages)
+overlapping some NTLB entry (regular 8K page).
+
+bw_mem 2m touches a large chunk of vaddr creating NTLB entries. In the
+interim khugepaged kicks in, collapsing the contiguous ptes into a
+single pmd. pmdp_collapse_flush()->flush_pmd_tlb_range() is called to
+flush out NTLB entries for the ptes. This for ARC (by design) can only
+shootdown STLB entries (for pmd). The stray NTLB entries cause the
+overlap with the subsequent STLB entry for collapsed page. So make
+pmdp_collapse_flush() call pte flush interface not pmd flush.
+
+Note that originally all thp flush call sites in generic code called
+flush_tlb_range() leaving it to architecture to implement the flush for
+pte and/or pmd. Commit 12ebc1581ad11454 changed this by calling a new
+opt-in API flush_pmd_tlb_range() which made the semantics more explicit
+but failed to distinguish the pte vs pmd flush in generic code, which is
+what this patch fixes.
+
+Note that ARC can fixed w/o touching the generic pmdp_collapse_flush()
+by defining a ARC version, but that defeats the purpose of generic
+version, plus sementically this is the right thing to do.
+
+Fixes STAR 9000961194: LMBench on AXS103 triggering duplicate TLB
+exceptions with super pages
+
+Fixes: 12ebc1581ad11454 ("mm,thp: introduce flush_pmd_tlb_range")
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/pgtable-generic.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/mm/pgtable-generic.c
++++ b/mm/pgtable-generic.c
+@@ -210,7 +210,9 @@ pmd_t pmdp_collapse_flush(struct vm_area
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(pmd_trans_huge(*pmdp));
+ pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+- flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
++
++ /* collapse entails shooting down ptes not pmd */
++ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ return pmd;
+ }
+ #endif
--- /dev/null
+From 2e7bac536106236104e9e339531ff0fcdb7b8147 Mon Sep 17 00:00:00 2001
+From: Rusty Russell <rusty@rustcorp.com.au>
+Date: Wed, 3 Feb 2016 16:55:26 +1030
+Subject: module: wrapper for symbol name.
+
+From: Rusty Russell <rusty@rustcorp.com.au>
+
+commit 2e7bac536106236104e9e339531ff0fcdb7b8147 upstream.
+
+This trivial wrapper adds clarity and makes the following patch
+smaller.
+
+Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/module.c | 26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -3646,6 +3646,11 @@ static inline int is_arm_mapping_symbol(
+ && (str[2] == '\0' || str[2] == '.');
+ }
+
++static const char *symname(struct module *mod, unsigned int symnum)
++{
++ return mod->strtab + mod->symtab[symnum].st_name;
++}
++
+ static const char *get_ksymbol(struct module *mod,
+ unsigned long addr,
+ unsigned long *size,
+@@ -3668,15 +3673,15 @@ static const char *get_ksymbol(struct mo
+
+ /* We ignore unnamed symbols: they're uninformative
+ * and inserted at a whim. */
++ if (*symname(mod, i) == '\0'
++ || is_arm_mapping_symbol(symname(mod, i)))
++ continue;
++
+ if (mod->symtab[i].st_value <= addr
+- && mod->symtab[i].st_value > mod->symtab[best].st_value
+- && *(mod->strtab + mod->symtab[i].st_name) != '\0'
+- && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
++ && mod->symtab[i].st_value > mod->symtab[best].st_value)
+ best = i;
+ if (mod->symtab[i].st_value > addr
+- && mod->symtab[i].st_value < nextval
+- && *(mod->strtab + mod->symtab[i].st_name) != '\0'
+- && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
++ && mod->symtab[i].st_value < nextval)
+ nextval = mod->symtab[i].st_value;
+ }
+
+@@ -3687,7 +3692,7 @@ static const char *get_ksymbol(struct mo
+ *size = nextval - mod->symtab[best].st_value;
+ if (offset)
+ *offset = addr - mod->symtab[best].st_value;
+- return mod->strtab + mod->symtab[best].st_name;
++ return symname(mod, best);
+ }
+
+ /* For kallsyms to ask for address resolution. NULL means not found. Careful
+@@ -3782,8 +3787,7 @@ int module_get_kallsym(unsigned int symn
+ if (symnum < mod->num_symtab) {
+ *value = mod->symtab[symnum].st_value;
+ *type = mod->symtab[symnum].st_info;
+- strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
+- KSYM_NAME_LEN);
++ strlcpy(name, symname(mod, symnum), KSYM_NAME_LEN);
+ strlcpy(module_name, mod->name, MODULE_NAME_LEN);
+ *exported = is_exported(name, *value, mod);
+ preempt_enable();
+@@ -3800,7 +3804,7 @@ static unsigned long mod_find_symname(st
+ unsigned int i;
+
+ for (i = 0; i < mod->num_symtab; i++)
+- if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
++ if (strcmp(name, symname(mod, i)) == 0 &&
+ mod->symtab[i].st_info != 'U')
+ return mod->symtab[i].st_value;
+ return 0;
+@@ -3844,7 +3848,7 @@ int module_kallsyms_on_each_symbol(int (
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
+ for (i = 0; i < mod->num_symtab; i++) {
+- ret = fn(data, mod->strtab + mod->symtab[i].st_name,
++ ret = fn(data, symname(mod, i),
+ mod, mod->symtab[i].st_value);
+ if (ret != 0)
+ return ret;
--- /dev/null
+From 4355efbd80482a961cae849281a8ef866e53d55c Mon Sep 17 00:00:00 2001
+From: "Luis R. Rodriguez" <mcgrof@suse.com>
+Date: Wed, 3 Feb 2016 16:55:26 +1030
+Subject: modules: fix modparam async_probe request
+
+From: Luis R. Rodriguez <mcgrof@suse.com>
+
+commit 4355efbd80482a961cae849281a8ef866e53d55c upstream.
+
+Commit f2411da746985 ("driver-core: add driver module
+asynchronous probe support") added async probe support,
+in two forms:
+
+ * in-kernel driver specification annotation
+ * generic async_probe module parameter (modprobe foo async_probe)
+
+To support the generic kernel parameter parse_args() was
+extended via commit ecc8617053e0 ("module: add extra
+argument for parse_params() callback") however commit
+failed to f2411da746985 failed to add the required argument.
+
+This causes a crash then whenever async_probe generic
+module parameter is used. This was overlooked when the
+form in which in-kernel async probe support was reworked
+a bit... Fix this as originally intended.
+
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
+Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> [minimized]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/module.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -3515,7 +3515,7 @@ static int load_module(struct load_info
+
+ /* Module is ready to execute: parsing args may do that. */
+ after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
+- -32768, 32767, NULL,
++ -32768, 32767, mod,
+ unknown_module_param_cb);
+ if (IS_ERR(after_dashes)) {
+ err = PTR_ERR(after_dashes);
--- /dev/null
+From ade14a7df796d4e86bd9d181193c883a57b13db0 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Tue, 29 Dec 2015 18:55:19 -0500
+Subject: NFS: Fix attribute cache revalidation
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit ade14a7df796d4e86bd9d181193c883a57b13db0 upstream.
+
+If a NFSv4 client uses the cache_consistency_bitmask in order to
+request only information about the change attribute, timestamps and
+size, then it has not revalidated all attributes, and hence the
+attribute timeout timestamp should not be updated.
+
+Reported-by: Donald Buczek <buczek@molgen.mpg.de>
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/inode.c | 54 +++++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 39 insertions(+), 15 deletions(-)
+
+--- a/fs/nfs/inode.c
++++ b/fs/nfs/inode.c
+@@ -1641,6 +1641,7 @@ static int nfs_update_inode(struct inode
+ unsigned long invalid = 0;
+ unsigned long now = jiffies;
+ unsigned long save_cache_validity;
++ bool cache_revalidated = true;
+
+ dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
+ __func__, inode->i_sb->s_id, inode->i_ino,
+@@ -1702,22 +1703,28 @@ static int nfs_update_inode(struct inode
+ nfs_force_lookup_revalidate(inode);
+ inode->i_version = fattr->change_attr;
+ }
+- } else
++ } else {
+ nfsi->cache_validity |= save_cache_validity;
++ cache_revalidated = false;
++ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
+ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+- } else if (server->caps & NFS_CAP_MTIME)
++ } else if (server->caps & NFS_CAP_MTIME) {
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_FORCED);
++ cache_revalidated = false;
++ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
+ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+- } else if (server->caps & NFS_CAP_CTIME)
++ } else if (server->caps & NFS_CAP_CTIME) {
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_FORCED);
++ cache_revalidated = false;
++ }
+
+ /* Check if our cached file size is stale */
+ if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
+@@ -1737,19 +1744,23 @@ static int nfs_update_inode(struct inode
+ (long long)cur_isize,
+ (long long)new_isize);
+ }
+- } else
++ } else {
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_PAGECACHE
+ | NFS_INO_REVAL_FORCED);
++ cache_revalidated = false;
++ }
+
+
+ if (fattr->valid & NFS_ATTR_FATTR_ATIME)
+ memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+- else if (server->caps & NFS_CAP_ATIME)
++ else if (server->caps & NFS_CAP_ATIME) {
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATIME
+ | NFS_INO_REVAL_FORCED);
++ cache_revalidated = false;
++ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_MODE) {
+ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
+@@ -1758,36 +1769,42 @@ static int nfs_update_inode(struct inode
+ inode->i_mode = newmode;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ }
+- } else if (server->caps & NFS_CAP_MODE)
++ } else if (server->caps & NFS_CAP_MODE) {
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_FORCED);
++ cache_revalidated = false;
++ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
+ if (!uid_eq(inode->i_uid, fattr->uid)) {
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ inode->i_uid = fattr->uid;
+ }
+- } else if (server->caps & NFS_CAP_OWNER)
++ } else if (server->caps & NFS_CAP_OWNER) {
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_FORCED);
++ cache_revalidated = false;
++ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
+ if (!gid_eq(inode->i_gid, fattr->gid)) {
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ inode->i_gid = fattr->gid;
+ }
+- } else if (server->caps & NFS_CAP_OWNER_GROUP)
++ } else if (server->caps & NFS_CAP_OWNER_GROUP) {
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_FORCED);
++ cache_revalidated = false;
++ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
+ if (inode->i_nlink != fattr->nlink) {
+@@ -1796,19 +1813,22 @@ static int nfs_update_inode(struct inode
+ invalid |= NFS_INO_INVALID_DATA;
+ set_nlink(inode, fattr->nlink);
+ }
+- } else if (server->caps & NFS_CAP_NLINK)
++ } else if (server->caps & NFS_CAP_NLINK) {
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_FORCED);
++ cache_revalidated = false;
++ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
+ /*
+ * report the blocks in 512byte units
+ */
+ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+- }
+- if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
++ } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+ inode->i_blocks = fattr->du.nfs2.blocks;
++ else
++ cache_revalidated = false;
+
+ /* Update attrtimeo value if we're out of the unstable period */
+ if (invalid & NFS_INO_INVALID_ATTR) {
+@@ -1818,9 +1838,13 @@ static int nfs_update_inode(struct inode
+ /* Set barrier to be more recent than all outstanding updates */
+ nfsi->attr_gencount = nfs_inc_attr_generation_counter();
+ } else {
+- if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
+- if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
+- nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
++ if (cache_revalidated) {
++ if (!time_in_range_open(now, nfsi->attrtimeo_timestamp,
++ nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
++ nfsi->attrtimeo <<= 1;
++ if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode))
++ nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
++ }
+ nfsi->attrtimeo_timestamp = now;
+ }
+ /* Set the barrier to be more recent than this fattr */
+@@ -1829,7 +1853,7 @@ static int nfs_update_inode(struct inode
+ }
+
+ /* Don't declare attrcache up to date if there were no attrs! */
+- if (fattr->valid != 0)
++ if (cache_revalidated)
+ invalid &= ~NFS_INO_INVALID_ATTR;
+
+ /* Don't invalidate the data if we were to blame */
--- /dev/null
+From 361cad3c89070aeb37560860ea8bfc092d545adc Mon Sep 17 00:00:00 2001
+From: Andrew Elble <aweits@rit.edu>
+Date: Wed, 2 Dec 2015 09:20:57 -0500
+Subject: nfs: Fix race in __update_open_stateid()
+
+From: Andrew Elble <aweits@rit.edu>
+
+commit 361cad3c89070aeb37560860ea8bfc092d545adc upstream.
+
+We've seen this in a packet capture - I've intermixed what I
+think was going on. The fix here is to grab the so_lock sooner.
+
+1964379 -> #1 open (for write) reply seqid=1
+1964393 -> #2 open (for read) reply seqid=2
+
+ __nfs4_close(), state->n_wronly--
+ nfs4_state_set_mode_locked(), changes state->state = [R]
+ state->flags is [RW]
+ state->state is [R], state->n_wronly == 0, state->n_rdonly == 1
+
+1964398 -> #3 open (for write) call -> because close is already running
+1964399 -> downgrade (to read) call seqid=2 (close of #1)
+1964402 -> #3 open (for write) reply seqid=3
+
+ __update_open_stateid()
+ nfs_set_open_stateid_locked(), changes state->flags
+ state->flags is [RW]
+ state->state is [R], state->n_wronly == 0, state->n_rdonly == 1
+ new sequence number is exposed now via nfs4_stateid_copy()
+
+ next step would be update_open_stateflags(), pending so_lock
+
+1964403 -> downgrade reply seqid=2, fails with OLD_STATEID (close of #1)
+
+ nfs4_close_prepare() gets so_lock and recalcs flags -> send close
+
+1964405 -> downgrade (to read) call seqid=3 (close of #1 retry)
+
+ __update_open_stateid() gets so_lock
+ * update_open_stateflags() updates state->n_wronly.
+ nfs4_state_set_mode_locked() updates state->state
+
+ state->flags is [RW]
+ state->state is [RW], state->n_wronly == 1, state->n_rdonly == 1
+
+ * should have suppressed the preceding nfs4_close_prepare() from
+ sending open_downgrade
+
+1964406 -> write call
+1964408 -> downgrade (to read) reply seqid=4 (close of #1 retry)
+
+ nfs_clear_open_stateid_locked()
+ state->flags is [R]
+ state->state is [RW], state->n_wronly == 1, state->n_rdonly == 1
+
+1964409 -> write reply (fails, openmode)
+
+Signed-off-by: Andrew Elble <aweits@rit.edu>
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4proc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -1385,6 +1385,7 @@ static void __update_open_stateid(struct
+ * Protect the call to nfs4_state_set_mode_locked and
+ * serialise the stateid update
+ */
++ spin_lock(&state->owner->so_lock);
+ write_seqlock(&state->seqlock);
+ if (deleg_stateid != NULL) {
+ nfs4_stateid_copy(&state->stateid, deleg_stateid);
+@@ -1393,7 +1394,6 @@ static void __update_open_stateid(struct
+ if (open_stateid != NULL)
+ nfs_set_open_stateid_locked(state, open_stateid, fmode);
+ write_sequnlock(&state->seqlock);
+- spin_lock(&state->owner->so_lock);
+ update_open_stateflags(state, fmode);
+ spin_unlock(&state->owner->so_lock);
+ }
--- /dev/null
+From 5c2ff95e41c9290d16556cd02e35b25d81be8fe0 Mon Sep 17 00:00:00 2001
+From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+Date: Tue, 2 Feb 2016 16:57:26 -0800
+Subject: numa: fix /proc/<pid>/numa_maps for hugetlbfs on s390
+
+From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+
+commit 5c2ff95e41c9290d16556cd02e35b25d81be8fe0 upstream.
+
+When working with hugetlbfs ptes (which are actually pmds) is not valid to
+directly use pte functions like pte_present() because the hardware bit
+layout of pmds and ptes can be different. This is the case on s390.
+Therefore we have to convert the hugetlbfs ptes first into a valid pte
+encoding with huge_ptep_get().
+
+Currently the /proc/<pid>/numa_maps code uses hugetlbfs ptes without
+huge_ptep_get(). On s390 this leads to the following two problems:
+
+1) The pte_present() function returns false (instead of true) for
+ PROT_NONE hugetlb ptes. Therefore PROT_NONE vmas are missing
+ completely in the "numa_maps" output.
+
+2) The pte_dirty() function always returns false for all hugetlb ptes.
+ Therefore these pages are reported as "mapped=xxx" instead of
+ "dirty=xxx".
+
+Therefore use huge_ptep_get() to correctly convert the hugetlb ptes.
+
+Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/task_mmu.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -1473,18 +1473,19 @@ static int gather_pte_stats(pmd_t *pmd,
+ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end, struct mm_walk *walk)
+ {
++ pte_t huge_pte = huge_ptep_get(pte);
+ struct numa_maps *md;
+ struct page *page;
+
+- if (!pte_present(*pte))
++ if (!pte_present(huge_pte))
+ return 0;
+
+- page = pte_page(*pte);
++ page = pte_page(huge_pte);
+ if (!page)
+ return 0;
+
+ md = walk->private;
+- gather_stats(page, md, pte_dirty(*pte), 1);
++ gather_stats(page, md, pte_dirty(huge_pte), 1);
+ return 0;
+ }
+
--- /dev/null
+From 97daf8b97ad6f913a34c82515be64dc9ac08d63e Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <miklos@szeredi.hu>
+Date: Tue, 10 Nov 2015 17:08:41 +0100
+Subject: ovl: allow zero size xattr
+
+From: Miklos Szeredi <miklos@szeredi.hu>
+
+commit 97daf8b97ad6f913a34c82515be64dc9ac08d63e upstream.
+
+When ovl_copy_xattr() encountered a zero size xattr no more xattrs were
+copied and the function returned success. This is clearly not the desired
+behavior.
+
+Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/copy_up.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -54,7 +54,7 @@ int ovl_copy_xattr(struct dentry *old, s
+
+ for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+ size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
+- if (size <= 0) {
++ if (size < 0) {
+ error = size;
+ goto out_free_value;
+ }
--- /dev/null
+From 84889d49335627bc770b32787c1ef9ebad1da232 Mon Sep 17 00:00:00 2001
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Date: Mon, 16 Nov 2015 18:44:11 +0300
+Subject: ovl: check dentry positiveness in ovl_cleanup_whiteouts()
+
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+
+commit 84889d49335627bc770b32787c1ef9ebad1da232 upstream.
+
+This patch fixes kernel crash at removing directory which contains
+whiteouts from lower layers.
+
+Cache of directory content passed as "list" contains entries from all
+layers, including whiteouts from lower layers. So, lookup in upper dir
+(moved into work at this stage) will return negative entry. Plus this
+cache is filled long before and we can race with external removal.
+
+Example:
+ mkdir -p lower0/dir lower1/dir upper work overlay
+ touch lower0/dir/a lower0/dir/b
+ mknod lower1/dir/a c 0 0
+ mount -t overlay none overlay -o lowerdir=lower1:lower0,upperdir=upper,workdir=work
+ rm -fr overlay/dir
+
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/readdir.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/overlayfs/readdir.c
++++ b/fs/overlayfs/readdir.c
+@@ -571,7 +571,8 @@ void ovl_cleanup_whiteouts(struct dentry
+ (int) PTR_ERR(dentry));
+ continue;
+ }
+- ovl_cleanup(upper->d_inode, dentry);
++ if (dentry->d_inode)
++ ovl_cleanup(upper->d_inode, dentry);
+ dput(dentry);
+ }
+ mutex_unlock(&upper->d_inode->i_mutex);
--- /dev/null
+From ed06e069775ad9236087594a1c1667367e983fb5 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <miklos@szeredi.hu>
+Date: Wed, 9 Dec 2015 16:11:59 +0100
+Subject: ovl: root: copy attr
+
+From: Miklos Szeredi <miklos@szeredi.hu>
+
+commit ed06e069775ad9236087594a1c1667367e983fb5 upstream.
+
+We copy i_uid and i_gid of underlying inode into overlayfs inode. Except
+for the root inode.
+
+Fix this omission.
+
+Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/super.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/overlayfs/super.c
++++ b/fs/overlayfs/super.c
+@@ -1053,6 +1053,9 @@ static int ovl_fill_super(struct super_b
+
+ root_dentry->d_fsdata = oe;
+
++ ovl_copyattr(ovl_dentry_real(root_dentry)->d_inode,
++ root_dentry->d_inode);
++
+ sb->s_magic = OVERLAYFS_SUPER_MAGIC;
+ sb->s_op = &ovl_super_operations;
+ sb->s_root = root_dentry;
--- /dev/null
+From cf9a6784f7c1b5ee2b9159a1246e327c331c5697 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <miklos@szeredi.hu>
+Date: Fri, 11 Dec 2015 16:30:49 +0100
+Subject: ovl: setattr: check permissions before copy-up
+
+From: Miklos Szeredi <miklos@szeredi.hu>
+
+commit cf9a6784f7c1b5ee2b9159a1246e327c331c5697 upstream.
+
+Without this copy-up of a file can be forced, even without actually being
+allowed to do anything on the file.
+
+[Arnd Bergmann] include <linux/pagemap.h> for PAGE_CACHE_SIZE (used by
+MAX_LFS_FILESIZE definition).
+
+Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/inode.c | 13 +++++++++++++
+ fs/overlayfs/super.c | 2 ++
+ 2 files changed, 15 insertions(+)
+
+--- a/fs/overlayfs/inode.c
++++ b/fs/overlayfs/inode.c
+@@ -42,6 +42,19 @@ int ovl_setattr(struct dentry *dentry, s
+ int err;
+ struct dentry *upperdentry;
+
++ /*
++ * Check for permissions before trying to copy-up. This is redundant
++ * since it will be rechecked later by ->setattr() on upper dentry. But
++ * without this, copy-up can be triggered by just about anybody.
++ *
++ * We don't initialize inode->size, which just means that
++ * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
++ * check for a swapfile (which this won't be anyway).
++ */
++ err = inode_change_ok(dentry->d_inode, attr);
++ if (err)
++ return err;
++
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+--- a/fs/overlayfs/super.c
++++ b/fs/overlayfs/super.c
+@@ -9,6 +9,7 @@
+
+ #include <linux/fs.h>
+ #include <linux/namei.h>
++#include <linux/pagemap.h>
+ #include <linux/xattr.h>
+ #include <linux/security.h>
+ #include <linux/mount.h>
+@@ -910,6 +911,7 @@ static int ovl_fill_super(struct super_b
+ }
+
+ sb->s_stack_depth = 0;
++ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ if (ufs->config.upperdir) {
+ if (!ufs->config.workdir) {
+ pr_err("overlayfs: missing 'workdir'\n");
--- /dev/null
+From e4ad29fa0d224d05e08b2858e65f112fd8edd4fe Mon Sep 17 00:00:00 2001
+From: Vito Caputo <vito.caputo@coreos.com>
+Date: Sat, 24 Oct 2015 07:19:46 -0500
+Subject: ovl: use a minimal buffer in ovl_copy_xattr
+
+From: Vito Caputo <vito.caputo@coreos.com>
+
+commit e4ad29fa0d224d05e08b2858e65f112fd8edd4fe upstream.
+
+Rather than always allocating the high-order XATTR_SIZE_MAX buffer
+which is costly and prone to failure, only allocate what is needed and
+realloc if necessary.
+
+Fixes https://github.com/coreos/bugs/issues/489
+
+Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/copy_up.c | 39 +++++++++++++++++++++++++--------------
+ 1 file changed, 25 insertions(+), 14 deletions(-)
+
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -22,9 +22,9 @@
+
+ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
+ {
+- ssize_t list_size, size;
+- char *buf, *name, *value;
+- int error;
++ ssize_t list_size, size, value_size = 0;
++ char *buf, *name, *value = NULL;
++ int uninitialized_var(error);
+
+ if (!old->d_inode->i_op->getxattr ||
+ !new->d_inode->i_op->getxattr)
+@@ -41,29 +41,40 @@ int ovl_copy_xattr(struct dentry *old, s
+ if (!buf)
+ return -ENOMEM;
+
+- error = -ENOMEM;
+- value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
+- if (!value)
+- goto out;
+-
+ list_size = vfs_listxattr(old, buf, list_size);
+ if (list_size <= 0) {
+ error = list_size;
+- goto out_free_value;
++ goto out;
+ }
+
+ for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+- size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
++retry:
++ size = vfs_getxattr(old, name, value, value_size);
++ if (size == -ERANGE)
++ size = vfs_getxattr(old, name, NULL, 0);
++
+ if (size < 0) {
+ error = size;
+- goto out_free_value;
++ break;
+ }
++
++ if (size > value_size) {
++ void *new;
++
++ new = krealloc(value, size, GFP_KERNEL);
++ if (!new) {
++ error = -ENOMEM;
++ break;
++ }
++ value = new;
++ value_size = size;
++ goto retry;
++ }
++
+ error = vfs_setxattr(new, name, value, size, 0);
+ if (error)
+- goto out_free_value;
++ break;
+ }
+-
+-out_free_value:
+ kfree(value);
+ out:
+ kfree(buf);
--- /dev/null
+From 3caeaa562733c4836e61086ec07666635006a787 Mon Sep 17 00:00:00 2001
+From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+Date: Mon, 7 Dec 2015 12:25:02 +0530
+Subject: perf kvm record/report: 'unprocessable sample' error while recording/reporting guest data
+
+From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+
+commit 3caeaa562733c4836e61086ec07666635006a787 upstream.
+
+While recording guest samples in host using perf kvm record, it will
+populate unprocessable sample error, though samples will be recorded
+properly. While generating report using perf kvm report, no samples will
+be processed and same error will populate. We have seen this behaviour
+with upstream perf(4.4-rc3) on x86 and ppc64 hardware.
+
+Reason behind this failure is, when it tries to fetch machine from
+rb_tree of machines, it fails. As a part of tracing a bug, we figured
+out that this code was incorrectly refactored in commit 54245fdc3576
+("perf session: Remove wrappers to machines__find").
+
+This patch will change the functionality such that if it can't fetch
+machine in first trial, it will create one node of machine and add that to
+rb_tree. So next time when it tries to fetch same machine from rb_tree,
+it won't fail. Actually it was the case before refactoring of code in
+aforementioned commit.
+
+This patch is generated from acme perf/core branch.
+
+Below I've mention an example that demonstrate the behaviour before and
+after applying patch.
+
+Before applying patch:
+[Note: One needs to run guest before recording data in host]
+
+ ravi@ravi-bangoria:~$ ./perf kvm record -a
+ Warning:
+ 5903 unprocessable samples recorded.
+ Do you have a KVM guest running and not using 'perf kvm'?
+ [ perf record: Captured and wrote 1.409 MB perf.data.guest (285 samples) ]
+
+ ravi@ravi-bangoria:~$ ./perf kvm report --stdio
+ Warning:
+ 5903 unprocessable samples recorded.
+ Do you have a KVM guest running and not using 'perf kvm'?
+ # To display the perf.data header info, please use --header/--header-only options.
+ #
+ # Total Lost Samples: 0
+ #
+ # Samples: 285 of event 'cycles'
+ # Event count (approx.): 88715406
+ #
+ # Overhead Command Shared Object Symbol
+ # ........ ....... ............. ......
+ #
+
+ # (For a higher level overview, try: perf report --sort comm,dso)
+ #
+
+After applying patch:
+
+ ravi@ravi-bangoria:~$ ./perf kvm record -a
+ [ perf record: Captured and wrote 1.188 MB perf.data.guest (17 samples) ]
+
+ ravi@ravi-bangoria:~$ ./perf kvm report --stdio
+ # To display the perf.data header info, please use --header/--header-only options.
+ #
+ # Total Lost Samples: 0
+ #
+ # Samples: 17 of event 'cycles'
+ # Event count (approx.): 700746
+ #
+ # Overhead Command Shared Object Symbol
+ # ........ ....... ................ ......................
+ #
+ 34.19% :5758 [unknown] [g] 0xffffffff818682ab
+ 22.79% :5758 [unknown] [g] 0xffffffff812dc7f8
+ 22.79% :5758 [unknown] [g] 0xffffffff818650d0
+ 14.83% :5758 [unknown] [g] 0xffffffff8161a1b6
+ 2.49% :5758 [unknown] [g] 0xffffffff818692bf
+ 0.48% :5758 [unknown] [g] 0xffffffff81869253
+ 0.05% :5758 [unknown] [g] 0xffffffff81869250
+
+Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Fixes: 54245fdc3576 ("perf session: Remove wrappers to machines__find")
+Link: http://lkml.kernel.org/r/1449471302-11283-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/util/session.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/perf/util/session.c
++++ b/tools/perf/util/session.c
+@@ -972,7 +972,7 @@ static struct machine *machines__find_fo
+
+ machine = machines__find(machines, pid);
+ if (!machine)
+- machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID);
++ machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
+ return machine;
+ }
+
--- /dev/null
+From ec183d22cc284a7a1e17f0341219d8ec8ca070cc Mon Sep 17 00:00:00 2001
+From: Adrian Hunter <adrian.hunter@intel.com>
+Date: Tue, 26 Jan 2016 14:05:20 +0200
+Subject: perf tools: tracepoint_error() can receive e=NULL, robustify it
+
+From: Adrian Hunter <adrian.hunter@intel.com>
+
+commit ec183d22cc284a7a1e17f0341219d8ec8ca070cc upstream.
+
+Fixes segmentation fault using, for instance:
+
+ (gdb) run record -I -e intel_pt/tsc=1,noretcomp=1/u /bin/ls
+ Starting program: /home/acme/bin/perf record -I -e intel_pt/tsc=1,noretcomp=1/u /bin/ls
+ Missing separate debuginfos, use: dnf debuginfo-install glibc-2.22-7.fc23.x86_64
+ [Thread debugging using libthread_db enabled]
+ Using host libthread_db library "/lib64/libthread_db.so.1".
+
+ Program received signal SIGSEGV, Segmentation fault.
+ 0 x00000000004b9ea5 in tracepoint_error (e=0x0, err=13, sys=0x19b1370 "sched", name=0x19a5d00 "sched_switch") at util/parse-events.c:410
+ (gdb) bt
+ #0 0x00000000004b9ea5 in tracepoint_error (e=0x0, err=13, sys=0x19b1370 "sched", name=0x19a5d00 "sched_switch") at util/parse-events.c:410
+ #1 0x00000000004b9fc5 in add_tracepoint (list=0x19a5d20, idx=0x7fffffffb8c0, sys_name=0x19b1370 "sched", evt_name=0x19a5d00 "sched_switch", err=0x0, head_config=0x0)
+ at util/parse-events.c:433
+ #2 0x00000000004ba334 in add_tracepoint_event (list=0x19a5d20, idx=0x7fffffffb8c0, sys_name=0x19b1370 "sched", evt_name=0x19a5d00 "sched_switch", err=0x0, head_config=0x0)
+ at util/parse-events.c:498
+ #3 0x00000000004bb699 in parse_events_add_tracepoint (list=0x19a5d20, idx=0x7fffffffb8c0, sys=0x19b1370 "sched", event=0x19a5d00 "sched_switch", err=0x0, head_config=0x0)
+ at util/parse-events.c:936
+ #4 0x00000000004f6eda in parse_events_parse (_data=0x7fffffffb8b0, scanner=0x19a49d0) at util/parse-events.y:391
+ #5 0x00000000004bc8e5 in parse_events__scanner (str=0x663ff2 "sched:sched_switch", data=0x7fffffffb8b0, start_token=258) at util/parse-events.c:1361
+ #6 0x00000000004bca57 in parse_events (evlist=0x19a5220, str=0x663ff2 "sched:sched_switch", err=0x0) at util/parse-events.c:1401
+ #7 0x0000000000518d5f in perf_evlist__can_select_event (evlist=0x19a3b90, str=0x663ff2 "sched:sched_switch") at util/record.c:253
+ #8 0x0000000000553c42 in intel_pt_track_switches (evlist=0x19a3b90) at arch/x86/util/intel-pt.c:364
+ #9 0x00000000005549d1 in intel_pt_recording_options (itr=0x19a2c40, evlist=0x19a3b90, opts=0x8edf68 <record+232>) at arch/x86/util/intel-pt.c:664
+ #10 0x000000000051e076 in auxtrace_record__options (itr=0x19a2c40, evlist=0x19a3b90, opts=0x8edf68 <record+232>) at util/auxtrace.c:539
+ #11 0x0000000000433368 in cmd_record (argc=1, argv=0x7fffffffde60, prefix=0x0) at builtin-record.c:1264
+ #12 0x000000000049bec2 in run_builtin (p=0x8fa2a8 <commands+168>, argc=5, argv=0x7fffffffde60) at perf.c:390
+ #13 0x000000000049c12a in handle_internal_command (argc=5, argv=0x7fffffffde60) at perf.c:451
+ #14 0x000000000049c278 in run_argv (argcp=0x7fffffffdcbc, argv=0x7fffffffdcb0) at perf.c:495
+ #15 0x000000000049c60a in main (argc=5, argv=0x7fffffffde60) at perf.c:618
+(gdb)
+
+Intel PT attempts to find the sched:sched_switch tracepoint but that seg
+faults if tracefs is not readable, because the error reporting structure
+is null, as errors are not reported when automatically adding
+tracepoints. Fix by checking before using.
+
+Committer note:
+
+This doesn't take place in a kernel that supports
+perf_event_attr.context_switch, that is the default way that will be
+used for tracking context switches, only in older kernels, like 4.2, in
+a machine with Intel PT (e.g. Broadwell) for non-priviledged users.
+
+Further info from a similar patch by Wang:
+
+The error is in tracepoint_error: it assumes the 'e' parameter is valid.
+
+However, there are many situation a parse_event() can be called without
+parse_events_error. See result of
+
+ $ grep 'parse_events(.*NULL)' ./tools/perf/ -r'
+
+Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Tong Zhang <ztong@vt.edu>
+Cc: Wang Nan <wangnan0@huawei.com>
+Fixes: 196581717d85 ("perf tools: Enhance parsing events tracepoint error output")
+Link: http://lkml.kernel.org/r/1453809921-24596-2-git-send-email-adrian.hunter@intel.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/util/parse-events.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/tools/perf/util/parse-events.c
++++ b/tools/perf/util/parse-events.c
+@@ -399,6 +399,9 @@ static void tracepoint_error(struct pars
+ {
+ char help[BUFSIZ];
+
++ if (!e)
++ return;
++
+ /*
+ * We get error directly from syscall errno ( > 0),
+ * or from encoded pointer's error ( < 0).
--- /dev/null
+From 58a66dba1beac2121d931cda4682ae4d40816af5 Mon Sep 17 00:00:00 2001
+From: Tony Lindgren <tony@atomide.com>
+Date: Mon, 30 Nov 2015 21:39:54 -0800
+Subject: phy: twl4030-usb: Fix unbalanced pm_runtime_enable on module reload
+
+From: Tony Lindgren <tony@atomide.com>
+
+commit 58a66dba1beac2121d931cda4682ae4d40816af5 upstream.
+
+If we reload phy-twl4030-usb, we get a warning about unbalanced
+pm_runtime_enable. Let's fix the issue and also fix idling of the
+device on unload before we attempt to shut it down.
+
+If we don't properly idle the PHY before shutting it down on removal,
+the twl4030 ends up consuming about 62mW of extra power compared to
+running idle with the module loaded.
+
+Cc: Bin Liu <b-liu@ti.com>
+Cc: Felipe Balbi <balbi@ti.com>
+Cc: Kishon Vijay Abraham I <kishon@ti.com>
+Cc: NeilBrown <neil@brown.name>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/phy/phy-twl4030-usb.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/drivers/phy/phy-twl4030-usb.c
++++ b/drivers/phy/phy-twl4030-usb.c
+@@ -715,6 +715,7 @@ static int twl4030_usb_probe(struct plat
+ pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, 2000);
+ pm_runtime_enable(&pdev->dev);
++ pm_runtime_get_sync(&pdev->dev);
+
+ /* Our job is to use irqs and status from the power module
+ * to keep the transceiver disabled when nothing's connected.
+@@ -758,6 +759,13 @@ static int twl4030_usb_remove(struct pla
+ /* set transceiver mode to power on defaults */
+ twl4030_usb_set_mode(twl, -1);
+
++ /* idle ulpi before powering off */
++ if (cable_present(twl->linkstat))
++ pm_runtime_put_noidle(twl->dev);
++ pm_runtime_mark_last_busy(twl->dev);
++ pm_runtime_put_sync_suspend(twl->dev);
++ pm_runtime_disable(twl->dev);
++
+ /* autogate 60MHz ULPI clock,
+ * clear dpll clock request for i2c access,
+ * disable 32KHz
+@@ -772,11 +780,6 @@ static int twl4030_usb_remove(struct pla
+ /* disable complete OTG block */
+ twl4030_usb_clear_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB);
+
+- if (cable_present(twl->linkstat))
+- pm_runtime_put_noidle(twl->dev);
+- pm_runtime_mark_last_busy(twl->dev);
+- pm_runtime_put(twl->dev);
+-
+ return 0;
+ }
+
--- /dev/null
+From b241d31ef2f6a289d33dcaa004714b26e06f476f Mon Sep 17 00:00:00 2001
+From: Tony Lindgren <tony@atomide.com>
+Date: Mon, 30 Nov 2015 21:39:53 -0800
+Subject: phy: twl4030-usb: Relase usb phy on unload
+
+From: Tony Lindgren <tony@atomide.com>
+
+commit b241d31ef2f6a289d33dcaa004714b26e06f476f upstream.
+
+Otherwise rmmod omap2430; rmmod phy-twl4030-usb; modprobe omap2430
+will try to use a non-existing phy and oops:
+
+Unable to handle kernel paging request at virtual address b6f7c1f0
+...
+[<c048a284>] (devm_usb_get_phy_by_node) from [<bf0758ac>]
+(omap2430_musb_init+0x44/0x2b4 [omap2430])
+[<bf0758ac>] (omap2430_musb_init [omap2430]) from [<bf055ec0>]
+(musb_init_controller+0x194/0x878 [musb_hdrc])
+
+Cc: Bin Liu <b-liu@ti.com>
+Cc: Felipe Balbi <balbi@ti.com>
+Cc: Kishon Vijay Abraham I <kishon@ti.com>
+Cc: NeilBrown <neil@brown.name>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/phy/phy-twl4030-usb.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/phy/phy-twl4030-usb.c
++++ b/drivers/phy/phy-twl4030-usb.c
+@@ -750,6 +750,7 @@ static int twl4030_usb_remove(struct pla
+ struct twl4030_usb *twl = platform_get_drvdata(pdev);
+ int val;
+
++ usb_remove_phy(&twl->phy);
+ pm_runtime_get_sync(twl->dev);
+ cancel_delayed_work(&twl->id_workaround_work);
+ device_remove_file(twl->dev, &dev_attr_vbus);
--- /dev/null
+From 86fb449b07b8215443a30782dca5755d5b8b0577 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Wed, 30 Dec 2015 10:57:01 -0500
+Subject: pNFS/flexfiles: Fix an Oopsable typo in ff_mirror_match_fh()
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit 86fb449b07b8215443a30782dca5755d5b8b0577 upstream.
+
+Jeff reports seeing an Oops in ff_layout_alloc_lseg. Turns out
+copy+paste has played cruel tricks on a nested loop.
+
+Reported-by: Jeff Layton <jeff.layton@primarydata.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/flexfilelayout/flexfilelayout.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/flexfilelayout/flexfilelayout.c
++++ b/fs/nfs/flexfilelayout/flexfilelayout.c
+@@ -145,7 +145,7 @@ static bool ff_mirror_match_fh(const str
+ return false;
+ for (i = 0; i < m1->fh_versions_cnt; i++) {
+ bool found_fh = false;
+- for (j = 0; j < m2->fh_versions_cnt; i++) {
++ for (j = 0; j < m2->fh_versions_cnt; j++) {
+ if (nfs_compare_fh(&m1->fh_versions[i],
+ &m2->fh_versions[j]) == 0) {
+ found_fh = true;
--- /dev/null
+From 082fa37d1351a41afc491d44a1d095cb8d919aa2 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Thu, 21 Jan 2016 15:39:40 -0500
+Subject: pNFS/flexfiles: Fix an XDR encoding bug in layoutreturn
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit 082fa37d1351a41afc491d44a1d095cb8d919aa2 upstream.
+
+We must not skip encoding the statistics, or the server will see an
+XDR encoding error.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/flexfilelayout/flexfilelayout.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/nfs/flexfilelayout/flexfilelayout.c
++++ b/fs/nfs/flexfilelayout/flexfilelayout.c
+@@ -1859,11 +1859,9 @@ ff_layout_encode_layoutreturn(struct pnf
+ start = xdr_reserve_space(xdr, 4);
+ BUG_ON(!start);
+
+- if (ff_layout_encode_ioerr(flo, xdr, args))
+- goto out;
+-
++ ff_layout_encode_ioerr(flo, xdr, args);
+ ff_layout_encode_iostats(flo, xdr, args);
+-out:
++
+ *start = cpu_to_be32((xdr->p - start - 1) * 4);
+ dprintk("%s: Return\n", __func__);
+ }
--- /dev/null
+From 572c39172684c3711e4a03c9a7380067e2b0661c Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Jan 2016 16:54:47 +0000
+Subject: posix-timers: Handle relative timers with CONFIG_TIME_LOW_RES proper
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 572c39172684c3711e4a03c9a7380067e2b0661c upstream.
+
+As Helge reported for timerfd we have the same issue in posix timers. We
+return remaining time larger than the programmed relative time to user space
+in case of CONFIG_TIME_LOW_RES=y. Use the proper function to adjust the extra
+time added in hrtimer_start_range_ns().
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Helge Deller <deller@gmx.de>
+Cc: John Stultz <john.stultz@linaro.org>
+Cc: linux-m68k@lists.linux-m68k.org
+Cc: dhowells@redhat.com
+Link: http://lkml.kernel.org/r/20160114164159.450510905@linutronix.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/posix-timers.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/time/posix-timers.c
++++ b/kernel/time/posix-timers.c
+@@ -760,7 +760,7 @@ common_timer_get(struct k_itimer *timr,
+ (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
+ timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
+
+- remaining = ktime_sub(hrtimer_get_expires(timer), now);
++ remaining = __hrtimer_expires_remaining_adjusted(timer, now);
+ /* Return 0 only, when the timer is expired and not pending */
+ if (remaining.tv64 <= 0) {
+ /*
--- /dev/null
+From 7e56f627768da4e6480986b5145dc3422bc448a5 Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Wed, 2 Dec 2015 16:25:32 +1100
+Subject: powerpc/eeh: Fix PE location code
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit 7e56f627768da4e6480986b5145dc3422bc448a5 upstream.
+
+In eeh_pe_loc_get(), the PE location code is retrieved from the
+"ibm,loc-code" property of the device node for the bridge of the
+PE's primary bus. It's not correct because the property indicates
+the parent PE's location code.
+
+This reads the correct PE location code from "ibm,io-base-loc-code"
+or "ibm,slot-location-code" property of PE parent bus's device node.
+
+Fixes: 357b2f3dd9b7 ("powerpc/eeh: Dump PE location code")
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Tested-by: Russell Currey <ruscur@russell.cc>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/eeh_pe.c | 33 +++++++++++++++------------------
+ 1 file changed, 15 insertions(+), 18 deletions(-)
+
+--- a/arch/powerpc/kernel/eeh_pe.c
++++ b/arch/powerpc/kernel/eeh_pe.c
+@@ -883,32 +883,29 @@ void eeh_pe_restore_bars(struct eeh_pe *
+ const char *eeh_pe_loc_get(struct eeh_pe *pe)
+ {
+ struct pci_bus *bus = eeh_pe_bus_get(pe);
+- struct device_node *dn = pci_bus_to_OF_node(bus);
++ struct device_node *dn;
+ const char *loc = NULL;
+
+- if (!dn)
+- goto out;
++ while (bus) {
++ dn = pci_bus_to_OF_node(bus);
++ if (!dn) {
++ bus = bus->parent;
++ continue;
++ }
+
+- /* PHB PE or root PE ? */
+- if (pci_is_root_bus(bus)) {
+- loc = of_get_property(dn, "ibm,loc-code", NULL);
+- if (!loc)
++ if (pci_is_root_bus(bus))
+ loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
++ else
++ loc = of_get_property(dn, "ibm,slot-location-code",
++ NULL);
++
+ if (loc)
+- goto out;
++ return loc;
+
+- /* Check the root port */
+- dn = dn->child;
+- if (!dn)
+- goto out;
++ bus = bus->parent;
+ }
+
+- loc = of_get_property(dn, "ibm,loc-code", NULL);
+- if (!loc)
+- loc = of_get_property(dn, "ibm,slot-location-code", NULL);
+-
+-out:
+- return loc ? loc : "N/A";
++ return "N/A";
+ }
+
+ /**
--- /dev/null
+From 05ba75f848647135f063199dc0e9f40fee769724 Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Tue, 9 Feb 2016 15:50:21 +1100
+Subject: powerpc/eeh: Fix stale cached primary bus
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit 05ba75f848647135f063199dc0e9f40fee769724 upstream.
+
+When PE is created, its primary bus is cached to pe->bus. At later
+point, the cached primary bus is returned from eeh_pe_bus_get().
+However, we could get stale cached primary bus and run into kernel
+crash in one case: full hotplug as part of fenced PHB error recovery
+releases all PCI busses under the PHB at unplugging time and recreate
+them at plugging time. pe->bus is still dereferencing the PCI bus
+that was released.
+
+This adds another PE flag (EEH_PE_PRI_BUS) to represent the validity
+of pe->bus. pe->bus is updated when its first child EEH device is
+online and the flag is set. Before unplugging in full hotplug for
+error recovery, the flag is cleared.
+
+Fixes: 8cdb2833 ("powerpc/eeh: Trace PCI bus from PE")
+Reported-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Reported-by: Pradipta Ghosh <pradghos@in.ibm.com>
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Tested-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/eeh.h | 1 +
+ arch/powerpc/kernel/eeh_driver.c | 3 +++
+ arch/powerpc/kernel/eeh_pe.c | 2 +-
+ arch/powerpc/platforms/powernv/eeh-powernv.c | 5 ++++-
+ 4 files changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/include/asm/eeh.h
++++ b/arch/powerpc/include/asm/eeh.h
+@@ -81,6 +81,7 @@ struct pci_dn;
+ #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
+ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
+ #define EEH_PE_REMOVED (1 << 10) /* Removed permanently */
++#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */
+
+ struct eeh_pe {
+ int type; /* PE type: PHB/Bus/Device */
+--- a/arch/powerpc/kernel/eeh_driver.c
++++ b/arch/powerpc/kernel/eeh_driver.c
+@@ -564,6 +564,7 @@ static int eeh_reset_device(struct eeh_p
+ */
+ eeh_pe_state_mark(pe, EEH_PE_KEEP);
+ if (bus) {
++ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ pci_lock_rescan_remove();
+ pcibios_remove_pci_devices(bus);
+ pci_unlock_rescan_remove();
+@@ -803,6 +804,7 @@ perm_error:
+ * the their PCI config any more.
+ */
+ if (frozen_bus) {
++ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
+ pci_lock_rescan_remove();
+@@ -886,6 +888,7 @@ static void eeh_handle_special_event(voi
+ continue;
+
+ /* Notify all devices to be down */
++ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ bus = eeh_pe_bus_get(phb_pe);
+ eeh_pe_dev_traverse(pe,
+ eeh_report_failure, NULL);
+--- a/arch/powerpc/kernel/eeh_pe.c
++++ b/arch/powerpc/kernel/eeh_pe.c
+@@ -928,7 +928,7 @@ struct pci_bus *eeh_pe_bus_get(struct ee
+ bus = pe->phb->bus;
+ } else if (pe->type & EEH_PE_BUS ||
+ pe->type & EEH_PE_DEVICE) {
+- if (pe->bus) {
++ if (pe->state & EEH_PE_PRI_BUS) {
+ bus = pe->bus;
+ goto out;
+ }
+--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
++++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
+@@ -444,9 +444,12 @@ static void *pnv_eeh_probe(struct pci_dn
+ * PCI devices of the PE are expected to be removed prior
+ * to PE reset.
+ */
+- if (!edev->pe->bus)
++ if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
+ edev->pe->bus = pci_find_bus(hose->global_number,
+ pdn->busno);
++ if (edev->pe->bus)
++ edev->pe->state |= EEH_PE_PRI_BUS;
++ }
+
+ /*
+ * Enable EEH explicitly so that we will do EEH check
--- /dev/null
+From f15838e9cac8f78f0cc506529bb9d3b9fa589c1f Mon Sep 17 00:00:00 2001
+From: Andreas Schwab <schwab@linux-m68k.org>
+Date: Fri, 5 Feb 2016 19:50:03 +0100
+Subject: powerpc: Fix dedotify for binutils >= 2.26
+
+From: Andreas Schwab <schwab@linux-m68k.org>
+
+commit f15838e9cac8f78f0cc506529bb9d3b9fa589c1f upstream.
+
+Since binutils 2.26 BFD is doing suffix merging on STRTAB sections. But
+dedotify modifies the symbol names in place, which can also modify
+unrelated symbols with a name that matches a suffix of a dotted name. To
+remove the leading dot of a symbol name we can just increment the pointer
+into the STRTAB section instead.
+
+Backport to all stables to avoid breakage when people update their
+binutils - mpe.
+
+Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/module_64.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/kernel/module_64.c
++++ b/arch/powerpc/kernel/module_64.c
+@@ -340,7 +340,7 @@ static void dedotify(Elf64_Sym *syms, un
+ if (name[0] == '.') {
+ if (strcmp(name+1, "TOC.") == 0)
+ syms[i].st_shndx = SHN_ABS;
+- memmove(name, name+1, strlen(name));
++ syms[i].st_name++;
+ }
+ }
+ }
--- /dev/null
+From 6ecad912a0073c768db1491c27ca55ad2d0ee68f Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Wed, 17 Feb 2016 18:26:31 +1100
+Subject: powerpc/ioda: Set "read" permission when "write" is set
+
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+
+commit 6ecad912a0073c768db1491c27ca55ad2d0ee68f upstream.
+
+Quite often drivers set only "write" permission assuming that this
+includes "read" permission as well and this works on plenty of
+platforms. However IODA2 is strict about this and produces an EEH when
+"read" permission is not set and reading happens.
+
+This adds a workaround in the IODA code to always add the "read" bit
+when the "write" bit is set.
+
+Fixes: 10b35b2b7485 ("powerpc/powernv: Do not set "read" flag if direction==DMA_NONE")
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Tested-by: Douglas Miller <dougmill@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/powernv/pci.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/powerpc/platforms/powernv/pci.c
++++ b/arch/powerpc/platforms/powernv/pci.c
+@@ -601,6 +601,9 @@ int pnv_tce_build(struct iommu_table *tb
+ u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+ long i;
+
++ if (proto_tce & TCE_PCI_WRITE)
++ proto_tce |= TCE_PCI_READ;
++
+ for (i = 0; i < npages; i++) {
+ unsigned long newtce = proto_tce |
+ ((rpn + i) << tbl->it_page_shift);
+@@ -622,6 +625,9 @@ int pnv_tce_xchg(struct iommu_table *tbl
+
+ BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+
++ if (newtce & TCE_PCI_WRITE)
++ newtce |= TCE_PCI_READ;
++
+ oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
+ *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ *direction = iommu_tce_direction(oldtce);
--- /dev/null
+From 1bc74f1ccd457832dc515fc1febe6655985fdcd2 Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Tue, 9 Feb 2016 15:50:22 +1100
+Subject: powerpc/powernv: Fix stale PE primary bus
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit 1bc74f1ccd457832dc515fc1febe6655985fdcd2 upstream.
+
+When PCI bus is unplugged during full hotplug for EEH recovery,
+the platform PE instance (struct pnv_ioda_pe) isn't released and
+it dereferences the stale PCI bus that has been released. It leads
+to kernel crash when referring to the stale PCI bus.
+
+This fixes the issue by correcting the PE's primary bus when it's
+oneline at plugging time, in pnv_pci_dma_bus_setup() which is to
+be called by pcibios_fixup_bus().
+
+Reported-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Reported-by: Pradipta Ghosh <pradghos@in.ibm.com>
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Tested-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/platforms/powernv/pci-ioda.c | 1 +
+ arch/powerpc/platforms/powernv/pci.c | 20 ++++++++++++++++++++
+ arch/powerpc/platforms/powernv/pci.h | 1 +
+ 3 files changed, 22 insertions(+)
+
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -3034,6 +3034,7 @@ static void pnv_pci_ioda_shutdown(struct
+
+ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
++ .dma_bus_setup = pnv_pci_dma_bus_setup,
+ #ifdef CONFIG_PCI_MSI
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
+--- a/arch/powerpc/platforms/powernv/pci.c
++++ b/arch/powerpc/platforms/powernv/pci.c
+@@ -762,6 +762,26 @@ void pnv_pci_dma_dev_setup(struct pci_de
+ phb->dma_dev_setup(phb, pdev);
+ }
+
++void pnv_pci_dma_bus_setup(struct pci_bus *bus)
++{
++ struct pci_controller *hose = bus->sysdata;
++ struct pnv_phb *phb = hose->private_data;
++ struct pnv_ioda_pe *pe;
++
++ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
++ if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
++ continue;
++
++ if (!pe->pbus)
++ continue;
++
++ if (bus->number == ((pe->rid >> 8) & 0xFF)) {
++ pe->pbus = bus;
++ break;
++ }
++ }
++}
++
+ void pnv_pci_shutdown(void)
+ {
+ struct pci_controller *hose;
+--- a/arch/powerpc/platforms/powernv/pci.h
++++ b/arch/powerpc/platforms/powernv/pci.h
+@@ -235,6 +235,7 @@ extern void pnv_pci_reset_secondary_bus(
+ extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
+
+ extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
++extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
+ extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
+ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
+
--- /dev/null
+From c153693d7eb9eeb28478aa2deaaf0b4e7b5ff5e9 Mon Sep 17 00:00:00 2001
+From: Alan Modra <amodra@gmail.com>
+Date: Fri, 15 Jan 2016 20:52:22 +1100
+Subject: powerpc: Simplify module TOC handling
+
+From: Alan Modra <amodra@gmail.com>
+
+commit c153693d7eb9eeb28478aa2deaaf0b4e7b5ff5e9 upstream.
+
+PowerPC64 uses the symbol .TOC. much as other targets use
+_GLOBAL_OFFSET_TABLE_. It identifies the value of the GOT pointer (or in
+powerpc parlance, the TOC pointer). Global offset tables are generally
+local to an executable or shared library, or in the kernel, module. Thus
+it does not make sense for a module to resolve a relocation against
+.TOC. to the kernel's .TOC. value. A module has its own .TOC., and
+indeed the powerpc64 module relocation processing ignores the kernel
+value of .TOC. and instead calculates a module-local value.
+
+This patch removes code involved in exporting the kernel .TOC., tweaks
+modpost to ignore an undefined .TOC., and the module loader to twiddle
+the section symbol so that .TOC. isn't seen as undefined.
+
+Note that if the kernel was compiled with -msingle-pic-base then ELFv2
+would not have function global entry code setting up r2. In that case
+the module call stubs would need to be modified to set up r2 using the
+kernel .TOC. value, requiring some of this code to be reinstated.
+
+mpe: Furthermore a change in binutils master (not yet released) causes
+the current way we handle the TOC to no longer work when building with
+MODVERSIONS=y and RELOCATABLE=n. The symptom is that modules can not be
+loaded due to there being no version found for TOC.
+
+Signed-off-by: Alan Modra <amodra@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/misc_64.S | 28 ----------------------------
+ arch/powerpc/kernel/module_64.c | 12 +++++++++---
+ scripts/mod/modpost.c | 3 ++-
+ 3 files changed, 11 insertions(+), 32 deletions(-)
+
+--- a/arch/powerpc/kernel/misc_64.S
++++ b/arch/powerpc/kernel/misc_64.S
+@@ -701,31 +701,3 @@ _GLOBAL(kexec_sequence)
+ li r5,0
+ blr /* image->start(physid, image->start, 0); */
+ #endif /* CONFIG_KEXEC */
+-
+-#ifdef CONFIG_MODULES
+-#if defined(_CALL_ELF) && _CALL_ELF == 2
+-
+-#ifdef CONFIG_MODVERSIONS
+-.weak __crc_TOC.
+-.section "___kcrctab+TOC.","a"
+-.globl __kcrctab_TOC.
+-__kcrctab_TOC.:
+- .llong __crc_TOC.
+-#endif
+-
+-/*
+- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
+- * Both modpost and depmod strip the leading . so we do the same here.
+- */
+-.section "__ksymtab_strings","a"
+-__kstrtab_TOC.:
+- .asciz "TOC."
+-
+-.section "___ksymtab+TOC.","a"
+-/* This symbol name is important: it's used by modpost to find exported syms */
+-.globl __ksymtab_TOC.
+-__ksymtab_TOC.:
+- .llong 0 /* .value */
+- .llong __kstrtab_TOC.
+-#endif /* ELFv2 */
+-#endif /* MODULES */
+--- a/arch/powerpc/kernel/module_64.c
++++ b/arch/powerpc/kernel/module_64.c
+@@ -326,7 +326,10 @@ static void dedotify_versions(struct mod
+ }
+ }
+
+-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
++/*
++ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
++ * seem to be defined (value set later).
++ */
+ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
+ {
+ unsigned int i;
+@@ -334,8 +337,11 @@ static void dedotify(Elf64_Sym *syms, un
+ for (i = 1; i < numsyms; i++) {
+ if (syms[i].st_shndx == SHN_UNDEF) {
+ char *name = strtab + syms[i].st_name;
+- if (name[0] == '.')
++ if (name[0] == '.') {
++ if (strcmp(name+1, "TOC.") == 0)
++ syms[i].st_shndx = SHN_ABS;
+ memmove(name, name+1, strlen(name));
++ }
+ }
+ }
+ }
+@@ -351,7 +357,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shd
+ numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
+
+ for (i = 1; i < numsyms; i++) {
+- if (syms[i].st_shndx == SHN_UNDEF
++ if (syms[i].st_shndx == SHN_ABS
+ && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
+ return &syms[i];
+ }
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -594,7 +594,8 @@ static int ignore_undef_symbol(struct el
+ if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 ||
+ strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 ||
+ strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
+- strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0)
++ strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0 ||
++ strcmp(symname, ".TOC.") == 0)
+ return 1;
+ /* Do not ignore this symbol */
+ return 0;
--- /dev/null
+From ddf1d398e517e660207e2c807f76a90df543a217 Mon Sep 17 00:00:00 2001
+From: Mateusz Guzik <mguzik@redhat.com>
+Date: Wed, 20 Jan 2016 15:01:02 -0800
+Subject: prctl: take mmap sem for writing to protect against others
+
+From: Mateusz Guzik <mguzik@redhat.com>
+
+commit ddf1d398e517e660207e2c807f76a90df543a217 upstream.
+
+An unprivileged user can trigger an oops on a kernel with
+CONFIG_CHECKPOINT_RESTORE.
+
+proc_pid_cmdline_read takes mmap_sem for reading and obtains args + env
+start/end values. These get sanity checked as follows:
+ BUG_ON(arg_start > arg_end);
+ BUG_ON(env_start > env_end);
+
+These can be changed by prctl_set_mm. Turns out also takes the semaphore for
+reading, effectively rendering it useless. This results in:
+
+ kernel BUG at fs/proc/base.c:240!
+ invalid opcode: 0000 [#1] SMP
+ Modules linked in: virtio_net
+ CPU: 0 PID: 925 Comm: a.out Not tainted 4.4.0-rc8-next-20160105dupa+ #71
+ Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+ task: ffff880077a68000 ti: ffff8800784d0000 task.ti: ffff8800784d0000
+ RIP: proc_pid_cmdline_read+0x520/0x530
+ RSP: 0018:ffff8800784d3db8 EFLAGS: 00010206
+ RAX: ffff880077c5b6b0 RBX: ffff8800784d3f18 RCX: 0000000000000000
+ RDX: 0000000000000002 RSI: 00007f78e8857000 RDI: 0000000000000246
+ RBP: ffff8800784d3e40 R08: 0000000000000008 R09: 0000000000000001
+ R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000050
+ R13: 00007f78e8857800 R14: ffff88006fcef000 R15: ffff880077c5b600
+ FS: 00007f78e884a740(0000) GS:ffff88007b200000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+ CR2: 00007f78e8361770 CR3: 00000000790a5000 CR4: 00000000000006f0
+ Call Trace:
+ __vfs_read+0x37/0x100
+ vfs_read+0x82/0x130
+ SyS_read+0x58/0xd0
+ entry_SYSCALL_64_fastpath+0x12/0x76
+ Code: 4c 8b 7d a8 eb e9 48 8b 9d 78 ff ff ff 4c 8b 7d 90 48 8b 03 48 39 45 a8 0f 87 f0 fe ff ff e9 d1 fe ff ff 4c 8b 7d 90 eb c6 0f 0b <0f> 0b 0f 0b 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00
+ RIP proc_pid_cmdline_read+0x520/0x530
+ ---[ end trace 97882617ae9c6818 ]---
+
+Turns out there are instances where the code just reads aformentioned
+values without locking whatsoever - namely environ_read and get_cmdline.
+
+Interestingly these functions look quite resilient against bogus values,
+but I don't believe this should be relied upon.
+
+The first patch gets rid of the oops bug by grabbing mmap_sem for
+writing.
+
+The second patch is optional and puts locking around aformentioned
+consumers for safety. Consumers of other fields don't seem to benefit
+from similar treatment and are left untouched.
+
+This patch (of 2):
+
+The code was taking the semaphore for reading, which does not protect
+against readers nor concurrent modifications.
+
+The problem could cause a sanity checks to fail in procfs's cmdline
+reader, resulting in an OOPS.
+
+Note that some functions perform an unlocked read of various mm fields,
+but they seem to be fine despite possible modificaton.
+
+Signed-off-by: Mateusz Guzik <mguzik@redhat.com>
+Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Alexey Dobriyan <adobriyan@gmail.com>
+Cc: Jarod Wilson <jarod@redhat.com>
+Cc: Jan Stancek <jstancek@redhat.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Anshuman Khandual <anshuman.linux@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sys.c | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1853,11 +1853,13 @@ static int prctl_set_mm_map(int opt, con
+ user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
+ }
+
+- if (prctl_map.exe_fd != (u32)-1)
++ if (prctl_map.exe_fd != (u32)-1) {
+ error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
+- down_read(&mm->mmap_sem);
+- if (error)
+- goto out;
++ if (error)
++ return error;
++ }
++
++ down_write(&mm->mmap_sem);
+
+ /*
+ * We don't validate if these members are pointing to
+@@ -1894,10 +1896,8 @@ static int prctl_set_mm_map(int opt, con
+ if (prctl_map.auxv_size)
+ memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
+
+- error = 0;
+-out:
+- up_read(&mm->mmap_sem);
+- return error;
++ up_write(&mm->mmap_sem);
++ return 0;
+ }
+ #endif /* CONFIG_CHECKPOINT_RESTORE */
+
+@@ -1963,7 +1963,7 @@ static int prctl_set_mm(int opt, unsigne
+
+ error = -EINVAL;
+
+- down_read(&mm->mmap_sem);
++ down_write(&mm->mmap_sem);
+ vma = find_vma(mm, addr);
+
+ prctl_map.start_code = mm->start_code;
+@@ -2056,7 +2056,7 @@ static int prctl_set_mm(int opt, unsigne
+
+ error = 0;
+ out:
+- up_read(&mm->mmap_sem);
++ up_write(&mm->mmap_sem);
+ return error;
+ }
+
--- /dev/null
+From caaee6234d05a58c5b4d05e7bf766131b810a657 Mon Sep 17 00:00:00 2001
+From: Jann Horn <jann@thejh.net>
+Date: Wed, 20 Jan 2016 15:00:04 -0800
+Subject: ptrace: use fsuid, fsgid, effective creds for fs access checks
+
+From: Jann Horn <jann@thejh.net>
+
+commit caaee6234d05a58c5b4d05e7bf766131b810a657 upstream.
+
+By checking the effective credentials instead of the real UID / permitted
+capabilities, ensure that the calling process actually intended to use its
+credentials.
+
+To ensure that all ptrace checks use the correct caller credentials (e.g.
+in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
+flag), use two new flags and require one of them to be set.
+
+The problem was that when a privileged task had temporarily dropped its
+privileges, e.g. by calling setreuid(0, user_uid), with the intent to
+perform following syscalls with the credentials of a user, it still passed
+ptrace access checks that the user would not be able to pass.
+
+While an attacker should not be able to convince the privileged task to
+perform a ptrace() syscall, this is a problem because the ptrace access
+check is reused for things in procfs.
+
+In particular, the following somewhat interesting procfs entries only rely
+on ptrace access checks:
+
+ /proc/$pid/stat - uses the check for determining whether pointers
+ should be visible, useful for bypassing ASLR
+ /proc/$pid/maps - also useful for bypassing ASLR
+ /proc/$pid/cwd - useful for gaining access to restricted
+ directories that contain files with lax permissions, e.g. in
+ this scenario:
+ lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
+ drwx------ root root /root
+ drwxr-xr-x root root /root/foobar
+ -rw-r--r-- root root /root/foobar/secret
+
+Therefore, on a system where a root-owned mode 6755 binary changes its
+effective credentials as described and then dumps a user-specified file,
+this could be used by an attacker to reveal the memory layout of root's
+processes or reveal the contents of files he is not allowed to access
+(through /proc/$pid/cwd).
+
+[akpm@linux-foundation.org: fix warning]
+Signed-off-by: Jann Horn <jann@thejh.net>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: Casey Schaufler <casey@schaufler-ca.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: James Morris <james.l.morris@oracle.com>
+Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
+Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: "Eric W. Biederman" <ebiederm@xmission.com>
+Cc: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/array.c | 2 +-
+ fs/proc/base.c | 21 +++++++++++----------
+ fs/proc/namespaces.c | 4 ++--
+ include/linux/ptrace.h | 24 +++++++++++++++++++++++-
+ kernel/events/core.c | 2 +-
+ kernel/futex.c | 2 +-
+ kernel/futex_compat.c | 2 +-
+ kernel/kcmp.c | 4 ++--
+ kernel/ptrace.c | 39 +++++++++++++++++++++++++++++++--------
+ mm/process_vm_access.c | 2 +-
+ security/commoncap.c | 7 ++++++-
+ 11 files changed, 80 insertions(+), 29 deletions(-)
+
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -395,7 +395,7 @@ static int do_task_stat(struct seq_file
+
+ state = *get_task_state(task);
+ vsize = eip = esp = 0;
+- permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT);
++ permitted = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS | PTRACE_MODE_NOAUDIT);
+ mm = get_task_mm(task);
+ if (mm) {
+ vsize = task_vsize(mm);
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -403,7 +403,7 @@ static const struct file_operations proc
+ static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+ {
+- struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
++ struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
+ if (mm && !IS_ERR(mm)) {
+ unsigned int nwords = 0;
+ do {
+@@ -430,7 +430,8 @@ static int proc_pid_wchan(struct seq_fil
+
+ wchan = get_wchan(task);
+
+- if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname))
++ if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
++ && !lookup_symbol_name(wchan, symname))
+ seq_printf(m, "%s", symname);
+ else
+ seq_putc(m, '0');
+@@ -444,7 +445,7 @@ static int lock_trace(struct task_struct
+ int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
+ if (err)
+ return err;
+- if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
++ if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
+ mutex_unlock(&task->signal->cred_guard_mutex);
+ return -EPERM;
+ }
+@@ -697,7 +698,7 @@ static int proc_fd_access_allowed(struct
+ */
+ task = get_proc_task(inode);
+ if (task) {
+- allowed = ptrace_may_access(task, PTRACE_MODE_READ);
++ allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
+ put_task_struct(task);
+ }
+ return allowed;
+@@ -732,7 +733,7 @@ static bool has_pid_permissions(struct p
+ return true;
+ if (in_group_p(pid->pid_gid))
+ return true;
+- return ptrace_may_access(task, PTRACE_MODE_READ);
++ return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
+ }
+
+
+@@ -809,7 +810,7 @@ struct mm_struct *proc_mem_open(struct i
+ struct mm_struct *mm = ERR_PTR(-ESRCH);
+
+ if (task) {
+- mm = mm_access(task, mode);
++ mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
+ put_task_struct(task);
+
+ if (!IS_ERR_OR_NULL(mm)) {
+@@ -1856,7 +1857,7 @@ static int map_files_d_revalidate(struct
+ if (!task)
+ goto out_notask;
+
+- mm = mm_access(task, PTRACE_MODE_READ);
++ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
+ if (IS_ERR_OR_NULL(mm))
+ goto out;
+
+@@ -2007,7 +2008,7 @@ static struct dentry *proc_map_files_loo
+ goto out;
+
+ result = -EACCES;
+- if (!ptrace_may_access(task, PTRACE_MODE_READ))
++ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+ goto out_put_task;
+
+ result = -ENOENT;
+@@ -2060,7 +2061,7 @@ proc_map_files_readdir(struct file *file
+ goto out;
+
+ ret = -EACCES;
+- if (!ptrace_may_access(task, PTRACE_MODE_READ))
++ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+ goto out_put_task;
+
+ ret = 0;
+@@ -2530,7 +2531,7 @@ static int do_io_accounting(struct task_
+ if (result)
+ return result;
+
+- if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
++ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
+ result = -EACCES;
+ goto out_unlock;
+ }
+--- a/fs/proc/namespaces.c
++++ b/fs/proc/namespaces.c
+@@ -42,7 +42,7 @@ static const char *proc_ns_follow_link(s
+ if (!task)
+ return error;
+
+- if (ptrace_may_access(task, PTRACE_MODE_READ)) {
++ if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
+ error = ns_get_path(&ns_path, task, ns_ops);
+ if (!error)
+ nd_jump_link(&ns_path);
+@@ -63,7 +63,7 @@ static int proc_ns_readlink(struct dentr
+ if (!task)
+ return res;
+
+- if (ptrace_may_access(task, PTRACE_MODE_READ)) {
++ if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
+ res = ns_get_name(name, sizeof(name), task, ns_ops);
+ if (res >= 0)
+ res = readlink_copy(buffer, buflen, name);
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -57,7 +57,29 @@ extern void exit_ptrace(struct task_stru
+ #define PTRACE_MODE_READ 0x01
+ #define PTRACE_MODE_ATTACH 0x02
+ #define PTRACE_MODE_NOAUDIT 0x04
+-/* Returns true on success, false on denial. */
++#define PTRACE_MODE_FSCREDS 0x08
++#define PTRACE_MODE_REALCREDS 0x10
++
++/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
++#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
++#define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
++#define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
++#define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
++
++/**
++ * ptrace_may_access - check whether the caller is permitted to access
++ * a target task.
++ * @task: target task
++ * @mode: selects type of access and caller credentials
++ *
++ * Returns true on success, false on denial.
++ *
++ * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must
++ * be set in @mode to specify whether the access was requested through
++ * a filesystem syscall (should use effective capabilities and fsuid
++ * of the caller) or through an explicit syscall such as
++ * process_vm_writev or ptrace (and should use the real credentials).
++ */
+ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
+
+ static inline int ptrace_reparented(struct task_struct *child)
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -3434,7 +3434,7 @@ find_lively_task_by_vpid(pid_t vpid)
+
+ /* Reuse ptrace permission checks for now. */
+ err = -EACCES;
+- if (!ptrace_may_access(task, PTRACE_MODE_READ))
++ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
+ goto errout;
+
+ return task;
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2881,7 +2881,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pi
+ }
+
+ ret = -EPERM;
+- if (!ptrace_may_access(p, PTRACE_MODE_READ))
++ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+ goto err_unlock;
+
+ head = p->robust_list;
+--- a/kernel/futex_compat.c
++++ b/kernel/futex_compat.c
+@@ -155,7 +155,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list,
+ }
+
+ ret = -EPERM;
+- if (!ptrace_may_access(p, PTRACE_MODE_READ))
++ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+ goto err_unlock;
+
+ head = p->compat_robust_list;
+--- a/kernel/kcmp.c
++++ b/kernel/kcmp.c
+@@ -122,8 +122,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t
+ &task2->signal->cred_guard_mutex);
+ if (ret)
+ goto err;
+- if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
+- !ptrace_may_access(task2, PTRACE_MODE_READ)) {
++ if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) ||
++ !ptrace_may_access(task2, PTRACE_MODE_READ_REALCREDS)) {
+ ret = -EPERM;
+ goto err_unlock;
+ }
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -219,6 +219,14 @@ static int ptrace_has_cap(struct user_na
+ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
+ {
+ const struct cred *cred = current_cred(), *tcred;
++ int dumpable = 0;
++ kuid_t caller_uid;
++ kgid_t caller_gid;
++
++ if (!(mode & PTRACE_MODE_FSCREDS) == !(mode & PTRACE_MODE_REALCREDS)) {
++ WARN(1, "denying ptrace access check without PTRACE_MODE_*CREDS\n");
++ return -EPERM;
++ }
+
+ /* May we inspect the given task?
+ * This check is used both for attaching with ptrace
+@@ -228,18 +236,33 @@ static int __ptrace_may_access(struct ta
+ * because setting up the necessary parent/child relationship
+ * or halting the specified task is impossible.
+ */
+- int dumpable = 0;
++
+ /* Don't let security modules deny introspection */
+ if (same_thread_group(task, current))
+ return 0;
+ rcu_read_lock();
++ if (mode & PTRACE_MODE_FSCREDS) {
++ caller_uid = cred->fsuid;
++ caller_gid = cred->fsgid;
++ } else {
++ /*
++ * Using the euid would make more sense here, but something
++ * in userland might rely on the old behavior, and this
++ * shouldn't be a security problem since
++ * PTRACE_MODE_REALCREDS implies that the caller explicitly
++ * used a syscall that requests access to another process
++ * (and not a filesystem syscall to procfs).
++ */
++ caller_uid = cred->uid;
++ caller_gid = cred->gid;
++ }
+ tcred = __task_cred(task);
+- if (uid_eq(cred->uid, tcred->euid) &&
+- uid_eq(cred->uid, tcred->suid) &&
+- uid_eq(cred->uid, tcred->uid) &&
+- gid_eq(cred->gid, tcred->egid) &&
+- gid_eq(cred->gid, tcred->sgid) &&
+- gid_eq(cred->gid, tcred->gid))
++ if (uid_eq(caller_uid, tcred->euid) &&
++ uid_eq(caller_uid, tcred->suid) &&
++ uid_eq(caller_uid, tcred->uid) &&
++ gid_eq(caller_gid, tcred->egid) &&
++ gid_eq(caller_gid, tcred->sgid) &&
++ gid_eq(caller_gid, tcred->gid))
+ goto ok;
+ if (ptrace_has_cap(tcred->user_ns, mode))
+ goto ok;
+@@ -306,7 +329,7 @@ static int ptrace_attach(struct task_str
+ goto out;
+
+ task_lock(task);
+- retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH);
++ retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
+ task_unlock(task);
+ if (retval)
+ goto unlock_creds;
+--- a/mm/process_vm_access.c
++++ b/mm/process_vm_access.c
+@@ -194,7 +194,7 @@ static ssize_t process_vm_rw_core(pid_t
+ goto free_proc_pages;
+ }
+
+- mm = mm_access(task, PTRACE_MODE_ATTACH);
++ mm = mm_access(task, PTRACE_MODE_ATTACH_REALCREDS);
+ if (!mm || IS_ERR(mm)) {
+ rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+ /*
+--- a/security/commoncap.c
++++ b/security/commoncap.c
+@@ -137,12 +137,17 @@ int cap_ptrace_access_check(struct task_
+ {
+ int ret = 0;
+ const struct cred *cred, *child_cred;
++ const kernel_cap_t *caller_caps;
+
+ rcu_read_lock();
+ cred = current_cred();
+ child_cred = __task_cred(child);
++ if (mode & PTRACE_MODE_FSCREDS)
++ caller_caps = &cred->cap_effective;
++ else
++ caller_caps = &cred->cap_permitted;
+ if (cred->user_ns == child_cred->user_ns &&
+- cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
++ cap_issubset(child_cred->cap_permitted, *caller_caps))
+ goto out;
+ if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
+ goto out;
--- /dev/null
+From 2831c89f42dcde440cfdccb9fee9f42d54bbc1ef Mon Sep 17 00:00:00 2001
+From: "Herton R. Krzesinski" <herton@redhat.com>
+Date: Mon, 11 Jan 2016 12:07:43 -0200
+Subject: pty: fix possible use after free of tty->driver_data
+
+From: Herton R. Krzesinski <herton@redhat.com>
+
+commit 2831c89f42dcde440cfdccb9fee9f42d54bbc1ef upstream.
+
+This change fixes a bug for a corner case where we have the the last
+release from a pty master/slave coming from a previously opened /dev/tty
+file. When this happens, the tty->driver_data can be stale, due to all
+ptmx or pts/N files having already been closed before (and thus the inode
+related to these files, which tty->driver_data points to, being already
+freed/destroyed).
+
+The fix here is to keep a reference on the opened master ptmx inode.
+We maintain the inode referenced until the final pty_unix98_shutdown,
+and only pass this inode to devpts_kill_index.
+
+Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
+Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/pty.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/drivers/tty/pty.c
++++ b/drivers/tty/pty.c
+@@ -681,7 +681,14 @@ static void pty_unix98_remove(struct tty
+ /* this is called once with whichever end is closed last */
+ static void pty_unix98_shutdown(struct tty_struct *tty)
+ {
+- devpts_kill_index(tty->driver_data, tty->index);
++ struct inode *ptmx_inode;
++
++ if (tty->driver->subtype == PTY_TYPE_MASTER)
++ ptmx_inode = tty->driver_data;
++ else
++ ptmx_inode = tty->link->driver_data;
++ devpts_kill_index(ptmx_inode, tty->index);
++ iput(ptmx_inode); /* drop reference we acquired at ptmx_open */
+ }
+
+ static const struct tty_operations ptm_unix98_ops = {
+@@ -773,6 +780,15 @@ static int ptmx_open(struct inode *inode
+ set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
+ tty->driver_data = inode;
+
++ /*
++ * In the case where all references to ptmx inode are dropped and we
++ * still have /dev/tty opened pointing to the master/slave pair (ptmx
++ * is closed/released before /dev/tty), we must make sure that the inode
++ * is still valid when we call the final pty_unix98_shutdown, thus we
++ * hold an additional reference to the ptmx inode
++ */
++ ihold(inode);
++
+ tty_add_file(tty, filp);
+
+ slave_inode = devpts_pty_new(inode,
--- /dev/null
+From 1f55c718c290616889c04946864a13ef30f64929 Mon Sep 17 00:00:00 2001
+From: "Herton R. Krzesinski" <herton@redhat.com>
+Date: Thu, 14 Jan 2016 17:56:58 -0200
+Subject: pty: make sure super_block is still valid in final /dev/tty close
+
+From: Herton R. Krzesinski <herton@redhat.com>
+
+commit 1f55c718c290616889c04946864a13ef30f64929 upstream.
+
+Considering current pty code and multiple devpts instances, it's possible
+to umount a devpts file system while a program still has /dev/tty opened
+pointing to a previosuly closed pty pair in that instance. In the case all
+ptmx and pts/N files are closed, umount can be done. If the program closes
+/dev/tty after umount is done, devpts_kill_index will use now an invalid
+super_block, which was already destroyed in the umount operation after
+running ->kill_sb. This is another "use after free" type of issue, but now
+related to the allocated super_block instance.
+
+To avoid the problem (warning at ida_remove and potential crashes) for
+this specific case, I added two functions in devpts which grabs additional
+references to the super_block, which pty code now uses so it makes sure
+the super block structure is still valid until pty shutdown is done.
+I also moved the additional inode references to the same functions, which
+also covered similar case with inode being freed before /dev/tty final
+close/shutdown.
+
+Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
+Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/pty.c | 9 ++++++---
+ fs/devpts/inode.c | 20 ++++++++++++++++++++
+ include/linux/devpts_fs.h | 4 ++++
+ 3 files changed, 30 insertions(+), 3 deletions(-)
+
+--- a/drivers/tty/pty.c
++++ b/drivers/tty/pty.c
+@@ -688,7 +688,7 @@ static void pty_unix98_shutdown(struct t
+ else
+ ptmx_inode = tty->link->driver_data;
+ devpts_kill_index(ptmx_inode, tty->index);
+- iput(ptmx_inode); /* drop reference we acquired at ptmx_open */
++ devpts_del_ref(ptmx_inode);
+ }
+
+ static const struct tty_operations ptm_unix98_ops = {
+@@ -785,9 +785,12 @@ static int ptmx_open(struct inode *inode
+ * still have /dev/tty opened pointing to the master/slave pair (ptmx
+ * is closed/released before /dev/tty), we must make sure that the inode
+ * is still valid when we call the final pty_unix98_shutdown, thus we
+- * hold an additional reference to the ptmx inode
++ * hold an additional reference to the ptmx inode. For the same /dev/tty
++ * last close case, we also need to make sure the super_block isn't
++ * destroyed (devpts instance unmounted), before /dev/tty is closed and
++ * on its release devpts_kill_index is called.
+ */
+- ihold(inode);
++ devpts_add_ref(inode);
+
+ tty_add_file(tty, filp);
+
+--- a/fs/devpts/inode.c
++++ b/fs/devpts/inode.c
+@@ -575,6 +575,26 @@ void devpts_kill_index(struct inode *ptm
+ mutex_unlock(&allocated_ptys_lock);
+ }
+
++/*
++ * pty code needs to hold extra references in case of last /dev/tty close
++ */
++
++void devpts_add_ref(struct inode *ptmx_inode)
++{
++ struct super_block *sb = pts_sb_from_inode(ptmx_inode);
++
++ atomic_inc(&sb->s_active);
++ ihold(ptmx_inode);
++}
++
++void devpts_del_ref(struct inode *ptmx_inode)
++{
++ struct super_block *sb = pts_sb_from_inode(ptmx_inode);
++
++ iput(ptmx_inode);
++ deactivate_super(sb);
++}
++
+ /**
+ * devpts_pty_new -- create a new inode in /dev/pts/
+ * @ptmx_inode: inode of the master
+--- a/include/linux/devpts_fs.h
++++ b/include/linux/devpts_fs.h
+@@ -19,6 +19,8 @@
+
+ int devpts_new_index(struct inode *ptmx_inode);
+ void devpts_kill_index(struct inode *ptmx_inode, int idx);
++void devpts_add_ref(struct inode *ptmx_inode);
++void devpts_del_ref(struct inode *ptmx_inode);
+ /* mknod in devpts */
+ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
+ void *priv);
+@@ -32,6 +34,8 @@ void devpts_pty_kill(struct inode *inode
+ /* Dummy stubs in the no-pty case */
+ static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
+ static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
++static inline void devpts_add_ref(struct inode *ptmx_inode) { }
++static inline void devpts_del_ref(struct inode *ptmx_inode) { }
+ static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
+ dev_t device, int index, void *priv)
+ {
--- /dev/null
+From 732042821cfa106b3c20b9780e4c60fee9d68900 Mon Sep 17 00:00:00 2001
+From: Konstantin Khlebnikov <koct9i@gmail.com>
+Date: Fri, 5 Feb 2016 15:37:01 -0800
+Subject: radix-tree: fix oops after radix_tree_iter_retry
+
+From: Konstantin Khlebnikov <koct9i@gmail.com>
+
+commit 732042821cfa106b3c20b9780e4c60fee9d68900 upstream.
+
+Helper radix_tree_iter_retry() resets next_index to the current index.
+In following radix_tree_next_slot current chunk size becomes zero. This
+isn't checked and it tries to dereference null pointer in slot.
+
+Tagged iterator is fine because retry happens only at slot 0 where tag
+bitmask in iter->tags is filled with single bit.
+
+Fixes: 46437f9a554f ("radix-tree: fix race in gang lookup")
+Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
+Cc: Matthew Wilcox <willy@linux.intel.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Ohad Ben-Cohen <ohad@wizery.com>
+Cc: Jeremiah Mahler <jmmahler@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/radix-tree.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/linux/radix-tree.h
++++ b/include/linux/radix-tree.h
+@@ -391,7 +391,7 @@ void **radix_tree_iter_retry(struct radi
+ * @iter: pointer to radix tree iterator
+ * Returns: current chunk size
+ */
+-static __always_inline unsigned
++static __always_inline long
+ radix_tree_chunk_size(struct radix_tree_iter *iter)
+ {
+ return iter->next_index - iter->index;
+@@ -425,9 +425,9 @@ radix_tree_next_slot(void **slot, struct
+ return slot + offset + 1;
+ }
+ } else {
+- unsigned size = radix_tree_chunk_size(iter) - 1;
++ long size = radix_tree_chunk_size(iter);
+
+- while (size--) {
++ while (--size > 0) {
+ slot++;
+ iter->index++;
+ if (likely(*slot))
--- /dev/null
+From 46437f9a554fbe3e110580ca08ab703b59f2f95a Mon Sep 17 00:00:00 2001
+From: Matthew Wilcox <willy@linux.intel.com>
+Date: Tue, 2 Feb 2016 16:57:52 -0800
+Subject: radix-tree: fix race in gang lookup
+
+From: Matthew Wilcox <willy@linux.intel.com>
+
+commit 46437f9a554fbe3e110580ca08ab703b59f2f95a upstream.
+
+If the indirect_ptr bit is set on a slot, that indicates we need to redo
+the lookup. Introduce a new function radix_tree_iter_retry() which
+forces the loop to retry the lookup by setting 'slot' to NULL and
+turning the iterator back to point at the problematic entry.
+
+This is a pretty rare problem to hit at the moment; the lookup has to
+race with a grow of the radix tree from a height of 0. The consequences
+of hitting this race are that gang lookup could return a pointer to a
+radix_tree_node instead of a pointer to whatever the user had inserted
+in the tree.
+
+Fixes: cebbd29e1c2f ("radix-tree: rewrite gang lookup using iterator")
+Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Ohad Ben-Cohen <ohad@wizery.com>
+Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/radix-tree.h | 16 ++++++++++++++++
+ lib/radix-tree.c | 12 ++++++++++--
+ 2 files changed, 26 insertions(+), 2 deletions(-)
+
+--- a/include/linux/radix-tree.h
++++ b/include/linux/radix-tree.h
+@@ -370,6 +370,22 @@ void **radix_tree_next_chunk(struct radi
+ struct radix_tree_iter *iter, unsigned flags);
+
+ /**
++ * radix_tree_iter_retry - retry this chunk of the iteration
++ * @iter: iterator state
++ *
++ * If we iterate over a tree protected only by the RCU lock, a race
++ * against deletion or creation may result in seeing a slot for which
++ * radix_tree_deref_retry() returns true. If so, call this function
++ * and continue the iteration.
++ */
++static inline __must_check
++void **radix_tree_iter_retry(struct radix_tree_iter *iter)
++{
++ iter->next_index = iter->index;
++ return NULL;
++}
++
++/**
+ * radix_tree_chunk_size - get current chunk size
+ *
+ * @iter: pointer to radix tree iterator
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -1019,9 +1019,13 @@ radix_tree_gang_lookup(struct radix_tree
+ return 0;
+
+ radix_tree_for_each_slot(slot, root, &iter, first_index) {
+- results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
++ results[ret] = rcu_dereference_raw(*slot);
+ if (!results[ret])
+ continue;
++ if (radix_tree_is_indirect_ptr(results[ret])) {
++ slot = radix_tree_iter_retry(&iter);
++ continue;
++ }
+ if (++ret == max_items)
+ break;
+ }
+@@ -1098,9 +1102,13 @@ radix_tree_gang_lookup_tag(struct radix_
+ return 0;
+
+ radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
+- results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
++ results[ret] = rcu_dereference_raw(*slot);
+ if (!results[ret])
+ continue;
++ if (radix_tree_is_indirect_ptr(results[ret])) {
++ slot = radix_tree_iter_retry(&iter);
++ continue;
++ }
+ if (++ret == max_items)
+ break;
+ }
--- /dev/null
+From 80ad623edd2d0ccb47d85357ee31c97e6c684e82 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Mon, 25 Jan 2016 11:02:06 +0100
+Subject: Revert "btrfs: clear PF_NOFREEZE in cleaner_kthread()"
+
+From: David Sterba <dsterba@suse.com>
+
+commit 80ad623edd2d0ccb47d85357ee31c97e6c684e82 upstream.
+
+This reverts commit 696249132158014d594896df3a81390616069c5c. The
+cleaner thread can block freezing when there's a snapshot cleaning in
+progress and the other threads get suspended first. From the logs
+provided by Martin we're waiting for reading extent pages:
+
+kernel: PM: Syncing filesystems ... done.
+kernel: Freezing user space processes ... (elapsed 0.015 seconds) done.
+kernel: Freezing remaining freezable tasks ...
+kernel: Freezing of tasks failed after 20.003 seconds (1 tasks refusing to freeze, wq_busy=0):
+kernel: btrfs-cleaner D ffff88033dd13bc0 0 152 2 0x00000000
+kernel: ffff88032ebc2e00 ffff88032e750000 ffff88032e74fa50 7fffffffffffffff
+kernel: ffffffff814a58df 0000000000000002 ffffea000934d580 ffffffff814a5451
+kernel: 7fffffffffffffff ffffffff814a6e8f 0000000000000000 0000000000000020
+kernel: Call Trace:
+kernel: [<ffffffff814a58df>] ? bit_wait+0x2c/0x2c
+kernel: [<ffffffff814a5451>] ? schedule+0x6f/0x7c
+kernel: [<ffffffff814a6e8f>] ? schedule_timeout+0x2f/0xd8
+kernel: [<ffffffff81076f94>] ? timekeeping_get_ns+0xa/0x2e
+kernel: [<ffffffff81077603>] ? ktime_get+0x36/0x44
+kernel: [<ffffffff814a4f6c>] ? io_schedule_timeout+0x94/0xf2
+kernel: [<ffffffff814a4f6c>] ? io_schedule_timeout+0x94/0xf2
+kernel: [<ffffffff814a590b>] ? bit_wait_io+0x2c/0x30
+kernel: [<ffffffff814a5694>] ? __wait_on_bit+0x41/0x73
+kernel: [<ffffffff8109eba8>] ? wait_on_page_bit+0x6d/0x72
+kernel: [<ffffffff8105d718>] ? autoremove_wake_function+0x2a/0x2a
+kernel: [<ffffffff811a02d7>] ? read_extent_buffer_pages+0x1bd/0x203
+kernel: [<ffffffff8117d9e9>] ? free_root_pointers+0x4c/0x4c
+kernel: [<ffffffff8117e831>] ? btree_read_extent_buffer_pages.constprop.57+0x5a/0xe9
+kernel: [<ffffffff8117f4f3>] ? read_tree_block+0x2d/0x45
+kernel: [<ffffffff8116782a>] ? read_block_for_search.isra.34+0x22a/0x26b
+kernel: [<ffffffff811656c3>] ? btrfs_set_path_blocking+0x1e/0x4a
+kernel: [<ffffffff8116919b>] ? btrfs_search_slot+0x648/0x736
+kernel: [<ffffffff81170559>] ? btrfs_lookup_extent_info+0xb7/0x2c7
+kernel: [<ffffffff81170ee5>] ? walk_down_proc+0x9c/0x1ae
+kernel: [<ffffffff81171c9d>] ? walk_down_tree+0x40/0xa4
+kernel: [<ffffffff8117375f>] ? btrfs_drop_snapshot+0x2da/0x664
+kernel: [<ffffffff8104ff21>] ? finish_task_switch+0x126/0x167
+kernel: [<ffffffff811850f8>] ? btrfs_clean_one_deleted_snapshot+0xa6/0xb0
+kernel: [<ffffffff8117eaba>] ? cleaner_kthread+0x13e/0x17b
+kernel: [<ffffffff8117e97c>] ? btrfs_item_end+0x33/0x33
+kernel: [<ffffffff8104d256>] ? kthread+0x95/0x9d
+kernel: [<ffffffff8104d1c1>] ? kthread_parkme+0x16/0x16
+kernel: [<ffffffff814a7b5f>] ? ret_from_fork+0x3f/0x70
+kernel: [<ffffffff8104d1c1>] ? kthread_parkme+0x16/0x16
+
+As this affects a released kernel (4.4) we need a minimal fix for
+stable kernels.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=108361
+Reported-by: Martin Ziegler <ziegler@uni-freiburg.de>
+CC: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/disk-io.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1762,7 +1762,6 @@ static int cleaner_kthread(void *arg)
+ int again;
+ struct btrfs_trans_handle *trans;
+
+- set_freezable();
+ do {
+ again = 0;
+
--- /dev/null
+From 3e85286e75224fa3f08bdad20e78c8327742634e Mon Sep 17 00:00:00 2001
+From: Dave Chinner <david@fromorbit.com>
+Date: Tue, 19 Jan 2016 08:21:46 +1100
+Subject: Revert "xfs: clear PF_NOFREEZE for xfsaild kthread"
+
+From: Dave Chinner <david@fromorbit.com>
+
+commit 3e85286e75224fa3f08bdad20e78c8327742634e upstream.
+
+This reverts commit 24ba16bb3d499c49974669cd8429c3e4138ab102 as it
+prevents machines from suspending. This regression occurs when the
+xfsaild is idle on entry to suspend, and so there s no activity to
+wake it from it's idle sleep and hence see that it is supposed to
+freeze. Hence the freezer times out waiting for it and suspend is
+cancelled.
+
+There is no obvious fix for this short of freezing the filesystem
+properly, so revert this change for now.
+
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_trans_ail.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/xfs/xfs_trans_ail.c
++++ b/fs/xfs/xfs_trans_ail.c
+@@ -497,7 +497,6 @@ xfsaild(
+ long tout = 0; /* milliseconds */
+
+ current->flags |= PF_MEMALLOC;
+- set_freezable();
+
+ while (!kthread_should_stop()) {
+ if (tout && tout <= 20)
--- /dev/null
+From a6835090716a85f2297668ba593bd00e1051e662 Mon Sep 17 00:00:00 2001
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+Date: Tue, 26 Jan 2016 17:50:04 +0200
+Subject: Revert "xhci: don't finish a TD if we get a short-transfer event mid TD"
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+commit a6835090716a85f2297668ba593bd00e1051e662 upstream.
+
+This reverts commit e210c422b6fd ("xhci: don't finish a TD if we get a
+short transfer event mid TD")
+
+Turns out that most host controllers do not follow the xHCI specs and never
+send the second event for the last TRB in the TD if there was a short event
+mid-TD.
+
+Returning the URB directly after the first short-transfer event is far
+better than never returning the URB. (class drivers usually timeout
+after 30sec). For the hosts that do send the second event we will go
+back to treating it as misplaced event and print an error message for it.
+
+The origial patch was sent to stable kernels and needs to be reverted from
+there as well
+
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/host/xhci-ring.c | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2192,10 +2192,6 @@ static int process_bulk_intr_td(struct x
+ }
+ /* Fast path - was this the last TRB in the TD for this URB? */
+ } else if (event_trb == td->last_trb) {
+- if (td->urb_length_set && trb_comp_code == COMP_SHORT_TX)
+- return finish_td(xhci, td, event_trb, event, ep,
+- status, false);
+-
+ if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
+ td->urb->actual_length =
+ td->urb->transfer_buffer_length -
+@@ -2247,12 +2243,6 @@ static int process_bulk_intr_td(struct x
+ td->urb->actual_length +=
+ TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
+ EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
+-
+- if (trb_comp_code == COMP_SHORT_TX) {
+- xhci_dbg(xhci, "mid bulk/intr SP, wait for last TRB event\n");
+- td->urb_length_set = true;
+- return 0;
+- }
+ }
+
+ return finish_td(xhci, td, event_trb, event, ep, status, false);
--- /dev/null
+From 72214a24a7677d4c7501eecc9517ed681b5f2db2 Mon Sep 17 00:00:00 2001
+From: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+Date: Thu, 14 Jan 2016 15:16:53 -0800
+Subject: scripts/bloat-o-meter: fix python3 syntax error
+
+From: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+
+commit 72214a24a7677d4c7501eecc9517ed681b5f2db2 upstream.
+
+In Python3+ print is a function so the old syntax is not correct
+anymore:
+
+ $ ./scripts/bloat-o-meter vmlinux.o vmlinux.o.old
+ File "./scripts/bloat-o-meter", line 61
+ print "add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \
+ ^
+ SyntaxError: invalid syntax
+
+Fix by calling print as a function.
+
+Tested on python 2.7.11, 3.5.1
+
+Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ scripts/bloat-o-meter | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/scripts/bloat-o-meter
++++ b/scripts/bloat-o-meter
+@@ -58,8 +58,8 @@ for name in common:
+ delta.sort()
+ delta.reverse()
+
+-print "add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \
+- (add, remove, grow, shrink, up, -down, up-down)
+-print "%-40s %7s %7s %+7s" % ("function", "old", "new", "delta")
++print("add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \
++ (add, remove, grow, shrink, up, -down, up-down))
++print("%-40s %7s %7s %+7s" % ("function", "old", "new", "delta"))
+ for d, n in delta:
+- if d: print "%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)
++ if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d))
--- /dev/null
+From 82c43310508eb19eb41fe7862e89afeb74030b84 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Wed, 27 Jan 2016 16:19:13 +0200
+Subject: SCSI: Add Marvell Console to VPD blacklist
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit 82c43310508eb19eb41fe7862e89afeb74030b84 upstream.
+
+I have a Marvell 88SE9230 SATA Controller that has some sort of
+integrated console SCSI device attached to one of the ports.
+
+ ata14: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
+ ata14.00: ATAPI: MARVELL VIRTUALL, 1.09, max UDMA/66
+ ata14.00: configured for UDMA/66
+ scsi 13:0:0:0: Processor Marvell Console 1.01 PQ: 0 ANSI: 5
+
+Sending it VPD INQUIRY command seem to always fail with following error:
+
+ ata14.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6
+ ata14.00: irq_stat 0x40000001
+ ata14.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 2 dma 16640 in
+ Inquiry 12 01 00 00 ff 00res 00/00:00:00:00:00/00:00:00:00:00/00 Emask 0x3 (HSM violation)
+ ata14: hard resetting link
+
+This has been minor annoyance (only error printed on dmesg) until commit
+09e2b0b14690 ("scsi: rescan VPD attributes") added call to scsi_attach_vpd()
+in scsi_rescan_device(). The commit causes the system to splat out
+following errors continuously without ever reaching the UI:
+
+ ata14.00: configured for UDMA/66
+ ata14: EH complete
+ ata14.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6
+ ata14.00: irq_stat 0x40000001
+ ata14.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 6 dma 16640 in
+ Inquiry 12 01 00 00 ff 00res 00/00:00:00:00:00/00:00:00:00:00/00 Emask 0x3 (HSM violation)
+ ata14: hard resetting link
+ ata14: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
+ ata14.00: configured for UDMA/66
+ ata14: EH complete
+ ata14.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6
+ ata14.00: irq_stat 0x40000001
+ ata14.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 7 dma 16640 in
+ Inquiry 12 01 00 00 ff 00res 00/00:00:00:00:00/00:00:00:00:00/00 Emask 0x3 (HSM violation)
+
+Without in-depth understanding of SCSI layer and the Marvell controller,
+I suspect this happens because when the link goes down (because of an
+error) we schedule scsi_rescan_device() which again fails to read VPD
+data... ad infinitum.
+
+Since VPD data cannot be read from the device anyway we prevent the SCSI
+layer from even trying by blacklisting the device. This gets away the
+error and the system starts up normally.
+
+[mkp: Widened the match to all revisions of this device]
+
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reported-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reported-by: Alexander Duyck <alexander.duyck@gmail.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/scsi_devinfo.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/scsi/scsi_devinfo.c
++++ b/drivers/scsi/scsi_devinfo.c
+@@ -205,6 +205,7 @@ static struct {
+ {"Intel", "Multi-Flex", NULL, BLIST_NO_RSOC},
+ {"iRiver", "iFP Mass Driver", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36},
+ {"LASOUND", "CDX7405", "3.10", BLIST_MAX5LUN | BLIST_SINGLELUN},
++ {"Marvell", "Console", NULL, BLIST_SKIP_VPD_PAGES},
+ {"MATSHITA", "PD-1", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
+ {"MATSHITA", "DMC-LC5", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36},
+ {"MATSHITA", "DMC-LC40", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36},
--- /dev/null
+From 9055082fb100cc66e20c048251d05159f5f2cfba Mon Sep 17 00:00:00 2001
+From: Mike Christie <mchristi@redhat.com>
+Date: Thu, 7 Jan 2016 16:34:05 -0600
+Subject: scsi: add Synology to 1024 sector blacklist
+
+From: Mike Christie <mchristi@redhat.com>
+
+commit 9055082fb100cc66e20c048251d05159f5f2cfba upstream.
+
+Another iscsi target that cannot handle large IOs, but does not tell us
+a limit.
+
+The Synology iSCSI targets report:
+
+Block limits VPD page (SBC):
+ Write same no zero (WSNZ): 0
+ Maximum compare and write length: 0 blocks
+ Optimal transfer length granularity: 0 blocks
+ Maximum transfer length: 0 blocks
+ Optimal transfer length: 0 blocks
+ Maximum prefetch length: 0 blocks
+ Maximum unmap LBA count: 0
+ Maximum unmap block descriptor count: 0
+ Optimal unmap granularity: 0
+ Unmap granularity alignment valid: 0
+ Unmap granularity alignment: 0
+ Maximum write same length: 0x0 blocks
+
+and the size of the command it can handle seems to depend on how much
+memory it can allocate at the time. This results in IO errors when
+handling large IOs. This patch just has us use the old 1024 default
+sectors for this target by adding it to the scsi blacklist. We do not
+have good contacs with this vendors, so I have not been able to try and
+fix on their side.
+
+I have posted this a long while back, but it was not merged. This
+version just fixes it up for merge/patch failures in the original
+version.
+
+Reported-by: Ancoron Luciferis <ancoron.luciferis@googlemail.com>
+Reported-by: Michael Meyers <steltek@tcnnet.com>
+Signed-off-by: Mike Christie <mchristi@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/scsi_devinfo.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/scsi/scsi_devinfo.c
++++ b/drivers/scsi/scsi_devinfo.c
+@@ -227,6 +227,7 @@ static struct {
+ {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
+ {"Promise", "", NULL, BLIST_SPARSELUN},
+ {"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
++ {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
+ {"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
+ {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN},
+ {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN},
--- /dev/null
+From 13b4389143413a1f18127c07f72c74cad5b563e8 Mon Sep 17 00:00:00 2001
+From: Alan Stern <stern@rowland.harvard.edu>
+Date: Wed, 20 Jan 2016 11:26:01 -0500
+Subject: SCSI: fix crashes in sd and sr runtime PM
+
+From: Alan Stern <stern@rowland.harvard.edu>
+
+commit 13b4389143413a1f18127c07f72c74cad5b563e8 upstream.
+
+Runtime suspend during driver probe and removal can cause problems.
+The driver's runtime_suspend or runtime_resume callbacks may invoked
+before the driver has finished binding to the device or after the
+driver has unbound from the device.
+
+This problem shows up with the sd and sr drivers, and can cause disk
+or CD/DVD drives to become unusable as a result. The fix is simple.
+The drivers store a pointer to the scsi_disk or scsi_cd structure as
+their private device data when probing is finished, so we simply have
+to be sure to clear the private data during removal and test it during
+runtime suspend/resume.
+
+This fixes <https://bugs.debian.org/801925>.
+
+Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
+Reported-by: Paul Menzel <paul.menzel@giantmonkey.de>
+Reported-by: Erich Schubert <erich@debian.org>
+Reported-by: Alexandre Rossi <alexandre.rossi@gmail.com>
+Tested-by: Paul Menzel <paul.menzel@giantmonkey.de>
+Tested-by: Erich Schubert <erich@debian.org>
+Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/sd.c | 7 +++++--
+ drivers/scsi/sr.c | 4 ++++
+ 2 files changed, 9 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -3268,8 +3268,8 @@ static int sd_suspend_common(struct devi
+ struct scsi_disk *sdkp = dev_get_drvdata(dev);
+ int ret = 0;
+
+- if (!sdkp)
+- return 0; /* this can happen */
++ if (!sdkp) /* E.g.: runtime suspend following sd_remove() */
++ return 0;
+
+ if (sdkp->WCE && sdkp->media_present) {
+ sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
+@@ -3308,6 +3308,9 @@ static int sd_resume(struct device *dev)
+ {
+ struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
++ if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */
++ return 0;
++
+ if (!sdkp->device->manage_start_stop)
+ return 0;
+
+--- a/drivers/scsi/sr.c
++++ b/drivers/scsi/sr.c
+@@ -144,6 +144,9 @@ static int sr_runtime_suspend(struct dev
+ {
+ struct scsi_cd *cd = dev_get_drvdata(dev);
+
++ if (!cd) /* E.g.: runtime suspend following sr_remove() */
++ return 0;
++
+ if (cd->media_present)
+ return -EBUSY;
+ else
+@@ -985,6 +988,7 @@ static int sr_remove(struct device *dev)
+ scsi_autopm_get_device(cd->device);
+
+ del_gendisk(cd->disk);
++ dev_set_drvdata(dev, NULL);
+
+ mutex_lock(&sr_ref_mutex);
+ kref_put(&cd->kref, sr_kref_release);
--- /dev/null
+From 90a88d6ef88edcfc4f644dddc7eef4ea41bccf8b Mon Sep 17 00:00:00 2001
+From: James Bottomley <James.Bottomley@HansenPartnership.com>
+Date: Wed, 10 Feb 2016 08:03:26 -0800
+Subject: scsi: fix soft lockup in scsi_remove_target() on module removal
+
+From: James Bottomley <James.Bottomley@HansenPartnership.com>
+
+commit 90a88d6ef88edcfc4f644dddc7eef4ea41bccf8b upstream.
+
+This softlockup is currently happening:
+
+[ 444.088002] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/1:1:29]
+[ 444.088002] Modules linked in: lpfc(-) qla2x00tgt(O) qla2xxx_scst(O) scst_vdisk(O) scsi_transport_fc libcrc32c scst(O) dlm configfs nfsd lockd grace nfs_acl auth_rpcgss sunrpc ed
+d snd_pcm_oss snd_mixer_oss snd_seq snd_seq_device dm_mod iTCO_wdt snd_hda_codec_realtek snd_hda_codec_generic gpio_ich iTCO_vendor_support ppdev snd_hda_intel snd_hda_codec snd_hda
+_core snd_hwdep tg3 snd_pcm snd_timer libphy lpc_ich parport_pc ptp acpi_cpufreq snd pps_core fjes parport i2c_i801 ehci_pci tpm_tis tpm sr_mod cdrom soundcore floppy hwmon sg 8250_
+fintek pcspkr i915 drm_kms_helper uhci_hcd ehci_hcd drm fb_sys_fops sysimgblt sysfillrect syscopyarea i2c_algo_bit usbcore button video usb_common fan ata_generic ata_piix libata th
+ermal
+[ 444.088002] CPU: 1 PID: 29 Comm: kworker/1:1 Tainted: G O 4.4.0-rc5-2.g1e923a3-default #1
+[ 444.088002] Hardware name: FUJITSU SIEMENS ESPRIMO E /D2164-A1, BIOS 5.00 R1.10.2164.A1 05/08/2006
+[ 444.088002] Workqueue: fc_wq_4 fc_rport_final_delete [scsi_transport_fc]
+[ 444.088002] task: f6266ec0 ti: f6268000 task.ti: f6268000
+[ 444.088002] EIP: 0060:[<c07e7044>] EFLAGS: 00000286 CPU: 1
+[ 444.088002] EIP is at _raw_spin_unlock_irqrestore+0x14/0x20
+[ 444.088002] EAX: 00000286 EBX: f20d3800 ECX: 00000002 EDX: 00000286
+[ 444.088002] ESI: f50ba800 EDI: f2146848 EBP: f6269ec8 ESP: f6269ec8
+[ 444.088002] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
+[ 444.088002] CR0: 8005003b CR2: 08f96600 CR3: 363ae000 CR4: 000006d0
+[ 444.088002] Stack:
+[ 444.088002] f6269eec c066b0f7 00000286 f2146848 f50ba808 f50ba800 f50ba800 f2146a90
+[ 444.088002] f2146848 f6269f08 f8f0a4ed f3141000 f2146800 f2146a90 f619fa00 00000040
+[ 444.088002] f6269f40 c026cb25 00000001 166c6392 00000061 f6757140 f6136340 00000004
+[ 444.088002] Call Trace:
+[ 444.088002] [<c066b0f7>] scsi_remove_target+0x167/0x1c0
+[ 444.088002] [<f8f0a4ed>] fc_rport_final_delete+0x9d/0x1e0 [scsi_transport_fc]
+[ 444.088002] [<c026cb25>] process_one_work+0x155/0x3e0
+[ 444.088002] [<c026cde7>] worker_thread+0x37/0x490
+[ 444.088002] [<c027214b>] kthread+0x9b/0xb0
+[ 444.088002] [<c07e72c1>] ret_from_kernel_thread+0x21/0x40
+
+What appears to be happening is that something has pinned the target
+so it can't go into STARGET_DEL via final release and the loop in
+scsi_remove_target spins endlessly until that happens.
+
+The fix for this soft lockup is to not keep looping over a device that
+we've called remove on but which hasn't gone into DEL state. This
+patch will retain a simplistic memory of the last target and not keep
+looping over it.
+
+Reported-by: Sebastian Herbszt <herbszt@gmx.de>
+Tested-by: Sebastian Herbszt <herbszt@gmx.de>
+Fixes: 40998193560dab6c3ce8d25f4fa58a23e252ef38
+Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/scsi_sysfs.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/scsi_sysfs.c
++++ b/drivers/scsi/scsi_sysfs.c
+@@ -1192,16 +1192,18 @@ static void __scsi_remove_target(struct
+ void scsi_remove_target(struct device *dev)
+ {
+ struct Scsi_Host *shost = dev_to_shost(dev->parent);
+- struct scsi_target *starget;
++ struct scsi_target *starget, *last_target = NULL;
+ unsigned long flags;
+
+ restart:
+ spin_lock_irqsave(shost->host_lock, flags);
+ list_for_each_entry(starget, &shost->__targets, siblings) {
+- if (starget->state == STARGET_DEL)
++ if (starget->state == STARGET_DEL ||
++ starget == last_target)
+ continue;
+ if (starget->dev.parent == dev || &starget->dev == dev) {
+ kref_get(&starget->reap_ref);
++ last_target = starget;
+ spin_unlock_irqrestore(shost->host_lock, flags);
+ __scsi_remove_target(starget);
+ scsi_target_reap(starget);
--- /dev/null
+From d2d06d4fe0f2cc2df9b17fefec96e6e1a1271d91 Mon Sep 17 00:00:00 2001
+From: Hannes Reinecke <hare@suse.de>
+Date: Fri, 22 Jan 2016 15:42:41 +0100
+Subject: scsi_dh_rdac: always retry MODE SELECT on command lock violation
+
+From: Hannes Reinecke <hare@suse.de>
+
+commit d2d06d4fe0f2cc2df9b17fefec96e6e1a1271d91 upstream.
+
+If MODE SELECT returns with sense '05/91/36' (command lock violation)
+it should always be retried without counting the number of retries.
+During an HBA upgrade or similar circumstances one might see a flood
+of MODE SELECT command from various HBAs, which will easily trigger
+the sense code and exceed the retry count.
+
+Signed-off-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/device_handler/scsi_dh_rdac.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
++++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
+@@ -562,7 +562,7 @@ static int mode_select_handle_sense(stru
+ /*
+ * Command Lock contention
+ */
+- err = SCSI_DH_RETRY;
++ err = SCSI_DH_IMM_RETRY;
+ break;
+ default:
+ break;
+@@ -612,6 +612,8 @@ retry:
+ err = mode_select_handle_sense(sdev, h->sense);
+ if (err == SCSI_DH_RETRY && retry_cnt--)
+ goto retry;
++ if (err == SCSI_DH_IMM_RETRY)
++ goto retry;
+ }
+ if (err == SCSI_DH_OK) {
+ h->state = RDAC_STATE_ACTIVE;
--- /dev/null
+From 6c55d9b98335f7f6bd5f061866ff1633401f3a44 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Fri, 29 Jan 2016 16:49:47 +0200
+Subject: serial: 8250_pci: Add Intel Broadwell ports
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit 6c55d9b98335f7f6bd5f061866ff1633401f3a44 upstream.
+
+Some recent (early 2015) macbooks have Intel Broadwell where LPSS UARTs are
+PCI enumerated instead of ACPI. The LPSS UART block is pretty much same as
+used on Intel Baytrail so we can reuse the existing Baytrail setup code.
+
+Add both Broadwell LPSS UART ports to the list of supported devices.
+
+Signed-off-by: Leif Liddy <leif.liddy@gmail.com>
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/serial/8250/8250_pci.c | 29 +++++++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -1379,6 +1379,9 @@ ce4100_serial_setup(struct serial_privat
+ #define PCI_DEVICE_ID_INTEL_BSW_UART1 0x228a
+ #define PCI_DEVICE_ID_INTEL_BSW_UART2 0x228c
+
++#define PCI_DEVICE_ID_INTEL_BDW_UART1 0x9ce3
++#define PCI_DEVICE_ID_INTEL_BDW_UART2 0x9ce4
++
+ #define BYT_PRV_CLK 0x800
+ #define BYT_PRV_CLK_EN (1 << 0)
+ #define BYT_PRV_CLK_M_VAL_SHIFT 1
+@@ -1461,11 +1464,13 @@ byt_serial_setup(struct serial_private *
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_BYT_UART1:
+ case PCI_DEVICE_ID_INTEL_BSW_UART1:
++ case PCI_DEVICE_ID_INTEL_BDW_UART1:
+ rx_param->src_id = 3;
+ tx_param->dst_id = 2;
+ break;
+ case PCI_DEVICE_ID_INTEL_BYT_UART2:
+ case PCI_DEVICE_ID_INTEL_BSW_UART2:
++ case PCI_DEVICE_ID_INTEL_BDW_UART2:
+ rx_param->src_id = 5;
+ tx_param->dst_id = 4;
+ break;
+@@ -2063,6 +2068,20 @@ static struct pci_serial_quirk pci_seria
+ .subdevice = PCI_ANY_ID,
+ .setup = byt_serial_setup,
+ },
++ {
++ .vendor = PCI_VENDOR_ID_INTEL,
++ .device = PCI_DEVICE_ID_INTEL_BDW_UART1,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ .setup = byt_serial_setup,
++ },
++ {
++ .vendor = PCI_VENDOR_ID_INTEL,
++ .device = PCI_DEVICE_ID_INTEL_BDW_UART2,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ .setup = byt_serial_setup,
++ },
+ /*
+ * ITE
+ */
+@@ -5522,6 +5541,16 @@ static struct pci_device_id serial_pci_t
+ PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+ pbn_byt },
++
++ /* Intel Broadwell */
++ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1,
++ PCI_ANY_ID, PCI_ANY_ID,
++ PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
++ pbn_byt },
++ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2,
++ PCI_ANY_ID, PCI_ANY_ID,
++ PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
++ pbn_byt },
+
+ /*
+ * Intel Quark x1000
--- /dev/null
+From 308bbc9ab838d0ace0298268c7970ba9513e2c65 Mon Sep 17 00:00:00 2001
+From: Peter Hurley <peter@hurleysoftware.com>
+Date: Tue, 12 Jan 2016 15:14:46 -0800
+Subject: serial: omap: Prevent DoS using unprivileged ioctl(TIOCSRS485)
+
+From: Peter Hurley <peter@hurleysoftware.com>
+
+commit 308bbc9ab838d0ace0298268c7970ba9513e2c65 upstream.
+
+The omap-serial driver emulates RS485 delays using software timers,
+but neglects to clamp the input values from the unprivileged
+ioctl(TIOCSRS485). Because the software implementation busy-waits,
+malicious userspace could stall the cpu for ~49 days.
+
+Clamp the input values to < 100ms.
+
+Fixes: 4a0ac0f55b18 ("OMAP: add RS485 support")
+Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/serial/omap-serial.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/tty/serial/omap-serial.c
++++ b/drivers/tty/serial/omap-serial.c
+@@ -1343,7 +1343,7 @@ static inline void serial_omap_add_conso
+
+ /* Enable or disable the rs485 support */
+ static int
+-serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf)
++serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485)
+ {
+ struct uart_omap_port *up = to_uart_omap_port(port);
+ unsigned int mode;
+@@ -1356,8 +1356,12 @@ serial_omap_config_rs485(struct uart_por
+ up->ier = 0;
+ serial_out(up, UART_IER, 0);
+
++ /* Clamp the delays to [0, 100ms] */
++ rs485->delay_rts_before_send = min(rs485->delay_rts_before_send, 100U);
++ rs485->delay_rts_after_send = min(rs485->delay_rts_after_send, 100U);
++
+ /* store new config */
+- port->rs485 = *rs485conf;
++ port->rs485 = *rs485;
+
+ /*
+ * Just as a precaution, only allow rs485
x86-mm-fix-types-used-in-pgprot-cacheability-flags-translations.patch
x86-mm-pat-avoid-truncation-when-converting-cpa-numpages-to-address.patch
+x86-uaccess-64-make-the-__copy_user_nocache-assembly-code-more-readable.patch
+x86-uaccess-64-handle-the-caching-of-4-byte-nocache-copies-properly-in-__copy_user_nocache.patch
+x86-mm-fix-vmalloc_fault-to-handle-large-pages-properly.patch
+alsa-hda-cancel-probe-work-instead-of-flush-at-remove.patch
+alsa-pcm-fix-rwsem-deadlock-for-non-atomic-pcm-stream.patch
+alsa-seq-fix-leak-of-pool-buffer-at-concurrent-writes.patch
+alsa-seq-fix-double-port-list-deletion.patch
+phy-twl4030-usb-relase-usb-phy-on-unload.patch
+phy-twl4030-usb-fix-unbalanced-pm_runtime_enable-on-module-reload.patch
+staging-speakup-use-tty_ldisc_ref-for-paste-kworker.patch
+pty-fix-possible-use-after-free-of-tty-driver_data.patch
+pty-make-sure-super_block-is-still-valid-in-final-dev-tty-close.patch
+tty-add-support-for-pcie-wch382-2s-multi-io-card.patch
+serial-8250_pci-add-intel-broadwell-ports.patch
+serial-omap-prevent-dos-using-unprivileged-ioctl-tiocsrs485.patch
+ext4-fix-scheduling-in-atomic-on-group-checksum-failure.patch
+ext4-fix-potential-integer-overflow.patch
+ext4-don-t-read-blocks-from-disk-after-extents-being-swapped.patch
+btrfs-handle-invalid-num_stripes-in-sys_array.patch
+btrfs-fix-fitrim-discarding-device-area-reserved-for-boot-loader-s-use.patch
+revert-btrfs-clear-pf_nofreeze-in-cleaner_kthread.patch
+btrfs-properly-set-the-termination-value-of-ctx-pos-in-readdir.patch
+btrfs-fix-invalid-page-accesses-in-extent_same-dedup-ioctl.patch
+btrfs-fix-page-reading-in-extent_same-ioctl-leading-to-csum-errors.patch
+btrfs-fix-hang-on-extent-buffer-lock-caused-by-the-inode_paths-ioctl.patch
+btrfs-fix-direct-io-requests-not-reporting-io-error-to-user-space.patch
+ptrace-use-fsuid-fsgid-effective-creds-for-fs-access-checks.patch
+tools-lib-traceevent-fix-output-of-llu-for-64-bit-values-read-on-32-bit-machines.patch
+perf-tools-tracepoint_error-can-receive-e-null-robustify-it.patch
+tracing-fix-freak-link-error-caused-by-branch-tracer.patch
+tracepoints-do-not-trace-when-cpu-is-offline.patch
+klist-fix-starting-point-removed-bug-in-klist-iterators.patch
+scsi-add-synology-to-1024-sector-blacklist.patch
+iscsi-target-fix-potential-dead-lock-during-node-acl-delete.patch
+scsi-fix-crashes-in-sd-and-sr-runtime-pm.patch
+drivers-scsi-sg.c-mark-vma-as-vm_io-to-prevent-migration.patch
+scsi_dh_rdac-always-retry-mode-select-on-command-lock-violation.patch
+scsi-add-marvell-console-to-vpd-blacklist.patch
+scsi-fix-soft-lockup-in-scsi_remove_target-on-module-removal.patch
+iio-adis_buffer-fix-out-of-bounds-memory-access.patch
+iio-adc-ti_am335x_adc-fix-buffered-mode-by-identifying-as-software-buffer.patch
+iio-light-use-a-signed-return-type-for-ltr501_match_samp_freq.patch
+iio-add-has_iomem-dependency-to-vf610_adc.patch
+iio-add-iio_trigger-dependency-to-stk8ba50.patch
+iio-dac-mcp4725-set-iio-name-property-in-sysfs.patch
+iio-light-acpi-als-report-data-as-processed.patch
+iio-pressure-mpl115-fix-temperature-offset-sign.patch
+iio-inkern-fix-a-null-dereference-on-error.patch
+cifs-ratelimit-kernel-log-messages.patch
+cifs-fix-race-between-call_async-and-reconnect.patch
+cifs_dbg-outputs-an-uninitialized-buffer-in-cifs_readdir.patch
+cifs-fix-erroneous-return-value.patch
+nfs-fix-attribute-cache-revalidation.patch
+pnfs-flexfiles-fix-an-oopsable-typo-in-ff_mirror_match_fh.patch
+nfs-fix-race-in-__update_open_stateid.patch
+pnfs-flexfiles-fix-an-xdr-encoding-bug-in-layoutreturn.patch
+udf-limit-the-maximum-number-of-indirect-extents-in-a-row.patch
+udf-prevent-buffer-overrun-with-multi-byte-characters.patch
+udf-check-output-buffer-length-when-converting-name-to-cs0.patch
+sunrpc-fixup-socket-wait-for-memory.patch
+powerpc-eeh-fix-pe-location-code.patch
+powerpc-simplify-module-toc-handling.patch
+powerpc-fix-dedotify-for-binutils-2.26.patch
+powerpc-eeh-fix-stale-cached-primary-bus.patch
+powerpc-powernv-fix-stale-pe-primary-bus.patch
+powerpc-ioda-set-read-permission-when-write-is-set.patch
+arm-mvebu-remove-duplicated-regulator-definition-in-armada-388-gp.patch
+arm64-mm-avoid-calling-apply_to_page_range-on-empty-range.patch
+arm-8519-1-icst-try-other-dividends-than-1.patch
+arm-8517-1-icst-avoid-arithmetic-overflow-in-icst_hz.patch
+arm-nomadik-fix-up-sd-mmc-dt-settings.patch
+arm-dts-fix-wl12xx-missing-clocks-that-cause-hangs.patch
+arm-dts-fix-omap5-pmic-control-lines-for-rtc-writes.patch
+arm-dts-omap5-board-common-enable-rtc-and-charging-of-backup-battery.patch
+arm-dts-at91-sama5d4-xplained-properly-mux-phy-interrupt.patch
+arm-dts-at91-sama5d4-fix-instance-id-of-dbgu.patch
+arm-dts-at91-sama5d4-xplained-fix-phy0-irq-type.patch
+arm-dts-at91-sama5d4ek-add-phy-address-and-irq-for-macb0.patch
+arm-omap2-fix-wait_dll_lock_timed-for-rodata.patch
+arm-omap2-fix-l2_inv_api_params-for-rodata.patch
+arm-omap2-fix-l2dis_3630-for-rodata.patch
+arm-omap2-fix-save_secure_ram_context-for-rodata.patch
+arm-omap2-fix-ppa_zero_params-and-ppa_por_params-for-rodata.patch
+arm64-dma-mapping-fix-handling-of-devices-registered-before-arch_initcall.patch
+kvm-arm-arm64-fix-reference-to-uninitialised-vgic.patch
+kvm-ppc-fix-emulation-of-h_set_dabr-x-on-power8.patch
+kvm-ppc-fix-one_reg-altivec-support.patch
+perf-kvm-record-report-unprocessable-sample-error-while-recording-reporting-guest-data.patch
+mm-soft-offline-check-return-value-in-second-__get_any_page-call.patch
+libnvdimm-fix-namespace-object-confusion-in-is_uuid_busy.patch
+mm-fix-mlock-accouting.patch
+mm-replace-vma_lock_anon_vma-with-anon_vma_lock_read-write.patch
+mm-fix-regression-in-remap_file_pages-emulation.patch
+input-elantech-mark-protocols-v2-and-v3-as-semi-mt.patch
+input-i8042-add-fujitsu-lifebook-u745-to-the-nomux-list.patch
+string_helpers-fix-precision-loss-for-some-inputs.patch
+input-vmmouse-fix-absolute-device-registration.patch
+iommu-vt-d-don-t-skip-pci-devices-when-disabling-iotlb.patch
+iommu-amd-correct-the-wrong-setting-of-alias-dte-in-do_attach.patch
+iommu-vt-d-fix-mm-refcounting-to-hold-mm_count-not-mm_users.patch
+iommu-vt-d-fix-64-bit-accesses-to-32-bit-dmar_gsts_reg.patch
+iommu-vt-d-clear-ppr-bit-to-ensure-we-get-more-page-request-interrupts.patch
+revert-xhci-don-t-finish-a-td-if-we-get-a-short-transfer-event-mid-td.patch
+xhci-fix-list-corruption-in-urb-dequeue-at-host-removal.patch
+m32r-fix-m32104ut_defconfig-build-fail.patch
+dma-debug-switch-check-from-_text-to-_stext.patch
+scripts-bloat-o-meter-fix-python3-syntax-error.patch
+fs-hugetlbfs-inode.c-fix-bugs-in-hugetlb_vmtruncate_list.patch
+numa-fix-proc-pid-numa_maps-for-hugetlbfs-on-s390.patch
+memcg-only-free-spare-array-when-readers-are-done.patch
+maintainers-return-arch-sh-to-maintained-state-with-new-maintainers.patch
+radix-tree-fix-race-in-gang-lookup.patch
+drivers-hwspinlock-fix-race-between-radix-tree-insertion-and-lookup.patch
+radix-tree-fix-oops-after-radix_tree_iter_retry.patch
+dump_stack-avoid-potential-deadlocks.patch
+mm-thp-khugepaged-call-pte-flush-at-the-time-of-collapse.patch
+intel_scu_ipcutil-underflow-in-scu_reg_access.patch
+ipc-shm-handle-removed-segments-gracefully-in-shm_mmap.patch
+devm_memremap_release-fix-memremap-d-addr-handling.patch
+futex-drop-refcount-if-requeue_pi-acquired-the-rtmutex.patch
+ovl-allow-zero-size-xattr.patch
+ovl-use-a-minimal-buffer-in-ovl_copy_xattr.patch
+ovl-check-dentry-positiveness-in-ovl_cleanup_whiteouts.patch
+ovl-root-copy-attr.patch
+ovl-setattr-check-permissions-before-copy-up.patch
+libxfs-pack-the-agfl-header-structure-so-xfs_agfl_size-is-correct.patch
+xfs-inode-recovery-readahead-can-race-with-inode-buffer-creation.patch
+revert-xfs-clear-pf_nofreeze-for-xfsaild-kthread.patch
+xfs-log-mount-failures-don-t-wait-for-buffers-to-be-released.patch
+prctl-take-mmap-sem-for-writing-to-protect-against-others.patch
+timerfd-handle-relative-timers-with-config_time_low_res-proper.patch
+posix-timers-handle-relative-timers-with-config_time_low_res-proper.patch
+itimers-handle-relative-timers-with-config_time_low_res-proper.patch
+module-wrapper-for-symbol-name.patch
+modules-fix-modparam-async_probe-request.patch
--- /dev/null
+From f4f9edcf9b5289ed96113e79fa65a7bf27ecb096 Mon Sep 17 00:00:00 2001
+From: Peter Hurley <peter@hurleysoftware.com>
+Date: Sun, 10 Jan 2016 22:40:58 -0800
+Subject: staging/speakup: Use tty_ldisc_ref() for paste kworker
+
+From: Peter Hurley <peter@hurleysoftware.com>
+
+commit f4f9edcf9b5289ed96113e79fa65a7bf27ecb096 upstream.
+
+As the function documentation for tty_ldisc_ref_wait() notes, it is
+only callable from a tty file_operations routine; otherwise there
+is no guarantee the ref won't be NULL.
+
+The key difference with the VT's paste_selection() is that is an ioctl,
+where __speakup_paste_selection() is completely async kworker, kicked
+off from interrupt context.
+
+Fixes: 28a821c30688 ("Staging: speakup: Update __speakup_paste_selection()
+ tty (ab)usage to match vt")
+Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/staging/speakup/selection.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/staging/speakup/selection.c
++++ b/drivers/staging/speakup/selection.c
+@@ -142,7 +142,9 @@ static void __speakup_paste_selection(st
+ struct tty_ldisc *ld;
+ DECLARE_WAITQUEUE(wait, current);
+
+- ld = tty_ldisc_ref_wait(tty);
++ ld = tty_ldisc_ref(tty);
++ if (!ld)
++ goto tty_unref;
+ tty_buffer_lock_exclusive(&vc->port);
+
+ add_wait_queue(&vc->paste_wait, &wait);
+@@ -162,6 +164,7 @@ static void __speakup_paste_selection(st
+
+ tty_buffer_unlock_exclusive(&vc->port);
+ tty_ldisc_deref(ld);
++tty_unref:
+ tty_kref_put(tty);
+ }
+
--- /dev/null
+From 564b026fbd0d28e9f70fb3831293d2922bb7855b Mon Sep 17 00:00:00 2001
+From: James Bottomley <JBottomley@Odin.com>
+Date: Wed, 20 Jan 2016 14:58:29 -0800
+Subject: string_helpers: fix precision loss for some inputs
+
+From: James Bottomley <JBottomley@Odin.com>
+
+commit 564b026fbd0d28e9f70fb3831293d2922bb7855b upstream.
+
+It was noticed that we lose precision in the final calculation for some
+inputs. The most egregious example is size=3000 blk_size=1900 in units
+of 10 should yield 5.70 MB but in fact yields 3.00 MB (oops).
+
+This is because the current algorithm doesn't correctly account for
+all the remainders in the logarithms. Fix this by doing a correct
+calculation in the remainders based on napier's algorithm.
+
+Additionally, now we have the correct result, we have to account for
+arithmetic rounding because we're printing 3 digits of precision. This
+means that if the fourth digit is five or greater, we have to round up,
+so add a section to ensure correct rounding. Finally account for all
+possible inputs correctly, including zero for block size.
+
+Fixes: b9f28d863594c429e1df35a0474d2663ca28b307
+Signed-off-by: James Bottomley <JBottomley@Odin.com>
+Reported-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/string_helpers.c | 63 ++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 43 insertions(+), 20 deletions(-)
+
+--- a/lib/string_helpers.c
++++ b/lib/string_helpers.c
+@@ -43,50 +43,73 @@ void string_get_size(u64 size, u64 blk_s
+ [STRING_UNITS_10] = 1000,
+ [STRING_UNITS_2] = 1024,
+ };
+- int i, j;
+- u32 remainder = 0, sf_cap, exp;
++ static const unsigned int rounding[] = { 500, 50, 5 };
++ int i = 0, j;
++ u32 remainder = 0, sf_cap;
+ char tmp[8];
+ const char *unit;
+
+ tmp[0] = '\0';
+- i = 0;
+- if (!size)
++
++ if (blk_size == 0)
++ size = 0;
++ if (size == 0)
+ goto out;
+
+- while (blk_size >= divisor[units]) {
+- remainder = do_div(blk_size, divisor[units]);
++ /* This is Napier's algorithm. Reduce the original block size to
++ *
++ * coefficient * divisor[units]^i
++ *
++ * we do the reduction so both coefficients are just under 32 bits so
++ * that multiplying them together won't overflow 64 bits and we keep
++ * as much precision as possible in the numbers.
++ *
++ * Note: it's safe to throw away the remainders here because all the
++ * precision is in the coefficients.
++ */
++ while (blk_size >> 32) {
++ do_div(blk_size, divisor[units]);
+ i++;
+ }
+
+- exp = divisor[units] / (u32)blk_size;
+- /*
+- * size must be strictly greater than exp here to ensure that remainder
+- * is greater than divisor[units] coming out of the if below.
+- */
+- if (size > exp) {
+- remainder = do_div(size, divisor[units]);
+- remainder *= blk_size;
++ while (size >> 32) {
++ do_div(size, divisor[units]);
+ i++;
+- } else {
+- remainder *= size;
+ }
+
++ /* now perform the actual multiplication keeping i as the sum of the
++ * two logarithms */
+ size *= blk_size;
+- size += remainder / divisor[units];
+- remainder %= divisor[units];
+
++ /* and logarithmically reduce it until it's just under the divisor */
+ while (size >= divisor[units]) {
+ remainder = do_div(size, divisor[units]);
+ i++;
+ }
+
++ /* work out in j how many digits of precision we need from the
++ * remainder */
+ sf_cap = size;
+ for (j = 0; sf_cap*10 < 1000; j++)
+ sf_cap *= 10;
+
+- if (j) {
++ if (units == STRING_UNITS_2) {
++ /* express the remainder as a decimal. It's currently the
++ * numerator of a fraction whose denominator is
++ * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
+ remainder *= 1000;
+- remainder /= divisor[units];
++ remainder >>= 10;
++ }
++
++ /* add a 5 to the digit below what will be printed to ensure
++ * an arithmetical round up and carry it through to size */
++ remainder += rounding[j];
++ if (remainder >= 1000) {
++ remainder -= 1000;
++ size += 1;
++ }
++
++ if (j) {
+ snprintf(tmp, sizeof(tmp), ".%03u", remainder);
+ tmp[j+1] = '\0';
+ }
--- /dev/null
+From 13331a551ab4df87f7a027d2cab392da96aba1de Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Wed, 6 Jan 2016 08:57:06 -0500
+Subject: SUNRPC: Fixup socket wait for memory
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit 13331a551ab4df87f7a027d2cab392da96aba1de upstream.
+
+We're seeing hangs in the NFS client code, with loops of the form:
+
+ RPC: 30317 xmit incomplete (267368 left of 524448)
+ RPC: 30317 call_status (status -11)
+ RPC: 30317 call_transmit (status 0)
+ RPC: 30317 xprt_prepare_transmit
+ RPC: 30317 xprt_transmit(524448)
+ RPC: xs_tcp_send_request(267368) = -11
+ RPC: 30317 xmit incomplete (267368 left of 524448)
+ RPC: 30317 call_status (status -11)
+ RPC: 30317 call_transmit (status 0)
+ RPC: 30317 xprt_prepare_transmit
+ RPC: 30317 xprt_transmit(524448)
+
+Turns out commit ceb5d58b2170 ("net: fix sock_wake_async() rcu protection")
+moved SOCKWQ_ASYNC_NOSPACE out of sock->flags and into sk->sk_wq->flags,
+however it never tried to fix up the code in net/sunrpc.
+
+The new idiom is to use the flags in the RCU protected struct socket_wq.
+While we're at it, clear out the now redundant places where we set/clear
+SOCKWQ_ASYNC_NOSPACE and SOCK_NOSPACE. In principle, sk_stream_wait_memory()
+is supposed to set these for us, so we only need to clear them in the
+particular case of our ->write_space() callback.
+
+Fixes: ceb5d58b2170 ("net: fix sock_wake_async() rcu protection")
+Cc: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/xprtsock.c | 49 +++++++++++++++++++++----------------------------
+ 1 file changed, 21 insertions(+), 28 deletions(-)
+
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -398,7 +398,6 @@ static int xs_sendpages(struct socket *s
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+- clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
+ if (base != 0) {
+ addr = NULL;
+ addrlen = 0;
+@@ -442,7 +441,6 @@ static void xs_nospace_callback(struct r
+ struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
+
+ transport->inet->sk_write_pending--;
+- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
+ }
+
+ /**
+@@ -467,20 +465,11 @@ static int xs_nospace(struct rpc_task *t
+
+ /* Don't race with disconnect */
+ if (xprt_connected(xprt)) {
+- if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) {
+- /*
+- * Notify TCP that we're limited by the application
+- * window size
+- */
+- set_bit(SOCK_NOSPACE, &transport->sock->flags);
+- sk->sk_write_pending++;
+- /* ...and wait for more buffer space */
+- xprt_wait_for_buffer_space(task, xs_nospace_callback);
+- }
+- } else {
+- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
++ /* wait for more buffer space */
++ sk->sk_write_pending++;
++ xprt_wait_for_buffer_space(task, xs_nospace_callback);
++ } else
+ ret = -ENOTCONN;
+- }
+
+ spin_unlock_bh(&xprt->transport_lock);
+
+@@ -616,9 +605,6 @@ process_status:
+ case -EAGAIN:
+ status = xs_nospace(task);
+ break;
+- default:
+- dprintk("RPC: sendmsg returned unrecognized error %d\n",
+- -status);
+ case -ENETUNREACH:
+ case -ENOBUFS:
+ case -EPIPE:
+@@ -626,7 +612,10 @@ process_status:
+ case -EPERM:
+ /* When the server has died, an ICMP port unreachable message
+ * prompts ECONNREFUSED. */
+- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
++ break;
++ default:
++ dprintk("RPC: sendmsg returned unrecognized error %d\n",
++ -status);
+ }
+
+ return status;
+@@ -706,16 +695,16 @@ static int xs_tcp_send_request(struct rp
+ case -EAGAIN:
+ status = xs_nospace(task);
+ break;
+- default:
+- dprintk("RPC: sendmsg returned unrecognized error %d\n",
+- -status);
+ case -ECONNRESET:
+ case -ECONNREFUSED:
+ case -ENOTCONN:
+ case -EADDRINUSE:
+ case -ENOBUFS:
+ case -EPIPE:
+- clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
++ break;
++ default:
++ dprintk("RPC: sendmsg returned unrecognized error %d\n",
++ -status);
+ }
+
+ return status;
+@@ -1609,19 +1598,23 @@ static void xs_tcp_state_change(struct s
+
+ static void xs_write_space(struct sock *sk)
+ {
+- struct socket *sock;
++ struct socket_wq *wq;
+ struct rpc_xprt *xprt;
+
+- if (unlikely(!(sock = sk->sk_socket)))
++ if (!sk->sk_socket)
+ return;
+- clear_bit(SOCK_NOSPACE, &sock->flags);
++ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+
+ if (unlikely(!(xprt = xprt_from_sock(sk))))
+ return;
+- if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0)
+- return;
++ rcu_read_lock();
++ wq = rcu_dereference(sk->sk_wq);
++ if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
++ goto out;
+
+ xprt_write_space(xprt);
++out:
++ rcu_read_unlock();
+ }
+
+ /**
--- /dev/null
+From b62526ed11a1fe3861ab98d40b7fdab8981d788a Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Jan 2016 16:54:46 +0000
+Subject: timerfd: Handle relative timers with CONFIG_TIME_LOW_RES proper
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit b62526ed11a1fe3861ab98d40b7fdab8981d788a upstream.
+
+Helge reported that a relative timer can return a remaining time larger than
+the programmed relative time on parisc and other architectures which have
+CONFIG_TIME_LOW_RES set. This happens because we add a jiffie to the resulting
+expiry time to prevent short timeouts.
+
+Use the new function hrtimer_expires_remaining_adjusted() to calculate the
+remaining time. It takes that extra added time into account for relative
+timers.
+
+Reported-and-tested-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: John Stultz <john.stultz@linaro.org>
+Cc: linux-m68k@lists.linux-m68k.org
+Cc: dhowells@redhat.com
+Link: http://lkml.kernel.org/r/20160114164159.354500742@linutronix.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/timerfd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/timerfd.c
++++ b/fs/timerfd.c
+@@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(str
+ if (isalarm(ctx))
+ remaining = alarm_expires_remaining(&ctx->t.alarm);
+ else
+- remaining = hrtimer_expires_remaining(&ctx->t.tmr);
++ remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
+
+ return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
+ }
--- /dev/null
+From 32abc2ede536aae52978d6c0a8944eb1df14f460 Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Mon, 16 Nov 2015 17:25:16 -0500
+Subject: tools lib traceevent: Fix output of %llu for 64 bit values read on 32 bit machines
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit 32abc2ede536aae52978d6c0a8944eb1df14f460 upstream.
+
+When a long value is read on 32 bit machines for 64 bit output, the
+parsing needs to change "%lu" into "%llu", as the value is read
+natively.
+
+Unfortunately, if "%llu" is already there, the code will add another "l"
+to it and fail to parse it properly.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Link: http://lkml.kernel.org/r/20151116172516.4b79b109@gandalf.local.home
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/lib/traceevent/event-parse.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/tools/lib/traceevent/event-parse.c
++++ b/tools/lib/traceevent/event-parse.c
+@@ -4968,13 +4968,12 @@ static void pretty_print(struct trace_se
+ sizeof(long) != 8) {
+ char *p;
+
+- ls = 2;
+ /* make %l into %ll */
+- p = strchr(format, 'l');
+- if (p)
++ if (ls == 1 && (p = strchr(format, 'l')))
+ memmove(p+1, p, strlen(p)+1);
+ else if (strcmp(format, "%p") == 0)
+ strcpy(format, "0x%llx");
++ ls = 2;
+ }
+ switch (ls) {
+ case -2:
--- /dev/null
+From f37755490fe9bf76f6ba1d8c6591745d3574a6a6 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
+Date: Mon, 15 Feb 2016 12:36:14 -0500
+Subject: tracepoints: Do not trace when cpu is offline
+
+From: Steven Rostedt (Red Hat) <rostedt@goodmis.org>
+
+commit f37755490fe9bf76f6ba1d8c6591745d3574a6a6 upstream.
+
+The tracepoint infrastructure uses RCU sched protection to enable and
+disable tracepoints safely. There are some instances where tracepoints are
+used in infrastructure code (like kfree()) that get called after a CPU is
+going offline, and perhaps when it is coming back online but hasn't been
+registered yet.
+
+This can probuce the following warning:
+
+ [ INFO: suspicious RCU usage. ]
+ 4.4.0-00006-g0fe53e8-dirty #34 Tainted: G S
+ -------------------------------
+ include/trace/events/kmem.h:141 suspicious rcu_dereference_check() usage!
+
+ other info that might help us debug this:
+
+ RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1
+ no locks held by swapper/8/0.
+
+ stack backtrace:
+ CPU: 8 PID: 0 Comm: swapper/8 Tainted: G S 4.4.0-00006-g0fe53e8-dirty #34
+ Call Trace:
+ [c0000005b76c78d0] [c0000000008b9540] .dump_stack+0x98/0xd4 (unreliable)
+ [c0000005b76c7950] [c00000000010c898] .lockdep_rcu_suspicious+0x108/0x170
+ [c0000005b76c79e0] [c00000000029adc0] .kfree+0x390/0x440
+ [c0000005b76c7a80] [c000000000055f74] .destroy_context+0x44/0x100
+ [c0000005b76c7b00] [c0000000000934a0] .__mmdrop+0x60/0x150
+ [c0000005b76c7b90] [c0000000000e3ff0] .idle_task_exit+0x130/0x140
+ [c0000005b76c7c20] [c000000000075804] .pseries_mach_cpu_die+0x64/0x310
+ [c0000005b76c7cd0] [c000000000043e7c] .cpu_die+0x3c/0x60
+ [c0000005b76c7d40] [c0000000000188d8] .arch_cpu_idle_dead+0x28/0x40
+ [c0000005b76c7db0] [c000000000101e6c] .cpu_startup_entry+0x50c/0x560
+ [c0000005b76c7ed0] [c000000000043bd8] .start_secondary+0x328/0x360
+ [c0000005b76c7f90] [c000000000008a6c] start_secondary_prolog+0x10/0x14
+
+This warning is not a false positive either. RCU is not protecting code that
+is being executed while the CPU is offline.
+
+Instead of playing "whack-a-mole(TM)" and adding conditional statements to
+the tracepoints we find that are used in this instance, simply add a
+cpu_online() test to the tracepoint code where the tracepoint will be
+ignored if the CPU is offline.
+
+Use of raw_smp_processor_id() is fine, as there should never be a case where
+the tracepoint code goes from running on a CPU that is online and suddenly
+gets migrated to a CPU that is offline.
+
+Link: http://lkml.kernel.org/r/1455387773-4245-1-git-send-email-kda@linux-powerpc.org
+
+Reported-by: Denis Kirjanov <kda@linux-powerpc.org>
+Fixes: 97e1c18e8d17b ("tracing: Kernel Tracepoints")
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/tracepoint.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/include/linux/tracepoint.h
++++ b/include/linux/tracepoint.h
+@@ -14,8 +14,10 @@
+ * See the file COPYING for more details.
+ */
+
++#include <linux/smp.h>
+ #include <linux/errno.h>
+ #include <linux/types.h>
++#include <linux/cpumask.h>
+ #include <linux/rcupdate.h>
+ #include <linux/static_key.h>
+
+@@ -146,6 +148,9 @@ extern void syscall_unregfunc(void);
+ void *it_func; \
+ void *__data; \
+ \
++ if (!cpu_online(raw_smp_processor_id())) \
++ return; \
++ \
+ if (!(cond)) \
+ return; \
+ prercu; \
--- /dev/null
+From b33c8ff4431a343561e2319f17c14286f2aa52e2 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 12 Feb 2016 22:26:42 +0100
+Subject: tracing: Fix freak link error caused by branch tracer
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit b33c8ff4431a343561e2319f17c14286f2aa52e2 upstream.
+
+In my randconfig tests, I came across a bug that involves several
+components:
+
+* gcc-4.9 through at least 5.3
+* CONFIG_GCOV_PROFILE_ALL enabling -fprofile-arcs for all files
+* CONFIG_PROFILE_ALL_BRANCHES overriding every if()
+* The optimized implementation of do_div() that tries to
+ replace a library call with an division by multiplication
+* code in drivers/media/dvb-frontends/zl10353.c doing
+
+ u32 adc_clock = 450560; /* 45.056 MHz */
+ if (state->config.adc_clock)
+ adc_clock = state->config.adc_clock;
+ do_div(value, adc_clock);
+
+In this case, gcc fails to determine whether the divisor
+in do_div() is __builtin_constant_p(). In particular, it
+concludes that __builtin_constant_p(adc_clock) is false, while
+__builtin_constant_p(!!adc_clock) is true.
+
+That in turn throws off the logic in do_div() that also uses
+__builtin_constant_p(), and instead of picking either the
+constant- optimized division, and the code in ilog2() that uses
+__builtin_constant_p() to figure out whether it knows the answer at
+compile time. The result is a link error from failing to find
+multiple symbols that should never have been called based on
+the __builtin_constant_p():
+
+dvb-frontends/zl10353.c:138: undefined reference to `____ilog2_NaN'
+dvb-frontends/zl10353.c:138: undefined reference to `__aeabi_uldivmod'
+ERROR: "____ilog2_NaN" [drivers/media/dvb-frontends/zl10353.ko] undefined!
+ERROR: "__aeabi_uldivmod" [drivers/media/dvb-frontends/zl10353.ko] undefined!
+
+This patch avoids the problem by changing __trace_if() to check
+whether the condition is known at compile-time to be nonzero, rather
+than checking whether it is actually a constant.
+
+I see this one link error in roughly one out of 1600 randconfig builds
+on ARM, and the patch fixes all known instances.
+
+Link: http://lkml.kernel.org/r/1455312410-1058841-1-git-send-email-arnd@arndb.de
+
+Acked-by: Nicolas Pitre <nico@linaro.org>
+Fixes: ab3c9c686e22 ("branch tracer, intel-iommu: fix build with CONFIG_BRANCH_TRACER=y")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/compiler.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/compiler.h
++++ b/include/linux/compiler.h
+@@ -144,7 +144,7 @@ void ftrace_likely_update(struct ftrace_
+ */
+ #define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) )
+ #define __trace_if(cond) \
+- if (__builtin_constant_p((cond)) ? !!(cond) : \
++ if (__builtin_constant_p(!!(cond)) ? !!(cond) : \
+ ({ \
+ int ______r; \
+ static struct ftrace_branch_data \
--- /dev/null
+From 7dde55787b43a8f2b4021916db38d90c03a2ec64 Mon Sep 17 00:00:00 2001
+From: Jeremy McNicoll <jmcnicol@redhat.com>
+Date: Tue, 2 Feb 2016 13:00:45 -0800
+Subject: tty: Add support for PCIe WCH382 2S multi-IO card
+
+From: Jeremy McNicoll <jmcnicol@redhat.com>
+
+commit 7dde55787b43a8f2b4021916db38d90c03a2ec64 upstream.
+
+WCH382 2S board is a PCIe card with 2 DB9 COM ports detected as
+Serial controller: Device 1c00:3253 (rev 10) (prog-if 05 [16850])
+
+Signed-off-by: Jeremy McNicoll <jmcnicol@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/serial/8250/8250_pci.c | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -1936,6 +1936,7 @@ pci_wch_ch38x_setup(struct serial_privat
+ #define PCIE_VENDOR_ID_WCH 0x1c00
+ #define PCIE_DEVICE_ID_WCH_CH382_2S1P 0x3250
+ #define PCIE_DEVICE_ID_WCH_CH384_4S 0x3470
++#define PCIE_DEVICE_ID_WCH_CH382_2S 0x3253
+
+ #define PCI_VENDOR_ID_PERICOM 0x12D8
+ #define PCI_DEVICE_ID_PERICOM_PI7C9X7951 0x7951
+@@ -2618,6 +2619,14 @@ static struct pci_serial_quirk pci_seria
+ .subdevice = PCI_ANY_ID,
+ .setup = pci_wch_ch353_setup,
+ },
++ /* WCH CH382 2S card (16850 clone) */
++ {
++ .vendor = PCIE_VENDOR_ID_WCH,
++ .device = PCIE_DEVICE_ID_WCH_CH382_2S,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ .setup = pci_wch_ch38x_setup,
++ },
+ /* WCH CH382 2S1P card (16850 clone) */
+ {
+ .vendor = PCIE_VENDOR_ID_WCH,
+@@ -2936,6 +2945,7 @@ enum pci_board_num_t {
+ pbn_fintek_4,
+ pbn_fintek_8,
+ pbn_fintek_12,
++ pbn_wch382_2,
+ pbn_wch384_4,
+ pbn_pericom_PI7C9X7951,
+ pbn_pericom_PI7C9X7952,
+@@ -3756,6 +3766,13 @@ static struct pciserial_board pci_boards
+ .base_baud = 115200,
+ .first_offset = 0x40,
+ },
++ [pbn_wch382_2] = {
++ .flags = FL_BASE0,
++ .num_ports = 2,
++ .base_baud = 115200,
++ .uart_offset = 8,
++ .first_offset = 0xC0,
++ },
+ [pbn_wch384_4] = {
+ .flags = FL_BASE0,
+ .num_ports = 4,
+@@ -5545,6 +5562,10 @@ static struct pci_device_id serial_pci_t
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0, pbn_b0_bt_2_115200 },
+
++ { PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH382_2S,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0, pbn_wch382_2 },
++
+ { PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH384_4S,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0, pbn_wch384_4 },
--- /dev/null
+From bb00c898ad1ce40c4bb422a8207ae562e9aea7ae Mon Sep 17 00:00:00 2001
+From: Andrew Gabbasov <andrew_gabbasov@mentor.com>
+Date: Thu, 24 Dec 2015 10:25:33 -0600
+Subject: udf: Check output buffer length when converting name to CS0
+
+From: Andrew Gabbasov <andrew_gabbasov@mentor.com>
+
+commit bb00c898ad1ce40c4bb422a8207ae562e9aea7ae upstream.
+
+If a name contains at least some characters with Unicode values
+exceeding single byte, the CS0 output should have 2 bytes per character.
+And if other input characters have single byte Unicode values, then
+the single input byte is converted to 2 output bytes, and the length
+of output becomes larger than the length of input. And if the input
+name is long enough, the output length may exceed the allocated buffer
+length.
+
+All this means that conversion from UTF8 or NLS to CS0 requires
+checking of output length in order to stop when it exceeds the given
+output buffer size.
+
+[JK: Make code return -ENAMETOOLONG instead of silently truncating the
+name]
+
+Signed-off-by: Andrew Gabbasov <andrew_gabbasov@mentor.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/udf/unicode.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/fs/udf/unicode.c
++++ b/fs/udf/unicode.c
+@@ -177,17 +177,22 @@ int udf_CS0toUTF8(struct ustr *utf_o, co
+ static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
+ {
+ unsigned c, i, max_val, utf_char;
+- int utf_cnt, u_len;
++ int utf_cnt, u_len, u_ch;
+
+ memset(ocu, 0, sizeof(dstring) * length);
+ ocu[0] = 8;
+ max_val = 0xffU;
++ u_ch = 1;
+
+ try_again:
+ u_len = 0U;
+ utf_char = 0U;
+ utf_cnt = 0U;
+ for (i = 0U; i < utf->u_len; i++) {
++ /* Name didn't fit? */
++ if (u_len + 1 + u_ch >= length)
++ return 0;
++
+ c = (uint8_t)utf->u_name[i];
+
+ /* Complete a multi-byte UTF-8 character */
+@@ -229,6 +234,7 @@ try_again:
+ if (max_val == 0xffU) {
+ max_val = 0xffffU;
+ ocu[0] = (uint8_t)0x10U;
++ u_ch = 2;
+ goto try_again;
+ }
+ goto error_out;
+@@ -299,15 +305,19 @@ static int udf_NLStoCS0(struct nls_table
+ int len;
+ unsigned i, max_val;
+ uint16_t uni_char;
+- int u_len;
++ int u_len, u_ch;
+
+ memset(ocu, 0, sizeof(dstring) * length);
+ ocu[0] = 8;
+ max_val = 0xffU;
++ u_ch = 1;
+
+ try_again:
+ u_len = 0U;
+ for (i = 0U; i < uni->u_len; i++) {
++ /* Name didn't fit? */
++ if (u_len + 1 + u_ch >= length)
++ return 0;
+ len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
+ if (!len)
+ continue;
+@@ -320,6 +330,7 @@ try_again:
+ if (uni_char > max_val) {
+ max_val = 0xffffU;
+ ocu[0] = (uint8_t)0x10U;
++ u_ch = 2;
+ goto try_again;
+ }
+
--- /dev/null
+From b0918d9f476a8434b055e362b83fa4fd1d462c3f Mon Sep 17 00:00:00 2001
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Fri, 11 Dec 2015 15:54:16 +0100
+Subject: udf: limit the maximum number of indirect extents in a row
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+commit b0918d9f476a8434b055e362b83fa4fd1d462c3f upstream.
+
+udf_next_aext() just follows extent pointers while extents are marked as
+indirect. This can loop forever for corrupted filesystem. Limit number
+the of indirect extents we are willing to follow in a row.
+
+[JK: Updated changelog, limit, style]
+
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Cc: Jan Kara <jack@suse.com>
+Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/udf/inode.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -2047,14 +2047,29 @@ void udf_write_aext(struct inode *inode,
+ epos->offset += adsize;
+ }
+
++/*
++ * Only 1 indirect extent in a row really makes sense but allow upto 16 in case
++ * someone does some weird stuff.
++ */
++#define UDF_MAX_INDIR_EXTS 16
++
+ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
+ struct kernel_lb_addr *eloc, uint32_t *elen, int inc)
+ {
+ int8_t etype;
++ unsigned int indirections = 0;
+
+ while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) ==
+ (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) {
+ int block;
++
++ if (++indirections > UDF_MAX_INDIR_EXTS) {
++ udf_err(inode->i_sb,
++ "too many indirect extents in inode %lu\n",
++ inode->i_ino);
++ return -1;
++ }
++
+ epos->block = *eloc;
+ epos->offset = sizeof(struct allocExtDesc);
+ brelse(epos->bh);
--- /dev/null
+From ad402b265ecf6fa22d04043b41444cdfcdf4f52d Mon Sep 17 00:00:00 2001
+From: Andrew Gabbasov <andrew_gabbasov@mentor.com>
+Date: Thu, 24 Dec 2015 10:25:32 -0600
+Subject: udf: Prevent buffer overrun with multi-byte characters
+
+From: Andrew Gabbasov <andrew_gabbasov@mentor.com>
+
+commit ad402b265ecf6fa22d04043b41444cdfcdf4f52d upstream.
+
+udf_CS0toUTF8 function stops the conversion when the output buffer
+length reaches UDF_NAME_LEN-2, which is correct maximum name length,
+but, when checking, it leaves the space for a single byte only,
+while multi-bytes output characters can take more space, causing
+buffer overflow.
+
+Similar error exists in udf_CS0toNLS function, that restricts
+the output length to UDF_NAME_LEN, while actual maximum allowed
+length is UDF_NAME_LEN-2.
+
+In these cases the output can override not only the current buffer
+length field, causing corruption of the name buffer itself, but also
+following allocation structures, causing kernel crash.
+
+Adjust the output length checks in both functions to prevent buffer
+overruns in case of multi-bytes UTF8 or NLS characters.
+
+Signed-off-by: Andrew Gabbasov <andrew_gabbasov@mentor.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/udf/unicode.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/udf/unicode.c
++++ b/fs/udf/unicode.c
+@@ -128,11 +128,15 @@ int udf_CS0toUTF8(struct ustr *utf_o, co
+ if (c < 0x80U)
+ utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
+ else if (c < 0x800U) {
++ if (utf_o->u_len > (UDF_NAME_LEN - 4))
++ break;
+ utf_o->u_name[utf_o->u_len++] =
+ (uint8_t)(0xc0 | (c >> 6));
+ utf_o->u_name[utf_o->u_len++] =
+ (uint8_t)(0x80 | (c & 0x3f));
+ } else {
++ if (utf_o->u_len > (UDF_NAME_LEN - 5))
++ break;
+ utf_o->u_name[utf_o->u_len++] =
+ (uint8_t)(0xe0 | (c >> 12));
+ utf_o->u_name[utf_o->u_len++] =
+@@ -277,7 +281,7 @@ static int udf_CS0toNLS(struct nls_table
+ c = (c << 8) | ocu[i++];
+
+ len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
+- UDF_NAME_LEN - utf_o->u_len);
++ UDF_NAME_LEN - 2 - utf_o->u_len);
+ /* Valid character? */
+ if (len >= 0)
+ utf_o->u_len += len;
--- /dev/null
+From f4eafd8bcd5229e998aa252627703b8462c3b90f Mon Sep 17 00:00:00 2001
+From: Toshi Kani <toshi.kani@hpe.com>
+Date: Wed, 17 Feb 2016 18:16:54 -0700
+Subject: x86/mm: Fix vmalloc_fault() to handle large pages properly
+
+From: Toshi Kani <toshi.kani@hpe.com>
+
+commit f4eafd8bcd5229e998aa252627703b8462c3b90f upstream.
+
+A kernel page fault oops with the callstack below was observed
+when a read syscall was made to a pmem device after a huge amount
+(>512GB) of vmalloc ranges was allocated by ioremap() on a x86_64
+system:
+
+ BUG: unable to handle kernel paging request at ffff880840000ff8
+ IP: vmalloc_fault+0x1be/0x300
+ PGD c7f03a067 PUD 0
+ Oops: 0000 [#1] SM
+ Call Trace:
+ __do_page_fault+0x285/0x3e0
+ do_page_fault+0x2f/0x80
+ ? put_prev_entity+0x35/0x7a0
+ page_fault+0x28/0x30
+ ? memcpy_erms+0x6/0x10
+ ? schedule+0x35/0x80
+ ? pmem_rw_bytes+0x6a/0x190 [nd_pmem]
+ ? schedule_timeout+0x183/0x240
+ btt_log_read+0x63/0x140 [nd_btt]
+ :
+ ? __symbol_put+0x60/0x60
+ ? kernel_read+0x50/0x80
+ SyS_finit_module+0xb9/0xf0
+ entry_SYSCALL_64_fastpath+0x1a/0xa4
+
+Since v4.1, ioremap() supports large page (pud/pmd) mappings in
+x86_64 and PAE. vmalloc_fault() however assumes that the vmalloc
+range is limited to pte mappings.
+
+vmalloc faults do not normally happen in ioremap'd ranges since
+ioremap() sets up the kernel page tables, which are shared by
+user processes. pgd_ctor() sets the kernel's PGD entries to
+user's during fork(). When allocation of the vmalloc ranges
+crosses a 512GB boundary, ioremap() allocates a new pud table
+and updates the kernel PGD entry to point it. If user process's
+PGD entry does not have this update yet, a read/write syscall
+to the range will cause a vmalloc fault, which hits the Oops
+above as it does not handle a large page properly.
+
+Following changes are made to vmalloc_fault().
+
+64-bit:
+
+ - No change for the PGD sync operation as it handles large
+ pages already.
+ - Add pud_huge() and pmd_huge() to the validation code to
+ handle large pages.
+ - Change pud_page_vaddr() to pud_pfn() since an ioremap range
+ is not directly mapped (while the if-statement still works
+ with a bogus addr).
+ - Change pmd_page() to pmd_pfn() since an ioremap range is not
+ backed by struct page (while the if-statement still works
+ with a bogus addr).
+
+32-bit:
+ - No change for the sync operation since the index3 PGD entry
+ covers the entire vmalloc range, which is always valid.
+ (A separate change to sync PGD entry is necessary if this
+ memory layout is changed regardless of the page size.)
+ - Add pmd_huge() to the validation code to handle large pages.
+ This is for completeness since vmalloc_fault() won't happen
+ in ioremap'd ranges as its PGD entry is always valid.
+
+Reported-by: Henning Schild <henning.schild@siemens.com>
+Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
+Acked-by: Borislav Petkov <bp@alien8.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: linux-mm@kvack.org
+Cc: linux-nvdimm@lists.01.org
+Link: http://lkml.kernel.org/r/1455758214-24623-1-git-send-email-toshi.kani@hpe.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/fault.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -287,6 +287,9 @@ static noinline int vmalloc_fault(unsign
+ if (!pmd_k)
+ return -1;
+
++ if (pmd_huge(*pmd_k))
++ return 0;
++
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+@@ -360,8 +363,6 @@ void vmalloc_sync_all(void)
+ * 64-bit:
+ *
+ * Handle a fault on the vmalloc area
+- *
+- * This assumes no large pages in there.
+ */
+ static noinline int vmalloc_fault(unsigned long address)
+ {
+@@ -403,17 +404,23 @@ static noinline int vmalloc_fault(unsign
+ if (pud_none(*pud_ref))
+ return -1;
+
+- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
++ if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
+ BUG();
+
++ if (pud_huge(*pud))
++ return 0;
++
+ pmd = pmd_offset(pud, address);
+ pmd_ref = pmd_offset(pud_ref, address);
+ if (pmd_none(*pmd_ref))
+ return -1;
+
+- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
++ if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
+ BUG();
+
++ if (pmd_huge(*pmd))
++ return 0;
++
+ pte_ref = pte_offset_kernel(pmd_ref, address);
+ if (!pte_present(*pte_ref))
+ return -1;
--- /dev/null
+From a82eee7424525e34e98d821dd059ce14560a1e35 Mon Sep 17 00:00:00 2001
+From: Toshi Kani <toshi.kani@hpe.com>
+Date: Thu, 11 Feb 2016 14:24:17 -0700
+Subject: x86/uaccess/64: Handle the caching of 4-byte nocache copies properly in __copy_user_nocache()
+
+From: Toshi Kani <toshi.kani@hpe.com>
+
+commit a82eee7424525e34e98d821dd059ce14560a1e35 upstream.
+
+Data corruption issues were observed in tests which initiated
+a system crash/reset while accessing BTT devices. This problem
+is reproducible.
+
+The BTT driver calls pmem_rw_bytes() to update data in pmem
+devices. This interface calls __copy_user_nocache(), which
+uses non-temporal stores so that the stores to pmem are
+persistent.
+
+__copy_user_nocache() uses non-temporal stores when a request
+size is 8 bytes or larger (and is aligned by 8 bytes). The
+BTT driver updates the BTT map table, which entry size is
+4 bytes. Therefore, updates to the map table entries remain
+cached, and are not written to pmem after a crash.
+
+Change __copy_user_nocache() to use non-temporal store when
+a request size is 4 bytes. The change extends the current
+byte-copy path for a less-than-8-bytes request, and does not
+add any overhead to the regular path.
+
+Reported-and-tested-by: Micah Parrish <micah.parrish@hpe.com>
+Reported-and-tested-by: Brian Boylston <brian.boylston@hpe.com>
+Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: Vishal Verma <vishal.l.verma@intel.com>
+Cc: linux-nvdimm@lists.01.org
+Link: http://lkml.kernel.org/r/1455225857-12039-3-git-send-email-toshi.kani@hpe.com
+[ Small readability edits. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/lib/copy_user_64.S | 36 ++++++++++++++++++++++++++++++++----
+ 1 file changed, 32 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -237,13 +237,14 @@ ENDPROC(copy_user_enhanced_fast_string)
+ * Note: Cached memory copy is used when destination or size is not
+ * naturally aligned. That is:
+ * - Require 8-byte alignment when size is 8 bytes or larger.
++ * - Require 4-byte alignment when size is 4 bytes.
+ */
+ ENTRY(__copy_user_nocache)
+ ASM_STAC
+
+- /* If size is less than 8 bytes, go to byte copy */
++ /* If size is less than 8 bytes, go to 4-byte copy */
+ cmpl $8,%edx
+- jb .L_1b_cache_copy_entry
++ jb .L_4b_nocache_copy_entry
+
+ /* If destination is not 8-byte aligned, "cache" copy to align it */
+ ALIGN_DESTINATION
+@@ -282,7 +283,7 @@ ENTRY(__copy_user_nocache)
+ movl %edx,%ecx
+ andl $7,%edx
+ shrl $3,%ecx
+- jz .L_1b_cache_copy_entry /* jump if count is 0 */
++ jz .L_4b_nocache_copy_entry /* jump if count is 0 */
+
+ /* Perform 8-byte nocache loop-copy */
+ .L_8b_nocache_copy_loop:
+@@ -294,11 +295,33 @@ ENTRY(__copy_user_nocache)
+ jnz .L_8b_nocache_copy_loop
+
+ /* If no byte left, we're done */
+-.L_1b_cache_copy_entry:
++.L_4b_nocache_copy_entry:
++ andl %edx,%edx
++ jz .L_finish_copy
++
++ /* If destination is not 4-byte aligned, go to byte copy: */
++ movl %edi,%ecx
++ andl $3,%ecx
++ jnz .L_1b_cache_copy_entry
++
++ /* Set 4-byte copy count (1 or 0) and remainder */
++ movl %edx,%ecx
++ andl $3,%edx
++ shrl $2,%ecx
++ jz .L_1b_cache_copy_entry /* jump if count is 0 */
++
++ /* Perform 4-byte nocache copy: */
++30: movl (%rsi),%r8d
++31: movnti %r8d,(%rdi)
++ leaq 4(%rsi),%rsi
++ leaq 4(%rdi),%rdi
++
++ /* If no bytes left, we're done: */
+ andl %edx,%edx
+ jz .L_finish_copy
+
+ /* Perform byte "cache" loop-copy for the remainder */
++.L_1b_cache_copy_entry:
+ movl %edx,%ecx
+ .L_1b_cache_copy_loop:
+ 40: movb (%rsi),%al
+@@ -323,6 +346,9 @@ ENTRY(__copy_user_nocache)
+ .L_fixup_8b_copy:
+ lea (%rdx,%rcx,8),%rdx
+ jmp .L_fixup_handle_tail
++.L_fixup_4b_copy:
++ lea (%rdx,%rcx,4),%rdx
++ jmp .L_fixup_handle_tail
+ .L_fixup_1b_copy:
+ movl %ecx,%edx
+ .L_fixup_handle_tail:
+@@ -348,6 +374,8 @@ ENTRY(__copy_user_nocache)
+ _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(20b,.L_fixup_8b_copy)
+ _ASM_EXTABLE(21b,.L_fixup_8b_copy)
++ _ASM_EXTABLE(30b,.L_fixup_4b_copy)
++ _ASM_EXTABLE(31b,.L_fixup_4b_copy)
+ _ASM_EXTABLE(40b,.L_fixup_1b_copy)
+ _ASM_EXTABLE(41b,.L_fixup_1b_copy)
+ ENDPROC(__copy_user_nocache)
--- /dev/null
+From ee9737c924706aaa72c2ead93e3ad5644681dc1c Mon Sep 17 00:00:00 2001
+From: Toshi Kani <toshi.kani@hpe.com>
+Date: Thu, 11 Feb 2016 14:24:16 -0700
+Subject: x86/uaccess/64: Make the __copy_user_nocache() assembly code more readable
+
+From: Toshi Kani <toshi.kani@hpe.com>
+
+commit ee9737c924706aaa72c2ead93e3ad5644681dc1c upstream.
+
+Add comments to __copy_user_nocache() to clarify its procedures
+and alignment requirements.
+
+Also change numeric branch target labels to named local labels.
+
+No code changed:
+
+ arch/x86/lib/copy_user_64.o:
+
+ text data bss dec hex filename
+ 1239 0 0 1239 4d7 copy_user_64.o.before
+ 1239 0 0 1239 4d7 copy_user_64.o.after
+
+ md5:
+ 58bed94c2db98c1ca9a2d46d0680aaae copy_user_64.o.before.asm
+ 58bed94c2db98c1ca9a2d46d0680aaae copy_user_64.o.after.asm
+
+Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof@suse.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toshi Kani <toshi.kani@hp.com>
+Cc: brian.boylston@hpe.com
+Cc: dan.j.williams@intel.com
+Cc: linux-nvdimm@lists.01.org
+Cc: micah.parrish@hpe.com
+Cc: ross.zwisler@linux.intel.com
+Cc: vishal.l.verma@intel.com
+Link: http://lkml.kernel.org/r/1455225857-12039-2-git-send-email-toshi.kani@hpe.com
+[ Small readability edits and added object file comparison. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/lib/copy_user_64.S | 114 ++++++++++++++++++++++++++++----------------
+ 1 file changed, 73 insertions(+), 41 deletions(-)
+
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -232,17 +232,30 @@ ENDPROC(copy_user_enhanced_fast_string)
+
+ /*
+ * copy_user_nocache - Uncached memory copy with exception handling
+- * This will force destination/source out of cache for more performance.
++ * This will force destination out of cache for more performance.
++ *
++ * Note: Cached memory copy is used when destination or size is not
++ * naturally aligned. That is:
++ * - Require 8-byte alignment when size is 8 bytes or larger.
+ */
+ ENTRY(__copy_user_nocache)
+ ASM_STAC
++
++ /* If size is less than 8 bytes, go to byte copy */
+ cmpl $8,%edx
+- jb 20f /* less then 8 bytes, go to byte copy loop */
++ jb .L_1b_cache_copy_entry
++
++ /* If destination is not 8-byte aligned, "cache" copy to align it */
+ ALIGN_DESTINATION
++
++ /* Set 4x8-byte copy count and remainder */
+ movl %edx,%ecx
+ andl $63,%edx
+ shrl $6,%ecx
+- jz 17f
++ jz .L_8b_nocache_copy_entry /* jump if count is 0 */
++
++ /* Perform 4x8-byte nocache loop-copy */
++.L_4x8b_nocache_copy_loop:
+ 1: movq (%rsi),%r8
+ 2: movq 1*8(%rsi),%r9
+ 3: movq 2*8(%rsi),%r10
+@@ -262,60 +275,79 @@ ENTRY(__copy_user_nocache)
+ leaq 64(%rsi),%rsi
+ leaq 64(%rdi),%rdi
+ decl %ecx
+- jnz 1b
+-17: movl %edx,%ecx
++ jnz .L_4x8b_nocache_copy_loop
++
++ /* Set 8-byte copy count and remainder */
++.L_8b_nocache_copy_entry:
++ movl %edx,%ecx
+ andl $7,%edx
+ shrl $3,%ecx
+- jz 20f
+-18: movq (%rsi),%r8
+-19: movnti %r8,(%rdi)
++ jz .L_1b_cache_copy_entry /* jump if count is 0 */
++
++ /* Perform 8-byte nocache loop-copy */
++.L_8b_nocache_copy_loop:
++20: movq (%rsi),%r8
++21: movnti %r8,(%rdi)
+ leaq 8(%rsi),%rsi
+ leaq 8(%rdi),%rdi
+ decl %ecx
+- jnz 18b
+-20: andl %edx,%edx
+- jz 23f
++ jnz .L_8b_nocache_copy_loop
++
++ /* If no byte left, we're done */
++.L_1b_cache_copy_entry:
++ andl %edx,%edx
++ jz .L_finish_copy
++
++ /* Perform byte "cache" loop-copy for the remainder */
+ movl %edx,%ecx
+-21: movb (%rsi),%al
+-22: movb %al,(%rdi)
++.L_1b_cache_copy_loop:
++40: movb (%rsi),%al
++41: movb %al,(%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+- jnz 21b
+-23: xorl %eax,%eax
++ jnz .L_1b_cache_copy_loop
++
++ /* Finished copying; fence the prior stores */
++.L_finish_copy:
++ xorl %eax,%eax
+ ASM_CLAC
+ sfence
+ ret
+
+ .section .fixup,"ax"
+-30: shll $6,%ecx
++.L_fixup_4x8b_copy:
++ shll $6,%ecx
+ addl %ecx,%edx
+- jmp 60f
+-40: lea (%rdx,%rcx,8),%rdx
+- jmp 60f
+-50: movl %ecx,%edx
+-60: sfence
++ jmp .L_fixup_handle_tail
++.L_fixup_8b_copy:
++ lea (%rdx,%rcx,8),%rdx
++ jmp .L_fixup_handle_tail
++.L_fixup_1b_copy:
++ movl %ecx,%edx
++.L_fixup_handle_tail:
++ sfence
+ jmp copy_user_handle_tail
+ .previous
+
+- _ASM_EXTABLE(1b,30b)
+- _ASM_EXTABLE(2b,30b)
+- _ASM_EXTABLE(3b,30b)
+- _ASM_EXTABLE(4b,30b)
+- _ASM_EXTABLE(5b,30b)
+- _ASM_EXTABLE(6b,30b)
+- _ASM_EXTABLE(7b,30b)
+- _ASM_EXTABLE(8b,30b)
+- _ASM_EXTABLE(9b,30b)
+- _ASM_EXTABLE(10b,30b)
+- _ASM_EXTABLE(11b,30b)
+- _ASM_EXTABLE(12b,30b)
+- _ASM_EXTABLE(13b,30b)
+- _ASM_EXTABLE(14b,30b)
+- _ASM_EXTABLE(15b,30b)
+- _ASM_EXTABLE(16b,30b)
+- _ASM_EXTABLE(18b,40b)
+- _ASM_EXTABLE(19b,40b)
+- _ASM_EXTABLE(21b,50b)
+- _ASM_EXTABLE(22b,50b)
++ _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
++ _ASM_EXTABLE(20b,.L_fixup_8b_copy)
++ _ASM_EXTABLE(21b,.L_fixup_8b_copy)
++ _ASM_EXTABLE(40b,.L_fixup_1b_copy)
++ _ASM_EXTABLE(41b,.L_fixup_1b_copy)
+ ENDPROC(__copy_user_nocache)
--- /dev/null
+From b79f4a1c68bb99152d0785ee4ea3ab4396cdacc6 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 12 Jan 2016 07:03:44 +1100
+Subject: xfs: inode recovery readahead can race with inode buffer creation
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit b79f4a1c68bb99152d0785ee4ea3ab4396cdacc6 upstream.
+
+When we do inode readahead in log recovery, we do can do the
+readahead before we've replayed the icreate transaction that stamps
+the buffer with inode cores. The inode readahead verifier catches
+this and marks the buffer as !done to indicate that it doesn't yet
+contain valid inodes.
+
+In adding buffer error notification (i.e. setting b_error = -EIO at
+the same time as as we clear the done flag) to such a readahead
+verifier failure, we can then get subsequent inode recovery failing
+with this error:
+
+XFS (dm-0): metadata I/O error: block 0xa00060 ("xlog_recover_do..(read#2)") error 5 numblks 32
+
+This occurs when readahead completion races with icreate item replay
+such as:
+
+ inode readahead
+ find buffer
+ lock buffer
+ submit RA io
+ ....
+ icreate recovery
+ xfs_trans_get_buffer
+ find buffer
+ lock buffer
+ <blocks on RA completion>
+ .....
+ <ra completion>
+ fails verifier
+ clear XBF_DONE
+ set bp->b_error = -EIO
+ release and unlock buffer
+ <icreate gains lock>
+ icreate initialises buffer
+ marks buffer as done
+ adds buffer to delayed write queue
+ releases buffer
+
+At this point, we have an initialised inode buffer that is up to
+date but has an -EIO state registered against it. When we finally
+get to recovering an inode in that buffer:
+
+ inode item recovery
+ xfs_trans_read_buffer
+ find buffer
+ lock buffer
+ sees XBF_DONE is set, returns buffer
+ sees bp->b_error is set
+ fail log recovery!
+
+Essentially, we need xfs_trans_get_buf_map() to clear the error status of
+the buffer when doing a lookup. This function returns uninitialised
+buffers, so the buffer returned can not be in an error state and
+none of the code that uses this function expects b_error to be set
+on return. Indeed, there is an ASSERT(!bp->b_error); in the
+transaction case in xfs_trans_get_buf_map() that would have caught
+this if log recovery used transactions....
+
+This patch firstly changes the inode readahead failure to set -EIO
+on the buffer, and secondly changes xfs_buf_get_map() to never
+return a buffer with an error state set so this first change doesn't
+cause unexpected log recovery failures.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_inode_buf.c | 12 +++++++-----
+ fs/xfs/xfs_buf.c | 7 +++++++
+ 2 files changed, 14 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -62,11 +62,12 @@ xfs_inobp_check(
+ * has not had the inode cores stamped into it. Hence for readahead, the buffer
+ * may be potentially invalid.
+ *
+- * If the readahead buffer is invalid, we don't want to mark it with an error,
+- * but we do want to clear the DONE status of the buffer so that a followup read
+- * will re-read it from disk. This will ensure that we don't get an unnecessary
+- * warnings during log recovery and we don't get unnecssary panics on debug
+- * kernels.
++ * If the readahead buffer is invalid, we need to mark it with an error and
++ * clear the DONE status of the buffer so that a followup read will re-read it
++ * from disk. We don't report the error otherwise to avoid warnings during log
++ * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
++ * because all we want to do is say readahead failed; there is no-one to report
++ * the error to, so this will distinguish it from a non-ra verifier failure.
+ */
+ static void
+ xfs_inode_buf_verify(
+@@ -93,6 +94,7 @@ xfs_inode_buf_verify(
+ XFS_RANDOM_ITOBP_INOTOBP))) {
+ if (readahead) {
+ bp->b_flags &= ~XBF_DONE;
++ xfs_buf_ioerror(bp, -EIO);
+ return;
+ }
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -604,6 +604,13 @@ found:
+ }
+ }
+
++ /*
++ * Clear b_error if this is a lookup from a caller that doesn't expect
++ * valid data to be found in the buffer.
++ */
++ if (!(flags & XBF_READ))
++ xfs_buf_ioerror(bp, 0);
++
+ XFS_STATS_INC(target->bt_mount, xb_get);
+ trace_xfs_buf_get(bp, flags, _RET_IP_);
+ return bp;
--- /dev/null
+From 85bec5460ad8e05e0a8d70fb0f6750eb719ad092 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 19 Jan 2016 08:28:10 +1100
+Subject: xfs: log mount failures don't wait for buffers to be released
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 85bec5460ad8e05e0a8d70fb0f6750eb719ad092 upstream.
+
+Recently I've been seeing xfs/051 fail on 1k block size filesystems.
+Trying to trace the events during the test lead to the problem going
+away, indicating that it was a race condition that lead to this
+ASSERT failure:
+
+XFS: Assertion failed: atomic_read(&pag->pag_ref) == 0, file: fs/xfs/xfs_mount.c, line: 156
+.....
+[<ffffffff814e1257>] xfs_free_perag+0x87/0xb0
+[<ffffffff814e21b9>] xfs_mountfs+0x4d9/0x900
+[<ffffffff814e5dff>] xfs_fs_fill_super+0x3bf/0x4d0
+[<ffffffff811d8800>] mount_bdev+0x180/0x1b0
+[<ffffffff814e3ff5>] xfs_fs_mount+0x15/0x20
+[<ffffffff811d90a8>] mount_fs+0x38/0x170
+[<ffffffff811f4347>] vfs_kern_mount+0x67/0x120
+[<ffffffff811f7018>] do_mount+0x218/0xd60
+[<ffffffff811f7e5b>] SyS_mount+0x8b/0xd0
+
+When I finally caught it with tracing enabled, I saw that AG 2 had
+an elevated reference count and a buffer was responsible for it. I
+tracked down the specific buffer, and found that it was missing the
+final reference count release that would put it back on the LRU and
+hence be found by xfs_wait_buftarg() calls in the log mount failure
+handling.
+
+The last four traces for the buffer before the assert were (trimmed
+for relevance)
+
+kworker/0:1-5259 xfs_buf_iodone: hold 2 lock 0 flags ASYNC
+kworker/0:1-5259 xfs_buf_ioerror: hold 2 lock 0 error -5
+mount-7163 xfs_buf_lock_done: hold 2 lock 0 flags ASYNC
+mount-7163 xfs_buf_unlock: hold 2 lock 1 flags ASYNC
+
+This is an async write that is completing, so there's nobody waiting
+for it directly. Hence we call xfs_buf_relse() once all the
+processing is complete. That does:
+
+static inline void xfs_buf_relse(xfs_buf_t *bp)
+{
+ xfs_buf_unlock(bp);
+ xfs_buf_rele(bp);
+}
+
+Now, it's clear that mount is waiting on the buffer lock, and that
+it has been released by xfs_buf_relse() and gained by mount. This is
+expected, because at this point the mount process is in
+xfs_buf_delwri_submit() waiting for all the IO it submitted to
+complete.
+
+The mount process, however, is waiting on the lock for the buffer
+because it is in xfs_buf_delwri_submit(). This waits for IO
+completion, but it doesn't wait for the buffer reference owned by
+the IO to go away. The mount process collects all the completions,
+fails the log recovery, and the higher level code then calls
+xfs_wait_buftarg() to free all the remaining buffers in the
+filesystem.
+
+The issue is that on unlocking the buffer, the scheduler has decided
+that the mount process has higher priority than the the kworker
+thread that is running the IO completion, and so immediately
+switched contexts to the mount process from the semaphore unlock
+code, hence preventing the kworker thread from finishing the IO
+completion and releasing the IO reference to the buffer.
+
+Hence by the time that xfs_wait_buftarg() is run, the buffer still
+has an active reference and so isn't on the LRU list that the
+function walks to free the remaining buffers. Hence we miss that
+buffer and continue onwards to tear down the mount structures,
+at which time we get find a stray reference count on the perag
+structure. On a non-debug kernel, this will be ignored and the
+structure torn down and freed. Hence when the kworker thread is then
+rescheduled and the buffer released and freed, it will access a
+freed perag structure.
+
+The problem here is that when the log mount fails, we still need to
+quiesce the log to ensure that the IO workqueues have returned to
+idle before we run xfs_wait_buftarg(). By synchronising the
+workqueues, we ensure that all IO completions are fully processed,
+not just to the point where buffers have been unlocked. This ensures
+we don't end up in the situation above.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_buf.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1527,6 +1527,16 @@ xfs_wait_buftarg(
+ LIST_HEAD(dispose);
+ int loop = 0;
+
++ /*
++ * We need to flush the buffer workqueue to ensure that all IO
++ * completion processing is 100% done. Just waiting on buffer locks is
++ * not sufficient for async IO as the reference count held over IO is
++ * not released until after the buffer lock is dropped. Hence we need to
++ * ensure here that all reference counts have been dropped before we
++ * start walking the LRU list.
++ */
++ drain_workqueue(btp->bt_mount->m_buf_workqueue);
++
+ /* loop until there is nothing left on the lru list. */
+ while (list_lru_count(&btp->bt_lru)) {
+ list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
--- /dev/null
+From 5c82171167adb8e4ac77b91a42cd49fb211a81a0 Mon Sep 17 00:00:00 2001
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+Date: Tue, 26 Jan 2016 17:50:12 +0200
+Subject: xhci: Fix list corruption in urb dequeue at host removal
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+commit 5c82171167adb8e4ac77b91a42cd49fb211a81a0 upstream.
+
+xhci driver frees data for all devices, both usb2 and and usb3 the
+first time usb_remove_hcd() is called, including td_list and and xhci_ring
+structures.
+
+When usb_remove_hcd() is called a second time for the second xhci bus it
+will try to dequeue all pending urbs, and touches td_list which is already
+freed for that endpoint.
+
+Reported-by: Joe Lawrence <joe.lawrence@stratus.com>
+Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/host/xhci.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -1549,7 +1549,9 @@ int xhci_urb_dequeue(struct usb_hcd *hcd
+ xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
+ "HW died, freeing TD.");
+ urb_priv = urb->hcpriv;
+- for (i = urb_priv->td_cnt; i < urb_priv->length; i++) {
++ for (i = urb_priv->td_cnt;
++ i < urb_priv->length && xhci->devs[urb->dev->slot_id];
++ i++) {
+ td = urb_priv->td[i];
+ if (!list_empty(&td->td_list))
+ list_del_init(&td->td_list);