From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 7 Nov 2016 16:41:26 +0000 (+0100)
Subject: 4.8-stable patches
X-Git-Tag: v4.4.31~24
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=124f251da641ee20449d12aff220235cebf33748;p=thirdparty%2Fkernel%2Fstable-queue.git

4.8-stable patches

added patches:
	timers-lock-base-for-same-bucket-optimization.patch
	timers-plug-locking-race-vs.-timer-migration.patch
	timers-prevent-base-clock-corruption-when-forwarding.patch
	timers-prevent-base-clock-rewind-when-forwarding-clock.patch
	ubifs-abort-readdir-upon-error.patch
	ubifs-fix-regression-in-ubifs_readdir.patch
---

diff --git a/queue-4.8/series b/queue-4.8/series
index e9883abd403..675240ad475 100644
--- a/queue-4.8/series
+++ b/queue-4.8/series
@@ -40,3 +40,9 @@ powerpc-mm-radix-use-tlbiel-only-if-we-ever-ran-on-the-current-cpu.patch
 powerpc-64-re-fix-race-condition-between-going-idle-and-entering-guest.patch
 powerpc-64-fix-race-condition-in-setting-lock-bit-in-idle-wakeup-code.patch
 x86-microcode-amd-fix-more-fallout-from-config_randomize_memory-y.patch
+timers-prevent-base-clock-rewind-when-forwarding-clock.patch
+timers-prevent-base-clock-corruption-when-forwarding.patch
+timers-plug-locking-race-vs.-timer-migration.patch
+timers-lock-base-for-same-bucket-optimization.patch
+ubifs-abort-readdir-upon-error.patch
+ubifs-fix-regression-in-ubifs_readdir.patch
diff --git a/queue-4.8/timers-lock-base-for-same-bucket-optimization.patch b/queue-4.8/timers-lock-base-for-same-bucket-optimization.patch
new file mode 100644
index 00000000000..4428735ce16
--- /dev/null
+++ b/queue-4.8/timers-lock-base-for-same-bucket-optimization.patch
@@ -0,0 +1,99 @@
+From 4da9152a4308dcbf611cde399c695c359fc9145f Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 24 Oct 2016 11:55:10 +0200
+Subject: timers: Lock base for same bucket optimization
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 4da9152a4308dcbf611cde399c695c359fc9145f upstream.
+
+Linus stumbled over the unlocked modification of the timer expiry value in
+mod_timer() which is an optimization for timers which stay in the same
+bucket - due to the bucket granularity - despite their expiry time getting
+updated.
+
+The optimization itself still makes sense even if we take the lock, because
+in case that the bucket stays the same, we avoid the pointless
+queue/enqueue dance.
+
+Make the check and the modification of timer->expires protected by the base
+lock and shuffle the remaining code around so we can keep the lock held
+when we actually have to requeue the timer to a different bucket.
+
+Fixes: f00c0afdfa62 ("timers: Implement optimization for same expiry time in mod_timer()")
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1610241711220.4983@nanos
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/timer.c |   28 +++++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 11 deletions(-)
+
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -965,6 +965,8 @@ __mod_timer(struct timer_list *timer, un
+ 	unsigned long clk = 0, flags;
+ 	int ret = 0;
+ 
++	BUG_ON(!timer->function);
++
+ 	/*
+ 	 * This is a common optimization triggered by the networking code - if
+ 	 * the timer is re-modified to have the same timeout or ends up in the
+@@ -973,13 +975,16 @@ __mod_timer(struct timer_list *timer, un
+ 	if (timer_pending(timer)) {
+ 		if (timer->expires == expires)
+ 			return 1;
++
+ 		/*
+-		 * Take the current timer_jiffies of base, but without holding
+-		 * the lock!
++		 * We lock timer base and calculate the bucket index right
++		 * here. If the timer ends up in the same bucket, then we
++		 * just update the expiry time and avoid the whole
++		 * dequeue/enqueue dance.
+ 		 */
+-		base = get_timer_base(timer->flags);
+-		clk = base->clk;
++		base = lock_timer_base(timer, &flags);
+ 
++		clk = base->clk;
+ 		idx = calc_wheel_index(expires, clk);
+ 
+ 		/*
+@@ -989,14 +994,14 @@ __mod_timer(struct timer_list *timer, un
+ 		 */
+ 		if (idx == timer_get_idx(timer)) {
+ 			timer->expires = expires;
+-			return 1;
++			ret = 1;
++			goto out_unlock;
+ 		}
++	} else {
++		base = lock_timer_base(timer, &flags);
+ 	}
+ 
+ 	timer_stats_timer_set_start_info(timer);
+-	BUG_ON(!timer->function);
+-
+-	base = lock_timer_base(timer, &flags);
+ 
+ 	ret = detach_if_pending(timer, base, false);
+ 	if (!ret && pending_only)
+@@ -1032,9 +1037,10 @@ __mod_timer(struct timer_list *timer, un
+ 	timer->expires = expires;
+ 	/*
+ 	 * If 'idx' was calculated above and the base time did not advance
+-	 * between calculating 'idx' and taking the lock, only enqueue_timer()
+-	 * and trigger_dyntick_cpu() is required. Otherwise we need to
+-	 * (re)calculate the wheel index via internal_add_timer().
++	 * between calculating 'idx' and possibly switching the base, only
++	 * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
++	 * we need to (re)calculate the wheel index via
++	 * internal_add_timer().
+ 	 */
+ 	if (idx != UINT_MAX && clk == base->clk) {
+ 		enqueue_timer(base, timer, idx);
diff --git a/queue-4.8/timers-plug-locking-race-vs.-timer-migration.patch b/queue-4.8/timers-plug-locking-race-vs.-timer-migration.patch
new file mode 100644
index 00000000000..18dd5727fff
--- /dev/null
+++ b/queue-4.8/timers-plug-locking-race-vs.-timer-migration.patch
@@ -0,0 +1,47 @@
+From b831275a3553c32091222ac619cfddd73a5553fb Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 24 Oct 2016 11:41:56 +0200
+Subject: timers: Plug locking race vs. timer migration
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit b831275a3553c32091222ac619cfddd73a5553fb upstream.
+
+Linus noticed that lock_timer_base() lacks a READ_ONCE() for accessing the
+timer flags. As a consequence the compiler is allowed to reload the flags
+between the initial check for TIMER_MIGRATION and the following timer base
+computation and the spin lock of the base.
+
+While this has not been observed (yet), we need to make sure that it never
+happens.
+
+Fixes: 0eeda71bc30d ("timer: Replace timer base by a cpu index")
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1610241711220.4983@nanos
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/timer.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -937,7 +937,14 @@ static struct timer_base *lock_timer_bas
+ {
+ 	for (;;) {
+ 		struct timer_base *base;
+-		u32 tf = timer->flags;
++		u32 tf;
++
++		/*
++		 * We need to use READ_ONCE() here, otherwise the compiler
++		 * might re-read @tf between the check for TIMER_MIGRATING
++		 * and spin_lock().
++		 */
++		tf = READ_ONCE(timer->flags);
+ 
+ 		if (!(tf & TIMER_MIGRATING)) {
+ 			base = get_timer_base(tf);
diff --git a/queue-4.8/timers-prevent-base-clock-corruption-when-forwarding.patch b/queue-4.8/timers-prevent-base-clock-corruption-when-forwarding.patch
new file mode 100644
index 00000000000..3cb1fc295df
--- /dev/null
+++ b/queue-4.8/timers-prevent-base-clock-corruption-when-forwarding.patch
@@ -0,0 +1,115 @@
+From 6bad6bccf2d717f652d37e63cf261eaa23466009 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 22 Oct 2016 11:07:37 +0000
+Subject: timers: Prevent base clock corruption when forwarding
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 6bad6bccf2d717f652d37e63cf261eaa23466009 upstream.
+
+When a timer is enqueued we try to forward the timer base clock. This
+mechanism has two issues:
+
+1) Forwarding a remote base unlocked
+
+The forwarding function is called from get_target_base() with the current
+timer base lock held. But if the new target base is a different base than
+the current base (can happen with NOHZ, sigh!) then the forwarding is done
+on an unlocked base. This can lead to corruption of base->clk.
+
+Solution is simple: Invoke the forwarding after the target base is locked.
+
+2) Possible corruption due to jiffies advancing
+
+This is similar to the issue in get_net_timer_interrupt() which was fixed
+in the previous patch. jiffies can advance between check and assignement
+and therefore advancing base->clk beyond the next expiry value.
+
+So we need to read jiffies into a local variable once and do the checks and
+assignment with the local copy.
+
+Fixes: a683f390b93f("timers: Forward the wheel clock whenever possible")
+Reported-by: Ashton Holmes <scoopta@gmail.com>
+Reported-by: Michael Thayer <michael.thayer@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Michal Necasek <michal.necasek@oracle.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: knut.osmundsen@oracle.com
+Cc: stern@rowland.harvard.edu
+Cc: rt@linutronix.de
+Link: http://lkml.kernel.org/r/20161022110552.253640125@linutronix.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/timer.c |   23 ++++++++++-------------
+ 1 file changed, 10 insertions(+), 13 deletions(-)
+
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -878,7 +878,7 @@ static inline struct timer_base *get_tim
+ 
+ #ifdef CONFIG_NO_HZ_COMMON
+ static inline struct timer_base *
+-__get_target_base(struct timer_base *base, unsigned tflags)
++get_target_base(struct timer_base *base, unsigned tflags)
+ {
+ #ifdef CONFIG_SMP
+ 	if ((tflags & TIMER_PINNED) || !base->migration_enabled)
+@@ -891,25 +891,27 @@ __get_target_base(struct timer_base *bas
+ 
+ static inline void forward_timer_base(struct timer_base *base)
+ {
++	unsigned long jnow = READ_ONCE(jiffies);
++
+ 	/*
+ 	 * We only forward the base when it's idle and we have a delta between
+ 	 * base clock and jiffies.
+ 	 */
+-	if (!base->is_idle || (long) (jiffies - base->clk) < 2)
++	if (!base->is_idle || (long) (jnow - base->clk) < 2)
+ 		return;
+ 
+ 	/*
+ 	 * If the next expiry value is > jiffies, then we fast forward to
+ 	 * jiffies otherwise we forward to the next expiry value.
+ 	 */
+-	if (time_after(base->next_expiry, jiffies))
+-		base->clk = jiffies;
++	if (time_after(base->next_expiry, jnow))
++		base->clk = jnow;
+ 	else
+ 		base->clk = base->next_expiry;
+ }
+ #else
+ static inline struct timer_base *
+-__get_target_base(struct timer_base *base, unsigned tflags)
++get_target_base(struct timer_base *base, unsigned tflags)
+ {
+ 	return get_timer_this_cpu_base(tflags);
+ }
+@@ -917,14 +919,6 @@ __get_target_base(struct timer_base *bas
+ static inline void forward_timer_base(struct timer_base *base) { }
+ #endif
+ 
+-static inline struct timer_base *
+-get_target_base(struct timer_base *base, unsigned tflags)
+-{
+-	struct timer_base *target = __get_target_base(base, tflags);
+-
+-	forward_timer_base(target);
+-	return target;
+-}
+ 
+ /*
+  * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
+@@ -1025,6 +1019,9 @@ __mod_timer(struct timer_list *timer, un
+ 		}
+ 	}
+ 
++	/* Try to forward a stale timer base clock */
++	forward_timer_base(base);
++
+ 	timer->expires = expires;
+ 	/*
+ 	 * If 'idx' was calculated above and the base time did not advance
diff --git a/queue-4.8/timers-prevent-base-clock-rewind-when-forwarding-clock.patch b/queue-4.8/timers-prevent-base-clock-rewind-when-forwarding-clock.patch
new file mode 100644
index 00000000000..84faa35e7ed
--- /dev/null
+++ b/queue-4.8/timers-prevent-base-clock-rewind-when-forwarding-clock.patch
@@ -0,0 +1,110 @@
+From 041ad7bc758db259bb960ef795197dd14aab19a6 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 22 Oct 2016 11:07:35 +0000
+Subject: timers: Prevent base clock rewind when forwarding clock
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 041ad7bc758db259bb960ef795197dd14aab19a6 upstream.
+
+Ashton and Michael reported, that kernel versions 4.8 and later suffer from
+USB timeouts which are caused by the timer wheel rework.
+
+This is caused by a bug in the base clock forwarding mechanism, which leads
+to timers expiring early. The scenario which leads to this is:
+
+run_timers()
+  while (jiffies >= base->clk) {
+    collect_expired_timers();
+    base->clk++;
+    expire_timers();
+  }
+
+So base->clk = jiffies + 1. Now the cpu goes idle:
+
+idle()
+  get_next_timer_interrupt()
+    nextevt = __next_time_interrupt();
+    if (time_after(nextevt, base->clk))
+       	base->clk = jiffies;
+
+jiffies has not advanced since run_timers(), so this assignment effectively
+decrements base->clk by one.
+
+base->clk is the index into the timer wheel arrays. So let's assume the
+following state after the base->clk increment in run_timers():
+
+ jiffies = 0
+ base->clk = 1
+
+A timer gets enqueued with an expiry delta of 63 ticks (which is the case
+with the USB timeout and HZ=250) so the resulting bucket index is:
+
+  base->clk + delta = 1 + 63 = 64
+
+The timer goes into the first wheel level. The array size is 64 so it ends
+up in bucket 0, which is correct as it takes 63 ticks to advance base->clk
+to index into bucket 0 again.
+
+If the cpu goes idle before jiffies advance, then the bug in the forwarding
+mechanism sets base->clk back to 0, so the next invocation of run_timers()
+at the next tick will index into bucket 0 and therefore expire the timer 62
+ticks too early.
+
+Instead of blindly setting base->clk to jiffies we must make the forwarding
+conditional on jiffies > base->clk, but we cannot use jiffies for this as
+we might run into the following issue:
+
+  if (time_after(jiffies, base->clk) {
+    if (time_after(nextevt, base->clk))
+       base->clk = jiffies;
+
+jiffies can increment between the check and the assigment far enough to
+advance beyond nextevt. So we need to use a stable value for checking.
+
+get_next_timer_interrupt() has the basej argument which is the jiffies
+value snapshot taken in the calling code. So we can just that.
+
+Thanks to Ashton for bisecting and providing trace data!
+
+Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible")
+Reported-by: Ashton Holmes <scoopta@gmail.com>
+Reported-by: Michael Thayer <michael.thayer@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Michal Necasek <michal.necasek@oracle.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: knut.osmundsen@oracle.com
+Cc: stern@rowland.harvard.edu
+Cc: rt@linutronix.de
+Link: http://lkml.kernel.org/r/20161022110552.175308322@linutronix.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/timer.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -1510,12 +1510,16 @@ u64 get_next_timer_interrupt(unsigned lo
+ 	is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
+ 	base->next_expiry = nextevt;
+ 	/*
+-	 * We have a fresh next event. Check whether we can forward the base:
++	 * We have a fresh next event. Check whether we can forward the
++	 * base. We can only do that when @basej is past base->clk
++	 * otherwise we might rewind base->clk.
+ 	 */
+-	if (time_after(nextevt, jiffies))
+-		base->clk = jiffies;
+-	else if (time_after(nextevt, base->clk))
+-		base->clk = nextevt;
++	if (time_after(basej, base->clk)) {
++		if (time_after(nextevt, basej))
++			base->clk = basej;
++		else if (time_after(nextevt, base->clk))
++			base->clk = nextevt;
++	}
+ 
+ 	if (time_before_eq(nextevt, basej)) {
+ 		expires = basem;
diff --git a/queue-4.8/ubifs-abort-readdir-upon-error.patch b/queue-4.8/ubifs-abort-readdir-upon-error.patch
new file mode 100644
index 00000000000..3159ea12b6e
--- /dev/null
+++ b/queue-4.8/ubifs-abort-readdir-upon-error.patch
@@ -0,0 +1,59 @@
+From c83ed4c9dbb358b9e7707486e167e940d48bfeed Mon Sep 17 00:00:00 2001
+From: Richard Weinberger <richard@nod.at>
+Date: Wed, 19 Oct 2016 12:43:07 +0200
+Subject: ubifs: Abort readdir upon error
+
+From: Richard Weinberger <richard@nod.at>
+
+commit c83ed4c9dbb358b9e7707486e167e940d48bfeed upstream.
+
+If UBIFS is facing an error while walking a directory, it reports this
+error and ubifs_readdir() returns the error code. But the VFS readdir
+logic does not make the getdents system call fail in all cases. When the
+readdir cursor indicates that more entries are present, the system call
+will just return and the libc wrapper will try again since it also
+knows that more entries are present.
+This causes the libc wrapper to busy loop for ever when a directory is
+corrupted on UBIFS.
+A common approach do deal with corrupted directory entries is
+skipping them by setting the cursor to the next entry. On UBIFS this
+approach is not possible since we cannot compute the next directory
+entry cursor position without reading the current entry. So all we can
+do is setting the cursor to the "no more entries" position and make
+getdents exit.
+
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ubifs/dir.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -350,7 +350,7 @@ static unsigned int vfs_dent_type(uint8_
+  */
+ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
+ {
+-	int err;
++	int err = 0;
+ 	struct qstr nm;
+ 	union ubifs_key key;
+ 	struct ubifs_dent_node *dent;
+@@ -452,14 +452,12 @@ out:
+ 	kfree(file->private_data);
+ 	file->private_data = NULL;
+ 
+-	if (err != -ENOENT) {
++	if (err != -ENOENT)
+ 		ubifs_err(c, "cannot find next direntry, error %d", err);
+-		return err;
+-	}
+ 
+ 	/* 2 is a special value indicating that there are no more direntries */
+ 	ctx->pos = 2;
+-	return 0;
++	return err;
+ }
+ 
+ /* Free saved readdir() state when the directory is closed */
diff --git a/queue-4.8/ubifs-fix-regression-in-ubifs_readdir.patch b/queue-4.8/ubifs-fix-regression-in-ubifs_readdir.patch
new file mode 100644
index 00000000000..e4ca8288243
--- /dev/null
+++ b/queue-4.8/ubifs-fix-regression-in-ubifs_readdir.patch
@@ -0,0 +1,42 @@
+From a00052a296e54205cf238c75bd98d17d5d02a6db Mon Sep 17 00:00:00 2001
+From: Richard Weinberger <richard@nod.at>
+Date: Fri, 28 Oct 2016 11:49:03 +0200
+Subject: ubifs: Fix regression in ubifs_readdir()
+
+From: Richard Weinberger <richard@nod.at>
+
+commit a00052a296e54205cf238c75bd98d17d5d02a6db upstream.
+
+Commit c83ed4c9dbb35 ("ubifs: Abort readdir upon error") broke
+overlayfs support because the fix exposed an internal error
+code to VFS.
+
+Reported-by: Peter Rosin <peda@axentia.se>
+Tested-by: Peter Rosin <peda@axentia.se>
+Reported-by: Ralph Sennhauser <ralph.sennhauser@gmail.com>
+Tested-by: Ralph Sennhauser <ralph.sennhauser@gmail.com>
+Fixes: c83ed4c9dbb35 ("ubifs: Abort readdir upon error")
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ubifs/dir.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -454,6 +454,14 @@ out:
+ 
+ 	if (err != -ENOENT)
+ 		ubifs_err(c, "cannot find next direntry, error %d", err);
++	else
++		/*
++		 * -ENOENT is a non-fatal error in this context, the TNC uses
++		 * it to indicate that the cursor moved past the current directory
++		 * and readdir() has to stop.
++		 */
++		err = 0;
++
+ 
+ 	/* 2 is a special value indicating that there are no more direntries */
+ 	ctx->pos = 2;