3.0-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 17 Jul 2012 22:49:54 +0000 (15:49 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 17 Jul 2012 22:49:54 +0000 (15:49 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 17 Jul 2012 22:49:54 +0000 (15:49 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 17 Jul 2012 22:49:54 +0000 (15:49 -0700)
diff --git a/queue-3.0/hrtimer-provide-clock_was_set_delayed.patch b/queue-3.0/hrtimer-provide-clock_was_set_delayed.patch

new file mode 100644 (file)

index 0000000..ff2fe79
--- /dev/null
+++ b/queue-3.0/hrtimer-provide-clock_was_set_delayed.patch
@@ -0,0 +1,119 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:25:04 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:52 -0400
+Subject: hrtimer: Provide clock_was_set_delayed()
+To: stable@vger.kernel.org
+Cc: John Stultz <johnstul@us.ibm.com>, Thomas Gleixner <tglx@linutronix.de>, Prarit Bhargava <prarit@redhat.com>, Linux Kernel <linux-kernel@vger.kernel.org>
+Message-ID: <1342546438-17534-6-git-send-email-johnstul@us.ibm.com>
+
+From: John Stultz <johnstul@us.ibm.com>
+
+This is a backport of f55a6faa384304c89cfef162768e88374d3312cb
+
+clock_was_set() cannot be called from hard interrupt context because
+it calls on_each_cpu().
+
+For fixing the widely reported leap seconds issue it is necessary to
+call it from hard interrupt context, i.e. the timer tick code, which
+does the timekeeping updates.
+
+Provide a new function which denotes it in the hrtimer cpu base
+structure of the cpu on which it is called and raise the hrtimer
+softirq. We then execute the clock_was_set() notificiation from
+softirq context in run_hrtimer_softirq(). The hrtimer softirq is
+rarely used, so polling the flag there is not a performance issue.
+
+[ tglx: Made it depend on CONFIG_HIGH_RES_TIMERS. We really should get
+  rid of all this ifdeffery ASAP ]
+
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Reported-by: Jan Engelhardt <jengelh@inai.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Acked-by: Prarit Bhargava <prarit@redhat.com>
+Link: http://lkml.kernel.org/r/1341960205-56738-2-git-send-email-johnstul@us.ibm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hrtimer.h |    9 ++++++++-
+ kernel/hrtimer.c        |   20 ++++++++++++++++++++
+ 2 files changed, 28 insertions(+), 1 deletion(-)
+
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -165,6 +165,7 @@ enum  hrtimer_base_type {
+  * @lock:             lock protecting the base and associated clock bases
+  *                    and timers
+  * @active_bases:     Bitfield to mark bases with active timers
++ * @clock_was_set:    Indicates that clock was set from irq context.
+  * @expires_next:     absolute time of the next event which was scheduled
+  *                    via clock_set_next_event()
+  * @hres_active:      State of high resolution mode
+@@ -177,7 +178,8 @@ enum  hrtimer_base_type {
+  */
+ struct hrtimer_cpu_base {
+       raw_spinlock_t                  lock;
+-      unsigned long                   active_bases;
++      unsigned int                    active_bases;
++      unsigned int                    clock_was_set;
+ #ifdef CONFIG_HIGH_RES_TIMERS
+       ktime_t                         expires_next;
+       int                             hres_active;
+@@ -286,6 +288,8 @@ extern void hrtimer_peek_ahead_timers(vo
+ # define MONOTONIC_RES_NSEC   HIGH_RES_NSEC
+ # define KTIME_MONOTONIC_RES  KTIME_HIGH_RES
+ 
++extern void clock_was_set_delayed(void);
++
+ #else
+ 
+ # define MONOTONIC_RES_NSEC   LOW_RES_NSEC
+@@ -306,6 +310,9 @@ static inline int hrtimer_is_hres_active
+ {
+       return 0;
+ }
++
++static inline void clock_was_set_delayed(void) { }
++
+ #endif
+ 
+ extern void clock_was_set(void);
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -717,6 +717,19 @@ static int hrtimer_switch_to_hres(void)
+       return 1;
+ }
+ 
++/*
++ * Called from timekeeping code to reprogramm the hrtimer interrupt
++ * device. If called from the timer interrupt context we defer it to
++ * softirq context.
++ */
++void clock_was_set_delayed(void)
++{
++      struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
++
++      cpu_base->clock_was_set = 1;
++      __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++}
++
+ #else
+ 
+ static inline int hrtimer_hres_active(void) { return 0; }
+@@ -1395,6 +1408,13 @@ void hrtimer_peek_ahead_timers(void)
+ 
+ static void run_hrtimer_softirq(struct softirq_action *h)
+ {
++      struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
++
++      if (cpu_base->clock_was_set) {
++              cpu_base->clock_was_set = 0;
++              clock_was_set();
++      }
++
+       hrtimer_peek_ahead_timers();
+ }
+ 
diff --git a/queue-3.0/hrtimer-update-hrtimer-base-offsets-each-hrtimer_interrupt.patch b/queue-3.0/hrtimer-update-hrtimer-base-offsets-each-hrtimer_interrupt.patch

new file mode 100644 (file)

index 0000000..7911937
--- /dev/null
+++ b/queue-3.0/hrtimer-update-hrtimer-base-offsets-each-hrtimer_interrupt.patch
@@ -0,0 +1,127 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:26:57 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:57 -0400
+Subject: hrtimer: Update hrtimer base offsets each hrtimer_interrupt
+To: stable@vger.kernel.org
+Cc: John Stultz <johnstul@us.ibm.com>, Thomas Gleixner <tglx@linutronix.de>, Prarit Bhargava <prarit@redhat.com>, Linux Kernel <linux-kernel@vger.kernel.org>
+Message-ID: <1342546438-17534-11-git-send-email-johnstul@us.ibm.com>
+
+From: John Stultz <johnstul@us.ibm.com>
+
+This is a backport of 5baefd6d84163443215f4a99f6a20f054ef11236
+
+The update of the hrtimer base offsets on all cpus cannot be made
+atomically from the timekeeper.lock held and interrupt disabled region
+as smp function calls are not allowed there.
+
+clock_was_set(), which enforces the update on all cpus, is called
+either from preemptible process context in case of do_settimeofday()
+or from the softirq context when the offset modification happened in
+the timer interrupt itself due to a leap second.
+
+In both cases there is a race window for an hrtimer interrupt between
+dropping timekeeper lock, enabling interrupts and clock_was_set()
+issuing the updates. Any interrupt which arrives in that window will
+see the new time but operate on stale offsets.
+
+So we need to make sure that an hrtimer interrupt always sees a
+consistent state of time and offsets.
+
+ktime_get_update_offsets() allows us to get the current monotonic time
+and update the per cpu hrtimer base offsets from hrtimer_interrupt()
+to capture a consistent state of monotonic time and the offsets. The
+function replaces the existing ktime_get() calls in hrtimer_interrupt().
+
+The overhead of the new function vs. ktime_get() is minimal as it just
+adds two store operations.
+
+This ensures that any changes to realtime or boottime offsets are
+noticed and stored into the per-cpu hrtimer base structures, prior to
+any hrtimer expiration and guarantees that timers are not expired early.
+
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Acked-by: Prarit Bhargava <prarit@redhat.com>
+Link: http://lkml.kernel.org/r/1341960205-56738-8-git-send-email-johnstul@us.ibm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/hrtimer.c |   28 ++++++++++++++--------------
+ 1 file changed, 14 insertions(+), 14 deletions(-)
+
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprog
+       return 0;
+ }
+ 
++static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
++{
++      ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
++      ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
++
++      return ktime_get_update_offsets(offs_real, offs_boot);
++}
++
+ /*
+  * Retrigger next event is called after clock was set
+  *
+@@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprog
+ static void retrigger_next_event(void *arg)
+ {
+       struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+-      struct timespec realtime_offset, xtim, wtm, sleep;
+ 
+       if (!hrtimer_hres_active())
+               return;
+ 
+-      /* Optimized out for !HIGH_RES */
+-      get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
+-      set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
+-
+-      /* Adjust CLOCK_REALTIME offset */
+       raw_spin_lock(&base->lock);
+-      base->clock_base[HRTIMER_BASE_REALTIME].offset =
+-              timespec_to_ktime(realtime_offset);
+-      base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
+-              timespec_to_ktime(sleep);
+-
++      hrtimer_update_base(base);
+       hrtimer_force_reprogram(base, 0);
+       raw_spin_unlock(&base->lock);
+ }
+@@ -710,7 +708,6 @@ static int hrtimer_switch_to_hres(void)
+               base->clock_base[i].resolution = KTIME_HIGH_RES;
+ 
+       tick_setup_sched_timer();
+-
+       /* "Retrigger" the interrupt to get things going */
+       retrigger_next_event(NULL);
+       local_irq_restore(flags);
+@@ -1264,7 +1261,7 @@ void hrtimer_interrupt(struct clock_even
+       dev->next_event.tv64 = KTIME_MAX;
+ 
+       raw_spin_lock(&cpu_base->lock);
+-      entry_time = now = ktime_get();
++      entry_time = now = hrtimer_update_base(cpu_base);
+ retry:
+       expires_next.tv64 = KTIME_MAX;
+       /*
+@@ -1342,9 +1339,12 @@ retry:
+        * We need to prevent that we loop forever in the hrtimer
+        * interrupt routine. We give it 3 attempts to avoid
+        * overreacting on some spurious event.
++       *
++       * Acquire base lock for updating the offsets and retrieving
++       * the current time.
+        */
+       raw_spin_lock(&cpu_base->lock);
+-      now = ktime_get();
++      now = hrtimer_update_base(cpu_base);
+       cpu_base->nr_retries++;
+       if (++retries < 3)
+               goto retry;
diff --git a/queue-3.0/hrtimers-move-lock-held-region-in-hrtimer_interrupt.patch b/queue-3.0/hrtimers-move-lock-held-region-in-hrtimer_interrupt.patch

new file mode 100644 (file)

index 0000000..bf36ed5
--- /dev/null
+++ b/queue-3.0/hrtimers-move-lock-held-region-in-hrtimer_interrupt.patch
@@ -0,0 +1,64 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:26:26 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:55 -0400
+Subject: hrtimers: Move lock held region in hrtimer_interrupt()
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, John Stultz <johnstul@us.ibm.com>, Prarit Bhargava <prarit@redhat.com>, Linux Kernel <linux-kernel@vger.kernel.org>
+Message-ID: <1342546438-17534-9-git-send-email-johnstul@us.ibm.com>
+
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+This is a backport of 196951e91262fccda81147d2bcf7fdab08668b40
+
+We need to update the base offsets from this code and we need to do
+that under base->lock. Move the lock held region around the
+ktime_get() calls. The ktime_get() calls are going to be replaced with
+a function which gets the time and the offsets atomically.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Acked-by: Prarit Bhargava <prarit@redhat.com>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Link: http://lkml.kernel.org/r/1341960205-56738-6-git-send-email-johnstul@us.ibm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/hrtimer.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -1263,11 +1263,10 @@ void hrtimer_interrupt(struct clock_even
+       cpu_base->nr_events++;
+       dev->next_event.tv64 = KTIME_MAX;
+ 
++      raw_spin_lock(&cpu_base->lock);
+       entry_time = now = ktime_get();
+ retry:
+       expires_next.tv64 = KTIME_MAX;
+-
+-      raw_spin_lock(&cpu_base->lock);
+       /*
+        * We set expires_next to KTIME_MAX here with cpu_base->lock
+        * held to prevent that a timer is enqueued in our queue via
+@@ -1344,6 +1343,7 @@ retry:
+        * interrupt routine. We give it 3 attempts to avoid
+        * overreacting on some spurious event.
+        */
++      raw_spin_lock(&cpu_base->lock);
+       now = ktime_get();
+       cpu_base->nr_retries++;
+       if (++retries < 3)
+@@ -1356,6 +1356,7 @@ retry:
+        */
+       cpu_base->nr_hangs++;
+       cpu_base->hang_detected = 1;
++      raw_spin_unlock(&cpu_base->lock);
+       delta = ktime_sub(now, entry_time);
+       if (delta.tv64 > cpu_base->max_hang_time.tv64)
+               cpu_base->max_hang_time = delta;
diff --git a/queue-3.0/ntp-correct-tai-offset-during-leap-second.patch b/queue-3.0/ntp-correct-tai-offset-during-leap-second.patch

new file mode 100644 (file)

index 0000000..def9f55
--- /dev/null
+++ b/queue-3.0/ntp-correct-tai-offset-during-leap-second.patch
@@ -0,0 +1,45 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:24:12 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:49 -0400
+Subject: ntp: Correct TAI offset during leap second
+To: stable@vger.kernel.org
+Cc: Richard Cochran <richardcochran@gmail.com>, Prarit Bhargava <prarit@redhat.com>, Thomas Gleixner <tglx@linutronix.de>, Linux Kernel <linux-kernel@vger.kernel.org>, John Stultz <john.stultz@linaro.org>
+Message-ID: <1342546438-17534-3-git-send-email-johnstul@us.ibm.com>
+
+
+From: Richard Cochran <richardcochran@gmail.com>
+
+This is a backport of dd48d708ff3e917f6d6b6c2b696c3f18c019feed
+
+When repeating a UTC time value during a leap second (when the UTC
+time should be 23:59:60), the TAI timescale should not stop. The kernel
+NTP code increments the TAI offset one second too late. This patch fixes
+the issue by incrementing the offset during the leap second itself.
+
+Signed-off-by: Richard Cochran <richardcochran@gmail.com>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/ntp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/time/ntp.c
++++ b/kernel/time/ntp.c
+@@ -378,6 +378,7 @@ int second_overflow(unsigned long secs)
+               if (secs % 86400 == 0) {
+                       leap = -1;
+                       time_state = TIME_OOP;
++                      time_tai++;
+                       printk(KERN_NOTICE
+                               "Clock: inserting leap second 23:59:60 UTC\n");
+               }
+@@ -392,7 +393,6 @@ int second_overflow(unsigned long secs)
+               }
+               break;
+       case TIME_OOP:
+-              time_tai++;
+               time_state = TIME_WAIT;
+               break;
+ 
diff --git a/queue-3.0/ntp-fix-leap-second-hrtimer-livelock.patch b/queue-3.0/ntp-fix-leap-second-hrtimer-livelock.patch

new file mode 100644 (file)

index 0000000..ccddbc8
--- /dev/null
+++ b/queue-3.0/ntp-fix-leap-second-hrtimer-livelock.patch
@@ -0,0 +1,346 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:23:32 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:48 -0400
+Subject: ntp: Fix leap-second hrtimer livelock
+To: stable@vger.kernel.org
+Cc: John Stultz <john.stultz@linaro.org>, Sasha Levin <levinsasha928@gmail.com>, Thomas Gleixner <tglx@linutronix.de>, Prarit Bhargava <prarit@redhat.com>, Linux Kernel <linux-kernel@vger.kernel.org>
+Message-ID: <1342546438-17534-2-git-send-email-johnstul@us.ibm.com>
+
+
+From: John Stultz <john.stultz@linaro.org>
+
+This is a backport of 6b43ae8a619d17c4935c3320d2ef9e92bdeed05d
+
+This should have been backported when it was commited, but I
+mistook the problem as requiring the ntp_lock changes
+that landed in 3.4 in order for it to occur.
+
+Unfortunately the same issue can happen (with only one cpu)
+as follows:
+do_adjtimex()
+ write_seqlock_irq(&xtime_lock);
+  process_adjtimex_modes()
+   process_adj_status()
+    ntp_start_leap_timer()
+     hrtimer_start()
+      hrtimer_reprogram()
+       tick_program_event()
+        clockevents_program_event()
+         ktime_get()
+          seq = req_seqbegin(xtime_lock); [DEADLOCK]
+
+This deadlock will no always occur, as it requires the
+leap_timer to force a hrtimer_reprogram which only happens
+if its set and there's no sooner timer to expire.
+
+NOTE: This patch, being faithful to the original commit,
+introduces a bug (we don't update wall_to_monotonic),
+which will be resovled by backporting a following fix.
+
+Original commit message below:
+
+Since commit 7dffa3c673fbcf835cd7be80bb4aec8ad3f51168 the ntp
+subsystem has used an hrtimer for triggering the leapsecond
+adjustment. However, this can cause a potential livelock.
+
+Thomas diagnosed this as the following pattern:
+CPU 0                                                    CPU 1
+do_adjtimex()
+  spin_lock_irq(&ntp_lock);
+    process_adjtimex_modes();                           timer_interrupt()
+      process_adj_status();                                do_timer()
+        ntp_start_leap_timer();                             write_lock(&xtime_lock);
+          hrtimer_start();                                  update_wall_time();
+             hrtimer_reprogram();                            ntp_tick_length()
+               tick_program_event()                            spin_lock(&ntp_lock);
+                 clockevents_program_event()
+                  ktime_get()
+                     seq = req_seqbegin(xtime_lock);
+
+This patch tries to avoid the problem by reverting back to not using
+an hrtimer to inject leapseconds, and instead we handle the leapsecond
+processing in the second_overflow() function.
+
+The downside to this change is that on systems that support highres
+timers, the leap second processing will occur on a HZ tick boundary,
+(ie: ~1-10ms, depending on HZ)  after the leap second instead of
+possibly sooner (~34us in my tests w/ x86_64 lapic).
+
+This patch applies on top of tip/timers/core.
+
+CC: Sasha Levin <levinsasha928@gmail.com>
+CC: Thomas Gleixner <tglx@linutronix.de>
+Reported-by: Sasha Levin <levinsasha928@gmail.com>
+Diagnoised-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Sasha Levin <levinsasha928@gmail.com>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/timex.h     |    2 
+ kernel/time/ntp.c         |  122 +++++++++++++++-------------------------------
+ kernel/time/timekeeping.c |   18 ++----
+ 3 files changed, 48 insertions(+), 94 deletions(-)
+
+--- a/include/linux/timex.h
++++ b/include/linux/timex.h
+@@ -266,7 +266,7 @@ static inline int ntp_synced(void)
+ /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
+ extern u64 tick_length;
+ 
+-extern void second_overflow(void);
++extern int second_overflow(unsigned long secs);
+ extern void update_ntp_one_tick(void);
+ extern int do_adjtimex(struct timex *);
+ extern void hardpps(const struct timespec *, const struct timespec *);
+--- a/kernel/time/ntp.c
++++ b/kernel/time/ntp.c
+@@ -31,8 +31,6 @@ unsigned long                        tick_nsec;
+ u64                           tick_length;
+ static u64                    tick_length_base;
+ 
+-static struct hrtimer         leap_timer;
+-
+ #define MAX_TICKADJ           500LL           /* usecs */
+ #define MAX_TICKADJ_SCALED \
+       (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
+@@ -350,60 +348,60 @@ void ntp_clear(void)
+ }
+ 
+ /*
+- * Leap second processing. If in leap-insert state at the end of the
+- * day, the system clock is set back one second; if in leap-delete
+- * state, the system clock is set ahead one second.
++ * this routine handles the overflow of the microsecond field
++ *
++ * The tricky bits of code to handle the accurate clock support
++ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
++ * They were originally developed for SUN and DEC kernels.
++ * All the kudos should go to Dave for this stuff.
++ *
++ * Also handles leap second processing, and returns leap offset
+  */
+-static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
++int second_overflow(unsigned long secs)
+ {
+-      enum hrtimer_restart res = HRTIMER_NORESTART;
+-
+-      write_seqlock(&xtime_lock);
++      int leap = 0;
++      s64 delta;
+ 
++      /*
++       * Leap second processing. If in leap-insert state at the end of the
++       * day, the system clock is set back one second; if in leap-delete
++       * state, the system clock is set ahead one second.
++       */
+       switch (time_state) {
+       case TIME_OK:
++              if (time_status & STA_INS)
++                      time_state = TIME_INS;
++              else if (time_status & STA_DEL)
++                      time_state = TIME_DEL;
+               break;
+       case TIME_INS:
+-              timekeeping_leap_insert(-1);
+-              time_state = TIME_OOP;
+-              printk(KERN_NOTICE
+-                      "Clock: inserting leap second 23:59:60 UTC\n");
+-              hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
+-              res = HRTIMER_RESTART;
++              if (secs % 86400 == 0) {
++                      leap = -1;
++                      time_state = TIME_OOP;
++                      printk(KERN_NOTICE
++                              "Clock: inserting leap second 23:59:60 UTC\n");
++              }
+               break;
+       case TIME_DEL:
+-              timekeeping_leap_insert(1);
+-              time_tai--;
+-              time_state = TIME_WAIT;
+-              printk(KERN_NOTICE
+-                      "Clock: deleting leap second 23:59:59 UTC\n");
++              if ((secs + 1) % 86400 == 0) {
++                      leap = 1;
++                      time_tai--;
++                      time_state = TIME_WAIT;
++                      printk(KERN_NOTICE
++                              "Clock: deleting leap second 23:59:59 UTC\n");
++              }
+               break;
+       case TIME_OOP:
+               time_tai++;
+               time_state = TIME_WAIT;
+-              /* fall through */
++              break;
++
+       case TIME_WAIT:
+               if (!(time_status & (STA_INS | STA_DEL)))
+                       time_state = TIME_OK;
+               break;
+       }
+ 
+-      write_sequnlock(&xtime_lock);
+-
+-      return res;
+-}
+-
+-/*
+- * this routine handles the overflow of the microsecond field
+- *
+- * The tricky bits of code to handle the accurate clock support
+- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+- * They were originally developed for SUN and DEC kernels.
+- * All the kudos should go to Dave for this stuff.
+- */
+-void second_overflow(void)
+-{
+-      s64 delta;
+ 
+       /* Bump the maxerror field */
+       time_maxerror += MAXFREQ / NSEC_PER_USEC;
+@@ -423,23 +421,25 @@ void second_overflow(void)
+       pps_dec_valid();
+ 
+       if (!time_adjust)
+-              return;
++              goto out;
+ 
+       if (time_adjust > MAX_TICKADJ) {
+               time_adjust -= MAX_TICKADJ;
+               tick_length += MAX_TICKADJ_SCALED;
+-              return;
++              goto out;
+       }
+ 
+       if (time_adjust < -MAX_TICKADJ) {
+               time_adjust += MAX_TICKADJ;
+               tick_length -= MAX_TICKADJ_SCALED;
+-              return;
++              goto out;
+       }
+ 
+       tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
+                                                        << NTP_SCALE_SHIFT;
+       time_adjust = 0;
++out:
++      return leap;
+ }
+ 
+ #ifdef CONFIG_GENERIC_CMOS_UPDATE
+@@ -501,27 +501,6 @@ static void notify_cmos_timer(void)
+ static inline void notify_cmos_timer(void) { }
+ #endif
+ 
+-/*
+- * Start the leap seconds timer:
+- */
+-static inline void ntp_start_leap_timer(struct timespec *ts)
+-{
+-      long now = ts->tv_sec;
+-
+-      if (time_status & STA_INS) {
+-              time_state = TIME_INS;
+-              now += 86400 - now % 86400;
+-              hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
+-
+-              return;
+-      }
+-
+-      if (time_status & STA_DEL) {
+-              time_state = TIME_DEL;
+-              now += 86400 - (now + 1) % 86400;
+-              hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
+-      }
+-}
+ 
+ /*
+  * Propagate a new txc->status value into the NTP state:
+@@ -546,22 +525,6 @@ static inline void process_adj_status(st
+       time_status &= STA_RONLY;
+       time_status |= txc->status & ~STA_RONLY;
+ 
+-      switch (time_state) {
+-      case TIME_OK:
+-              ntp_start_leap_timer(ts);
+-              break;
+-      case TIME_INS:
+-      case TIME_DEL:
+-              time_state = TIME_OK;
+-              ntp_start_leap_timer(ts);
+-      case TIME_WAIT:
+-              if (!(time_status & (STA_INS | STA_DEL)))
+-                      time_state = TIME_OK;
+-              break;
+-      case TIME_OOP:
+-              hrtimer_restart(&leap_timer);
+-              break;
+-      }
+ }
+ /*
+  * Called with the xtime lock held, so we can access and modify
+@@ -643,9 +606,6 @@ int do_adjtimex(struct timex *txc)
+                   (txc->tick <  900000/USER_HZ ||
+                    txc->tick > 1100000/USER_HZ))
+                       return -EINVAL;
+-
+-              if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
+-                      hrtimer_cancel(&leap_timer);
+       }
+ 
+       if (txc->modes & ADJ_SETOFFSET) {
+@@ -967,6 +927,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_se
+ void __init ntp_init(void)
+ {
+       ntp_clear();
+-      hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+-      leap_timer.function = ntp_leap_second;
+ }
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -169,15 +169,6 @@ static struct timespec raw_time;
+ /* flag for if timekeeping is suspended */
+ int __read_mostly timekeeping_suspended;
+ 
+-/* must hold xtime_lock */
+-void timekeeping_leap_insert(int leapsecond)
+-{
+-      xtime.tv_sec += leapsecond;
+-      wall_to_monotonic.tv_sec -= leapsecond;
+-      update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-                      timekeeper.mult);
+-}
+-
+ /**
+  * timekeeping_forward_now - update clock to the current time
+  *
+@@ -828,9 +819,11 @@ static cycle_t logarithmic_accumulation(
+ 
+       timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
+       while (timekeeper.xtime_nsec >= nsecps) {
++              int leap;
+               timekeeper.xtime_nsec -= nsecps;
+               xtime.tv_sec++;
+-              second_overflow();
++              leap = second_overflow(xtime.tv_sec);
++              xtime.tv_sec += leap;
+       }
+ 
+       /* Accumulate raw time */
+@@ -936,9 +929,12 @@ static void update_wall_time(void)
+        * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+        */
+       if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
++              int leap;
+               xtime.tv_nsec -= NSEC_PER_SEC;
+               xtime.tv_sec++;
+-              second_overflow();
++              leap = second_overflow(xtime.tv_sec);
++              xtime.tv_sec += leap;
++
+       }
+ 
+       /* check to see if there is a new clocksource to use */
diff --git a/queue-3.0/series b/queue-3.0/series

index a20004a28c5265c68fdb1265893bc75d123799c8..a5cdaa6b37e8f2c1a4d0322718cafe359fca682f 100644 (file)
--- a/queue-3.0/series
+++ b/queue-3.0/series
@@ -10,3 +10,14 @@ e1000e-correct-link-check-logic-for-82571-serdes.patch
  input-xpad-add-andamiro-pump-it-up-pad.patch
  tcp-drop-syn-fin-messages.patch
  cfg80211-check-iface-combinations-only-when-iface-is-running.patch
+ntp-fix-leap-second-hrtimer-livelock.patch
+ntp-correct-tai-offset-during-leap-second.patch
+timekeeping-fix-clock_monotonic-inconsistency-during-leapsecond.patch
+time-move-common-updates-to-a-function.patch
+hrtimer-provide-clock_was_set_delayed.patch
+timekeeping-fix-leapsecond-triggered-load-spike-issue.patch
+timekeeping-maintain-ktime_t-based-offsets-for-hrtimers.patch
+hrtimers-move-lock-held-region-in-hrtimer_interrupt.patch
+timekeeping-provide-hrtimer-update-function.patch
+hrtimer-update-hrtimer-base-offsets-each-hrtimer_interrupt.patch
+timekeeping-add-missing-update-call-in-timekeeping_resume.patch
diff --git a/queue-3.0/time-move-common-updates-to-a-function.patch b/queue-3.0/time-move-common-updates-to-a-function.patch

new file mode 100644 (file)

index 0000000..8031a76
--- /dev/null
+++ b/queue-3.0/time-move-common-updates-to-a-function.patch
@@ -0,0 +1,99 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:24:46 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:51 -0400
+Subject: time: Move common updates to a function
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Eric Dumazet <eric.dumazet@gmail.com>, Richard Cochran <richardcochran@gmail.com>, Prarit Bhargava <prarit@redhat.com>, Linux Kernel <linux-kernel@vger.kernel.org>, John Stultz <john.stultz@linaro.org>
+Message-ID: <1342546438-17534-5-git-send-email-johnstul@us.ibm.com>
+
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+This is a backport of cc06268c6a87db156af2daed6e96a936b955cc82
+
+While not a bugfix itself, it allows following fixes to backport
+in a more straightforward manner.
+
+CC: Thomas Gleixner <tglx@linutronix.de>
+CC: Eric Dumazet <eric.dumazet@gmail.com>
+CC: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/timekeeping.c |   34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -166,6 +166,19 @@ static struct timespec total_sleep_time;
+  */
+ static struct timespec raw_time;
+ 
++/* must hold write on xtime_lock */
++static void timekeeping_update(bool clearntp)
++{
++      if (clearntp) {
++              timekeeper.ntp_error = 0;
++              ntp_clear();
++      }
++      update_vsyscall(&xtime, &wall_to_monotonic,
++                       timekeeper.clock, timekeeper.mult);
++}
++
++
++
+ /* flag for if timekeeping is suspended */
+ int __read_mostly timekeeping_suspended;
+ 
+@@ -366,11 +379,7 @@ int do_settimeofday(const struct timespe
+ 
+       xtime = *tv;
+ 
+-      timekeeper.ntp_error = 0;
+-      ntp_clear();
+-
+-      update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-                              timekeeper.mult);
++      timekeeping_update(true);
+ 
+       write_sequnlock_irqrestore(&xtime_lock, flags);
+ 
+@@ -403,11 +412,7 @@ int timekeeping_inject_offset(struct tim
+       xtime = timespec_add(xtime, *ts);
+       wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
+ 
+-      timekeeper.ntp_error = 0;
+-      ntp_clear();
+-
+-      update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-                              timekeeper.mult);
++      timekeeping_update(true);
+ 
+       write_sequnlock_irqrestore(&xtime_lock, flags);
+ 
+@@ -630,10 +635,7 @@ void timekeeping_inject_sleeptime(struct
+ 
+       __timekeeping_inject_sleeptime(delta);
+ 
+-      timekeeper.ntp_error = 0;
+-      ntp_clear();
+-      update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-                              timekeeper.mult);
++      timekeeping_update(true);
+ 
+       write_sequnlock_irqrestore(&xtime_lock, flags);
+ 
+@@ -938,9 +940,7 @@ static void update_wall_time(void)
+               wall_to_monotonic.tv_sec -= leap;
+       }
+ 
+-      /* check to see if there is a new clocksource to use */
+-      update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-                              timekeeper.mult);
++      timekeeping_update(false);
+ }
+ 
+ /**
diff --git a/queue-3.0/timekeeping-add-missing-update-call-in-timekeeping_resume.patch b/queue-3.0/timekeeping-add-missing-update-call-in-timekeeping_resume.patch

new file mode 100644 (file)

index 0000000..39dd05d
--- /dev/null
+++ b/queue-3.0/timekeeping-add-missing-update-call-in-timekeeping_resume.patch
@@ -0,0 +1,54 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:27:16 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:58 -0400
+Subject: timekeeping: Add missing update call in timekeeping_resume()
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, LKML <linux-kernel@vger.kernel.org>, Linux PM list <linux-pm@vger.kernel.org>, John Stultz <johnstul@us.ibm.com>, Ingo Molnar <mingo@kernel.org>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Prarit Bhargava <prarit@redhat.com>, Linus Torvalds <torvalds@linux-foundation.org>
+Message-ID: <1342546438-17534-12-git-send-email-johnstul@us.ibm.com>
+
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+This is a backport of 3e997130bd2e8c6f5aaa49d6e3161d4d29b43ab0
+
+The leap second rework unearthed another issue of inconsistent data.
+
+On timekeeping_resume() the timekeeper data is updated, but nothing
+calls timekeeping_update(), so now the update code in the timer
+interrupt sees stale values.
+
+This has been the case before those changes, but then the timer
+interrupt was using stale data as well so this went unnoticed for quite
+some time.
+
+Add the missing update call, so all the data is consistent everywhere.
+
+Reported-by: Andreas Schwab <schwab@linux-m68k.org>
+Reported-and-tested-by: "Rafael J. Wysocki" <rjw@sisk.pl>
+Reported-and-tested-by: Martin Steigerwald <Martin@lichtvoll.de>
+Cc: John Stultz <johnstul@us.ibm.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
+Cc: Prarit Bhargava <prarit@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/timekeeping.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -693,6 +693,7 @@ static void timekeeping_resume(void)
+       timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
+       timekeeper.ntp_error = 0;
+       timekeeping_suspended = 0;
++      timekeeping_update(false);
+       write_sequnlock_irqrestore(&xtime_lock, flags);
+ 
+       touch_softlockup_watchdog();
diff --git a/queue-3.0/timekeeping-fix-clock_monotonic-inconsistency-during-leapsecond.patch b/queue-3.0/timekeeping-fix-clock_monotonic-inconsistency-during-leapsecond.patch

new file mode 100644 (file)

index 0000000..d2a1897
--- /dev/null
+++ b/queue-3.0/timekeeping-fix-clock_monotonic-inconsistency-during-leapsecond.patch
@@ -0,0 +1,52 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:24:29 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:50 -0400
+Subject: timekeeping: Fix CLOCK_MONOTONIC inconsistency during leapsecond
+To: stable@vger.kernel.org
+Cc: John Stultz <john.stultz@linaro.org>, Thomas Gleixner <tglx@linutronix.de>, Prarit Bhargava <prarit@redhat.com>, Linux Kernel <linux-kernel@vger.kernel.org>, John Stultz <johnstul@us.ibm.com>
+Message-ID: <1342546438-17534-4-git-send-email-johnstul@us.ibm.com>
+
+
+From: John Stultz <john.stultz@linaro.org>
+
+This is a backport of fad0c66c4bb836d57a5f125ecd38bed653ca863a
+which resolves a bug the previous commit.
+
+Commit 6b43ae8a61 (ntp: Fix leap-second hrtimer livelock) broke the
+leapsecond update of CLOCK_MONOTONIC. The missing leapsecond update to
+wall_to_monotonic causes discontinuities in CLOCK_MONOTONIC.
+
+Adjust wall_to_monotonic when NTP inserted a leapsecond.
+
+Reported-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Tested-by: Richard Cochran <richardcochran@gmail.com>
+Link: http://lkml.kernel.org/r/1338400497-12420-1-git-send-email-john.stultz@linaro.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/timekeeping.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -824,6 +824,7 @@ static cycle_t logarithmic_accumulation(
+               xtime.tv_sec++;
+               leap = second_overflow(xtime.tv_sec);
+               xtime.tv_sec += leap;
++              wall_to_monotonic.tv_sec -= leap;
+       }
+ 
+       /* Accumulate raw time */
+@@ -934,7 +935,7 @@ static void update_wall_time(void)
+               xtime.tv_sec++;
+               leap = second_overflow(xtime.tv_sec);
+               xtime.tv_sec += leap;
+-
++              wall_to_monotonic.tv_sec -= leap;
+       }
+ 
+       /* check to see if there is a new clocksource to use */
diff --git a/queue-3.0/timekeeping-fix-leapsecond-triggered-load-spike-issue.patch b/queue-3.0/timekeeping-fix-leapsecond-triggered-load-spike-issue.patch

new file mode 100644 (file)

index 0000000..8054431
--- /dev/null
+++ b/queue-3.0/timekeeping-fix-leapsecond-triggered-load-spike-issue.patch
@@ -0,0 +1,64 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:25:50 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:53 -0400
+Subject: timekeeping: Fix leapsecond triggered load spike issue
+To: stable@vger.kernel.org
+Cc: John Stultz <johnstul@us.ibm.com>, Thomas Gleixner <tglx@linutronix.de>, Prarit Bhargava <prarit@redhat.com>, Linux Kernel <linux-kernel@vger.kernel.org>
+Message-ID: <1342546438-17534-7-git-send-email-johnstul@us.ibm.com>
+
+From: John Stultz <johnstul@us.ibm.com>
+
+This is a backport of 4873fa070ae84a4115f0b3c9dfabc224f1bc7c51
+
+The timekeeping code misses an update of the hrtimer subsystem after a
+leap second happened. Due to that timers based on CLOCK_REALTIME are
+either expiring a second early or late depending on whether a leap
+second has been inserted or deleted until an operation is initiated
+which causes that update. Unless the update happens by some other
+means this discrepancy between the timekeeping and the hrtimer data
+stays forever and timers are expired either early or late.
+
+The reported immediate workaround - $ data -s "`date`" - is causing a
+call to clock_was_set() which updates the hrtimer data structures.
+See: http://www.sheeri.com/content/mysql-and-leap-second-high-cpu-and-fix
+
+Add the missing clock_was_set() call to update_wall_time() in case of
+a leap second event. The actual update is deferred to softirq context
+as the necessary smp function call cannot be invoked from hard
+interrupt context.
+
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Reported-by: Jan Engelhardt <jengelh@inai.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Acked-by: Prarit Bhargava <prarit@redhat.com>
+Link: http://lkml.kernel.org/r/1341960205-56738-3-git-send-email-johnstul@us.ibm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/timekeeping.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -827,6 +827,8 @@ static cycle_t logarithmic_accumulation(
+               leap = second_overflow(xtime.tv_sec);
+               xtime.tv_sec += leap;
+               wall_to_monotonic.tv_sec -= leap;
++              if (leap)
++                      clock_was_set_delayed();
+       }
+ 
+       /* Accumulate raw time */
+@@ -938,6 +940,8 @@ static void update_wall_time(void)
+               leap = second_overflow(xtime.tv_sec);
+               xtime.tv_sec += leap;
+               wall_to_monotonic.tv_sec -= leap;
++              if (leap)
++                      clock_was_set_delayed();
+       }
+ 
+       timekeeping_update(false);
diff --git a/queue-3.0/timekeeping-maintain-ktime_t-based-offsets-for-hrtimers.patch b/queue-3.0/timekeeping-maintain-ktime_t-based-offsets-for-hrtimers.patch

new file mode 100644 (file)

index 0000000..0e76927
--- /dev/null
+++ b/queue-3.0/timekeeping-maintain-ktime_t-based-offsets-for-hrtimers.patch
@@ -0,0 +1,102 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:26:11 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:54 -0400
+Subject: timekeeping: Maintain ktime_t based offsets for hrtimers
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, John Stultz <johnstul@us.ibm.com>, Prarit Bhargava <prarit@redhat.com>, Linux Kernel <linux-kernel@vger.kernel.org>
+Message-ID: <1342546438-17534-8-git-send-email-johnstul@us.ibm.com>
+
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+This is a backport of 5b9fe759a678e05be4937ddf03d50e950207c1c0
+
+We need to update the hrtimer clock offsets from the hrtimer interrupt
+context. To avoid conversions from timespec to ktime_t maintain a
+ktime_t based representation of those offsets in the timekeeper. This
+puts the conversion overhead into the code which updates the
+underlying offsets and provides fast accessible values in the hrtimer
+interrupt.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Acked-by: Prarit Bhargava <prarit@redhat.com>
+Link: http://lkml.kernel.org/r/1341960205-56738-4-git-send-email-johnstul@us.ibm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/timekeeping.c |   25 ++++++++++++++++++++++++-
+ 1 file changed, 24 insertions(+), 1 deletion(-)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -161,18 +161,34 @@ static struct timespec xtime __attribute
+ static struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
+ static struct timespec total_sleep_time;
+ 
++/* Offset clock monotonic -> clock realtime */
++static ktime_t offs_real;
++
++/* Offset clock monotonic -> clock boottime */
++static ktime_t offs_boot;
++
+ /*
+  * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
+  */
+ static struct timespec raw_time;
+ 
+ /* must hold write on xtime_lock */
++static void update_rt_offset(void)
++{
++      struct timespec tmp, *wtm = &wall_to_monotonic;
++
++      set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
++      offs_real = timespec_to_ktime(tmp);
++}
++
++/* must hold write on xtime_lock */
+ static void timekeeping_update(bool clearntp)
+ {
+       if (clearntp) {
+               timekeeper.ntp_error = 0;
+               ntp_clear();
+       }
++      update_rt_offset();
+       update_vsyscall(&xtime, &wall_to_monotonic,
+                        timekeeper.clock, timekeeper.mult);
+ }
+@@ -587,6 +603,7 @@ void __init timekeeping_init(void)
+       }
+       set_normalized_timespec(&wall_to_monotonic,
+                               -boot.tv_sec, -boot.tv_nsec);
++      update_rt_offset();
+       total_sleep_time.tv_sec = 0;
+       total_sleep_time.tv_nsec = 0;
+       write_sequnlock_irqrestore(&xtime_lock, flags);
+@@ -595,6 +612,12 @@ void __init timekeeping_init(void)
+ /* time in seconds when suspend began */
+ static struct timespec timekeeping_suspend_time;
+ 
++static void update_sleep_time(struct timespec t)
++{
++      total_sleep_time = t;
++      offs_boot = timespec_to_ktime(t);
++}
++
+ /**
+  * __timekeeping_inject_sleeptime - Internal function to add sleep interval
+  * @delta: pointer to a timespec delta value
+@@ -606,7 +629,7 @@ static void __timekeeping_inject_sleepti
+ {
+       xtime = timespec_add(xtime, *delta);
+       wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
+-      total_sleep_time = timespec_add(total_sleep_time, *delta);
++      update_sleep_time(timespec_add(total_sleep_time, *delta));
+ }
+ 
+ 
diff --git a/queue-3.0/timekeeping-provide-hrtimer-update-function.patch b/queue-3.0/timekeeping-provide-hrtimer-update-function.patch

new file mode 100644 (file)

index 0000000..b871229
--- /dev/null
+++ b/queue-3.0/timekeeping-provide-hrtimer-update-function.patch
@@ -0,0 +1,91 @@
+From johnstul@us.ibm.com  Tue Jul 17 15:26:41 2012
+From: John Stultz <johnstul@us.ibm.com>
+Date: Tue, 17 Jul 2012 13:33:56 -0400
+Subject: timekeeping: Provide hrtimer update function
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, John Stultz <johnstul@us.ibm.com>, Prarit Bhargava <prarit@redhat.com>, Linux Kernel <linux-kernel@vger.kernel.org>
+Message-ID: <1342546438-17534-10-git-send-email-johnstul@us.ibm.com>
+
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+This is a backport of f6c06abfb3972ad4914cef57d8348fcb2932bc3b
+
+To finally fix the infamous leap second issue and other race windows
+caused by functions which change the offsets between the various time
+bases (CLOCK_MONOTONIC, CLOCK_REALTIME and CLOCK_BOOTTIME) we need a
+function which atomically gets the current monotonic time and updates
+the offsets of CLOCK_REALTIME and CLOCK_BOOTTIME with minimalistic
+overhead. The previous patch which provides ktime_t offsets allows us
+to make this function almost as cheap as ktime_get() which is going to
+be replaced in hrtimer_interrupt().
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Acked-by: Prarit Bhargava <prarit@redhat.com>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Link: http://lkml.kernel.org/r/1341960205-56738-7-git-send-email-johnstul@us.ibm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hrtimer.h   |    1 +
+ kernel/time/timekeeping.c |   34 ++++++++++++++++++++++++++++++++++
+ 2 files changed, 35 insertions(+)
+
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -327,6 +327,7 @@ extern ktime_t ktime_get(void);
+ extern ktime_t ktime_get_real(void);
+ extern ktime_t ktime_get_boottime(void);
+ extern ktime_t ktime_get_monotonic_offset(void);
++extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot);
+ 
+ DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
+ 
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -1126,6 +1126,40 @@ void get_xtime_and_monotonic_and_sleep_o
+       } while (read_seqretry(&xtime_lock, seq));
+ }
+ 
++#ifdef CONFIG_HIGH_RES_TIMERS
++/**
++ * ktime_get_update_offsets - hrtimer helper
++ * @real:     pointer to storage for monotonic -> realtime offset
++ * @_boot:    pointer to storage for monotonic -> boottime offset
++ *
++ * Returns current monotonic time and updates the offsets
++ * Called from hrtimer_interupt() or retrigger_next_event()
++ */
++ktime_t ktime_get_update_offsets(ktime_t *real, ktime_t *boot)
++{
++      ktime_t now;
++      unsigned int seq;
++      u64 secs, nsecs;
++
++      do {
++              seq = read_seqbegin(&xtime_lock);
++
++              secs = xtime.tv_sec;
++              nsecs = xtime.tv_nsec;
++              nsecs += timekeeping_get_ns();
++              /* If arch requires, add in gettimeoffset() */
++              nsecs += arch_gettimeoffset();
++
++              *real = offs_real;
++              *boot = offs_boot;
++      } while (read_seqretry(&xtime_lock, seq));
++
++      now = ktime_add_ns(ktime_set(secs, 0), nsecs);
++      now = ktime_sub(now, *real);
++      return now;
++}
++#endif
++
+ /**
+  * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
+  */
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 17 Jul 2012 22:49:54 +0000 (15:49 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 17 Jul 2012 22:49:54 +0000 (15:49 -0700)
queue-3.0/hrtimer-provide-clock_was_set_delayed.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/hrtimer-update-hrtimer-base-offsets-each-hrtimer_interrupt.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/hrtimers-move-lock-held-region-in-hrtimer_interrupt.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/ntp-correct-tai-offset-during-leap-second.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/ntp-fix-leap-second-hrtimer-livelock.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/series		patch \| blob \| blame \| history
queue-3.0/time-move-common-updates-to-a-function.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/timekeeping-add-missing-update-call-in-timekeeping_resume.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/timekeeping-fix-clock_monotonic-inconsistency-during-leapsecond.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/timekeeping-fix-leapsecond-triggered-load-spike-issue.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/timekeeping-maintain-ktime_t-based-offsets-for-hrtimers.patch	[new file with mode: 0644]	patch \| blob
queue-3.0/timekeeping-provide-hrtimer-update-function.patch	[new file with mode: 0644]	patch \| blob