4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 18 Oct 2018 17:13:46 +0000 (19:13 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 18 Oct 2018 17:13:46 +0000 (19:13 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 18 Oct 2018 17:13:46 +0000 (19:13 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 18 Oct 2018 17:13:46 +0000 (19:13 +0200)
diff --git a/queue-4.9/ext4-avoid-running-out-of-journal-credits-when-appending-to-an-inline-file.patch b/queue-4.9/ext4-avoid-running-out-of-journal-credits-when-appending-to-an-inline-file.patch

new file mode 100644 (file)

index 0000000..9fd3466
--- /dev/null
+++ b/queue-4.9/ext4-avoid-running-out-of-journal-credits-when-appending-to-an-inline-file.patch
@@ -0,0 +1,125 @@
+From 8bc1379b82b8e809eef77a9fedbb75c6c297be19 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 16 Jun 2018 23:41:59 -0400
+Subject: ext4: avoid running out of journal credits when appending to an inline file
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 8bc1379b82b8e809eef77a9fedbb75c6c297be19 upstream.
+
+Use a separate journal transaction if it turns out that we need to
+convert an inline file to use an data block.  Otherwise we could end
+up failing due to not having journal credits.
+
+This addresses CVE-2018-10883.
+
+https://bugzilla.kernel.org/show_bug.cgi?id=200071
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+[fengc@google.com: 4.4 and 4.9 backport: adjust context]
+Signed-off-by: Chenbo Feng <fengc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/ext4.h   |    3 ---
+ fs/ext4/inline.c |   38 +-------------------------------------
+ fs/ext4/xattr.c  |   18 ++----------------
+ 3 files changed, 3 insertions(+), 56 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -3038,9 +3038,6 @@ extern struct buffer_head *ext4_get_firs
+ extern int ext4_inline_data_fiemap(struct inode *inode,
+                                  struct fiemap_extent_info *fieinfo,
+                                  int *has_inline, __u64 start, __u64 len);
+-extern int ext4_try_to_evict_inline_data(handle_t *handle,
+-                                       struct inode *inode,
+-                                       int needed);
+ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
+ 
+ extern int ext4_convert_inline_data(struct inode *inode);
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -889,11 +889,11 @@ retry_journal:
+       flags |= AOP_FLAG_NOFS;
+ 
+       if (ret == -ENOSPC) {
++              ext4_journal_stop(handle);
+               ret = ext4_da_convert_inline_data_to_extent(mapping,
+                                                           inode,
+                                                           flags,
+                                                           fsdata);
+-              ext4_journal_stop(handle);
+               if (ret == -ENOSPC &&
+                   ext4_should_retry_alloc(inode->i_sb, &retries))
+                       goto retry_journal;
+@@ -1865,42 +1865,6 @@ out:
+       return (error < 0 ? error : 0);
+ }
+ 
+-/*
+- * Called during xattr set, and if we can sparse space 'needed',
+- * just create the extent tree evict the data to the outer block.
+- *
+- * We use jbd2 instead of page cache to move data to the 1st block
+- * so that the whole transaction can be committed as a whole and
+- * the data isn't lost because of the delayed page cache write.
+- */
+-int ext4_try_to_evict_inline_data(handle_t *handle,
+-                                struct inode *inode,
+-                                int needed)
+-{
+-      int error;
+-      struct ext4_xattr_entry *entry;
+-      struct ext4_inode *raw_inode;
+-      struct ext4_iloc iloc;
+-
+-      error = ext4_get_inode_loc(inode, &iloc);
+-      if (error)
+-              return error;
+-
+-      raw_inode = ext4_raw_inode(&iloc);
+-      entry = (struct ext4_xattr_entry *)((void *)raw_inode +
+-                                          EXT4_I(inode)->i_inline_off);
+-      if (EXT4_XATTR_LEN(entry->e_name_len) +
+-          EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) {
+-              error = -ENOSPC;
+-              goto out;
+-      }
+-
+-      error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
+-out:
+-      brelse(iloc.bh);
+-      return error;
+-}
+-
+ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
+ {
+       handle_t *handle;
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1086,22 +1086,8 @@ int ext4_xattr_ibody_inline_set(handle_t
+       if (EXT4_I(inode)->i_extra_isize == 0)
+               return -ENOSPC;
+       error = ext4_xattr_set_entry(i, s, inode);
+-      if (error) {
+-              if (error == -ENOSPC &&
+-                  ext4_has_inline_data(inode)) {
+-                      error = ext4_try_to_evict_inline_data(handle, inode,
+-                                      EXT4_XATTR_LEN(strlen(i->name) +
+-                                      EXT4_XATTR_SIZE(i->value_len)));
+-                      if (error)
+-                              return error;
+-                      error = ext4_xattr_ibody_find(inode, i, is);
+-                      if (error)
+-                              return error;
+-                      error = ext4_xattr_set_entry(i, s, inode);
+-              }
+-              if (error)
+-                      return error;
+-      }
++      if (error)
++              return error;
+       header = IHDR(inode, ext4_raw_inode(&is->iloc));
+       if (!IS_LAST_ENTRY(s->first)) {
+               header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
diff --git a/queue-4.9/hv-properly-delay-kvp-packets-when-negotiation-is-in-progress.patch b/queue-4.9/hv-properly-delay-kvp-packets-when-negotiation-is-in-progress.patch

new file mode 100644 (file)

index 0000000..9f4ca85
--- /dev/null
+++ b/queue-4.9/hv-properly-delay-kvp-packets-when-negotiation-is-in-progress.patch
@@ -0,0 +1,94 @@
+From a3ade8cc474d848676278660e65f5af1e9e094d9 Mon Sep 17 00:00:00 2001
+From: Long Li <longli@microsoft.com>
+Date: Sun, 30 Apr 2017 16:21:19 -0700
+Subject: HV: properly delay KVP packets when negotiation is in progress
+
+From: Long Li <longli@microsoft.com>
+
+commit a3ade8cc474d848676278660e65f5af1e9e094d9 upstream.
+
+The host may send multiple negotiation packets
+(due to timeout) before the KVP user-mode daemon
+is connected. KVP user-mode daemon is connected.
+We need to defer processing those packets
+until the daemon is negotiated and connected.
+It's okay for guest to respond
+to all negotiation packets.
+
+In addition, the host may send multiple staged
+KVP requests as soon as negotiation is done.
+We need to properly process those packets using one
+tasklet for exclusive access to ring buffer.
+
+This patch is based on the work of
+Nick Meier <Nick.Meier@microsoft.com>.
+
+The above is the original changelog of
+a3ade8cc474d ("HV: properly delay KVP packets when negotiation is in progress"
+
+Here I re-worked the original patch because the mainline version
+can't work for the linux-4.4.y branch, on which channel->callback_event
+doesn't exist yet. In the mainline, channel->callback_event was added by:
+631e63a9f346 ("vmbus: change to per channel tasklet"). Here we don't want
+to backport it to v4.4, as it requires extra supporting changes and fixes,
+which are unnecessary as to the KVP bug we're trying to resolve.
+
+NOTE: before this patch is used, we should cherry-pick the other related
+3 patches from the mainline first:
+
+The background of this backport request is that: recently Wang Jian reported
+some KVP issues: https://github.com/LIS/lis-next/issues/593:
+e.g. the /var/lib/hyperv/.kvp_pool_* files can not be updated, and sometimes
+if the hv_kvp_daemon doesn't timely start, the host may not be able to query
+the VM's IP address via KVP.
+
+Reported-by: Wang Jian <jianjian.wang1@gmail.com>
+Tested-by: Wang Jian <jianjian.wang1@gmail.com>
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: Long Li <longli@microsoft.com>
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hv/hv_kvp.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/drivers/hv/hv_kvp.c
++++ b/drivers/hv/hv_kvp.c
+@@ -616,21 +616,22 @@ void hv_kvp_onchannelcallback(void *cont
+                    NEGO_IN_PROGRESS,
+                    NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED;
+ 
+-      if (host_negotiatied == NEGO_NOT_STARTED &&
+-          kvp_transaction.state < HVUTIL_READY) {
++      if (kvp_transaction.state < HVUTIL_READY) {
+               /*
+                * If userspace daemon is not connected and host is asking
+                * us to negotiate we need to delay to not lose messages.
+                * This is important for Failover IP setting.
+                */
+-              host_negotiatied = NEGO_IN_PROGRESS;
+-              schedule_delayed_work(&kvp_host_handshake_work,
++              if (host_negotiatied == NEGO_NOT_STARTED) {
++                      host_negotiatied = NEGO_IN_PROGRESS;
++                      schedule_delayed_work(&kvp_host_handshake_work,
+                                     HV_UTIL_NEGO_TIMEOUT * HZ);
++              }
+               return;
+       }
+       if (kvp_transaction.state > HVUTIL_READY)
+               return;
+-
++recheck:
+       vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 4, &recvlen,
+                        &requestid);
+ 
+@@ -707,6 +708,8 @@ void hv_kvp_onchannelcallback(void *cont
+                                      VM_PKT_DATA_INBAND, 0);
+ 
+               host_negotiatied = NEGO_FINISHED;
++
++              goto recheck;
+       }
+ 
+ }
diff --git a/queue-4.9/macintosh-rack-meter-convert-cputime64_t-use-to-u64.patch b/queue-4.9/macintosh-rack-meter-convert-cputime64_t-use-to-u64.patch

new file mode 100644 (file)

index 0000000..431dce0
--- /dev/null
+++ b/queue-4.9/macintosh-rack-meter-convert-cputime64_t-use-to-u64.patch
@@ -0,0 +1,108 @@
+From 564b733c899f4e12a64946658960fce80cad0b05 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <fweisbec@gmail.com>
+Date: Tue, 31 Jan 2017 04:09:20 +0100
+Subject: macintosh/rack-meter: Convert cputime64_t use to u64
+
+From: Frederic Weisbecker <fweisbec@gmail.com>
+
+commit 564b733c899f4e12a64946658960fce80cad0b05 upstream.
+
+cputime_t is going to be removed and replaced by nsecs units,
+so convert the drivers/macintosh/rack-meter.c use to u64..
+
+Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Stanislaw Gruszka <sgruszka@redhat.com>
+Cc: Wanpeng Li <wanpeng.li@hotmail.com>
+Link: http://lkml.kernel.org/r/1485832191-26889-5-git-send-email-fweisbec@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Ivan Delalande <colona@arista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/macintosh/rack-meter.c |   32 ++++++++++++++++----------------
+ 1 file changed, 16 insertions(+), 16 deletions(-)
+
+--- a/drivers/macintosh/rack-meter.c
++++ b/drivers/macintosh/rack-meter.c
+@@ -52,8 +52,8 @@ struct rackmeter_dma {
+ struct rackmeter_cpu {
+       struct delayed_work     sniffer;
+       struct rackmeter        *rm;
+-      cputime64_t             prev_wall;
+-      cputime64_t             prev_idle;
++      u64                     prev_wall;
++      u64                     prev_idle;
+       int                     zero;
+ } ____cacheline_aligned;
+ 
+@@ -81,7 +81,7 @@ static int rackmeter_ignore_nice;
+ /* This is copied from cpufreq_ondemand, maybe we should put it in
+  * a common header somewhere
+  */
+-static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
++static inline u64 get_cpu_idle_time(unsigned int cpu)
+ {
+       u64 retval;
+ 
+@@ -91,7 +91,7 @@ static inline cputime64_t get_cpu_idle_t
+       if (rackmeter_ignore_nice)
+               retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+ 
+-      return nsecs_to_cputime64(retval);
++      return retval;
+ }
+ 
+ static void rackmeter_setup_i2s(struct rackmeter *rm)
+@@ -217,23 +217,23 @@ static void rackmeter_do_timer(struct wo
+               container_of(work, struct rackmeter_cpu, sniffer.work);
+       struct rackmeter *rm = rcpu->rm;
+       unsigned int cpu = smp_processor_id();
+-      cputime64_t cur_jiffies, total_idle_ticks;
+-      unsigned int total_ticks, idle_ticks;
++      u64 cur_nsecs, total_idle_nsecs;
++      u64 total_nsecs, idle_nsecs;
+       int i, offset, load, cumm, pause;
+ 
+-      cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
+-      total_ticks = (unsigned int) (cur_jiffies - rcpu->prev_wall);
+-      rcpu->prev_wall = cur_jiffies;
+-
+-      total_idle_ticks = get_cpu_idle_time(cpu);
+-      idle_ticks = (unsigned int) (total_idle_ticks - rcpu->prev_idle);
+-      idle_ticks = min(idle_ticks, total_ticks);
+-      rcpu->prev_idle = total_idle_ticks;
++      cur_nsecs = jiffies64_to_nsecs(get_jiffies_64());
++      total_nsecs = cur_nsecs - rcpu->prev_wall;
++      rcpu->prev_wall = cur_nsecs;
++
++      total_idle_nsecs = get_cpu_idle_time(cpu);
++      idle_nsecs = total_idle_nsecs - rcpu->prev_idle;
++      idle_nsecs = min(idle_nsecs, total_nsecs);
++      rcpu->prev_idle = total_idle_nsecs;
+ 
+       /* We do a very dumb calculation to update the LEDs for now,
+        * we'll do better once we have actual PWM implemented
+        */
+-      load = (9 * (total_ticks - idle_ticks)) / total_ticks;
++      load = div64_u64(9 * (total_nsecs - idle_nsecs), total_nsecs);
+ 
+       offset = cpu << 3;
+       cumm = 0;
+@@ -278,7 +278,7 @@ static void rackmeter_init_cpu_sniffer(s
+                       continue;
+               rcpu = &rm->cpu[cpu];
+               rcpu->prev_idle = get_cpu_idle_time(cpu);
+-              rcpu->prev_wall = jiffies64_to_cputime64(get_jiffies_64());
++              rcpu->prev_wall = jiffies64_to_nsecs(get_jiffies_64());
+               schedule_delayed_work_on(cpu, &rm->cpu[cpu].sniffer,
+                                        msecs_to_jiffies(CPU_SAMPLING_RATE));
+       }
diff --git a/queue-4.9/sched-cputime-convert-kcpustat-to-nsecs.patch b/queue-4.9/sched-cputime-convert-kcpustat-to-nsecs.patch

new file mode 100644 (file)

index 0000000..cfa8de6
--- /dev/null
+++ b/queue-4.9/sched-cputime-convert-kcpustat-to-nsecs.patch
@@ -0,0 +1,368 @@
+From 7fb1327ee9b92fca27662f9b9d60c7c3376d6c69 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <fweisbec@gmail.com>
+Date: Tue, 31 Jan 2017 04:09:19 +0100
+Subject: sched/cputime: Convert kcpustat to nsecs
+
+From: Frederic Weisbecker <fweisbec@gmail.com>
+
+commit 7fb1327ee9b92fca27662f9b9d60c7c3376d6c69 upstream.
+
+Kernel CPU stats are stored in cputime_t which is an architecture
+defined type, and hence a bit opaque and requiring accessors and mutators
+for any operation.
+
+Converting them to nsecs simplifies the code and is one step toward
+the removal of cputime_t in the core code.
+
+Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Stanislaw Gruszka <sgruszka@redhat.com>
+Cc: Wanpeng Li <wanpeng.li@hotmail.com>
+Link: http://lkml.kernel.org/r/1485832191-26889-4-git-send-email-fweisbec@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[colona: minor conflict as 527b0a76f41d ("sched/cpuacct: Avoid %lld seq_printf
+ warning") is missing from v4.9]
+Signed-off-by: Ivan Delalande <colona@arista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/appldata/appldata_os.c   |   16 ++++----
+ drivers/cpufreq/cpufreq.c          |    6 +--
+ drivers/cpufreq/cpufreq_governor.c |    2 -
+ drivers/cpufreq/cpufreq_stats.c    |    1 
+ drivers/macintosh/rack-meter.c     |    2 -
+ fs/proc/stat.c                     |   68 ++++++++++++++++++-------------------
+ fs/proc/uptime.c                   |    7 +--
+ kernel/sched/cpuacct.c             |    2 -
+ kernel/sched/cputime.c             |   22 +++++------
+ 9 files changed, 61 insertions(+), 65 deletions(-)
+
+--- a/arch/s390/appldata/appldata_os.c
++++ b/arch/s390/appldata/appldata_os.c
+@@ -113,21 +113,21 @@ static void appldata_get_os_data(void *d
+       j = 0;
+       for_each_online_cpu(i) {
+               os_data->os_cpu[j].per_cpu_user =
+-                      cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
++                      nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
+               os_data->os_cpu[j].per_cpu_nice =
+-                      cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
++                      nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
+               os_data->os_cpu[j].per_cpu_system =
+-                      cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
++                      nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
+               os_data->os_cpu[j].per_cpu_idle =
+-                      cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
++                      nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
+               os_data->os_cpu[j].per_cpu_irq =
+-                      cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
++                      nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
+               os_data->os_cpu[j].per_cpu_softirq =
+-                      cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
++                      nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
+               os_data->os_cpu[j].per_cpu_iowait =
+-                      cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
++                      nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
+               os_data->os_cpu[j].per_cpu_steal =
+-                      cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
++                      nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
+               os_data->os_cpu[j].cpu_id = i;
+               j++;
+       }
+--- a/drivers/cpufreq/cpufreq.c
++++ b/drivers/cpufreq/cpufreq.c
+@@ -132,7 +132,7 @@ static inline u64 get_cpu_idle_time_jiff
+       u64 cur_wall_time;
+       u64 busy_time;
+ 
+-      cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
++      cur_wall_time = jiffies64_to_nsecs(get_jiffies_64());
+ 
+       busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+       busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+@@ -143,9 +143,9 @@ static inline u64 get_cpu_idle_time_jiff
+ 
+       idle_time = cur_wall_time - busy_time;
+       if (wall)
+-              *wall = cputime_to_usecs(cur_wall_time);
++              *wall = div_u64(cur_wall_time, NSEC_PER_USEC);
+ 
+-      return cputime_to_usecs(idle_time);
++      return div_u64(idle_time, NSEC_PER_USEC);
+ }
+ 
+ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
+--- a/drivers/cpufreq/cpufreq_governor.c
++++ b/drivers/cpufreq/cpufreq_governor.c
+@@ -152,7 +152,7 @@ unsigned int dbs_update(struct cpufreq_p
+               if (ignore_nice) {
+                       u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+ 
+-                      idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice);
++                      idle_time += div_u64(cur_nice - j_cdbs->prev_cpu_nice, NSEC_PER_USEC);
+                       j_cdbs->prev_cpu_nice = cur_nice;
+               }
+ 
+--- a/drivers/cpufreq/cpufreq_stats.c
++++ b/drivers/cpufreq/cpufreq_stats.c
+@@ -13,7 +13,6 @@
+ #include <linux/cpufreq.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
+-#include <linux/cputime.h>
+ 
+ static DEFINE_SPINLOCK(cpufreq_stats_lock);
+ 
+--- a/drivers/macintosh/rack-meter.c
++++ b/drivers/macintosh/rack-meter.c
+@@ -91,7 +91,7 @@ static inline cputime64_t get_cpu_idle_t
+       if (rackmeter_ignore_nice)
+               retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+ 
+-      return retval;
++      return nsecs_to_cputime64(retval);
+ }
+ 
+ static void rackmeter_setup_i2s(struct rackmeter *rm)
+--- a/fs/proc/stat.c
++++ b/fs/proc/stat.c
+@@ -21,23 +21,23 @@
+ 
+ #ifdef arch_idle_time
+ 
+-static cputime64_t get_idle_time(int cpu)
++static u64 get_idle_time(int cpu)
+ {
+-      cputime64_t idle;
++      u64 idle;
+ 
+       idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+       if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
+-              idle += arch_idle_time(cpu);
++              idle += cputime_to_nsecs(arch_idle_time(cpu));
+       return idle;
+ }
+ 
+-static cputime64_t get_iowait_time(int cpu)
++static u64 get_iowait_time(int cpu)
+ {
+-      cputime64_t iowait;
++      u64 iowait;
+ 
+       iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+       if (cpu_online(cpu) && nr_iowait_cpu(cpu))
+-              iowait += arch_idle_time(cpu);
++              iowait += cputime_to_nsecs(arch_idle_time(cpu));
+       return iowait;
+ }
+ 
+@@ -45,32 +45,32 @@ static cputime64_t get_iowait_time(int c
+ 
+ static u64 get_idle_time(int cpu)
+ {
+-      u64 idle, idle_time = -1ULL;
++      u64 idle, idle_usecs = -1ULL;
+ 
+       if (cpu_online(cpu))
+-              idle_time = get_cpu_idle_time_us(cpu, NULL);
++              idle_usecs = get_cpu_idle_time_us(cpu, NULL);
+ 
+-      if (idle_time == -1ULL)
++      if (idle_usecs == -1ULL)
+               /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
+               idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+       else
+-              idle = usecs_to_cputime64(idle_time);
++              idle = idle_usecs * NSEC_PER_USEC;
+ 
+       return idle;
+ }
+ 
+ static u64 get_iowait_time(int cpu)
+ {
+-      u64 iowait, iowait_time = -1ULL;
++      u64 iowait, iowait_usecs = -1ULL;
+ 
+       if (cpu_online(cpu))
+-              iowait_time = get_cpu_iowait_time_us(cpu, NULL);
++              iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
+ 
+-      if (iowait_time == -1ULL)
++      if (iowait_usecs == -1ULL)
+               /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
+               iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+       else
+-              iowait = usecs_to_cputime64(iowait_time);
++              iowait = iowait_usecs * NSEC_PER_USEC;
+ 
+       return iowait;
+ }
+@@ -115,16 +115,16 @@ static int show_stat(struct seq_file *p,
+       }
+       sum += arch_irq_stat();
+ 
+-      seq_put_decimal_ull(p, "cpu  ", cputime64_to_clock_t(user));
+-      seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+-      seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+-      seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+-      seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+-      seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+-      seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+-      seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+-      seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+-      seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
++      seq_put_decimal_ull(p, "cpu  ", nsec_to_clock_t(user));
++      seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
++      seq_put_decimal_ull(p, " ", nsec_to_clock_t(system));
++      seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
++      seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait));
++      seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq));
++      seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq));
++      seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
++      seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
++      seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
+       seq_putc(p, '\n');
+ 
+       for_each_online_cpu(i) {
+@@ -140,16 +140,16 @@ static int show_stat(struct seq_file *p,
+               guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
+               guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+               seq_printf(p, "cpu%d", i);
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user));
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+-              seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(system));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
++              seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
+               seq_putc(p, '\n');
+       }
+       seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
+--- a/fs/proc/uptime.c
++++ b/fs/proc/uptime.c
+@@ -5,23 +5,20 @@
+ #include <linux/seq_file.h>
+ #include <linux/time.h>
+ #include <linux/kernel_stat.h>
+-#include <linux/cputime.h>
+ 
+ static int uptime_proc_show(struct seq_file *m, void *v)
+ {
+       struct timespec uptime;
+       struct timespec idle;
+-      u64 idletime;
+       u64 nsec;
+       u32 rem;
+       int i;
+ 
+-      idletime = 0;
++      nsec = 0;
+       for_each_possible_cpu(i)
+-              idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
++              nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
+ 
+       get_monotonic_boottime(&uptime);
+-      nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
+       idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
+       idle.tv_nsec = rem;
+       seq_printf(m, "%lu.%02lu %lu.%02lu\n",
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -297,7 +297,7 @@ static int cpuacct_stats_show(struct seq
+       for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
+               seq_printf(sf, "%s %lld\n",
+                          cpuacct_stat_desc[stat],
+-                         cputime64_to_clock_t(val[stat]));
++                         nsec_to_clock_t(val[stat]));
+       }
+ 
+       return 0;
+--- a/kernel/sched/cputime.c
++++ b/kernel/sched/cputime.c
+@@ -75,9 +75,9 @@ static cputime_t irqtime_account_update(
+       u64 *cpustat = kcpustat_this_cpu->cpustat;
+       cputime_t irq_cputime;
+ 
+-      irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx];
++      irq_cputime = nsecs_to_cputime64(irqtime - cpustat[idx]);
+       irq_cputime = min(irq_cputime, maxtime);
+-      cpustat[idx] += irq_cputime;
++      cpustat[idx] += cputime_to_nsecs(irq_cputime);
+ 
+       return irq_cputime;
+ }
+@@ -143,7 +143,7 @@ void account_user_time(struct task_struc
+       index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
+ 
+       /* Add user time to cpustat. */
+-      task_group_account_field(p, index, (__force u64) cputime);
++      task_group_account_field(p, index, cputime_to_nsecs(cputime));
+ 
+       /* Account for user time used */
+       acct_account_cputime(p);
+@@ -168,11 +168,11 @@ static void account_guest_time(struct ta
+ 
+       /* Add guest time to cpustat. */
+       if (task_nice(p) > 0) {
+-              cpustat[CPUTIME_NICE] += (__force u64) cputime;
+-              cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
++              cpustat[CPUTIME_NICE] += cputime_to_nsecs(cputime);
++              cpustat[CPUTIME_GUEST_NICE] += cputime_to_nsecs(cputime);
+       } else {
+-              cpustat[CPUTIME_USER] += (__force u64) cputime;
+-              cpustat[CPUTIME_GUEST] += (__force u64) cputime;
++              cpustat[CPUTIME_USER] += cputime_to_nsecs(cputime);
++              cpustat[CPUTIME_GUEST] += cputime_to_nsecs(cputime);
+       }
+ }
+ 
+@@ -193,7 +193,7 @@ void __account_system_time(struct task_s
+       account_group_system_time(p, cputime);
+ 
+       /* Add system time to cpustat. */
+-      task_group_account_field(p, index, (__force u64) cputime);
++      task_group_account_field(p, index, cputime_to_nsecs(cputime));
+ 
+       /* Account for system time used */
+       acct_account_cputime(p);
+@@ -234,7 +234,7 @@ void account_steal_time(cputime_t cputim
+ {
+       u64 *cpustat = kcpustat_this_cpu->cpustat;
+ 
+-      cpustat[CPUTIME_STEAL] += (__force u64) cputime;
++      cpustat[CPUTIME_STEAL] += cputime_to_nsecs(cputime);
+ }
+ 
+ /*
+@@ -247,9 +247,9 @@ void account_idle_time(cputime_t cputime
+       struct rq *rq = this_rq();
+ 
+       if (atomic_read(&rq->nr_iowait) > 0)
+-              cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
++              cpustat[CPUTIME_IOWAIT] += cputime_to_nsecs(cputime);
+       else
+-              cpustat[CPUTIME_IDLE] += (__force u64) cputime;
++              cpustat[CPUTIME_IDLE] += cputime_to_nsecs(cputime);
+ }
+ 
+ /*
diff --git a/queue-4.9/sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch b/queue-4.9/sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch

new file mode 100644 (file)

index 0000000..4a62195
--- /dev/null
+++ b/queue-4.9/sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch
@@ -0,0 +1,140 @@
+From 25e2d8c1b9e327ed260edd13169cc22bc7a78bc6 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <fweisbec@gmail.com>
+Date: Tue, 25 Apr 2017 16:10:48 +0200
+Subject: sched/cputime: Fix ksoftirqd cputime accounting regression
+
+From: Frederic Weisbecker <fweisbec@gmail.com>
+
+commit 25e2d8c1b9e327ed260edd13169cc22bc7a78bc6 upstream.
+
+irq_time_read() returns the irqtime minus the ksoftirqd time. This
+is necessary because irq_time_read() is used to substract the IRQ time
+from the sum_exec_runtime of a task. If we were to include the softirq
+time of ksoftirqd, this task would substract its own CPU time everytime
+it updates ksoftirqd->sum_exec_runtime which would therefore never
+progress.
+
+But this behaviour got broken by:
+
+  a499a5a14db ("sched/cputime: Increment kcpustat directly on irqtime account")
+
+... which now includes ksoftirqd softirq time in the time returned by
+irq_time_read().
+
+This has resulted in wrong ksoftirqd cputime reported to userspace
+through /proc/stat and thus "top" not showing ksoftirqd when it should
+after intense networking load.
+
+ksoftirqd->stime happens to be correct but it gets scaled down by
+sum_exec_runtime through task_cputime_adjusted().
+
+To fix this, just account the strict IRQ time in a separate counter and
+use it to report the IRQ time.
+
+Reported-and-tested-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Stanislaw Gruszka <sgruszka@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Wanpeng Li <wanpeng.li@hotmail.com>
+Link: http://lkml.kernel.org/r/1493129448-5356-1-git-send-email-fweisbec@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Ivan Delalande <colona@arista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/cputime.c |   27 ++++++++++++++++-----------
+ kernel/sched/sched.h   |    9 +++++++--
+ 2 files changed, 23 insertions(+), 13 deletions(-)
+
+--- a/kernel/sched/cputime.c
++++ b/kernel/sched/cputime.c
+@@ -37,6 +37,18 @@ void disable_sched_clock_irqtime(void)
+       sched_clock_irqtime = 0;
+ }
+ 
++static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
++                                enum cpu_usage_stat idx)
++{
++      u64 *cpustat = kcpustat_this_cpu->cpustat;
++
++      u64_stats_update_begin(&irqtime->sync);
++      cpustat[idx] += delta;
++      irqtime->total += delta;
++      irqtime->tick_delta += delta;
++      u64_stats_update_end(&irqtime->sync);
++}
++
+ /*
+  * Called before incrementing preempt_count on {soft,}irq_enter
+  * and before decrementing preempt_count on {soft,}irq_exit.
+@@ -44,7 +56,6 @@ void disable_sched_clock_irqtime(void)
+ void irqtime_account_irq(struct task_struct *curr)
+ {
+       struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
+-      u64 *cpustat = kcpustat_this_cpu->cpustat;
+       s64 delta;
+       int cpu;
+ 
+@@ -55,22 +66,16 @@ void irqtime_account_irq(struct task_str
+       delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
+       irqtime->irq_start_time += delta;
+ 
+-      u64_stats_update_begin(&irqtime->sync);
+       /*
+        * We do not account for softirq time from ksoftirqd here.
+        * We want to continue accounting softirq time to ksoftirqd thread
+        * in that case, so as not to confuse scheduler with a special task
+        * that do not consume any time, but still wants to run.
+        */
+-      if (hardirq_count()) {
+-              cpustat[CPUTIME_IRQ] += delta;
+-              irqtime->tick_delta += delta;
+-      } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) {
+-              cpustat[CPUTIME_SOFTIRQ] += delta;
+-              irqtime->tick_delta += delta;
+-      }
+-
+-      u64_stats_update_end(&irqtime->sync);
++      if (hardirq_count())
++              irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
++      else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
++              irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
+ }
+ EXPORT_SYMBOL_GPL(irqtime_account_irq);
+ 
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1743,6 +1743,7 @@ static inline void nohz_balance_exit_idl
+ 
+ #ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ struct irqtime {
++      u64                     total;
+       u64                     tick_delta;
+       u64                     irq_start_time;
+       struct u64_stats_sync   sync;
+@@ -1750,16 +1751,20 @@ struct irqtime {
+ 
+ DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
+ 
++/*
++ * Returns the irqtime minus the softirq time computed by ksoftirqd.
++ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
++ * and never move forward.
++ */
+ static inline u64 irq_time_read(int cpu)
+ {
+       struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
+-      u64 *cpustat = kcpustat_cpu(cpu).cpustat;
+       unsigned int seq;
+       u64 total;
+ 
+       do {
+               seq = __u64_stats_fetch_begin(&irqtime->sync);
+-              total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ];
++              total = irqtime->total;
+       } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
+ 
+       return total;
diff --git a/queue-4.9/sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch b/queue-4.9/sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch

new file mode 100644 (file)

index 0000000..5daf4c2
--- /dev/null
+++ b/queue-4.9/sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch
@@ -0,0 +1,166 @@
+From a499a5a14dbd1d0315a96fc62a8798059325e9e6 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <fweisbec@gmail.com>
+Date: Tue, 31 Jan 2017 04:09:32 +0100
+Subject: sched/cputime: Increment kcpustat directly on irqtime account
+
+From: Frederic Weisbecker <fweisbec@gmail.com>
+
+commit a499a5a14dbd1d0315a96fc62a8798059325e9e6 upstream.
+
+The irqtime is accounted is nsecs and stored in
+cpu_irq_time.hardirq_time and cpu_irq_time.softirq_time. Once the
+accumulated amount reaches a new jiffy, this one gets accounted to the
+kcpustat.
+
+This was necessary when kcpustat was stored in cputime_t, which could at
+worst have jiffies granularity. But now kcpustat is stored in nsecs
+so this whole discretization game with temporary irqtime storage has
+become unnecessary.
+
+We can now directly account the irqtime to the kcpustat.
+
+Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Stanislaw Gruszka <sgruszka@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Wanpeng Li <wanpeng.li@hotmail.com>
+Link: http://lkml.kernel.org/r/1485832191-26889-17-git-send-email-fweisbec@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Ivan Delalande <colona@arista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/cputime.c |   50 ++++++++++++++++---------------------------------
+ kernel/sched/sched.h   |    7 +++---
+ 2 files changed, 21 insertions(+), 36 deletions(-)
+
+--- a/kernel/sched/cputime.c
++++ b/kernel/sched/cputime.c
+@@ -44,6 +44,7 @@ void disable_sched_clock_irqtime(void)
+ void irqtime_account_irq(struct task_struct *curr)
+ {
+       struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
++      u64 *cpustat = kcpustat_this_cpu->cpustat;
+       s64 delta;
+       int cpu;
+ 
+@@ -61,49 +62,35 @@ void irqtime_account_irq(struct task_str
+        * in that case, so as not to confuse scheduler with a special task
+        * that do not consume any time, but still wants to run.
+        */
+-      if (hardirq_count())
+-              irqtime->hardirq_time += delta;
+-      else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
+-              irqtime->softirq_time += delta;
++      if (hardirq_count()) {
++              cpustat[CPUTIME_IRQ] += delta;
++              irqtime->tick_delta += delta;
++      } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) {
++              cpustat[CPUTIME_SOFTIRQ] += delta;
++              irqtime->tick_delta += delta;
++      }
+ 
+       u64_stats_update_end(&irqtime->sync);
+ }
+ EXPORT_SYMBOL_GPL(irqtime_account_irq);
+ 
+-static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime)
++static cputime_t irqtime_tick_accounted(cputime_t maxtime)
+ {
+-      u64 *cpustat = kcpustat_this_cpu->cpustat;
+-      cputime_t irq_cputime;
+-
+-      irq_cputime = nsecs_to_cputime64(irqtime - cpustat[idx]);
+-      irq_cputime = min(irq_cputime, maxtime);
+-      cpustat[idx] += cputime_to_nsecs(irq_cputime);
+-
+-      return irq_cputime;
+-}
++      struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
++      cputime_t delta;
+ 
+-static cputime_t irqtime_account_hi_update(cputime_t maxtime)
+-{
+-      return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time),
+-                                    CPUTIME_IRQ, maxtime);
+-}
++      delta = nsecs_to_cputime(irqtime->tick_delta);
++      delta = min(delta, maxtime);
++      irqtime->tick_delta -= cputime_to_nsecs(delta);
+ 
+-static cputime_t irqtime_account_si_update(cputime_t maxtime)
+-{
+-      return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time),
+-                                    CPUTIME_SOFTIRQ, maxtime);
++      return delta;
+ }
+ 
+ #else /* CONFIG_IRQ_TIME_ACCOUNTING */
+ 
+ #define sched_clock_irqtime   (0)
+ 
+-static cputime_t irqtime_account_hi_update(cputime_t dummy)
+-{
+-      return 0;
+-}
+-
+-static cputime_t irqtime_account_si_update(cputime_t dummy)
++static cputime_t irqtime_tick_accounted(cputime_t dummy)
+ {
+       return 0;
+ }
+@@ -290,10 +277,7 @@ static inline cputime_t account_other_ti
+       accounted = steal_account_process_time(max);
+ 
+       if (accounted < max)
+-              accounted += irqtime_account_hi_update(max - accounted);
+-
+-      if (accounted < max)
+-              accounted += irqtime_account_si_update(max - accounted);
++              accounted += irqtime_tick_accounted(max - accounted);
+ 
+       return accounted;
+ }
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -4,6 +4,7 @@
+ #include <linux/sched/rt.h>
+ #include <linux/u64_stats_sync.h>
+ #include <linux/sched/deadline.h>
++#include <linux/kernel_stat.h>
+ #include <linux/binfmts.h>
+ #include <linux/mutex.h>
+ #include <linux/spinlock.h>
+@@ -1742,8 +1743,7 @@ static inline void nohz_balance_exit_idl
+ 
+ #ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ struct irqtime {
+-      u64                     hardirq_time;
+-      u64                     softirq_time;
++      u64                     tick_delta;
+       u64                     irq_start_time;
+       struct u64_stats_sync   sync;
+ };
+@@ -1753,12 +1753,13 @@ DECLARE_PER_CPU(struct irqtime, cpu_irqt
+ static inline u64 irq_time_read(int cpu)
+ {
+       struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
++      u64 *cpustat = kcpustat_cpu(cpu).cpustat;
+       unsigned int seq;
+       u64 total;
+ 
+       do {
+               seq = __u64_stats_fetch_begin(&irqtime->sync);
+-              total = irqtime->softirq_time + irqtime->hardirq_time;
++              total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ];
+       } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
+ 
+       return total;
diff --git a/queue-4.9/series b/queue-4.9/series

index 748932d1362231274067fa51df935778947c1e8f..f8416299fda34d854085879b600e26d576ad1fd3 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -27,3 +27,9 @@ arc-build-get-rid-of-toolchain-check.patch
  arc-build-don-t-set-cross_compile-in-arch-s-makefile.patch
  hid-quirks-fix-support-for-apple-magic-keyboards.patch
  usb-gadget-serial-fix-oops-when-data-rx-d-after-close.patch
+sched-cputime-convert-kcpustat-to-nsecs.patch
+macintosh-rack-meter-convert-cputime64_t-use-to-u64.patch
+sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch
+sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch
+ext4-avoid-running-out-of-journal-credits-when-appending-to-an-inline-file.patch
+hv-properly-delay-kvp-packets-when-negotiation-is-in-progress.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 18 Oct 2018 17:13:46 +0000 (19:13 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 18 Oct 2018 17:13:46 +0000 (19:13 +0200)
queue-4.9/ext4-avoid-running-out-of-journal-credits-when-appending-to-an-inline-file.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/hv-properly-delay-kvp-packets-when-negotiation-is-in-progress.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/macintosh-rack-meter-convert-cputime64_t-use-to-u64.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/sched-cputime-convert-kcpustat-to-nsecs.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history