From 67d1df2f6a8bfb7e8801a604b7ca7ef2d8d46efd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 11 Oct 2018 17:07:05 +0200 Subject: [PATCH] 4.9-stable patches added patches: ath10k-fix-scan-crash-due-to-incorrect-length-calculation.patch ebtables-arpreply-add-the-standard-target-sanity-check.patch revert-perf-sync-up-x86-...-cpufeatures.h.patch sched-cputime-convert-kcpustat-to-nsecs.patch sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch x86-fpu-finish-excising-eagerfpu.patch x86-fpu-remove-struct-fpu-counter.patch x86-fpu-remove-use_eager_fpu.patch --- ...-due-to-incorrect-length-calculation.patch | 61 +++ ...add-the-standard-target-sanity-check.patch | 55 +++ ...t-perf-sync-up-x86-...-cpufeatures.h.patch | 30 ++ ...ed-cputime-convert-kcpustat-to-nsecs.patch | 368 ++++++++++++++++++ ...ftirqd-cputime-accounting-regression.patch | 140 +++++++ ...kcpustat-directly-on-irqtime-account.patch | 167 ++++++++ queue-4.9/series | 9 + .../x86-fpu-finish-excising-eagerfpu.patch | 118 ++++++ .../x86-fpu-remove-struct-fpu-counter.patch | 133 +++++++ queue-4.9/x86-fpu-remove-use_eager_fpu.patch | 338 ++++++++++++++++ 10 files changed, 1419 insertions(+) create mode 100644 queue-4.9/ath10k-fix-scan-crash-due-to-incorrect-length-calculation.patch create mode 100644 queue-4.9/ebtables-arpreply-add-the-standard-target-sanity-check.patch create mode 100644 queue-4.9/revert-perf-sync-up-x86-...-cpufeatures.h.patch create mode 100644 queue-4.9/sched-cputime-convert-kcpustat-to-nsecs.patch create mode 100644 queue-4.9/sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch create mode 100644 queue-4.9/sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch create mode 100644 queue-4.9/x86-fpu-finish-excising-eagerfpu.patch create mode 100644 queue-4.9/x86-fpu-remove-struct-fpu-counter.patch create mode 100644 queue-4.9/x86-fpu-remove-use_eager_fpu.patch diff --git a/queue-4.9/ath10k-fix-scan-crash-due-to-incorrect-length-calculation.patch b/queue-4.9/ath10k-fix-scan-crash-due-to-incorrect-length-calculation.patch new file mode 100644 index 00000000000..c9dbb7d4004 --- /dev/null +++ b/queue-4.9/ath10k-fix-scan-crash-due-to-incorrect-length-calculation.patch @@ -0,0 +1,61 @@ +From c8291988806407e02a01b4b15b4504eafbcc04e0 Mon Sep 17 00:00:00 2001 +From: Zhi Chen +Date: Mon, 18 Jun 2018 17:00:39 +0300 +Subject: ath10k: fix scan crash due to incorrect length calculation + +From: Zhi Chen + +commit c8291988806407e02a01b4b15b4504eafbcc04e0 upstream. + +Length of WMI scan message was not calculated correctly. The allocated +buffer was smaller than what we expected. So WMI message corrupted +skb_info, which is at the end of skb->data. This fix takes TLV header +into account even if the element is zero-length. + +Crash log: + [49.629986] Unhandled kernel unaligned access[#1]: + [49.634932] CPU: 0 PID: 1176 Comm: logd Not tainted 4.4.60 #180 + [49.641040] task: 83051460 ti: 8329c000 task.ti: 8329c000 + [49.646608] $ 0 : 00000000 00000001 80984a80 00000000 + [49.652038] $ 4 : 45259e89 8046d484 8046df30 8024ba70 + [49.657468] $ 8 : 00000000 804cc4c0 00000001 20306320 + [49.662898] $12 : 33322037 000110f2 00000000 31203930 + [49.668327] $16 : 82792b40 80984a80 00000001 804207fc + [49.673757] $20 : 00000000 0000012c 00000040 80470000 + [49.679186] $24 : 00000000 8024af7c + [49.684617] $28 : 8329c000 8329db88 00000001 802c58d0 + [49.690046] Hi : 00000000 + [49.693022] Lo : 453c0000 + [49.696013] epc : 800efae4 put_page+0x0/0x58 + [49.700615] ra : 802c58d0 skb_release_data+0x148/0x1d4 + [49.706184] Status: 1000fc03 KERNEL EXL IE + [49.710531] Cause : 00800010 (ExcCode 04) + [49.714669] BadVA : 45259e89 + [49.717644] PrId : 00019374 (MIPS 24Kc) + +Signed-off-by: Zhi Chen +Signed-off-by: Kalle Valo +Cc: Brian Norris +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/ath/ath10k/wmi-tlv.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c ++++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c +@@ -1486,10 +1486,10 @@ ath10k_wmi_tlv_op_gen_start_scan(struct + bssid_len = arg->n_bssids * sizeof(struct wmi_mac_addr); + ie_len = roundup(arg->ie_len, 4); + len = (sizeof(*tlv) + sizeof(*cmd)) + +- (arg->n_channels ? sizeof(*tlv) + chan_len : 0) + +- (arg->n_ssids ? sizeof(*tlv) + ssid_len : 0) + +- (arg->n_bssids ? sizeof(*tlv) + bssid_len : 0) + +- (arg->ie_len ? sizeof(*tlv) + ie_len : 0); ++ sizeof(*tlv) + chan_len + ++ sizeof(*tlv) + ssid_len + ++ sizeof(*tlv) + bssid_len + ++ sizeof(*tlv) + ie_len; + + skb = ath10k_wmi_alloc_skb(ar, len); + if (!skb) diff --git a/queue-4.9/ebtables-arpreply-add-the-standard-target-sanity-check.patch b/queue-4.9/ebtables-arpreply-add-the-standard-target-sanity-check.patch new file mode 100644 index 00000000000..3b8c5e7af45 --- /dev/null +++ b/queue-4.9/ebtables-arpreply-add-the-standard-target-sanity-check.patch @@ -0,0 +1,55 @@ +From c953d63548207a085abcb12a15fefc8a11ffdf0a Mon Sep 17 00:00:00 2001 +From: Gao Feng +Date: Tue, 16 May 2017 09:30:18 +0800 +Subject: ebtables: arpreply: Add the standard target sanity check + +From: Gao Feng + +commit c953d63548207a085abcb12a15fefc8a11ffdf0a upstream. + +The info->target comes from userspace and it would be used directly. +So we need to add the sanity check to make sure it is a valid standard +target, although the ebtables tool has already checked it. Kernel needs +to validate anything coming from userspace. + +If the target is set as an evil value, it would break the ebtables +and cause a panic. Because the non-standard target is treated as one +offset. + +Now add one helper function ebt_invalid_target, and we would replace +the macro INVALID_TARGET later. + +Signed-off-by: Gao Feng +Signed-off-by: Pablo Neira Ayuso +Cc: Loic +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/netfilter_bridge/ebtables.h | 5 +++++ + net/bridge/netfilter/ebt_arpreply.c | 3 +++ + 2 files changed, 8 insertions(+) + +--- a/include/linux/netfilter_bridge/ebtables.h ++++ b/include/linux/netfilter_bridge/ebtables.h +@@ -123,4 +123,9 @@ extern unsigned int ebt_do_table(struct + /* True if the target is not a standard target */ + #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) + ++static inline bool ebt_invalid_target(int target) ++{ ++ return (target < -NUM_STANDARD_TARGETS || target >= 0); ++} ++ + #endif +--- a/net/bridge/netfilter/ebt_arpreply.c ++++ b/net/bridge/netfilter/ebt_arpreply.c +@@ -67,6 +67,9 @@ static int ebt_arpreply_tg_check(const s + if (e->ethproto != htons(ETH_P_ARP) || + e->invflags & EBT_IPROTO) + return -EINVAL; ++ if (ebt_invalid_target(info->target)) ++ return -EINVAL; ++ + return 0; + } + diff --git a/queue-4.9/revert-perf-sync-up-x86-...-cpufeatures.h.patch b/queue-4.9/revert-perf-sync-up-x86-...-cpufeatures.h.patch new file mode 100644 index 00000000000..eca0287c669 --- /dev/null +++ b/queue-4.9/revert-perf-sync-up-x86-...-cpufeatures.h.patch @@ -0,0 +1,30 @@ +From 974b7178c3d9e118c17aef103ee43f9b324b1d6e Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman +Date: Thu, 11 Oct 2018 16:22:49 +0200 +Subject: Revert "perf: sync up x86/.../cpufeatures.h" + +From: Greg Kroah-Hartman + +This reverts commit f09a7b0eead737b33d940bf5c2509ca1441e9590 + +Daniel writes: + Because the modification in this patch actually belongs to + e63650840e8b ("x86/fpu: Finish excising 'eagerfpu'") + +Reported-by: Daniel Sangorrin +Signed-off-by: Greg Kroah-Hartman +--- + tools/arch/x86/include/asm/cpufeatures.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -104,7 +104,7 @@ + #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ + #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ + #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +-/* free, was #define X86_FEATURE_EAGER_FPU ( 3*32+29) * "eagerfpu" Non lazy FPU restore */ ++#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ + #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ + + /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ diff --git a/queue-4.9/sched-cputime-convert-kcpustat-to-nsecs.patch b/queue-4.9/sched-cputime-convert-kcpustat-to-nsecs.patch new file mode 100644 index 00000000000..a5b9a1b8381 --- /dev/null +++ b/queue-4.9/sched-cputime-convert-kcpustat-to-nsecs.patch @@ -0,0 +1,368 @@ +From 7fb1327ee9b92fca27662f9b9d60c7c3376d6c69 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Tue, 31 Jan 2017 04:09:19 +0100 +Subject: sched/cputime: Convert kcpustat to nsecs + +From: Frederic Weisbecker + +commit 7fb1327ee9b92fca27662f9b9d60c7c3376d6c69 upstream. + +Kernel CPU stats are stored in cputime_t which is an architecture +defined type, and hence a bit opaque and requiring accessors and mutators +for any operation. + +Converting them to nsecs simplifies the code and is one step toward +the removal of cputime_t in the core code. + +Signed-off-by: Frederic Weisbecker +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Michael Ellerman +Cc: Heiko Carstens +Cc: Martin Schwidefsky +Cc: Tony Luck +Cc: Fenghua Yu +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Stanislaw Gruszka +Cc: Wanpeng Li +Link: http://lkml.kernel.org/r/1485832191-26889-4-git-send-email-fweisbec@gmail.com +Signed-off-by: Ingo Molnar +[colona: minor conflict as 527b0a76f41d ("sched/cpuacct: Avoid %lld seq_printf +warning") is missing from v4.9] +Signed-off-by: Ivan Delalande +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/appldata/appldata_os.c | 16 ++++---- + drivers/cpufreq/cpufreq.c | 6 +-- + drivers/cpufreq/cpufreq_governor.c | 2 - + drivers/cpufreq/cpufreq_stats.c | 1 + drivers/macintosh/rack-meter.c | 2 - + fs/proc/stat.c | 68 ++++++++++++++++++------------------- + fs/proc/uptime.c | 7 +-- + kernel/sched/cpuacct.c | 2 - + kernel/sched/cputime.c | 22 +++++------ + 9 files changed, 61 insertions(+), 65 deletions(-) + +--- a/arch/s390/appldata/appldata_os.c ++++ b/arch/s390/appldata/appldata_os.c +@@ -113,21 +113,21 @@ static void appldata_get_os_data(void *d + j = 0; + for_each_online_cpu(i) { + os_data->os_cpu[j].per_cpu_user = +- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); ++ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); + os_data->os_cpu[j].per_cpu_nice = +- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); ++ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); + os_data->os_cpu[j].per_cpu_system = +- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); ++ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); + os_data->os_cpu[j].per_cpu_idle = +- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); ++ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); + os_data->os_cpu[j].per_cpu_irq = +- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); ++ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); + os_data->os_cpu[j].per_cpu_softirq = +- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); ++ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); + os_data->os_cpu[j].per_cpu_iowait = +- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); ++ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); + os_data->os_cpu[j].per_cpu_steal = +- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); ++ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); + os_data->os_cpu[j].cpu_id = i; + j++; + } +--- a/drivers/cpufreq/cpufreq.c ++++ b/drivers/cpufreq/cpufreq.c +@@ -132,7 +132,7 @@ static inline u64 get_cpu_idle_time_jiff + u64 cur_wall_time; + u64 busy_time; + +- cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); ++ cur_wall_time = jiffies64_to_nsecs(get_jiffies_64()); + + busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; +@@ -143,9 +143,9 @@ static inline u64 get_cpu_idle_time_jiff + + idle_time = cur_wall_time - busy_time; + if (wall) +- *wall = cputime_to_usecs(cur_wall_time); ++ *wall = div_u64(cur_wall_time, NSEC_PER_USEC); + +- return cputime_to_usecs(idle_time); ++ return div_u64(idle_time, NSEC_PER_USEC); + } + + u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy) +--- a/drivers/cpufreq/cpufreq_governor.c ++++ b/drivers/cpufreq/cpufreq_governor.c +@@ -152,7 +152,7 @@ unsigned int dbs_update(struct cpufreq_p + if (ignore_nice) { + u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + +- idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice); ++ idle_time += div_u64(cur_nice - j_cdbs->prev_cpu_nice, NSEC_PER_USEC); + j_cdbs->prev_cpu_nice = cur_nice; + } + +--- a/drivers/cpufreq/cpufreq_stats.c ++++ b/drivers/cpufreq/cpufreq_stats.c +@@ -13,7 +13,6 @@ + #include + #include + #include +-#include + + static DEFINE_SPINLOCK(cpufreq_stats_lock); + +--- a/drivers/macintosh/rack-meter.c ++++ b/drivers/macintosh/rack-meter.c +@@ -91,7 +91,7 @@ static inline cputime64_t get_cpu_idle_t + if (rackmeter_ignore_nice) + retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + +- return retval; ++ return nsecs_to_cputime64(retval); + } + + static void rackmeter_setup_i2s(struct rackmeter *rm) +--- a/fs/proc/stat.c ++++ b/fs/proc/stat.c +@@ -21,23 +21,23 @@ + + #ifdef arch_idle_time + +-static cputime64_t get_idle_time(int cpu) ++static u64 get_idle_time(int cpu) + { +- cputime64_t idle; ++ u64 idle; + + idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; + if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) +- idle += arch_idle_time(cpu); ++ idle += cputime_to_nsecs(arch_idle_time(cpu)); + return idle; + } + +-static cputime64_t get_iowait_time(int cpu) ++static u64 get_iowait_time(int cpu) + { +- cputime64_t iowait; ++ u64 iowait; + + iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; + if (cpu_online(cpu) && nr_iowait_cpu(cpu)) +- iowait += arch_idle_time(cpu); ++ iowait += cputime_to_nsecs(arch_idle_time(cpu)); + return iowait; + } + +@@ -45,32 +45,32 @@ static cputime64_t get_iowait_time(int c + + static u64 get_idle_time(int cpu) + { +- u64 idle, idle_time = -1ULL; ++ u64 idle, idle_usecs = -1ULL; + + if (cpu_online(cpu)) +- idle_time = get_cpu_idle_time_us(cpu, NULL); ++ idle_usecs = get_cpu_idle_time_us(cpu, NULL); + +- if (idle_time == -1ULL) ++ if (idle_usecs == -1ULL) + /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ + idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; + else +- idle = usecs_to_cputime64(idle_time); ++ idle = idle_usecs * NSEC_PER_USEC; + + return idle; + } + + static u64 get_iowait_time(int cpu) + { +- u64 iowait, iowait_time = -1ULL; ++ u64 iowait, iowait_usecs = -1ULL; + + if (cpu_online(cpu)) +- iowait_time = get_cpu_iowait_time_us(cpu, NULL); ++ iowait_usecs = get_cpu_iowait_time_us(cpu, NULL); + +- if (iowait_time == -1ULL) ++ if (iowait_usecs == -1ULL) + /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ + iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; + else +- iowait = usecs_to_cputime64(iowait_time); ++ iowait = iowait_usecs * NSEC_PER_USEC; + + return iowait; + } +@@ -115,16 +115,16 @@ static int show_stat(struct seq_file *p, + } + sum += arch_irq_stat(); + +- seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); ++ seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(user)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); + seq_putc(p, '\n'); + + for_each_online_cpu(i) { +@@ -140,16 +140,16 @@ static int show_stat(struct seq_file *p, + guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; + guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; + seq_printf(p, "cpu%d", i); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); +- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(user)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); ++ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); + seq_putc(p, '\n'); + } + seq_put_decimal_ull(p, "intr ", (unsigned long long)sum); +--- a/fs/proc/uptime.c ++++ b/fs/proc/uptime.c +@@ -5,23 +5,20 @@ + #include + #include + #include +-#include + + static int uptime_proc_show(struct seq_file *m, void *v) + { + struct timespec uptime; + struct timespec idle; +- u64 idletime; + u64 nsec; + u32 rem; + int i; + +- idletime = 0; ++ nsec = 0; + for_each_possible_cpu(i) +- idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; ++ nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; + + get_monotonic_boottime(&uptime); +- nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; + idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); + idle.tv_nsec = rem; + seq_printf(m, "%lu.%02lu %lu.%02lu\n", +--- a/kernel/sched/cpuacct.c ++++ b/kernel/sched/cpuacct.c +@@ -297,7 +297,7 @@ static int cpuacct_stats_show(struct seq + for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { + seq_printf(sf, "%s %lld\n", + cpuacct_stat_desc[stat], +- cputime64_to_clock_t(val[stat])); ++ nsec_to_clock_t(val[stat])); + } + + return 0; +--- a/kernel/sched/cputime.c ++++ b/kernel/sched/cputime.c +@@ -75,9 +75,9 @@ static cputime_t irqtime_account_update( + u64 *cpustat = kcpustat_this_cpu->cpustat; + cputime_t irq_cputime; + +- irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx]; ++ irq_cputime = nsecs_to_cputime64(irqtime - cpustat[idx]); + irq_cputime = min(irq_cputime, maxtime); +- cpustat[idx] += irq_cputime; ++ cpustat[idx] += cputime_to_nsecs(irq_cputime); + + return irq_cputime; + } +@@ -143,7 +143,7 @@ void account_user_time(struct task_struc + index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; + + /* Add user time to cpustat. */ +- task_group_account_field(p, index, (__force u64) cputime); ++ task_group_account_field(p, index, cputime_to_nsecs(cputime)); + + /* Account for user time used */ + acct_account_cputime(p); +@@ -168,11 +168,11 @@ static void account_guest_time(struct ta + + /* Add guest time to cpustat. */ + if (task_nice(p) > 0) { +- cpustat[CPUTIME_NICE] += (__force u64) cputime; +- cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; ++ cpustat[CPUTIME_NICE] += cputime_to_nsecs(cputime); ++ cpustat[CPUTIME_GUEST_NICE] += cputime_to_nsecs(cputime); + } else { +- cpustat[CPUTIME_USER] += (__force u64) cputime; +- cpustat[CPUTIME_GUEST] += (__force u64) cputime; ++ cpustat[CPUTIME_USER] += cputime_to_nsecs(cputime); ++ cpustat[CPUTIME_GUEST] += cputime_to_nsecs(cputime); + } + } + +@@ -193,7 +193,7 @@ void __account_system_time(struct task_s + account_group_system_time(p, cputime); + + /* Add system time to cpustat. */ +- task_group_account_field(p, index, (__force u64) cputime); ++ task_group_account_field(p, index, cputime_to_nsecs(cputime)); + + /* Account for system time used */ + acct_account_cputime(p); +@@ -234,7 +234,7 @@ void account_steal_time(cputime_t cputim + { + u64 *cpustat = kcpustat_this_cpu->cpustat; + +- cpustat[CPUTIME_STEAL] += (__force u64) cputime; ++ cpustat[CPUTIME_STEAL] += cputime_to_nsecs(cputime); + } + + /* +@@ -247,9 +247,9 @@ void account_idle_time(cputime_t cputime + struct rq *rq = this_rq(); + + if (atomic_read(&rq->nr_iowait) > 0) +- cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; ++ cpustat[CPUTIME_IOWAIT] += cputime_to_nsecs(cputime); + else +- cpustat[CPUTIME_IDLE] += (__force u64) cputime; ++ cpustat[CPUTIME_IDLE] += cputime_to_nsecs(cputime); + } + + /* diff --git a/queue-4.9/sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch b/queue-4.9/sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch new file mode 100644 index 00000000000..4a621951a0d --- /dev/null +++ b/queue-4.9/sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch @@ -0,0 +1,140 @@ +From 25e2d8c1b9e327ed260edd13169cc22bc7a78bc6 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Tue, 25 Apr 2017 16:10:48 +0200 +Subject: sched/cputime: Fix ksoftirqd cputime accounting regression + +From: Frederic Weisbecker + +commit 25e2d8c1b9e327ed260edd13169cc22bc7a78bc6 upstream. + +irq_time_read() returns the irqtime minus the ksoftirqd time. This +is necessary because irq_time_read() is used to substract the IRQ time +from the sum_exec_runtime of a task. If we were to include the softirq +time of ksoftirqd, this task would substract its own CPU time everytime +it updates ksoftirqd->sum_exec_runtime which would therefore never +progress. + +But this behaviour got broken by: + + a499a5a14db ("sched/cputime: Increment kcpustat directly on irqtime account") + +... which now includes ksoftirqd softirq time in the time returned by +irq_time_read(). + +This has resulted in wrong ksoftirqd cputime reported to userspace +through /proc/stat and thus "top" not showing ksoftirqd when it should +after intense networking load. + +ksoftirqd->stime happens to be correct but it gets scaled down by +sum_exec_runtime through task_cputime_adjusted(). + +To fix this, just account the strict IRQ time in a separate counter and +use it to report the IRQ time. + +Reported-and-tested-by: Jesper Dangaard Brouer +Signed-off-by: Frederic Weisbecker +Reviewed-by: Rik van Riel +Acked-by: Jesper Dangaard Brouer +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Stanislaw Gruszka +Cc: Thomas Gleixner +Cc: Wanpeng Li +Link: http://lkml.kernel.org/r/1493129448-5356-1-git-send-email-fweisbec@gmail.com +Signed-off-by: Ingo Molnar +Signed-off-by: Ivan Delalande +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/cputime.c | 27 ++++++++++++++++----------- + kernel/sched/sched.h | 9 +++++++-- + 2 files changed, 23 insertions(+), 13 deletions(-) + +--- a/kernel/sched/cputime.c ++++ b/kernel/sched/cputime.c +@@ -37,6 +37,18 @@ void disable_sched_clock_irqtime(void) + sched_clock_irqtime = 0; + } + ++static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, ++ enum cpu_usage_stat idx) ++{ ++ u64 *cpustat = kcpustat_this_cpu->cpustat; ++ ++ u64_stats_update_begin(&irqtime->sync); ++ cpustat[idx] += delta; ++ irqtime->total += delta; ++ irqtime->tick_delta += delta; ++ u64_stats_update_end(&irqtime->sync); ++} ++ + /* + * Called before incrementing preempt_count on {soft,}irq_enter + * and before decrementing preempt_count on {soft,}irq_exit. +@@ -44,7 +56,6 @@ void disable_sched_clock_irqtime(void) + void irqtime_account_irq(struct task_struct *curr) + { + struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); +- u64 *cpustat = kcpustat_this_cpu->cpustat; + s64 delta; + int cpu; + +@@ -55,22 +66,16 @@ void irqtime_account_irq(struct task_str + delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; + irqtime->irq_start_time += delta; + +- u64_stats_update_begin(&irqtime->sync); + /* + * We do not account for softirq time from ksoftirqd here. + * We want to continue accounting softirq time to ksoftirqd thread + * in that case, so as not to confuse scheduler with a special task + * that do not consume any time, but still wants to run. + */ +- if (hardirq_count()) { +- cpustat[CPUTIME_IRQ] += delta; +- irqtime->tick_delta += delta; +- } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) { +- cpustat[CPUTIME_SOFTIRQ] += delta; +- irqtime->tick_delta += delta; +- } +- +- u64_stats_update_end(&irqtime->sync); ++ if (hardirq_count()) ++ irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); ++ else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) ++ irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); + } + EXPORT_SYMBOL_GPL(irqtime_account_irq); + +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -1743,6 +1743,7 @@ static inline void nohz_balance_exit_idl + + #ifdef CONFIG_IRQ_TIME_ACCOUNTING + struct irqtime { ++ u64 total; + u64 tick_delta; + u64 irq_start_time; + struct u64_stats_sync sync; +@@ -1750,16 +1751,20 @@ struct irqtime { + + DECLARE_PER_CPU(struct irqtime, cpu_irqtime); + ++/* ++ * Returns the irqtime minus the softirq time computed by ksoftirqd. ++ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime ++ * and never move forward. ++ */ + static inline u64 irq_time_read(int cpu) + { + struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); +- u64 *cpustat = kcpustat_cpu(cpu).cpustat; + unsigned int seq; + u64 total; + + do { + seq = __u64_stats_fetch_begin(&irqtime->sync); +- total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ]; ++ total = irqtime->total; + } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); + + return total; diff --git a/queue-4.9/sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch b/queue-4.9/sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch new file mode 100644 index 00000000000..e91ec63be39 --- /dev/null +++ b/queue-4.9/sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch @@ -0,0 +1,167 @@ +From a499a5a14dbd1d0315a96fc62a8798059325e9e6 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Tue, 31 Jan 2017 04:09:32 +0100 +Subject: sched/cputime: Increment kcpustat directly on irqtime account + +From: Frederic Weisbecker + +commit a499a5a14dbd1d0315a96fc62a8798059325e9e6 upstream. + +The irqtime is accounted is nsecs and stored in +cpu_irq_time.hardirq_time and cpu_irq_time.softirq_time. Once the +accumulated amount reaches a new jiffy, this one gets accounted to the +kcpustat. + +This was necessary when kcpustat was stored in cputime_t, which could at +worst have jiffies granularity. But now kcpustat is stored in nsecs +so this whole discretization game with temporary irqtime storage has +become unnecessary. + +We can now directly account the irqtime to the kcpustat. + +Signed-off-by: Frederic Weisbecker +Cc: Benjamin Herrenschmidt +Cc: Fenghua Yu +Cc: Heiko Carstens +Cc: Linus Torvalds +Cc: Martin Schwidefsky +Cc: Michael Ellerman +Cc: Paul Mackerras +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Stanislaw Gruszka +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: Wanpeng Li +Link: http://lkml.kernel.org/r/1485832191-26889-17-git-send-email-fweisbec@gmail.com +Signed-off-by: Ingo Molnar +Signed-off-by: Ivan Delalande +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/cputime.c | 50 ++++++++++++++++--------------------------------- + kernel/sched/sched.h | 7 +++--- + 2 files changed, 21 insertions(+), 36 deletions(-) + +--- a/kernel/sched/cputime.c ++++ b/kernel/sched/cputime.c +@@ -44,6 +44,7 @@ void disable_sched_clock_irqtime(void) + void irqtime_account_irq(struct task_struct *curr) + { + struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); ++ u64 *cpustat = kcpustat_this_cpu->cpustat; + s64 delta; + int cpu; + +@@ -61,49 +62,35 @@ void irqtime_account_irq(struct task_str + * in that case, so as not to confuse scheduler with a special task + * that do not consume any time, but still wants to run. + */ +- if (hardirq_count()) +- irqtime->hardirq_time += delta; +- else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) +- irqtime->softirq_time += delta; ++ if (hardirq_count()) { ++ cpustat[CPUTIME_IRQ] += delta; ++ irqtime->tick_delta += delta; ++ } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) { ++ cpustat[CPUTIME_SOFTIRQ] += delta; ++ irqtime->tick_delta += delta; ++ } + + u64_stats_update_end(&irqtime->sync); + } + EXPORT_SYMBOL_GPL(irqtime_account_irq); + +-static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime) ++static cputime_t irqtime_tick_accounted(cputime_t maxtime) + { +- u64 *cpustat = kcpustat_this_cpu->cpustat; +- cputime_t irq_cputime; +- +- irq_cputime = nsecs_to_cputime64(irqtime - cpustat[idx]); +- irq_cputime = min(irq_cputime, maxtime); +- cpustat[idx] += cputime_to_nsecs(irq_cputime); +- +- return irq_cputime; +-} ++ struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); ++ cputime_t delta; + +-static cputime_t irqtime_account_hi_update(cputime_t maxtime) +-{ +- return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time), +- CPUTIME_IRQ, maxtime); +-} ++ delta = nsecs_to_cputime(irqtime->tick_delta); ++ delta = min(delta, maxtime); ++ irqtime->tick_delta -= cputime_to_nsecs(delta); + +-static cputime_t irqtime_account_si_update(cputime_t maxtime) +-{ +- return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time), +- CPUTIME_SOFTIRQ, maxtime); ++ return delta; + } + + #else /* CONFIG_IRQ_TIME_ACCOUNTING */ + + #define sched_clock_irqtime (0) + +-static cputime_t irqtime_account_hi_update(cputime_t dummy) +-{ +- return 0; +-} +- +-static cputime_t irqtime_account_si_update(cputime_t dummy) ++static cputime_t irqtime_tick_accounted(cputime_t dummy) + { + return 0; + } +@@ -290,10 +277,7 @@ static inline cputime_t account_other_ti + accounted = steal_account_process_time(max); + + if (accounted < max) +- accounted += irqtime_account_hi_update(max - accounted); +- +- if (accounted < max) +- accounted += irqtime_account_si_update(max - accounted); ++ accounted += irqtime_tick_accounted(max - accounted); + + return accounted; + } +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1742,8 +1743,7 @@ static inline void nohz_balance_exit_idl + + #ifdef CONFIG_IRQ_TIME_ACCOUNTING + struct irqtime { +- u64 hardirq_time; +- u64 softirq_time; ++ u64 tick_delta; + u64 irq_start_time; + struct u64_stats_sync sync; + }; +@@ -1753,12 +1753,13 @@ DECLARE_PER_CPU(struct irqtime, cpu_irqt + static inline u64 irq_time_read(int cpu) + { + struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); ++ u64 *cpustat = kcpustat_cpu(cpu).cpustat; + unsigned int seq; + u64 total; + + do { + seq = __u64_stats_fetch_begin(&irqtime->sync); +- total = irqtime->softirq_time + irqtime->hardirq_time; ++ total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ]; + } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); + + return total; diff --git a/queue-4.9/series b/queue-4.9/series index 288984f54d6..535785f52b7 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -24,3 +24,12 @@ x86-mm-expand-static-page-table-for-fixmap-space.patch f2fs-fix-invalid-memory-access.patch ucma-fix-a-use-after-free-in-ucma_resolve_ip.patch ubifs-check-for-name-being-null-while-mounting.patch +sched-cputime-convert-kcpustat-to-nsecs.patch +sched-cputime-increment-kcpustat-directly-on-irqtime-account.patch +sched-cputime-fix-ksoftirqd-cputime-accounting-regression.patch +ath10k-fix-scan-crash-due-to-incorrect-length-calculation.patch +ebtables-arpreply-add-the-standard-target-sanity-check.patch +x86-fpu-remove-use_eager_fpu.patch +x86-fpu-remove-struct-fpu-counter.patch +revert-perf-sync-up-x86-...-cpufeatures.h.patch +x86-fpu-finish-excising-eagerfpu.patch diff --git a/queue-4.9/x86-fpu-finish-excising-eagerfpu.patch b/queue-4.9/x86-fpu-finish-excising-eagerfpu.patch new file mode 100644 index 00000000000..804bd890f55 --- /dev/null +++ b/queue-4.9/x86-fpu-finish-excising-eagerfpu.patch @@ -0,0 +1,118 @@ +From e63650840e8b053aa09ad934877e87e9941ed135 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 17 Oct 2016 14:40:11 -0700 +Subject: x86/fpu: Finish excising 'eagerfpu' + +From: Andy Lutomirski + +commit e63650840e8b053aa09ad934877e87e9941ed135 upstream. + +Now that eagerfpu= is gone, remove it from the docs and some +comments. Also sync the changes to tools/. + +Signed-off-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: Fenghua Yu +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Oleg Nesterov +Cc: Peter Zijlstra +Cc: Quentin Casasnovas +Cc: Rik van Riel +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/cf430dd4481d41280e93ac6cf0def1007a67fc8e.1476740397.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Cc: Daniel Sangorrin +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/kernel-parameters.txt | 6 ------ + arch/x86/include/asm/cpufeatures.h | 1 - + arch/x86/include/asm/fpu/types.h | 23 ----------------------- + arch/x86/mm/pkeys.c | 3 +-- + tools/arch/x86/include/asm/cpufeatures.h | 1 - + 5 files changed, 1 insertion(+), 33 deletions(-) + +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -1084,12 +1084,6 @@ bytes respectively. Such letter suffixes + nopku [X86] Disable Memory Protection Keys CPU feature found + in some Intel CPUs. + +- eagerfpu= [X86] +- on enable eager fpu restore +- off disable eager fpu restore +- auto selects the default scheme, which automatically +- enables eagerfpu restore for xsaveopt. +- + module.async_probe [KNL] + Enable asynchronous probe on this module. + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -104,7 +104,6 @@ + #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ + #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ + #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +-/* free, was #define X86_FEATURE_EAGER_FPU ( 3*32+29) * "eagerfpu" Non lazy FPU restore */ + #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ + + /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +--- a/arch/x86/include/asm/fpu/types.h ++++ b/arch/x86/include/asm/fpu/types.h +@@ -329,29 +329,6 @@ struct fpu { + * the registers in the FPU are more recent than this state + * copy. If the task context-switches away then they get + * saved here and represent the FPU state. +- * +- * After context switches there may be a (short) time period +- * during which the in-FPU hardware registers are unchanged +- * and still perfectly match this state, if the tasks +- * scheduled afterwards are not using the FPU. +- * +- * This is the 'lazy restore' window of optimization, which +- * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. +- * +- * We detect whether a subsequent task uses the FPU via setting +- * CR0::TS to 1, which causes any FPU use to raise a #NM fault. +- * +- * During this window, if the task gets scheduled again, we +- * might be able to skip having to do a restore from this +- * memory buffer to the hardware registers - at the cost of +- * incurring the overhead of #NM fault traps. +- * +- * Note that on modern CPUs that support the XSAVEOPT (or other +- * optimized XSAVE instructions), we don't use #NM traps anymore, +- * as the hardware can track whether FPU registers need saving +- * or not. On such CPUs we activate the non-lazy ('eagerfpu') +- * logic, which unconditionally saves/restores all FPU state +- * across context switches. (if FPU state exists.) + */ + union fpregs_state state; + /* +--- a/arch/x86/mm/pkeys.c ++++ b/arch/x86/mm/pkeys.c +@@ -142,8 +142,7 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | + * Called from the FPU code when creating a fresh set of FPU + * registers. This is called from a very specific context where + * we know the FPU regstiers are safe for use and we can use PKRU +- * directly. The fact that PKRU is only available when we are +- * using eagerfpu mode makes this possible. ++ * directly. + */ + void copy_init_pkru_to_fpregs(void) + { +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -104,7 +104,6 @@ + #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ + #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ + #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +-#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ + #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ + + /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ diff --git a/queue-4.9/x86-fpu-remove-struct-fpu-counter.patch b/queue-4.9/x86-fpu-remove-struct-fpu-counter.patch new file mode 100644 index 00000000000..b8def0a77eb --- /dev/null +++ b/queue-4.9/x86-fpu-remove-struct-fpu-counter.patch @@ -0,0 +1,133 @@ +From 3913cc3507575273beb165a5e027a081913ed507 Mon Sep 17 00:00:00 2001 +From: Rik van Riel +Date: Tue, 4 Oct 2016 20:34:34 -0400 +Subject: x86/fpu: Remove struct fpu::counter + +From: Rik van Riel + +commit 3913cc3507575273beb165a5e027a081913ed507 upstream. + +With the lazy FPU code gone, we no longer use the counter field +in struct fpu for anything. Get rid it. + +Signed-off-by: Rik van Riel +Reviewed-by: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: Fenghua Yu +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Oleg Nesterov +Cc: Peter Zijlstra +Cc: Quentin Casasnovas +Cc: Thomas Gleixner +Cc: pbonzini@redhat.com +Link: http://lkml.kernel.org/r/1475627678-20788-6-git-send-email-riel@redhat.com +Signed-off-by: Ingo Molnar +Cc: Daniel Sangorrin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/fpu/internal.h | 3 --- + arch/x86/include/asm/fpu/types.h | 11 ----------- + arch/x86/include/asm/trace/fpu.h | 5 +---- + arch/x86/kernel/fpu/core.c | 3 --- + 4 files changed, 1 insertion(+), 21 deletions(-) + +--- a/arch/x86/include/asm/fpu/internal.h ++++ b/arch/x86/include/asm/fpu/internal.h +@@ -581,16 +581,13 @@ switch_fpu_prepare(struct fpu *old_fpu, + + /* Don't change CR0.TS if we just switch! */ + if (fpu.preload) { +- new_fpu->counter++; + __fpregs_activate(new_fpu); + trace_x86_fpu_regs_activated(new_fpu); + prefetch(&new_fpu->state); + } + } else { +- old_fpu->counter = 0; + old_fpu->last_cpu = -1; + if (fpu.preload) { +- new_fpu->counter++; + if (fpu_want_lazy_restore(new_fpu, cpu)) + fpu.preload = 0; + else +--- a/arch/x86/include/asm/fpu/types.h ++++ b/arch/x86/include/asm/fpu/types.h +@@ -322,17 +322,6 @@ struct fpu { + unsigned char fpregs_active; + + /* +- * @counter: +- * +- * This counter contains the number of consecutive context switches +- * during which the FPU stays used. If this is over a threshold, the +- * lazy FPU restore logic becomes eager, to save the trap overhead. +- * This is an unsigned char so that after 256 iterations the counter +- * wraps and the context switch behavior turns lazy again; this is to +- * deal with bursty apps that only use the FPU for a short time: +- */ +- unsigned char counter; +- /* + * @state: + * + * In-memory copy of all FPU registers that we save/restore +--- a/arch/x86/include/asm/trace/fpu.h ++++ b/arch/x86/include/asm/trace/fpu.h +@@ -14,7 +14,6 @@ DECLARE_EVENT_CLASS(x86_fpu, + __field(struct fpu *, fpu) + __field(bool, fpregs_active) + __field(bool, fpstate_active) +- __field(int, counter) + __field(u64, xfeatures) + __field(u64, xcomp_bv) + ), +@@ -23,17 +22,15 @@ DECLARE_EVENT_CLASS(x86_fpu, + __entry->fpu = fpu; + __entry->fpregs_active = fpu->fpregs_active; + __entry->fpstate_active = fpu->fpstate_active; +- __entry->counter = fpu->counter; + if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { + __entry->xfeatures = fpu->state.xsave.header.xfeatures; + __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; + } + ), +- TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx", ++ TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", + __entry->fpu, + __entry->fpregs_active, + __entry->fpstate_active, +- __entry->counter, + __entry->xfeatures, + __entry->xcomp_bv + ) +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -225,7 +225,6 @@ EXPORT_SYMBOL_GPL(fpstate_init); + + int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) + { +- dst_fpu->counter = 0; + dst_fpu->fpregs_active = 0; + dst_fpu->last_cpu = -1; + +@@ -433,7 +432,6 @@ void fpu__restore(struct fpu *fpu) + trace_x86_fpu_before_restore(fpu); + fpregs_activate(fpu); + copy_kernel_to_fpregs(&fpu->state); +- fpu->counter++; + trace_x86_fpu_after_restore(fpu); + kernel_fpu_enable(); + } +@@ -451,7 +449,6 @@ EXPORT_SYMBOL_GPL(fpu__restore); + void fpu__drop(struct fpu *fpu) + { + preempt_disable(); +- fpu->counter = 0; + + if (fpu->fpregs_active) { + /* Ignore delayed exceptions from user space */ diff --git a/queue-4.9/x86-fpu-remove-use_eager_fpu.patch b/queue-4.9/x86-fpu-remove-use_eager_fpu.patch new file mode 100644 index 00000000000..942be516322 --- /dev/null +++ b/queue-4.9/x86-fpu-remove-use_eager_fpu.patch @@ -0,0 +1,338 @@ +From c592b57347069abfc0dcad3b3a302cf882602597 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Tue, 4 Oct 2016 20:34:33 -0400 +Subject: x86/fpu: Remove use_eager_fpu() + +From: Andy Lutomirski + +commit c592b57347069abfc0dcad3b3a302cf882602597 upstream. + +This removes all the obvious code paths that depend on lazy FPU mode. +It shouldn't change the generated code at all. + +Signed-off-by: Andy Lutomirski +Signed-off-by: Rik van Riel +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: Fenghua Yu +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Oleg Nesterov +Cc: Peter Zijlstra +Cc: Quentin Casasnovas +Cc: Thomas Gleixner +Cc: pbonzini@redhat.com +Link: http://lkml.kernel.org/r/1475627678-20788-5-git-send-email-riel@redhat.com +Signed-off-by: Ingo Molnar +Signed-off-by: Daniel Sangorrin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/crypto/crc32c-intel_glue.c | 17 ++++------------- + arch/x86/include/asm/fpu/internal.h | 34 +--------------------------------- + arch/x86/kernel/fpu/core.c | 36 ++++-------------------------------- + arch/x86/kernel/fpu/signal.c | 8 +++----- + arch/x86/kernel/fpu/xstate.c | 9 --------- + arch/x86/kvm/cpuid.c | 4 +--- + arch/x86/kvm/x86.c | 10 ---------- + 7 files changed, 13 insertions(+), 105 deletions(-) + +--- a/arch/x86/crypto/crc32c-intel_glue.c ++++ b/arch/x86/crypto/crc32c-intel_glue.c +@@ -48,21 +48,13 @@ + #ifdef CONFIG_X86_64 + /* + * use carryless multiply version of crc32c when buffer +- * size is >= 512 (when eager fpu is enabled) or +- * >= 1024 (when eager fpu is disabled) to account ++ * size is >= 512 to account + * for fpu state save/restore overhead. + */ +-#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512 +-#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024 ++#define CRC32C_PCL_BREAKEVEN 512 + + asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, + unsigned int crc_init); +-static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU; +-#define set_pcl_breakeven_point() \ +-do { \ +- if (!use_eager_fpu()) \ +- crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \ +-} while (0) + #endif /* CONFIG_X86_64 */ + + static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) +@@ -185,7 +177,7 @@ static int crc32c_pcl_intel_update(struc + * use faster PCL version if datasize is large enough to + * overcome kernel fpu state save/restore overhead + */ +- if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { ++ if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { + kernel_fpu_begin(); + *crcp = crc_pcl(data, len, *crcp); + kernel_fpu_end(); +@@ -197,7 +189,7 @@ static int crc32c_pcl_intel_update(struc + static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) + { +- if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { ++ if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { + kernel_fpu_begin(); + *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); + kernel_fpu_end(); +@@ -257,7 +249,6 @@ static int __init crc32c_intel_mod_init( + alg.update = crc32c_pcl_intel_update; + alg.finup = crc32c_pcl_intel_finup; + alg.digest = crc32c_pcl_intel_digest; +- set_pcl_breakeven_point(); + } + #endif + return crypto_register_shash(&alg); +--- a/arch/x86/include/asm/fpu/internal.h ++++ b/arch/x86/include/asm/fpu/internal.h +@@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_ + /* + * FPU related CPU feature flag helper routines: + */ +-static __always_inline __pure bool use_eager_fpu(void) +-{ +- return true; +-} +- + static __always_inline __pure bool use_xsaveopt(void) + { + return static_cpu_has(X86_FEATURE_XSAVEOPT); +@@ -501,24 +496,6 @@ static inline int fpu_want_lazy_restore( + } + + +-/* +- * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' +- * idiom, which is then paired with the sw-flag (fpregs_active) later on: +- */ +- +-static inline void __fpregs_activate_hw(void) +-{ +- if (!use_eager_fpu()) +- clts(); +-} +- +-static inline void __fpregs_deactivate_hw(void) +-{ +- if (!use_eager_fpu()) +- stts(); +-} +- +-/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ + static inline void __fpregs_deactivate(struct fpu *fpu) + { + WARN_ON_FPU(!fpu->fpregs_active); +@@ -528,7 +505,6 @@ static inline void __fpregs_deactivate(s + trace_x86_fpu_regs_deactivated(fpu); + } + +-/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ + static inline void __fpregs_activate(struct fpu *fpu) + { + WARN_ON_FPU(fpu->fpregs_active); +@@ -554,22 +530,17 @@ static inline int fpregs_active(void) + } + + /* +- * Encapsulate the CR0.TS handling together with the +- * software flag. +- * + * These generally need preemption protection to work, + * do try to avoid using these on their own. + */ + static inline void fpregs_activate(struct fpu *fpu) + { +- __fpregs_activate_hw(); + __fpregs_activate(fpu); + } + + static inline void fpregs_deactivate(struct fpu *fpu) + { + __fpregs_deactivate(fpu); +- __fpregs_deactivate_hw(); + } + + /* +@@ -596,8 +567,7 @@ switch_fpu_prepare(struct fpu *old_fpu, + * or if the past 5 consecutive context-switches used math. + */ + fpu.preload = static_cpu_has(X86_FEATURE_FPU) && +- new_fpu->fpstate_active && +- (use_eager_fpu() || new_fpu->counter > 5); ++ new_fpu->fpstate_active; + + if (old_fpu->fpregs_active) { + if (!copy_fpregs_to_fpstate(old_fpu)) +@@ -615,8 +585,6 @@ switch_fpu_prepare(struct fpu *old_fpu, + __fpregs_activate(new_fpu); + trace_x86_fpu_regs_activated(new_fpu); + prefetch(&new_fpu->state); +- } else { +- __fpregs_deactivate_hw(); + } + } else { + old_fpu->counter = 0; +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -59,27 +59,9 @@ static bool kernel_fpu_disabled(void) + return this_cpu_read(in_kernel_fpu); + } + +-/* +- * Were we in an interrupt that interrupted kernel mode? +- * +- * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that +- * pair does nothing at all: the thread must not have fpu (so +- * that we don't try to save the FPU state), and TS must +- * be set (so that the clts/stts pair does nothing that is +- * visible in the interrupted kernel thread). +- * +- * Except for the eagerfpu case when we return true; in the likely case +- * the thread has FPU but we are not going to set/clear TS. +- */ + static bool interrupted_kernel_fpu_idle(void) + { +- if (kernel_fpu_disabled()) +- return false; +- +- if (use_eager_fpu()) +- return true; +- +- return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); ++ return !kernel_fpu_disabled(); + } + + /* +@@ -127,7 +109,6 @@ void __kernel_fpu_begin(void) + copy_fpregs_to_fpstate(fpu); + } else { + this_cpu_write(fpu_fpregs_owner_ctx, NULL); +- __fpregs_activate_hw(); + } + } + EXPORT_SYMBOL(__kernel_fpu_begin); +@@ -138,8 +119,6 @@ void __kernel_fpu_end(void) + + if (fpu->fpregs_active) + copy_kernel_to_fpregs(&fpu->state); +- else +- __fpregs_deactivate_hw(); + + kernel_fpu_enable(); + } +@@ -201,10 +180,7 @@ void fpu__save(struct fpu *fpu) + trace_x86_fpu_before_save(fpu); + if (fpu->fpregs_active) { + if (!copy_fpregs_to_fpstate(fpu)) { +- if (use_eager_fpu()) +- copy_kernel_to_fpregs(&fpu->state); +- else +- fpregs_deactivate(fpu); ++ copy_kernel_to_fpregs(&fpu->state); + } + } + trace_x86_fpu_after_save(fpu); +@@ -262,8 +238,7 @@ int fpu__copy(struct fpu *dst_fpu, struc + * Don't let 'init optimized' areas of the XSAVE area + * leak into the child task: + */ +- if (use_eager_fpu()) +- memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); ++ memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); + + /* + * Save current FPU registers directly into the child +@@ -285,10 +260,7 @@ int fpu__copy(struct fpu *dst_fpu, struc + memcpy(&src_fpu->state, &dst_fpu->state, + fpu_kernel_xstate_size); + +- if (use_eager_fpu()) +- copy_kernel_to_fpregs(&src_fpu->state); +- else +- fpregs_deactivate(src_fpu); ++ copy_kernel_to_fpregs(&src_fpu->state); + } + preempt_enable(); + +--- a/arch/x86/kernel/fpu/signal.c ++++ b/arch/x86/kernel/fpu/signal.c +@@ -344,11 +344,9 @@ static int __fpu__restore_sig(void __use + } + + fpu->fpstate_active = 1; +- if (use_eager_fpu()) { +- preempt_disable(); +- fpu__restore(fpu); +- preempt_enable(); +- } ++ preempt_disable(); ++ fpu__restore(fpu); ++ preempt_enable(); + + return err; + } else { +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -890,15 +890,6 @@ int arch_set_user_pkey_access(struct tas + */ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return -EINVAL; +- /* +- * For most XSAVE components, this would be an arduous task: +- * brining fpstate up to date with fpregs, updating fpstate, +- * then re-populating fpregs. But, for components that are +- * never lazily managed, we can just access the fpregs +- * directly. PKRU is never managed lazily, so we can just +- * manipulate it directly. Make sure it stays that way. +- */ +- WARN_ON_ONCE(!use_eager_fpu()); + + /* Set the bits we need in PKRU: */ + if (init_val & PKEY_DISABLE_ACCESS) +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -16,7 +16,6 @@ + #include + #include + #include +-#include /* For use_eager_fpu. Ugh! */ + #include + #include + #include "cpuid.h" +@@ -114,8 +113,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vc + if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) + best->ebx = xstate_required_size(vcpu->arch.xcr0, true); + +- if (use_eager_fpu()) +- kvm_x86_ops->fpu_activate(vcpu); ++ kvm_x86_ops->fpu_activate(vcpu); + + /* + * The existing code assumes virtual address is 48-bit in the canonical +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -7631,16 +7631,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu * + copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); + __kernel_fpu_end(); + ++vcpu->stat.fpu_reload; +- /* +- * If using eager FPU mode, or if the guest is a frequent user +- * of the FPU, just leave the FPU active for next time. +- * Every 255 times fpu_counter rolls over to 0; a guest that uses +- * the FPU in bursts will revert to loading it on demand. +- */ +- if (!use_eager_fpu()) { +- if (++vcpu->fpu_counter < 5) +- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); +- } + trace_kvm_fpu(0); + } + -- 2.47.2