From: Greg Kroah-Hartman Date: Tue, 16 Oct 2018 14:05:38 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v4.9.134~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bedc95c20542a0782a7c6a632a7b002c72270f8b;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: dcache-account-external-names-as-indirectly-reclaimable-memory.patch i2c-i2c-scmi-fix-for-i2c_smbus_write_block_data.patch mm-don-t-show-nr_indirectly_reclaimable-in-proc-vmstat.patch mm-introduce-nr_indirectly_reclaimable_bytes.patch mm-treat-indirectly-reclaimable-memory-as-available-in-memavailable.patch mm-treat-indirectly-reclaimable-memory-as-free-in-overcommit-logic.patch xhci-don-t-print-a-warning-when-setting-link-state-for-disabled-ports.patch --- diff --git a/queue-4.14/dcache-account-external-names-as-indirectly-reclaimable-memory.patch b/queue-4.14/dcache-account-external-names-as-indirectly-reclaimable-memory.patch new file mode 100644 index 00000000000..dc94475f32b --- /dev/null +++ b/queue-4.14/dcache-account-external-names-as-indirectly-reclaimable-memory.patch @@ -0,0 +1,161 @@ +From f1782c9bc547754f4bd3043fe8cfda53db85f13f Mon Sep 17 00:00:00 2001 +From: Roman Gushchin +Date: Tue, 10 Apr 2018 16:27:44 -0700 +Subject: dcache: account external names as indirectly reclaimable memory + +From: Roman Gushchin + +commit f1782c9bc547754f4bd3043fe8cfda53db85f13f upstream. + +I received a report about suspicious growth of unreclaimable slabs on +some machines. I've found that it happens on machines with low memory +pressure, and these unreclaimable slabs are external names attached to +dentries. + +External names are allocated using generic kmalloc() function, so they +are accounted as unreclaimable. But they are held by dentries, which +are reclaimable, and they will be reclaimed under the memory pressure. + +In particular, this breaks MemAvailable calculation, as it doesn't take +unreclaimable slabs into account. This leads to a silly situation, when +a machine is almost idle, has no memory pressure and therefore has a big +dentry cache. And the resulting MemAvailable is too low to start a new +workload. + +To address the issue, the NR_INDIRECTLY_RECLAIMABLE_BYTES counter is +used to track the amount of memory, consumed by external names. The +counter is increased in the dentry allocation path, if an external name +structure is allocated; and it's decreased in the dentry freeing path. + +To reproduce the problem I've used the following Python script: + + import os + + for iter in range (0, 10000000): + try: + name = ("/some_long_name_%d" % iter) + "_" * 220 + os.stat(name) + except Exception: + pass + +Without this patch: + $ cat /proc/meminfo | grep MemAvailable + MemAvailable: 7811688 kB + $ python indirect.py + $ cat /proc/meminfo | grep MemAvailable + MemAvailable: 2753052 kB + +With the patch: + $ cat /proc/meminfo | grep MemAvailable + MemAvailable: 7809516 kB + $ python indirect.py + $ cat /proc/meminfo | grep MemAvailable + MemAvailable: 7749144 kB + +[guro@fb.com: fix indirectly reclaimable memory accounting for CONFIG_SLOB] + Link: http://lkml.kernel.org/r/20180312194140.19517-1-guro@fb.com +[guro@fb.com: fix indirectly reclaimable memory accounting] + Link: http://lkml.kernel.org/r/20180313125701.7955-1-guro@fb.com +Link: http://lkml.kernel.org/r/20180305133743.12746-5-guro@fb.com +Signed-off-by: Roman Gushchin +Reviewed-by: Andrew Morton +Cc: Alexander Viro +Cc: Michal Hocko +Cc: Johannes Weiner +Cc: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dcache.c | 38 +++++++++++++++++++++++++++++--------- + 1 file changed, 29 insertions(+), 9 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -270,11 +270,25 @@ static void __d_free(struct rcu_head *he + kmem_cache_free(dentry_cache, dentry); + } + ++static void __d_free_external_name(struct rcu_head *head) ++{ ++ struct external_name *name = container_of(head, struct external_name, ++ u.head); ++ ++ mod_node_page_state(page_pgdat(virt_to_page(name)), ++ NR_INDIRECTLY_RECLAIMABLE_BYTES, ++ -ksize(name)); ++ ++ kfree(name); ++} ++ + static void __d_free_external(struct rcu_head *head) + { + struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); +- kfree(external_name(dentry)); +- kmem_cache_free(dentry_cache, dentry); ++ ++ __d_free_external_name(&external_name(dentry)->u.head); ++ ++ kmem_cache_free(dentry_cache, dentry); + } + + static inline int dname_external(const struct dentry *dentry) +@@ -305,7 +319,7 @@ void release_dentry_name_snapshot(struct + struct external_name *p; + p = container_of(name->name, struct external_name, name[0]); + if (unlikely(atomic_dec_and_test(&p->u.count))) +- kfree_rcu(p, u.head); ++ call_rcu(&p->u.head, __d_free_external_name); + } + } + EXPORT_SYMBOL(release_dentry_name_snapshot); +@@ -1605,6 +1619,7 @@ EXPORT_SYMBOL(d_invalidate); + + struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) + { ++ struct external_name *ext = NULL; + struct dentry *dentry; + char *dname; + int err; +@@ -1625,14 +1640,13 @@ struct dentry *__d_alloc(struct super_bl + dname = dentry->d_iname; + } else if (name->len > DNAME_INLINE_LEN-1) { + size_t size = offsetof(struct external_name, name[1]); +- struct external_name *p = kmalloc(size + name->len, +- GFP_KERNEL_ACCOUNT); +- if (!p) { ++ ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT); ++ if (!ext) { + kmem_cache_free(dentry_cache, dentry); + return NULL; + } +- atomic_set(&p->u.count, 1); +- dname = p->name; ++ atomic_set(&ext->u.count, 1); ++ dname = ext->name; + if (IS_ENABLED(CONFIG_DCACHE_WORD_ACCESS)) + kasan_unpoison_shadow(dname, + round_up(name->len + 1, sizeof(unsigned long))); +@@ -1675,6 +1689,12 @@ struct dentry *__d_alloc(struct super_bl + } + } + ++ if (unlikely(ext)) { ++ pg_data_t *pgdat = page_pgdat(virt_to_page(ext)); ++ mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES, ++ ksize(ext)); ++ } ++ + this_cpu_inc(nr_dentry); + + return dentry; +@@ -2769,7 +2789,7 @@ static void copy_name(struct dentry *den + dentry->d_name.hash_len = target->d_name.hash_len; + } + if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) +- kfree_rcu(old_name, u.head); ++ call_rcu(&old_name->u.head, __d_free_external_name); + } + + static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) diff --git a/queue-4.14/i2c-i2c-scmi-fix-for-i2c_smbus_write_block_data.patch b/queue-4.14/i2c-i2c-scmi-fix-for-i2c_smbus_write_block_data.patch new file mode 100644 index 00000000000..3c444d77a3e --- /dev/null +++ b/queue-4.14/i2c-i2c-scmi-fix-for-i2c_smbus_write_block_data.patch @@ -0,0 +1,60 @@ +From 08d9db00fe0e300d6df976e6c294f974988226dd Mon Sep 17 00:00:00 2001 +From: Edgar Cherkasov +Date: Thu, 27 Sep 2018 11:56:03 +0300 +Subject: i2c: i2c-scmi: fix for i2c_smbus_write_block_data + +From: Edgar Cherkasov + +commit 08d9db00fe0e300d6df976e6c294f974988226dd upstream. + +The i2c-scmi driver crashes when the SMBus Write Block transaction is +executed: + +WARNING: CPU: 9 PID: 2194 at mm/page_alloc.c:3931 __alloc_pages_slowpath+0x9db/0xec0 + Call Trace: + ? get_page_from_freelist+0x49d/0x11f0 + ? alloc_pages_current+0x6a/0xe0 + ? new_slab+0x499/0x690 + __alloc_pages_nodemask+0x265/0x280 + alloc_pages_current+0x6a/0xe0 + kmalloc_order+0x18/0x40 + kmalloc_order_trace+0x24/0xb0 + ? acpi_ut_allocate_object_desc_dbg+0x62/0x10c + __kmalloc+0x203/0x220 + acpi_os_allocate_zeroed+0x34/0x36 + acpi_ut_copy_eobject_to_iobject+0x266/0x31e + acpi_evaluate_object+0x166/0x3b2 + acpi_smbus_cmi_access+0x144/0x530 [i2c_scmi] + i2c_smbus_xfer+0xda/0x370 + i2cdev_ioctl_smbus+0x1bd/0x270 + i2cdev_ioctl+0xaa/0x250 + do_vfs_ioctl+0xa4/0x600 + SyS_ioctl+0x79/0x90 + do_syscall_64+0x73/0x130 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 +ACPI Error: Evaluating _SBW: 4 (20170831/smbus_cmi-185) + +This problem occurs because the length of ACPI Buffer object is not +defined/initialized in the code before a corresponding ACPI method is +called. The obvious patch below fixes this issue. + +Signed-off-by: Edgar Cherkasov +Acked-by: Viktor Krasnov +Acked-by: Michael Brunner +Signed-off-by: Wolfram Sang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/i2c/busses/i2c-scmi.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/i2c/busses/i2c-scmi.c ++++ b/drivers/i2c/busses/i2c-scmi.c +@@ -152,6 +152,7 @@ acpi_smbus_cmi_access(struct i2c_adapter + mt_params[3].type = ACPI_TYPE_INTEGER; + mt_params[3].integer.value = len; + mt_params[4].type = ACPI_TYPE_BUFFER; ++ mt_params[4].buffer.length = len; + mt_params[4].buffer.pointer = data->block + 1; + } + break; diff --git a/queue-4.14/mm-don-t-show-nr_indirectly_reclaimable-in-proc-vmstat.patch b/queue-4.14/mm-don-t-show-nr_indirectly_reclaimable-in-proc-vmstat.patch new file mode 100644 index 00000000000..b5d66866360 --- /dev/null +++ b/queue-4.14/mm-don-t-show-nr_indirectly_reclaimable-in-proc-vmstat.patch @@ -0,0 +1,52 @@ +From 7aaf7727235870f497eb928f728f7773d6df3b40 Mon Sep 17 00:00:00 2001 +From: Roman Gushchin +Date: Fri, 11 May 2018 16:01:53 -0700 +Subject: mm: don't show nr_indirectly_reclaimable in /proc/vmstat + +From: Roman Gushchin + +commit 7aaf7727235870f497eb928f728f7773d6df3b40 upstream. + +Don't show nr_indirectly_reclaimable in /proc/vmstat, because there is +no need to export this vm counter to userspace, and some changes are +expected in reclaimable object accounting, which can alter this counter. + +Link: http://lkml.kernel.org/r/20180425191422.9159-1-guro@fb.com +Signed-off-by: Roman Gushchin +Acked-by: Vlastimil Babka +Reviewed-by: Andrew Morton +Cc: Matthew Wilcox +Cc: Alexander Viro +Cc: Michal Hocko +Cc: Johannes Weiner +Cc: David Rientjes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmstat.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1090,7 +1090,7 @@ const char * const vmstat_text[] = { + "nr_vmscan_immediate_reclaim", + "nr_dirtied", + "nr_written", +- "nr_indirectly_reclaimable", ++ "", /* nr_indirectly_reclaimable */ + + /* enum writeback_stat_item counters */ + "nr_dirty_threshold", +@@ -1673,6 +1673,10 @@ static int vmstat_show(struct seq_file * + unsigned long *l = arg; + unsigned long off = l - (unsigned long *)m->private; + ++ /* Skip hidden vmstat items. */ ++ if (*vmstat_text[off] == '\0') ++ return 0; ++ + seq_puts(m, vmstat_text[off]); + seq_put_decimal_ull(m, " ", *l); + seq_putc(m, '\n'); diff --git a/queue-4.14/mm-introduce-nr_indirectly_reclaimable_bytes.patch b/queue-4.14/mm-introduce-nr_indirectly_reclaimable_bytes.patch new file mode 100644 index 00000000000..301f241689a --- /dev/null +++ b/queue-4.14/mm-introduce-nr_indirectly_reclaimable_bytes.patch @@ -0,0 +1,64 @@ +From eb59254608bc1d42c4c6afdcdce9c0d3ce02b318 Mon Sep 17 00:00:00 2001 +From: Roman Gushchin +Date: Tue, 10 Apr 2018 16:27:36 -0700 +Subject: mm: introduce NR_INDIRECTLY_RECLAIMABLE_BYTES + +From: Roman Gushchin + +commit eb59254608bc1d42c4c6afdcdce9c0d3ce02b318 upstream. + +Patch series "indirectly reclaimable memory", v2. + +This patchset introduces the concept of indirectly reclaimable memory +and applies it to fix the issue of when a big number of dentries with +external names can significantly affect the MemAvailable value. + +This patch (of 3): + +Introduce a concept of indirectly reclaimable memory and adds the +corresponding memory counter and /proc/vmstat item. + +Indirectly reclaimable memory is any sort of memory, used by the kernel +(except of reclaimable slabs), which is actually reclaimable, i.e. will +be released under memory pressure. + +The counter is in bytes, as it's not always possible to count such +objects in pages. The name contains BYTES by analogy to +NR_KERNEL_STACK_KB. + +Link: http://lkml.kernel.org/r/20180305133743.12746-2-guro@fb.com +Signed-off-by: Roman Gushchin +Reviewed-by: Andrew Morton +Cc: Alexander Viro +Cc: Michal Hocko +Cc: Johannes Weiner +Cc: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mmzone.h | 1 + + mm/vmstat.c | 1 + + 2 files changed, 2 insertions(+) + +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -180,6 +180,7 @@ enum node_stat_item { + NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ + NR_DIRTIED, /* page dirtyings since bootup */ + NR_WRITTEN, /* page writings since bootup */ ++ NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */ + NR_VM_NODE_STAT_ITEMS + }; + +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1090,6 +1090,7 @@ const char * const vmstat_text[] = { + "nr_vmscan_immediate_reclaim", + "nr_dirtied", + "nr_written", ++ "nr_indirectly_reclaimable", + + /* enum writeback_stat_item counters */ + "nr_dirty_threshold", diff --git a/queue-4.14/mm-treat-indirectly-reclaimable-memory-as-available-in-memavailable.patch b/queue-4.14/mm-treat-indirectly-reclaimable-memory-as-available-in-memavailable.patch new file mode 100644 index 00000000000..e58f2d948b9 --- /dev/null +++ b/queue-4.14/mm-treat-indirectly-reclaimable-memory-as-available-in-memavailable.patch @@ -0,0 +1,43 @@ +From 034ebf65c3c21d85b963d39f992258a64a85e3a9 Mon Sep 17 00:00:00 2001 +From: Roman Gushchin +Date: Tue, 10 Apr 2018 16:27:40 -0700 +Subject: mm: treat indirectly reclaimable memory as available in MemAvailable + +From: Roman Gushchin + +commit 034ebf65c3c21d85b963d39f992258a64a85e3a9 upstream. + +Adjust /proc/meminfo MemAvailable calculation by adding the amount of +indirectly reclaimable memory (rounded to the PAGE_SIZE). + +Link: http://lkml.kernel.org/r/20180305133743.12746-4-guro@fb.com +Signed-off-by: Roman Gushchin +Reviewed-by: Andrew Morton +Cc: Alexander Viro +Cc: Michal Hocko +Cc: Johannes Weiner +Cc: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_alloc.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -4557,6 +4557,13 @@ long si_mem_available(void) + min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2, + wmark_low); + ++ /* ++ * Part of the kernel memory, which can be released under memory ++ * pressure. ++ */ ++ available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >> ++ PAGE_SHIFT; ++ + if (available < 0) + available = 0; + return available; diff --git a/queue-4.14/mm-treat-indirectly-reclaimable-memory-as-free-in-overcommit-logic.patch b/queue-4.14/mm-treat-indirectly-reclaimable-memory-as-free-in-overcommit-logic.patch new file mode 100644 index 00000000000..5efba1b7cb7 --- /dev/null +++ b/queue-4.14/mm-treat-indirectly-reclaimable-memory-as-free-in-overcommit-logic.patch @@ -0,0 +1,78 @@ +From d79f7aa496fc94d763f67b833a1f36f4c171176f Mon Sep 17 00:00:00 2001 +From: Roman Gushchin +Date: Tue, 10 Apr 2018 16:27:47 -0700 +Subject: mm: treat indirectly reclaimable memory as free in overcommit logic + +From: Roman Gushchin + +commit d79f7aa496fc94d763f67b833a1f36f4c171176f upstream. + +Indirectly reclaimable memory can consume a significant part of total +memory and it's actually reclaimable (it will be released under actual +memory pressure). + +So, the overcommit logic should treat it as free. + +Otherwise, it's possible to cause random system-wide memory allocation +failures by consuming a significant amount of memory by indirectly +reclaimable memory, e.g. dentry external names. + +If overcommit policy GUESS is used, it might be used for denial of +service attack under some conditions. + +The following program illustrates the approach. It causes the kernel to +allocate an unreclaimable kmalloc-256 chunk for each stat() call, so +that at some point the overcommit logic may start blocking large +allocation system-wide. + + int main() + { + char buf[256]; + unsigned long i; + struct stat statbuf; + + buf[0] = '/'; + for (i = 1; i < sizeof(buf); i++) + buf[i] = '_'; + + for (i = 0; 1; i++) { + sprintf(&buf[248], "%8lu", i); + stat(buf, &statbuf); + } + + return 0; + } + +This patch in combination with related indirectly reclaimable memory +patches closes this issue. + +Link: http://lkml.kernel.org/r/20180313130041.8078-1-guro@fb.com +Signed-off-by: Roman Gushchin +Reviewed-by: Andrew Morton +Cc: Alexander Viro +Cc: Michal Hocko +Cc: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/util.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/mm/util.c ++++ b/mm/util.c +@@ -636,6 +636,13 @@ int __vm_enough_memory(struct mm_struct + free += global_node_page_state(NR_SLAB_RECLAIMABLE); + + /* ++ * Part of the kernel memory, which can be released ++ * under memory pressure. ++ */ ++ free += global_node_page_state( ++ NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT; ++ ++ /* + * Leave reserved pages. The pages are not for anonymous pages. + */ + if (free <= totalreserve_pages) diff --git a/queue-4.14/series b/queue-4.14/series index 6f2ddf94c3e..a176e8763b9 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -75,3 +75,10 @@ pinctrl-mcp23s08-fix-irq-and-irqchip-setup-order.patch arm64-perf-reject-stand-alone-chain-events-for-pmuv3.patch mm-thp-fix-call-to-mmu_notifier-in-set_pmd_migration_entry-v2.patch mm-preserve-_page_devmap-across-mprotect-calls.patch +i2c-i2c-scmi-fix-for-i2c_smbus_write_block_data.patch +xhci-don-t-print-a-warning-when-setting-link-state-for-disabled-ports.patch +mm-introduce-nr_indirectly_reclaimable_bytes.patch +mm-treat-indirectly-reclaimable-memory-as-available-in-memavailable.patch +dcache-account-external-names-as-indirectly-reclaimable-memory.patch +mm-treat-indirectly-reclaimable-memory-as-free-in-overcommit-logic.patch +mm-don-t-show-nr_indirectly_reclaimable-in-proc-vmstat.patch diff --git a/queue-4.14/xhci-don-t-print-a-warning-when-setting-link-state-for-disabled-ports.patch b/queue-4.14/xhci-don-t-print-a-warning-when-setting-link-state-for-disabled-ports.patch new file mode 100644 index 00000000000..86623d5ff7c --- /dev/null +++ b/queue-4.14/xhci-don-t-print-a-warning-when-setting-link-state-for-disabled-ports.patch @@ -0,0 +1,61 @@ +From 1208d8a84fdcae6b395c57911cdf907450d30e70 Mon Sep 17 00:00:00 2001 +From: Mathias Nyman +Date: Mon, 12 Feb 2018 14:24:47 +0200 +Subject: xhci: Don't print a warning when setting link state for disabled ports + +From: Mathias Nyman + +commit 1208d8a84fdcae6b395c57911cdf907450d30e70 upstream. + +When disabling a USB3 port the hub driver will set the port link state to +U3 to prevent "ejected" or "safely removed" devices that are still +physically connected from immediately re-enumerating. + +If the device was really unplugged, then error messages were printed +as the hub tries to set the U3 link state for a port that is no longer +enabled. + +xhci-hcd ee000000.usb: Cannot set link state. +usb usb8-port1: cannot disable (err = -32) + +Don't print error message in xhci-hub if hub tries to set port link state +for a disabled port. Return -ENODEV instead which also silences hub driver. + +Signed-off-by: Mathias Nyman +Tested-by: Yoshihiro Shimoda +Signed-off-by: Ross Zwisler +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/host/xhci-hub.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/drivers/usb/host/xhci-hub.c ++++ b/drivers/usb/host/xhci-hub.c +@@ -1236,17 +1236,17 @@ int xhci_hub_control(struct usb_hcd *hcd + temp = readl(port_array[wIndex]); + break; + } +- +- /* Software should not attempt to set +- * port link state above '3' (U3) and the port +- * must be enabled. +- */ +- if ((temp & PORT_PE) == 0 || +- (link_state > USB_SS_PORT_LS_U3)) { +- xhci_warn(xhci, "Cannot set link state.\n"); ++ /* Port must be enabled */ ++ if (!(temp & PORT_PE)) { ++ retval = -ENODEV; ++ break; ++ } ++ /* Can't set port link state above '3' (U3) */ ++ if (link_state > USB_SS_PORT_LS_U3) { ++ xhci_warn(xhci, "Cannot set port %d link state %d\n", ++ wIndex, link_state); + goto error; + } +- + if (link_state == USB_SS_PORT_LS_U3) { + slot_id = xhci_find_slot_id_by_port(hcd, xhci, + wIndex + 1);