From: Greg Kroah-Hartman Date: Thu, 23 May 2019 18:02:09 +0000 (+0200) Subject: 5.0-stable patches X-Git-Tag: v5.1.5~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b8704eb5cf6d2827dd86c3c8d5471358da0d056f;p=thirdparty%2Fkernel%2Fstable-queue.git 5.0-stable patches added patches: bpf-add-map_lookup_elem_sys_only-for-lookups-from-syscall-side.patch bpf-lru-avoid-messing-with-eviction-heuristics-upon-syscall-lookup.patch bpf-relax-inode-permission-check-for-retrieving-bpf-program.patch driver-core-postpone-dma-tear-down-until-after-devres-release-for-probe-failure.patch md-raid-raid5-preserve-the-writeback-action-after-the-parity-check.patch revert-don-t-jump-to-compute_result-state-from-check_result-state.patch --- diff --git a/queue-5.0/bpf-add-map_lookup_elem_sys_only-for-lookups-from-syscall-side.patch b/queue-5.0/bpf-add-map_lookup_elem_sys_only-for-lookups-from-syscall-side.patch new file mode 100644 index 00000000000..ce87d2b4225 --- /dev/null +++ b/queue-5.0/bpf-add-map_lookup_elem_sys_only-for-lookups-from-syscall-side.patch @@ -0,0 +1,53 @@ +From c6110222c6f49ea68169f353565eb865488a8619 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Tue, 14 May 2019 01:18:55 +0200 +Subject: bpf: add map_lookup_elem_sys_only for lookups from syscall side + +From: Daniel Borkmann + +commit c6110222c6f49ea68169f353565eb865488a8619 upstream. + +Add a callback map_lookup_elem_sys_only() that map implementations +could use over map_lookup_elem() from system call side in case the +map implementation needs to handle the latter differently than from +the BPF data path. If map_lookup_elem_sys_only() is set, this will +be preferred pick for map lookups out of user space. This hook is +used in a follow-up fix for LRU map, but once development window +opens, we can convert other map types from map_lookup_elem() (here, +the one called upon BPF_MAP_LOOKUP_ELEM cmd is meant) over to use +the callback to simplify and clean up the latter. + +Signed-off-by: Daniel Borkmann +Acked-by: Martin KaFai Lau +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/bpf.h | 1 + + kernel/bpf/syscall.c | 5 ++++- + 2 files changed, 5 insertions(+), 1 deletion(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -35,6 +35,7 @@ struct bpf_map_ops { + void (*map_free)(struct bpf_map *map); + int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); + void (*map_release_uref)(struct bpf_map *map); ++ void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); + + /* funcs callable from userspace and from eBPF programs */ + void *(*map_lookup_elem)(struct bpf_map *map, void *key); +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -738,7 +738,10 @@ static int map_lookup_elem(union bpf_att + err = map->ops->map_peek_elem(map, value); + } else { + rcu_read_lock(); +- ptr = map->ops->map_lookup_elem(map, key); ++ if (map->ops->map_lookup_elem_sys_only) ++ ptr = map->ops->map_lookup_elem_sys_only(map, key); ++ else ++ ptr = map->ops->map_lookup_elem(map, key); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + } else if (!ptr) { diff --git a/queue-5.0/bpf-lru-avoid-messing-with-eviction-heuristics-upon-syscall-lookup.patch b/queue-5.0/bpf-lru-avoid-messing-with-eviction-heuristics-upon-syscall-lookup.patch new file mode 100644 index 00000000000..561b1906fb4 --- /dev/null +++ b/queue-5.0/bpf-lru-avoid-messing-with-eviction-heuristics-upon-syscall-lookup.patch @@ -0,0 +1,106 @@ +From 50b045a8c0ccf44f76640ac3eea8d80ca53979a3 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Tue, 14 May 2019 01:18:56 +0200 +Subject: bpf, lru: avoid messing with eviction heuristics upon syscall lookup + +From: Daniel Borkmann + +commit 50b045a8c0ccf44f76640ac3eea8d80ca53979a3 upstream. + +One of the biggest issues we face right now with picking LRU map over +regular hash table is that a map walk out of user space, for example, +to just dump the existing entries or to remove certain ones, will +completely mess up LRU eviction heuristics and wrong entries such +as just created ones will get evicted instead. The reason for this +is that we mark an entry as "in use" via bpf_lru_node_set_ref() from +system call lookup side as well. Thus upon walk, all entries are +being marked, so information of actual least recently used ones +are "lost". + +In case of Cilium where it can be used (besides others) as a BPF +based connection tracker, this current behavior causes disruption +upon control plane changes that need to walk the map from user space +to evict certain entries. Discussion result from bpfconf [0] was that +we should simply just remove marking from system call side as no +good use case could be found where it's actually needed there. +Therefore this patch removes marking for regular LRU and per-CPU +flavor. If there ever should be a need in future, the behavior could +be selected via map creation flag, but due to mentioned reason we +avoid this here. + + [0] http://vger.kernel.org/bpfconf.html + +Fixes: 29ba732acbee ("bpf: Add BPF_MAP_TYPE_LRU_HASH") +Fixes: 8f8449384ec3 ("bpf: Add BPF_MAP_TYPE_LRU_PERCPU_HASH") +Signed-off-by: Daniel Borkmann +Acked-by: Martin KaFai Lau +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/bpf/hashtab.c | 23 ++++++++++++++++++----- + 1 file changed, 18 insertions(+), 5 deletions(-) + +--- a/kernel/bpf/hashtab.c ++++ b/kernel/bpf/hashtab.c +@@ -527,18 +527,30 @@ static u32 htab_map_gen_lookup(struct bp + return insn - insn_buf; + } + +-static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) ++static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, ++ void *key, const bool mark) + { + struct htab_elem *l = __htab_map_lookup_elem(map, key); + + if (l) { +- bpf_lru_node_set_ref(&l->lru_node); ++ if (mark) ++ bpf_lru_node_set_ref(&l->lru_node); + return l->key + round_up(map->key_size, 8); + } + + return NULL; + } + ++static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ return __htab_lru_map_lookup_elem(map, key, true); ++} ++ ++static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) ++{ ++ return __htab_lru_map_lookup_elem(map, key, false); ++} ++ + static u32 htab_lru_map_gen_lookup(struct bpf_map *map, + struct bpf_insn *insn_buf) + { +@@ -1215,6 +1227,7 @@ const struct bpf_map_ops htab_lru_map_op + .map_free = htab_map_free, + .map_get_next_key = htab_map_get_next_key, + .map_lookup_elem = htab_lru_map_lookup_elem, ++ .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, + .map_update_elem = htab_lru_map_update_elem, + .map_delete_elem = htab_lru_map_delete_elem, + .map_gen_lookup = htab_lru_map_gen_lookup, +@@ -1246,7 +1259,6 @@ static void *htab_lru_percpu_map_lookup_ + + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) + { +- struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct htab_elem *l; + void __percpu *pptr; + int ret = -ENOENT; +@@ -1262,8 +1274,9 @@ int bpf_percpu_hash_copy(struct bpf_map + l = __htab_map_lookup_elem(map, key); + if (!l) + goto out; +- if (htab_is_lru(htab)) +- bpf_lru_node_set_ref(&l->lru_node); ++ /* We do not mark LRU map element here in order to not mess up ++ * eviction heuristics when user space does a map walk. ++ */ + pptr = htab_elem_get_ptr(l, map->key_size); + for_each_possible_cpu(cpu) { + bpf_long_memcpy(value + off, diff --git a/queue-5.0/bpf-relax-inode-permission-check-for-retrieving-bpf-program.patch b/queue-5.0/bpf-relax-inode-permission-check-for-retrieving-bpf-program.patch new file mode 100644 index 00000000000..63f3fc48b04 --- /dev/null +++ b/queue-5.0/bpf-relax-inode-permission-check-for-retrieving-bpf-program.patch @@ -0,0 +1,38 @@ +From e547ff3f803e779a3898f1f48447b29f43c54085 Mon Sep 17 00:00:00 2001 +From: Chenbo Feng +Date: Tue, 14 May 2019 19:42:57 -0700 +Subject: bpf: relax inode permission check for retrieving bpf program + +From: Chenbo Feng + +commit e547ff3f803e779a3898f1f48447b29f43c54085 upstream. + +For iptable module to load a bpf program from a pinned location, it +only retrieve a loaded program and cannot change the program content so +requiring a write permission for it might not be necessary. +Also when adding or removing an unrelated iptable rule, it might need to +flush and reload the xt_bpf related rules as well and triggers the inode +permission check. It might be better to remove the write premission +check for the inode so we won't need to grant write access to all the +processes that flush and restore iptables rules. + +Signed-off-by: Chenbo Feng +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/bpf/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/inode.c ++++ b/kernel/bpf/inode.c +@@ -518,7 +518,7 @@ out: + static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) + { + struct bpf_prog *prog; +- int ret = inode_permission(inode, MAY_READ | MAY_WRITE); ++ int ret = inode_permission(inode, MAY_READ); + if (ret) + return ERR_PTR(ret); + diff --git a/queue-5.0/driver-core-postpone-dma-tear-down-until-after-devres-release-for-probe-failure.patch b/queue-5.0/driver-core-postpone-dma-tear-down-until-after-devres-release-for-probe-failure.patch new file mode 100644 index 00000000000..8ce8388e8c4 --- /dev/null +++ b/queue-5.0/driver-core-postpone-dma-tear-down-until-after-devres-release-for-probe-failure.patch @@ -0,0 +1,117 @@ +From 0b777eee88d712256ba8232a9429edb17c4f9ceb Mon Sep 17 00:00:00 2001 +From: John Garry +Date: Thu, 28 Mar 2019 18:08:05 +0800 +Subject: driver core: Postpone DMA tear-down until after devres release for probe failure + +From: John Garry + +commit 0b777eee88d712256ba8232a9429edb17c4f9ceb upstream. + +In commit 376991db4b64 ("driver core: Postpone DMA tear-down until after +devres release"), we changed the ordering of tearing down the device DMA +ops and releasing all the device's resources; this was because the DMA ops +should be maintained until we release the device's managed DMA memories. + +However, we have seen another crash on an arm64 system when a +device driver probe fails: + + hisi_sas_v3_hw 0000:74:02.0: Adding to iommu group 2 + scsi host1: hisi_sas_v3_hw + BUG: Bad page state in process swapper/0 pfn:313f5 + page:ffff7e0000c4fd40 count:1 mapcount:0 + mapping:0000000000000000 index:0x0 + flags: 0xfffe00000001000(reserved) + raw: 0fffe00000001000 ffff7e0000c4fd48 ffff7e0000c4fd48 +0000000000000000 + raw: 0000000000000000 0000000000000000 00000001ffffffff +0000000000000000 + page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set + bad because of flags: 0x1000(reserved) + Modules linked in: + CPU: 49 PID: 1 Comm: swapper/0 Not tainted +5.1.0-rc1-43081-g22d97fd-dirty #1433 + Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI +RC0 - V1.12.01 01/29/2019 + Call trace: + dump_backtrace+0x0/0x118 + show_stack+0x14/0x1c + dump_stack+0xa4/0xc8 + bad_page+0xe4/0x13c + free_pages_check_bad+0x4c/0xc0 + __free_pages_ok+0x30c/0x340 + __free_pages+0x30/0x44 + __dma_direct_free_pages+0x30/0x38 + dma_direct_free+0x24/0x38 + dma_free_attrs+0x9c/0xd8 + dmam_release+0x20/0x28 + release_nodes+0x17c/0x220 + devres_release_all+0x34/0x54 + really_probe+0xc4/0x2c8 + driver_probe_device+0x58/0xfc + device_driver_attach+0x68/0x70 + __driver_attach+0x94/0xdc + bus_for_each_dev+0x5c/0xb4 + driver_attach+0x20/0x28 + bus_add_driver+0x14c/0x200 + driver_register+0x6c/0x124 + __pci_register_driver+0x48/0x50 + sas_v3_pci_driver_init+0x20/0x28 + do_one_initcall+0x40/0x25c + kernel_init_freeable+0x2b8/0x3c0 + kernel_init+0x10/0x100 + ret_from_fork+0x10/0x18 + Disabling lock debugging due to kernel taint + BUG: Bad page state in process swapper/0 pfn:313f6 + page:ffff7e0000c4fd80 count:1 mapcount:0 +mapping:0000000000000000 index:0x0 +[ 89.322983] flags: 0xfffe00000001000(reserved) + raw: 0fffe00000001000 ffff7e0000c4fd88 ffff7e0000c4fd88 +0000000000000000 + raw: 0000000000000000 0000000000000000 00000001ffffffff +0000000000000000 + +The crash occurs for the same reason. + +In this case, on the really_probe() failure path, we are still clearing +the DMA ops prior to releasing the device's managed memories. + +This patch fixes this issue by reordering the DMA ops teardown and the +call to devres_release_all() on the failure path. + +Reported-by: Xiang Chen +Tested-by: Xiang Chen +Signed-off-by: John Garry +Reviewed-by: Robin Murphy +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/dd.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/base/dd.c ++++ b/drivers/base/dd.c +@@ -486,7 +486,7 @@ re_probe: + if (dev->bus->dma_configure) { + ret = dev->bus->dma_configure(dev); + if (ret) +- goto dma_failed; ++ goto probe_failed; + } + + if (driver_sysfs_add(dev)) { +@@ -542,14 +542,13 @@ re_probe: + goto done; + + probe_failed: +- arch_teardown_dma_ops(dev); +-dma_failed: + if (dev->bus) + blocking_notifier_call_chain(&dev->bus->p->bus_notifier, + BUS_NOTIFY_DRIVER_NOT_BOUND, dev); + pinctrl_bind_failed: + device_links_no_driver(dev); + devres_release_all(dev); ++ arch_teardown_dma_ops(dev); + driver_sysfs_remove(dev); + dev->driver = NULL; + dev_set_drvdata(dev, NULL); diff --git a/queue-5.0/md-raid-raid5-preserve-the-writeback-action-after-the-parity-check.patch b/queue-5.0/md-raid-raid5-preserve-the-writeback-action-after-the-parity-check.patch new file mode 100644 index 00000000000..79f3627738d --- /dev/null +++ b/queue-5.0/md-raid-raid5-preserve-the-writeback-action-after-the-parity-check.patch @@ -0,0 +1,52 @@ +From b2176a1dfb518d870ee073445d27055fea64dfb8 Mon Sep 17 00:00:00 2001 +From: Nigel Croxon +Date: Tue, 16 Apr 2019 09:50:09 -0700 +Subject: md/raid: raid5 preserve the writeback action after the parity check + +From: Nigel Croxon + +commit b2176a1dfb518d870ee073445d27055fea64dfb8 upstream. + +The problem is that any 'uptodate' vs 'disks' check is not precise +in this path. Put a "WARN_ON(!test_bit(R5_UPTODATE, &dev->flags)" on the +device that might try to kick off writes and then skip the action. +Better to prevent the raid driver from taking unexpected action *and* keep +the system alive vs killing the machine with BUG_ON. + +Note: fixed warning reported by kbuild test robot + +Signed-off-by: Dan Williams +Signed-off-by: Nigel Croxon +Signed-off-by: Song Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid5.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -4197,7 +4197,7 @@ static void handle_parity_checks6(struct + /* now write out any block on a failed drive, + * or P or Q if they were recomputed + */ +- BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ ++ dev = NULL; + if (s->failed == 2) { + dev = &sh->dev[s->failed_num[1]]; + s->locked++; +@@ -4222,6 +4222,14 @@ static void handle_parity_checks6(struct + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + } ++ if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags), ++ "%s: disk%td not up to date\n", ++ mdname(conf->mddev), ++ dev - (struct r5dev *) &sh->dev)) { ++ clear_bit(R5_LOCKED, &dev->flags); ++ clear_bit(R5_Wantwrite, &dev->flags); ++ s->locked--; ++ } + clear_bit(STRIPE_DEGRADED, &sh->state); + + set_bit(STRIPE_INSYNC, &sh->state); diff --git a/queue-5.0/revert-don-t-jump-to-compute_result-state-from-check_result-state.patch b/queue-5.0/revert-don-t-jump-to-compute_result-state-from-check_result-state.patch new file mode 100644 index 00000000000..d1c0ed76216 --- /dev/null +++ b/queue-5.0/revert-don-t-jump-to-compute_result-state-from-check_result-state.patch @@ -0,0 +1,54 @@ +From a25d8c327bb41742dbd59f8c545f59f3b9c39983 Mon Sep 17 00:00:00 2001 +From: Song Liu +Date: Tue, 16 Apr 2019 09:34:21 -0700 +Subject: Revert "Don't jump to compute_result state from check_result state" + +From: Song Liu + +commit a25d8c327bb41742dbd59f8c545f59f3b9c39983 upstream. + +This reverts commit 4f4fd7c5798bbdd5a03a60f6269cf1177fbd11ef. + +Cc: Dan Williams +Cc: Nigel Croxon +Cc: Xiao Ni +Signed-off-by: Song Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid5.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -4233,15 +4233,26 @@ static void handle_parity_checks6(struct + case check_state_check_result: + sh->check_state = check_state_idle; + +- if (s->failed > 1) +- break; + /* handle a successful check operation, if parity is correct + * we are done. Otherwise update the mismatch count and repair + * parity if !MD_RECOVERY_CHECK + */ + if (sh->ops.zero_sum_result == 0) { +- /* Any parity checked was correct */ +- set_bit(STRIPE_INSYNC, &sh->state); ++ /* both parities are correct */ ++ if (!s->failed) ++ set_bit(STRIPE_INSYNC, &sh->state); ++ else { ++ /* in contrast to the raid5 case we can validate ++ * parity, but still have a failure to write ++ * back ++ */ ++ sh->check_state = check_state_compute_result; ++ /* Returning at this point means that we may go ++ * off and bring p and/or q uptodate again so ++ * we make sure to check zero_sum_result again ++ * to verify if p or q need writeback ++ */ ++ } + } else { + atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { diff --git a/queue-5.0/series b/queue-5.0/series index d29fd0a1899..55aa2d5580d 100644 --- a/queue-5.0/series +++ b/queue-5.0/series @@ -131,3 +131,9 @@ i2c-designware-ratelimit-transfer-when-suspended-err.patch perf-bench-numa-add-define-for-rusage_thread-if-not-.patch perf-cs-etm-always-allocate-memory-for-cs_etm_queue-.patch perf-x86-intel-fix-race-in-intel_pmu_disable_event.patch +revert-don-t-jump-to-compute_result-state-from-check_result-state.patch +md-raid-raid5-preserve-the-writeback-action-after-the-parity-check.patch +driver-core-postpone-dma-tear-down-until-after-devres-release-for-probe-failure.patch +bpf-relax-inode-permission-check-for-retrieving-bpf-program.patch +bpf-add-map_lookup_elem_sys_only-for-lookups-from-syscall-side.patch +bpf-lru-avoid-messing-with-eviction-heuristics-upon-syscall-lookup.patch