--- /dev/null
+From c6110222c6f49ea68169f353565eb865488a8619 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 14 May 2019 01:18:55 +0200
+Subject: bpf: add map_lookup_elem_sys_only for lookups from syscall side
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit c6110222c6f49ea68169f353565eb865488a8619 upstream.
+
+Add a callback map_lookup_elem_sys_only() that map implementations
+could use over map_lookup_elem() from system call side in case the
+map implementation needs to handle the latter differently than from
+the BPF data path. If map_lookup_elem_sys_only() is set, this will
+be preferred pick for map lookups out of user space. This hook is
+used in a follow-up fix for LRU map, but once development window
+opens, we can convert other map types from map_lookup_elem() (here,
+the one called upon BPF_MAP_LOOKUP_ELEM cmd is meant) over to use
+the callback to simplify and clean up the latter.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ include/linux/bpf.h | 1 +
+ kernel/bpf/syscall.c | 5 ++++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -34,6 +34,7 @@ struct bpf_map_ops {
+ void (*map_free)(struct bpf_map *map);
+ int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+ void (*map_release_uref)(struct bpf_map *map);
++ void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
+
+ /* funcs callable from userspace and from eBPF programs */
+ void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -721,7 +721,10 @@ static int map_lookup_elem(union bpf_att
+ err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
+ } else {
+ rcu_read_lock();
+- ptr = map->ops->map_lookup_elem(map, key);
++ if (map->ops->map_lookup_elem_sys_only)
++ ptr = map->ops->map_lookup_elem_sys_only(map, key);
++ else
++ ptr = map->ops->map_lookup_elem(map, key);
+ if (ptr)
+ memcpy(value, ptr, value_size);
+ rcu_read_unlock();
--- /dev/null
+From 50b045a8c0ccf44f76640ac3eea8d80ca53979a3 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 14 May 2019 01:18:56 +0200
+Subject: bpf, lru: avoid messing with eviction heuristics upon syscall lookup
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 50b045a8c0ccf44f76640ac3eea8d80ca53979a3 upstream.
+
+One of the biggest issues we face right now with picking LRU map over
+regular hash table is that a map walk out of user space, for example,
+to just dump the existing entries or to remove certain ones, will
+completely mess up LRU eviction heuristics and wrong entries such
+as just created ones will get evicted instead. The reason for this
+is that we mark an entry as "in use" via bpf_lru_node_set_ref() from
+system call lookup side as well. Thus upon walk, all entries are
+being marked, so information of actual least recently used ones
+are "lost".
+
+In case of Cilium where it can be used (besides others) as a BPF
+based connection tracker, this current behavior causes disruption
+upon control plane changes that need to walk the map from user space
+to evict certain entries. Discussion result from bpfconf [0] was that
+we should simply just remove marking from system call side as no
+good use case could be found where it's actually needed there.
+Therefore this patch removes marking for regular LRU and per-CPU
+flavor. If there ever should be a need in future, the behavior could
+be selected via map creation flag, but due to mentioned reason we
+avoid this here.
+
+ [0] http://vger.kernel.org/bpfconf.html
+
+Fixes: 29ba732acbee ("bpf: Add BPF_MAP_TYPE_LRU_HASH")
+Fixes: 8f8449384ec3 ("bpf: Add BPF_MAP_TYPE_LRU_PERCPU_HASH")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/hashtab.c | 23 ++++++++++++++++++-----
+ 1 file changed, 18 insertions(+), 5 deletions(-)
+
+--- a/kernel/bpf/hashtab.c
++++ b/kernel/bpf/hashtab.c
+@@ -518,18 +518,30 @@ static u32 htab_map_gen_lookup(struct bp
+ return insn - insn_buf;
+ }
+
+-static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
++static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
++ void *key, const bool mark)
+ {
+ struct htab_elem *l = __htab_map_lookup_elem(map, key);
+
+ if (l) {
+- bpf_lru_node_set_ref(&l->lru_node);
++ if (mark)
++ bpf_lru_node_set_ref(&l->lru_node);
+ return l->key + round_up(map->key_size, 8);
+ }
+
+ return NULL;
+ }
+
++static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
++{
++ return __htab_lru_map_lookup_elem(map, key, true);
++}
++
++static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
++{
++ return __htab_lru_map_lookup_elem(map, key, false);
++}
++
+ static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
+ struct bpf_insn *insn_buf)
+ {
+@@ -1206,6 +1218,7 @@ const struct bpf_map_ops htab_lru_map_op
+ .map_free = htab_map_free,
+ .map_get_next_key = htab_map_get_next_key,
+ .map_lookup_elem = htab_lru_map_lookup_elem,
++ .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
+ .map_update_elem = htab_lru_map_update_elem,
+ .map_delete_elem = htab_lru_map_delete_elem,
+ .map_gen_lookup = htab_lru_map_gen_lookup,
+@@ -1237,7 +1250,6 @@ static void *htab_lru_percpu_map_lookup_
+
+ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
+ {
+- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct htab_elem *l;
+ void __percpu *pptr;
+ int ret = -ENOENT;
+@@ -1253,8 +1265,9 @@ int bpf_percpu_hash_copy(struct bpf_map
+ l = __htab_map_lookup_elem(map, key);
+ if (!l)
+ goto out;
+- if (htab_is_lru(htab))
+- bpf_lru_node_set_ref(&l->lru_node);
++ /* We do not mark LRU map element here in order to not mess up
++ * eviction heuristics when user space does a map walk.
++ */
+ pptr = htab_elem_get_ptr(l, map->key_size);
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(value + off,
--- /dev/null
+From e547ff3f803e779a3898f1f48447b29f43c54085 Mon Sep 17 00:00:00 2001
+From: Chenbo Feng <fengc@google.com>
+Date: Tue, 14 May 2019 19:42:57 -0700
+Subject: bpf: relax inode permission check for retrieving bpf program
+
+From: Chenbo Feng <fengc@google.com>
+
+commit e547ff3f803e779a3898f1f48447b29f43c54085 upstream.
+
+For iptable module to load a bpf program from a pinned location, it
+only retrieve a loaded program and cannot change the program content so
+requiring a write permission for it might not be necessary.
+Also when adding or removing an unrelated iptable rule, it might need to
+flush and reload the xt_bpf related rules as well and triggers the inode
+permission check. It might be better to remove the write premission
+check for the inode so we won't need to grant write access to all the
+processes that flush and restore iptables rules.
+
+Signed-off-by: Chenbo Feng <fengc@google.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/bpf/inode.c
++++ b/kernel/bpf/inode.c
+@@ -518,7 +518,7 @@ out:
+ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
+ {
+ struct bpf_prog *prog;
+- int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
++ int ret = inode_permission(inode, MAY_READ);
+ if (ret)
+ return ERR_PTR(ret);
+
--- /dev/null
+From 0b777eee88d712256ba8232a9429edb17c4f9ceb Mon Sep 17 00:00:00 2001
+From: John Garry <john.garry@huawei.com>
+Date: Thu, 28 Mar 2019 18:08:05 +0800
+Subject: driver core: Postpone DMA tear-down until after devres release for probe failure
+
+From: John Garry <john.garry@huawei.com>
+
+commit 0b777eee88d712256ba8232a9429edb17c4f9ceb upstream.
+
+In commit 376991db4b64 ("driver core: Postpone DMA tear-down until after
+devres release"), we changed the ordering of tearing down the device DMA
+ops and releasing all the device's resources; this was because the DMA ops
+should be maintained until we release the device's managed DMA memories.
+
+However, we have seen another crash on an arm64 system when a
+device driver probe fails:
+
+ hisi_sas_v3_hw 0000:74:02.0: Adding to iommu group 2
+ scsi host1: hisi_sas_v3_hw
+ BUG: Bad page state in process swapper/0 pfn:313f5
+ page:ffff7e0000c4fd40 count:1 mapcount:0
+ mapping:0000000000000000 index:0x0
+ flags: 0xfffe00000001000(reserved)
+ raw: 0fffe00000001000 ffff7e0000c4fd48 ffff7e0000c4fd48
+0000000000000000
+ raw: 0000000000000000 0000000000000000 00000001ffffffff
+0000000000000000
+ page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
+ bad because of flags: 0x1000(reserved)
+ Modules linked in:
+ CPU: 49 PID: 1 Comm: swapper/0 Not tainted
+5.1.0-rc1-43081-g22d97fd-dirty #1433
+ Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI
+RC0 - V1.12.01 01/29/2019
+ Call trace:
+ dump_backtrace+0x0/0x118
+ show_stack+0x14/0x1c
+ dump_stack+0xa4/0xc8
+ bad_page+0xe4/0x13c
+ free_pages_check_bad+0x4c/0xc0
+ __free_pages_ok+0x30c/0x340
+ __free_pages+0x30/0x44
+ __dma_direct_free_pages+0x30/0x38
+ dma_direct_free+0x24/0x38
+ dma_free_attrs+0x9c/0xd8
+ dmam_release+0x20/0x28
+ release_nodes+0x17c/0x220
+ devres_release_all+0x34/0x54
+ really_probe+0xc4/0x2c8
+ driver_probe_device+0x58/0xfc
+ device_driver_attach+0x68/0x70
+ __driver_attach+0x94/0xdc
+ bus_for_each_dev+0x5c/0xb4
+ driver_attach+0x20/0x28
+ bus_add_driver+0x14c/0x200
+ driver_register+0x6c/0x124
+ __pci_register_driver+0x48/0x50
+ sas_v3_pci_driver_init+0x20/0x28
+ do_one_initcall+0x40/0x25c
+ kernel_init_freeable+0x2b8/0x3c0
+ kernel_init+0x10/0x100
+ ret_from_fork+0x10/0x18
+ Disabling lock debugging due to kernel taint
+ BUG: Bad page state in process swapper/0 pfn:313f6
+ page:ffff7e0000c4fd80 count:1 mapcount:0
+mapping:0000000000000000 index:0x0
+[ 89.322983] flags: 0xfffe00000001000(reserved)
+ raw: 0fffe00000001000 ffff7e0000c4fd88 ffff7e0000c4fd88
+0000000000000000
+ raw: 0000000000000000 0000000000000000 00000001ffffffff
+0000000000000000
+
+The crash occurs for the same reason.
+
+In this case, on the really_probe() failure path, we are still clearing
+the DMA ops prior to releasing the device's managed memories.
+
+This patch fixes this issue by reordering the DMA ops teardown and the
+call to devres_release_all() on the failure path.
+
+Reported-by: Xiang Chen <chenxiang66@hisilicon.com>
+Tested-by: Xiang Chen <chenxiang66@hisilicon.com>
+Signed-off-by: John Garry <john.garry@huawei.com>
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+[jpg: backport to 4.19.x and earlier]
+Signed-off-by: John Garry <john.garry@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/dd.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/base/dd.c
++++ b/drivers/base/dd.c
+@@ -482,7 +482,7 @@ re_probe:
+
+ ret = dma_configure(dev);
+ if (ret)
+- goto dma_failed;
++ goto probe_failed;
+
+ if (driver_sysfs_add(dev)) {
+ printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n",
+@@ -537,14 +537,13 @@ re_probe:
+ goto done;
+
+ probe_failed:
+- dma_deconfigure(dev);
+-dma_failed:
+ if (dev->bus)
+ blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
+ BUS_NOTIFY_DRIVER_NOT_BOUND, dev);
+ pinctrl_bind_failed:
+ device_links_no_driver(dev);
+ devres_release_all(dev);
++ dma_deconfigure(dev);
+ driver_sysfs_remove(dev);
+ dev->driver = NULL;
+ dev_set_drvdata(dev, NULL);
--- /dev/null
+From b2176a1dfb518d870ee073445d27055fea64dfb8 Mon Sep 17 00:00:00 2001
+From: Nigel Croxon <ncroxon@redhat.com>
+Date: Tue, 16 Apr 2019 09:50:09 -0700
+Subject: md/raid: raid5 preserve the writeback action after the parity check
+
+From: Nigel Croxon <ncroxon@redhat.com>
+
+commit b2176a1dfb518d870ee073445d27055fea64dfb8 upstream.
+
+The problem is that any 'uptodate' vs 'disks' check is not precise
+in this path. Put a "WARN_ON(!test_bit(R5_UPTODATE, &dev->flags)" on the
+device that might try to kick off writes and then skip the action.
+Better to prevent the raid driver from taking unexpected action *and* keep
+the system alive vs killing the machine with BUG_ON.
+
+Note: fixed warning reported by kbuild test robot <lkp@intel.com>
+
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Nigel Croxon <ncroxon@redhat.com>
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid5.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -4185,7 +4185,7 @@ static void handle_parity_checks6(struct
+ /* now write out any block on a failed drive,
+ * or P or Q if they were recomputed
+ */
+- BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
++ dev = NULL;
+ if (s->failed == 2) {
+ dev = &sh->dev[s->failed_num[1]];
+ s->locked++;
+@@ -4210,6 +4210,14 @@ static void handle_parity_checks6(struct
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantwrite, &dev->flags);
+ }
++ if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags),
++ "%s: disk%td not up to date\n",
++ mdname(conf->mddev),
++ dev - (struct r5dev *) &sh->dev)) {
++ clear_bit(R5_LOCKED, &dev->flags);
++ clear_bit(R5_Wantwrite, &dev->flags);
++ s->locked--;
++ }
+ clear_bit(STRIPE_DEGRADED, &sh->state);
+
+ set_bit(STRIPE_INSYNC, &sh->state);
--- /dev/null
+From a25d8c327bb41742dbd59f8c545f59f3b9c39983 Mon Sep 17 00:00:00 2001
+From: Song Liu <songliubraving@fb.com>
+Date: Tue, 16 Apr 2019 09:34:21 -0700
+Subject: Revert "Don't jump to compute_result state from check_result state"
+
+From: Song Liu <songliubraving@fb.com>
+
+commit a25d8c327bb41742dbd59f8c545f59f3b9c39983 upstream.
+
+This reverts commit 4f4fd7c5798bbdd5a03a60f6269cf1177fbd11ef.
+
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Nigel Croxon <ncroxon@redhat.com>
+Cc: Xiao Ni <xni@redhat.com>
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid5.c | 19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -4221,15 +4221,26 @@ static void handle_parity_checks6(struct
+ case check_state_check_result:
+ sh->check_state = check_state_idle;
+
+- if (s->failed > 1)
+- break;
+ /* handle a successful check operation, if parity is correct
+ * we are done. Otherwise update the mismatch count and repair
+ * parity if !MD_RECOVERY_CHECK
+ */
+ if (sh->ops.zero_sum_result == 0) {
+- /* Any parity checked was correct */
+- set_bit(STRIPE_INSYNC, &sh->state);
++ /* both parities are correct */
++ if (!s->failed)
++ set_bit(STRIPE_INSYNC, &sh->state);
++ else {
++ /* in contrast to the raid5 case we can validate
++ * parity, but still have a failure to write
++ * back
++ */
++ sh->check_state = check_state_compute_result;
++ /* Returning at this point means that we may go
++ * off and bring p and/or q uptodate again so
++ * we make sure to check zero_sum_result again
++ * to verify if p or q need writeback
++ */
++ }
+ } else {
+ atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
+ if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
--- /dev/null
+From 97f8ab14a01b356c19f81374ffeaacab2b59fddf Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Thu, 23 May 2019 19:48:54 +0200
+Subject: Revert "selftests/bpf: skip verifier tests for unsupported program types"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit 118d38a3577f7728278f6afda8436af05a6bec7f which is
+commit 8184d44c9a577a2f1842ed6cc844bfd4a9981d8e upstream.
+
+Tommi reports that this patch breaks the build, it's not really needed
+so let's revert it.
+
+Reported-by: Tommi Rantala <tommi.t.rantala@nokia.com>
+Cc: Stanislav Fomichev <sdf@google.com>
+Cc: Sasha Levin <sashal@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/test_verifier.c | 9 +--------
+ 1 file changed, 1 insertion(+), 8 deletions(-)
+
+--- a/tools/testing/selftests/bpf/test_verifier.c
++++ b/tools/testing/selftests/bpf/test_verifier.c
+@@ -32,7 +32,6 @@
+ #include <linux/if_ether.h>
+
+ #include <bpf/bpf.h>
+-#include <bpf/libbpf.h>
+
+ #ifdef HAVE_GENHDR
+ # include "autoconf.h"
+@@ -57,7 +56,6 @@
+
+ #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+ static bool unpriv_disabled = false;
+-static int skips;
+
+ struct bpf_test {
+ const char *descr;
+@@ -12772,11 +12770,6 @@ static void do_test_single(struct bpf_te
+ fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+ prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+ "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
+- if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
+- printf("SKIP (unsupported program type %d)\n", prog_type);
+- skips++;
+- goto close_fds;
+- }
+
+ expected_ret = unpriv && test->result_unpriv != UNDEF ?
+ test->result_unpriv : test->result;
+@@ -12912,7 +12905,7 @@ static void get_unpriv_disabled()
+
+ static int do_test(bool unpriv, unsigned int from, unsigned int to)
+ {
+- int i, passes = 0, errors = 0;
++ int i, passes = 0, errors = 0, skips = 0;
+
+ for (i = from; i < to; i++) {
+ struct bpf_test *test = &tests[i];
ufs-fix-braino-in-ufs_get_inode_gid-for-solaris-ufs-.patch
perf-bench-numa-add-define-for-rusage_thread-if-not-.patch
perf-x86-intel-fix-race-in-intel_pmu_disable_event.patch
+revert-don-t-jump-to-compute_result-state-from-check_result-state.patch
+md-raid-raid5-preserve-the-writeback-action-after-the-parity-check.patch
+driver-core-postpone-dma-tear-down-until-after-devres-release-for-probe-failure.patch
+revert-selftests-bpf-skip-verifier-tests-for-unsupported-program-types.patch
+bpf-relax-inode-permission-check-for-retrieving-bpf-program.patch
+bpf-add-map_lookup_elem_sys_only-for-lookups-from-syscall-side.patch
+bpf-lru-avoid-messing-with-eviction-heuristics-upon-syscall-lookup.patch