--- /dev/null
+From 34b8ab091f9ef57a2bb3c8c8359a0a03a8abf2f9 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 26 Apr 2019 21:48:22 +0200
+Subject: bpf, arm64: use more scalable stadd over ldxr / stxr loop in xadd
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 34b8ab091f9ef57a2bb3c8c8359a0a03a8abf2f9 upstream.
+
+Since ARMv8.1 supplement introduced LSE atomic instructions back in 2016,
+lets add support for STADD and use that in favor of LDXR / STXR loop for
+the XADD mapping if available. STADD is encoded as an alias for LDADD with
+XZR as the destination register, therefore add LDADD to the instruction
+encoder along with STADD as special case and use it in the JIT for CPUs
+that advertise LSE atomics in CPUID register. If immediate offset in the
+BPF XADD insn is 0, then use dst register directly instead of temporary
+one.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/insn.h | 8 ++++++++
+ arch/arm64/kernel/insn.c | 40 ++++++++++++++++++++++++++++++++++++++++
+ arch/arm64/net/bpf_jit.h | 4 ++++
+ arch/arm64/net/bpf_jit_comp.c | 28 +++++++++++++++++++---------
+ 4 files changed, 71 insertions(+), 9 deletions(-)
+
+--- a/arch/arm64/include/asm/insn.h
++++ b/arch/arm64/include/asm/insn.h
+@@ -271,6 +271,7 @@ __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0
+ __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000)
+ __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
+ __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
++__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0xB8200000)
+ __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
+ __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
+ __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
+@@ -383,6 +384,13 @@ u32 aarch64_insn_gen_load_store_ex(enum
+ enum aarch64_insn_register state,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
++u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
++ enum aarch64_insn_register address,
++ enum aarch64_insn_register value,
++ enum aarch64_insn_size_type size);
++u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
++ enum aarch64_insn_register value,
++ enum aarch64_insn_size_type size);
+ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ int imm, enum aarch64_insn_variant variant,
+--- a/arch/arm64/kernel/insn.c
++++ b/arch/arm64/kernel/insn.c
+@@ -793,6 +793,46 @@ u32 aarch64_insn_gen_load_store_ex(enum
+ state);
+ }
+
++u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
++ enum aarch64_insn_register address,
++ enum aarch64_insn_register value,
++ enum aarch64_insn_size_type size)
++{
++ u32 insn = aarch64_insn_get_ldadd_value();
++
++ switch (size) {
++ case AARCH64_INSN_SIZE_32:
++ case AARCH64_INSN_SIZE_64:
++ break;
++ default:
++ pr_err("%s: unimplemented size encoding %d\n", __func__, size);
++ return AARCH64_BREAK_FAULT;
++ }
++
++ insn = aarch64_insn_encode_ldst_size(size, insn);
++
++ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
++ result);
++
++ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
++ address);
++
++ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
++ value);
++}
++
++u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
++ enum aarch64_insn_register value,
++ enum aarch64_insn_size_type size)
++{
++ /*
++ * STADD is simply encoded as an alias for LDADD with XZR as
++ * the destination register.
++ */
++ return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
++ value, size);
++}
++
+ static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
+ enum aarch64_insn_prfm_target target,
+ enum aarch64_insn_prfm_policy policy,
+--- a/arch/arm64/net/bpf_jit.h
++++ b/arch/arm64/net/bpf_jit.h
+@@ -100,6 +100,10 @@
+ #define A64_STXR(sf, Rt, Rn, Rs) \
+ A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
+
++/* LSE atomics */
++#define A64_STADD(sf, Rn, Rs) \
++ aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
++
+ /* Add/subtract (immediate) */
+ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
+ aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -330,7 +330,7 @@ static int build_insn(const struct bpf_i
+ const int i = insn - ctx->prog->insnsi;
+ const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+ const bool isdw = BPF_SIZE(code) == BPF_DW;
+- u8 jmp_cond;
++ u8 jmp_cond, reg;
+ s32 jmp_offset;
+
+ #define check_imm(bits, imm) do { \
+@@ -706,18 +706,28 @@ emit_cond_jmp:
+ break;
+ }
+ break;
++
+ /* STX XADD: lock *(u32 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_W:
+ /* STX XADD: lock *(u64 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_DW:
+- emit_a64_mov_i(1, tmp, off, ctx);
+- emit(A64_ADD(1, tmp, tmp, dst), ctx);
+- emit(A64_LDXR(isdw, tmp2, tmp), ctx);
+- emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
+- emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
+- jmp_offset = -3;
+- check_imm19(jmp_offset);
+- emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
++ if (!off) {
++ reg = dst;
++ } else {
++ emit_a64_mov_i(1, tmp, off, ctx);
++ emit(A64_ADD(1, tmp, tmp, dst), ctx);
++ reg = tmp;
++ }
++ if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) {
++ emit(A64_STADD(isdw, reg, src), ctx);
++ } else {
++ emit(A64_LDXR(isdw, tmp2, reg), ctx);
++ emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
++ emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
++ jmp_offset = -3;
++ check_imm19(jmp_offset);
++ emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
++ }
+ break;
+
+ /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
--- /dev/null
+From 257a525fe2e49584842c504a92c27097407f778f Mon Sep 17 00:00:00 2001
+From: Martin KaFai Lau <kafai@fb.com>
+Date: Fri, 31 May 2019 15:29:13 -0700
+Subject: bpf: udp: Avoid calling reuseport's bpf_prog from udp_gro
+
+From: Martin KaFai Lau <kafai@fb.com>
+
+commit 257a525fe2e49584842c504a92c27097407f778f upstream.
+
+When the commit a6024562ffd7 ("udp: Add GRO functions to UDP socket")
+added udp[46]_lib_lookup_skb to the udp_gro code path, it broke
+the reuseport_select_sock() assumption that skb->data is pointing
+to the transport header.
+
+This patch follows an earlier __udp6_lib_err() fix by
+passing a NULL skb to avoid calling the reuseport's bpf_prog.
+
+Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket")
+Cc: Tom Herbert <tom@herbertland.com>
+Signed-off-by: Martin KaFai Lau <kafai@fb.com>
+Acked-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/udp.c | 6 +++++-
+ net/ipv6/udp.c | 2 +-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -563,7 +563,11 @@ static inline struct sock *__udp4_lib_lo
+ struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
+ __be16 sport, __be16 dport)
+ {
+- return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table);
++ const struct iphdr *iph = ip_hdr(skb);
++
++ return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
++ iph->daddr, dport, inet_iif(skb),
++ inet_sdif(skb), &udp_table, NULL);
+ }
+ EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
+
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -308,7 +308,7 @@ struct sock *udp6_lib_lookup_skb(struct
+
+ return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
+ &iph->daddr, dport, inet6_iif(skb),
+- inet6_sdif(skb), &udp_table, skb);
++ inet6_sdif(skb), &udp_table, NULL);
+ }
+ EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
+
--- /dev/null
+From 4ac30c4b3659efac031818c418beb51e630d512d Mon Sep 17 00:00:00 2001
+From: Martin KaFai Lau <kafai@fb.com>
+Date: Fri, 31 May 2019 15:29:11 -0700
+Subject: bpf: udp: ipv6: Avoid running reuseport's bpf_prog from __udp6_lib_err
+
+From: Martin KaFai Lau <kafai@fb.com>
+
+commit 4ac30c4b3659efac031818c418beb51e630d512d upstream.
+
+__udp6_lib_err() may be called when handling icmpv6 message. For example,
+the icmpv6 toobig(type=2). __udp6_lib_lookup() is then called
+which may call reuseport_select_sock(). reuseport_select_sock() will
+call into a bpf_prog (if there is one).
+
+reuseport_select_sock() is expecting the skb->data pointing to the
+transport header (udphdr in this case). For example, run_bpf_filter()
+is pulling the transport header.
+
+However, in the __udp6_lib_err() path, the skb->data is pointing to the
+ipv6hdr instead of the udphdr.
+
+One option is to pull and push the ipv6hdr in __udp6_lib_err().
+Instead of doing this, this patch follows how the original
+commit 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF")
+was done in IPv4, which has passed a NULL skb pointer to
+reuseport_select_sock().
+
+Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF")
+Cc: Craig Gallek <kraig@google.com>
+Signed-off-by: Martin KaFai Lau <kafai@fb.com>
+Acked-by: Song Liu <songliubraving@fb.com>
+Acked-by: Craig Gallek <kraig@google.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/udp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -506,7 +506,7 @@ void __udp6_lib_err(struct sk_buff *skb,
+ struct net *net = dev_net(skb->dev);
+
+ sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
+- inet6_iif(skb), 0, udptable, skb);
++ inet6_iif(skb), 0, udptable, NULL);
+ if (!sk) {
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
--- /dev/null
+From 427503519739e779c0db8afe876c1b33f3ac60ae Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Wed, 10 Apr 2019 11:51:54 +0100
+Subject: futex: Update comments and docs about return values of arch futex code
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit 427503519739e779c0db8afe876c1b33f3ac60ae upstream.
+
+The architecture implementations of 'arch_futex_atomic_op_inuser()' and
+'futex_atomic_cmpxchg_inatomic()' are permitted to return only -EFAULT,
+-EAGAIN or -ENOSYS in the case of failure.
+
+Update the comments in the asm-generic/ implementation and also a stray
+reference in the robust futex documentation.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/robust-futexes.txt | 3 +--
+ include/asm-generic/futex.h | 8 ++++++--
+ 2 files changed, 7 insertions(+), 4 deletions(-)
+
+--- a/Documentation/robust-futexes.txt
++++ b/Documentation/robust-futexes.txt
+@@ -218,5 +218,4 @@ All other architectures should build jus
+ the new syscalls yet.
+
+ Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
+-inline function before writing up the syscalls (that function returns
+--ENOSYS right now).
++inline function before writing up the syscalls.
+--- a/include/asm-generic/futex.h
++++ b/include/asm-generic/futex.h
+@@ -23,7 +23,9 @@
+ *
+ * Return:
+ * 0 - On success
+- * <0 - On error
++ * -EFAULT - User access resulted in a page fault
++ * -EAGAIN - Atomic operation was unable to complete due to contention
++ * -ENOSYS - Operation not supported
+ */
+ static inline int
+ arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
+@@ -85,7 +87,9 @@ out_pagefault_enable:
+ *
+ * Return:
+ * 0 - On success
+- * <0 - On error
++ * -EFAULT - User access resulted in a page fault
++ * -EAGAIN - Atomic operation was unable to complete due to contention
++ * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
+ */
+ static inline int
+ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,