From: Greg Kroah-Hartman Date: Wed, 21 Sep 2016 08:05:37 +0000 (+0200) Subject: 4.7-stable patches X-Git-Tag: v4.4.22~18 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4dc51618f53947e4ea28044a6ef63c9f860b45a3;p=thirdparty%2Fkernel%2Fstable-queue.git 4.7-stable patches added patches: af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch bnxt_en-fix-tx-push-operation-on-arm64.patch bonding-fix-bonding-crash.patch bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch bridge-re-introduce-fix-parsing-of-mldv2-reports.patch ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch ipv6-release-dst-in-ping_v6_sendmsg.patch kcm-fix-a-socket-double-free.patch net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch net-irda-handle-iriap_register_lsap-allocation-failure.patch net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch net-mlx5-fix-pci-error-recovery-flow.patch net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch net-sched-fix-encoding-to-use-real-length.patch net-sctp-always-initialise-sctp_ht_iter-start_fail.patch net-smc91x-fix-smc-accesses.patch revert-af_unix-fix-splice-bind-deadlock.patch revert-phy-irq-cannot-be-shared.patch sctp-fix-overrun-in-sctp_diag_dump_one.patch tcp-cwnd-does-not-increase-in-tcp-yeah.patch tcp-fastopen-avoid-negative-sk_forward_alloc.patch tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch tipc-fix-null-pointer-dereference-in-shutdown.patch tun-fix-transmit-timestamp-support.patch udp-fix-poll-issue-with-zero-sized-packets.patch vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch --- diff --git a/queue-4.7/af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch b/queue-4.7/af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch new file mode 100644 index 00000000000..b7a820f3e21 --- /dev/null +++ b/queue-4.7/af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch @@ -0,0 +1,232 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Linus Torvalds +Date: Thu, 1 Sep 2016 14:43:53 -0700 +Subject: af_unix: split 'u->readlock' into two: 'iolock' and 'bindlock' + +From: Linus Torvalds + + +[ Upstream commit 6e1ce3c3451291142a57c4f3f6f999a29fb5b3bc ] + +Right now we use the 'readlock' both for protecting some of the af_unix +IO path and for making the bind be single-threaded. + +The two are independent, but using the same lock makes for a nasty +deadlock due to ordering with regards to filesystem locking. The bind +locking would want to nest outside the VSF pathname locking, but the IO +locking wants to nest inside some of those same locks. + +We tried to fix this earlier with commit c845acb324aa ("af_unix: Fix +splice-bind deadlock") which moved the readlock inside the vfs locks, +but that caused problems with overlayfs that will then call back into +filesystem routines that take the lock in the wrong order anyway. + +Splitting the locks means that we can go back to having the bind lock be +the outermost lock, and we don't have any deadlocks with lock ordering. + +Acked-by: Rainer Weikusat +Acked-by: Al Viro +Signed-off-by: Linus Torvalds +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/af_unix.h | 2 +- + net/unix/af_unix.c | 45 +++++++++++++++++++++++---------------------- + 2 files changed, 24 insertions(+), 23 deletions(-) + +--- a/include/net/af_unix.h ++++ b/include/net/af_unix.h +@@ -52,7 +52,7 @@ struct unix_sock { + struct sock sk; + struct unix_address *addr; + struct path path; +- struct mutex readlock; ++ struct mutex iolock, bindlock; + struct sock *peer; + struct list_head link; + atomic_long_t inflight; +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock + { + struct unix_sock *u = unix_sk(sk); + +- if (mutex_lock_interruptible(&u->readlock)) ++ if (mutex_lock_interruptible(&u->iolock)) + return -EINTR; + + sk->sk_peek_off = val; +- mutex_unlock(&u->readlock); ++ mutex_unlock(&u->iolock); + + return 0; + } +@@ -778,7 +778,8 @@ static struct sock *unix_create1(struct + spin_lock_init(&u->lock); + atomic_long_set(&u->inflight, 0); + INIT_LIST_HEAD(&u->link); +- mutex_init(&u->readlock); /* single task reading lock */ ++ mutex_init(&u->iolock); /* single task reading lock */ ++ mutex_init(&u->bindlock); /* single task binding lock */ + init_waitqueue_head(&u->peer_wait); + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); + unix_insert_socket(unix_sockets_unbound(sk), sk); +@@ -847,7 +848,7 @@ static int unix_autobind(struct socket * + int err; + unsigned int retries = 0; + +- err = mutex_lock_interruptible(&u->readlock); ++ err = mutex_lock_interruptible(&u->bindlock); + if (err) + return err; + +@@ -894,7 +895,7 @@ retry: + spin_unlock(&unix_table_lock); + err = 0; + +-out: mutex_unlock(&u->readlock); ++out: mutex_unlock(&u->bindlock); + return err; + } + +@@ -1008,7 +1009,7 @@ static int unix_bind(struct socket *sock + goto out; + addr_len = err; + +- err = mutex_lock_interruptible(&u->readlock); ++ err = mutex_lock_interruptible(&u->bindlock); + if (err) + goto out; + +@@ -1062,7 +1063,7 @@ static int unix_bind(struct socket *sock + out_unlock: + spin_unlock(&unix_table_lock); + out_up: +- mutex_unlock(&u->readlock); ++ mutex_unlock(&u->bindlock); + out: + return err; + } +@@ -1954,17 +1955,17 @@ static ssize_t unix_stream_sendpage(stru + if (false) { + alloc_skb: + unix_state_unlock(other); +- mutex_unlock(&unix_sk(other)->readlock); ++ mutex_unlock(&unix_sk(other)->iolock); + newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, + &err, 0); + if (!newskb) + goto err; + } + +- /* we must acquire readlock as we modify already present ++ /* we must acquire iolock as we modify already present + * skbs in the sk_receive_queue and mess with skb->len + */ +- err = mutex_lock_interruptible(&unix_sk(other)->readlock); ++ err = mutex_lock_interruptible(&unix_sk(other)->iolock); + if (err) { + err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; + goto err; +@@ -2031,7 +2032,7 @@ alloc_skb: + } + + unix_state_unlock(other); +- mutex_unlock(&unix_sk(other)->readlock); ++ mutex_unlock(&unix_sk(other)->iolock); + + other->sk_data_ready(other); + scm_destroy(&scm); +@@ -2040,7 +2041,7 @@ alloc_skb: + err_state_unlock: + unix_state_unlock(other); + err_unlock: +- mutex_unlock(&unix_sk(other)->readlock); ++ mutex_unlock(&unix_sk(other)->iolock); + err: + kfree_skb(newskb); + if (send_sigpipe && !(flags & MSG_NOSIGNAL)) +@@ -2108,7 +2109,7 @@ static int unix_dgram_recvmsg(struct soc + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + do { +- mutex_lock(&u->readlock); ++ mutex_lock(&u->iolock); + + skip = sk_peek_offset(sk, flags); + skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, +@@ -2116,14 +2117,14 @@ static int unix_dgram_recvmsg(struct soc + if (skb) + break; + +- mutex_unlock(&u->readlock); ++ mutex_unlock(&u->iolock); + + if (err != -EAGAIN) + break; + } while (timeo && + !__skb_wait_for_more_packets(sk, &err, &timeo, last)); + +- if (!skb) { /* implies readlock unlocked */ ++ if (!skb) { /* implies iolock unlocked */ + unix_state_lock(sk); + /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ + if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && +@@ -2188,7 +2189,7 @@ static int unix_dgram_recvmsg(struct soc + + out_free: + skb_free_datagram(sk, skb); +- mutex_unlock(&u->readlock); ++ mutex_unlock(&u->iolock); + out: + return err; + } +@@ -2283,7 +2284,7 @@ static int unix_stream_read_generic(stru + /* Lock the socket to prevent queue disordering + * while sleeps in memcpy_tomsg + */ +- mutex_lock(&u->readlock); ++ mutex_lock(&u->iolock); + + if (flags & MSG_PEEK) + skip = sk_peek_offset(sk, flags); +@@ -2325,7 +2326,7 @@ again: + break; + } + +- mutex_unlock(&u->readlock); ++ mutex_unlock(&u->iolock); + + timeo = unix_stream_data_wait(sk, timeo, last, + last_len); +@@ -2336,7 +2337,7 @@ again: + goto out; + } + +- mutex_lock(&u->readlock); ++ mutex_lock(&u->iolock); + goto redo; + unlock: + unix_state_unlock(sk); +@@ -2439,7 +2440,7 @@ unlock: + } + } while (size); + +- mutex_unlock(&u->readlock); ++ mutex_unlock(&u->iolock); + if (state->msg) + scm_recv(sock, state->msg, &scm, flags); + else +@@ -2480,9 +2481,9 @@ static ssize_t skb_unix_socket_splice(st + int ret; + struct unix_sock *u = unix_sk(sk); + +- mutex_unlock(&u->readlock); ++ mutex_unlock(&u->iolock); + ret = splice_to_pipe(pipe, spd); +- mutex_lock(&u->readlock); ++ mutex_lock(&u->iolock); + + return ret; + } diff --git a/queue-4.7/bnxt_en-fix-tx-push-operation-on-arm64.patch b/queue-4.7/bnxt_en-fix-tx-push-operation-on-arm64.patch new file mode 100644 index 00000000000..c98d8f1a637 --- /dev/null +++ b/queue-4.7/bnxt_en-fix-tx-push-operation-on-arm64.patch @@ -0,0 +1,36 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Michael Chan +Date: Mon, 5 Sep 2016 01:57:35 -0400 +Subject: bnxt_en: Fix TX push operation on ARM64. + +From: Michael Chan + + +[ Upstream commit 9d13744bb75078175ab49408f2abb980e4dbccc9 ] + +There is a code path where we are calling __iowrite64_copy() on +an address that is not 64-bit aligned. This causes an exception on +some architectures such as arm64. Fix that code path by using +__iowrite32_copy(). + +Reported-by: JD Zheng +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -293,8 +293,8 @@ static netdev_tx_t bnxt_start_xmit(struc + push_len = (length + sizeof(*tx_push) + 7) / 8; + if (push_len > 16) { + __iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16); +- __iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1, +- push_len - 16); ++ __iowrite32_copy(txr->tx_doorbell + 4, tx_push_buf + 1, ++ (push_len - 16) << 1); + } else { + __iowrite64_copy(txr->tx_doorbell, tx_push_buf, + push_len); diff --git a/queue-4.7/bonding-fix-bonding-crash.patch b/queue-4.7/bonding-fix-bonding-crash.patch new file mode 100644 index 00000000000..2ad2982a379 --- /dev/null +++ b/queue-4.7/bonding-fix-bonding-crash.patch @@ -0,0 +1,93 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Mahesh Bandewar +Date: Thu, 1 Sep 2016 22:18:34 -0700 +Subject: bonding: Fix bonding crash + +From: Mahesh Bandewar + + +[ Upstream commit 24b27fc4cdf9e10c5e79e5923b6b7c2c5c95096c ] + +Following few steps will crash kernel - + + (a) Create bonding master + > modprobe bonding miimon=50 + (b) Create macvlan bridge on eth2 + > ip link add link eth2 dev mvl0 address aa:0:0:0:0:01 \ + type macvlan + (c) Now try adding eth2 into the bond + > echo +eth2 > /sys/class/net/bond0/bonding/slaves + + +Bonding does lots of things before checking if the device enslaved is +busy or not. + +In this case when the notifier call-chain sends notifications, the +bond_netdev_event() assumes that the rx_handler /rx_handler_data is +registered while the bond_enslave() hasn't progressed far enough to +register rx_handler for the new slave. + +This patch adds a rx_handler check that can be performed right at the +beginning of the enslave code to avoid getting into this situation. + +Signed-off-by: Mahesh Bandewar +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 7 ++++--- + include/linux/netdevice.h | 1 + + net/core/dev.c | 16 ++++++++++++++++ + 3 files changed, 21 insertions(+), 3 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1341,9 +1341,10 @@ int bond_enslave(struct net_device *bond + slave_dev->name); + } + +- /* already enslaved */ +- if (slave_dev->flags & IFF_SLAVE) { +- netdev_dbg(bond_dev, "Error: Device was already enslaved\n"); ++ /* already in-use? */ ++ if (netdev_is_rx_handler_busy(slave_dev)) { ++ netdev_err(bond_dev, ++ "Error: Device is in use and cannot be enslaved\n"); + return -EBUSY; + } + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3225,6 +3225,7 @@ static inline void napi_free_frags(struc + napi->skb = NULL; + } + ++bool netdev_is_rx_handler_busy(struct net_device *dev); + int netdev_rx_handler_register(struct net_device *dev, + rx_handler_func_t *rx_handler, + void *rx_handler_data); +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3979,6 +3979,22 @@ sch_handle_ingress(struct sk_buff *skb, + } + + /** ++ * netdev_is_rx_handler_busy - check if receive handler is registered ++ * @dev: device to check ++ * ++ * Check if a receive handler is already registered for a given device. ++ * Return true if there one. ++ * ++ * The caller must hold the rtnl_mutex. ++ */ ++bool netdev_is_rx_handler_busy(struct net_device *dev) ++{ ++ ASSERT_RTNL(); ++ return dev && rtnl_dereference(dev->rx_handler); ++} ++EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy); ++ ++/** + * netdev_rx_handler_register - register receive handler + * @dev: device to register a handler for + * @rx_handler: receive handler to register diff --git a/queue-4.7/bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch b/queue-4.7/bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch new file mode 100644 index 00000000000..391d5718a47 --- /dev/null +++ b/queue-4.7/bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch @@ -0,0 +1,74 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Jakub Kicinski +Date: Tue, 2 Aug 2016 16:12:14 +0100 +Subject: bpf: fix method of PTR_TO_PACKET reg id generation + +From: Jakub Kicinski + + +[ Upstream commit 1f415a74b0ca64b5bfacbb12d71ed2ec050a8cfb ] + +Using per-register incrementing ID can lead to +find_good_pkt_pointers() confusing registers which +have completely different values. Consider example: + +0: (bf) r6 = r1 +1: (61) r8 = *(u32 *)(r6 +76) +2: (61) r0 = *(u32 *)(r6 +80) +3: (bf) r7 = r8 +4: (07) r8 += 32 +5: (2d) if r8 > r0 goto pc+9 + R0=pkt_end R1=ctx R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=0,off=32,r=32) R10=fp +6: (bf) r8 = r7 +7: (bf) r9 = r7 +8: (71) r1 = *(u8 *)(r7 +0) +9: (0f) r8 += r1 +10: (71) r1 = *(u8 *)(r7 +1) +11: (0f) r9 += r1 +12: (07) r8 += 32 +13: (2d) if r8 > r0 goto pc+1 + R0=pkt_end R1=inv56 R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=1,off=32,r=32) R9=pkt(id=1,off=0,r=32) R10=fp +14: (71) r1 = *(u8 *)(r9 +16) +15: (b7) r7 = 0 +16: (bf) r0 = r7 +17: (95) exit + +We need to get a UNKNOWN_VALUE with imm to force id +generation so lines 0-5 make r7 a valid packet pointer. +We then read two different bytes from the packet and +add them to copies of the constructed packet pointer. +r8 (line 9) and r9 (line 11) will get the same id of 1, +independently. When either of them is validated (line +13) - find_good_pkt_pointers() will also mark the other +as safe. This leads to access on line 14 being mistakenly +considered safe. + +Fixes: 969bf05eb3ce ("bpf: direct packet access") +Signed-off-by: Jakub Kicinski +Acked-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -194,6 +194,7 @@ struct verifier_env { + struct verifier_state_list **explored_states; /* search pruning optimization */ + struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + u32 used_map_cnt; /* number of used maps */ ++ u32 id_gen; /* used to generate unique reg IDs */ + bool allow_ptr_leaks; + }; + +@@ -1277,7 +1278,7 @@ add_imm: + /* dst_reg stays as pkt_ptr type and since some positive + * integer value was added to the pointer, increment its 'id' + */ +- dst_reg->id++; ++ dst_reg->id = ++env->id_gen; + + /* something was added to pkt_ptr, set range and off to zero */ + dst_reg->off = 0; diff --git a/queue-4.7/bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch b/queue-4.7/bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch new file mode 100644 index 00000000000..e79664ebe74 --- /dev/null +++ b/queue-4.7/bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch @@ -0,0 +1,207 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Daniel Borkmann +Date: Thu, 11 Aug 2016 21:38:37 +0200 +Subject: bpf: fix write helpers with regards to non-linear parts + +From: Daniel Borkmann + + +[ Upstream commit 0ed661d5a48fa6df0b50ae64d27fe759a3ce42cf ] + +Fix the bpf_try_make_writable() helper and all call sites we have in BPF, +it's currently defect with regards to skbs when the write_len spans into +non-linear parts, no matter if cloned or not. + +There are multiple issues at once. First, using skb_store_bits() is not +correct since even if we have a cloned skb, page frags can still be shared. +To really make them private, we need to pull them in via __pskb_pull_tail() +first, which also gets us a private head via pskb_expand_head() implicitly. + +This is for helpers like bpf_skb_store_bytes(), bpf_l3_csum_replace(), +bpf_l4_csum_replace(). Really, the only thing reasonable and working here +is to call skb_ensure_writable() before any write operation. Meaning, via +pskb_may_pull() it makes sure that parts we want to access are pulled in and +if not does so plus unclones the skb implicitly. If our write_len still fits +the headlen and we're cloned and our header of the clone is not writable, +then we need to make a private copy via pskb_expand_head(). skb_store_bits() +is a bit misleading and only safe to store into non-linear data in different +contexts such as 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can +corrupt kernel memory"). + +For above BPF helper functions, it means after fixed bpf_try_make_writable(), +we've pulled in enough, so that we operate always based on skb->data. Thus, +the call to skb_header_pointer() and skb_store_bits() becomes superfluous. +In bpf_skb_store_bytes(), the len check is unnecessary too since it can +only pass in maximum of BPF stack size, so adding offset is guaranteed to +never overflow. Also bpf_l3/4_csum_replace() helpers must test for proper +offset alignment since they use __sum16 pointer for writing resulting csum. + +The remaining helpers that change skb data not discussed here yet are +bpf_skb_vlan_push(), bpf_skb_vlan_pop() and bpf_skb_change_proto(). The +vlan helpers internally call either skb_ensure_writable() (pop case) and +skb_cow_head() (push case, for head expansion), respectively. Similarly, +bpf_skb_proto_xlat() takes care to not mangle page frags. + +Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action") +Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers") +Fixes: 3697649ff29e ("bpf: try harder on clones when writing into skb") +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/filter.c | 70 +++++++++++++----------------------------------------- + 1 file changed, 18 insertions(+), 52 deletions(-) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1353,54 +1353,33 @@ static inline int bpf_try_make_writable( + { + int err; + +- if (!skb_cloned(skb)) +- return 0; +- if (skb_clone_writable(skb, write_len)) +- return 0; +- err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +- if (!err) +- bpf_compute_data_end(skb); ++ err = skb_ensure_writable(skb, write_len); ++ bpf_compute_data_end(skb); ++ + return err; + } + + static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) + { +- struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); + struct sk_buff *skb = (struct sk_buff *) (long) r1; +- int offset = (int) r2; ++ unsigned int offset = (unsigned int) r2; + void *from = (void *) (long) r3; + unsigned int len = (unsigned int) r4; + void *ptr; + + if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) + return -EINVAL; +- +- /* bpf verifier guarantees that: +- * 'from' pointer points to bpf program stack +- * 'len' bytes of it were initialized +- * 'len' > 0 +- * 'skb' is a valid pointer to 'struct sk_buff' +- * +- * so check for invalid 'offset' and too large 'len' +- */ +- if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff))) ++ if (unlikely(offset > 0xffff)) + return -EFAULT; + if (unlikely(bpf_try_make_writable(skb, offset + len))) + return -EFAULT; + +- ptr = skb_header_pointer(skb, offset, len, sp->buff); +- if (unlikely(!ptr)) +- return -EFAULT; +- ++ ptr = skb->data + offset; + if (flags & BPF_F_RECOMPUTE_CSUM) + skb_postpull_rcsum(skb, ptr, len); + + memcpy(ptr, from, len); + +- if (ptr == sp->buff) +- /* skb_store_bits cannot return -EFAULT here */ +- skb_store_bits(skb, offset, ptr, len); +- + if (flags & BPF_F_RECOMPUTE_CSUM) + skb_postpush_rcsum(skb, ptr, len); + if (flags & BPF_F_INVALIDATE_HASH) +@@ -1423,12 +1402,12 @@ static const struct bpf_func_proto bpf_s + static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) + { + const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; +- int offset = (int) r2; ++ unsigned int offset = (unsigned int) r2; + void *to = (void *)(unsigned long) r3; + unsigned int len = (unsigned int) r4; + void *ptr; + +- if (unlikely((u32) offset > 0xffff)) ++ if (unlikely(offset > 0xffff)) + goto err_clear; + + ptr = skb_header_pointer(skb, offset, len, to); +@@ -1456,20 +1435,17 @@ static const struct bpf_func_proto bpf_s + static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) + { + struct sk_buff *skb = (struct sk_buff *) (long) r1; +- int offset = (int) r2; +- __sum16 sum, *ptr; ++ unsigned int offset = (unsigned int) r2; ++ __sum16 *ptr; + + if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) + return -EINVAL; +- if (unlikely((u32) offset > 0xffff)) ++ if (unlikely(offset > 0xffff || offset & 1)) + return -EFAULT; +- if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) +- return -EFAULT; +- +- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); +- if (unlikely(!ptr)) ++ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) + return -EFAULT; + ++ ptr = (__sum16 *)(skb->data + offset); + switch (flags & BPF_F_HDR_FIELD_MASK) { + case 0: + if (unlikely(from != 0)) +@@ -1487,10 +1463,6 @@ static u64 bpf_l3_csum_replace(u64 r1, u + return -EINVAL; + } + +- if (ptr == &sum) +- /* skb_store_bits guaranteed to not return -EFAULT here */ +- skb_store_bits(skb, offset, ptr, sizeof(sum)); +- + return 0; + } + +@@ -1510,20 +1482,18 @@ static u64 bpf_l4_csum_replace(u64 r1, u + struct sk_buff *skb = (struct sk_buff *) (long) r1; + bool is_pseudo = flags & BPF_F_PSEUDO_HDR; + bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; +- int offset = (int) r2; +- __sum16 sum, *ptr; ++ unsigned int offset = (unsigned int) r2; ++ __sum16 *ptr; + + if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR | + BPF_F_HDR_FIELD_MASK))) + return -EINVAL; +- if (unlikely((u32) offset > 0xffff)) ++ if (unlikely(offset > 0xffff || offset & 1)) + return -EFAULT; +- if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) ++ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) + return -EFAULT; + +- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); +- if (unlikely(!ptr)) +- return -EFAULT; ++ ptr = (__sum16 *)(skb->data + offset); + if (is_mmzero && !*ptr) + return 0; + +@@ -1546,10 +1516,6 @@ static u64 bpf_l4_csum_replace(u64 r1, u + + if (is_mmzero && !*ptr) + *ptr = CSUM_MANGLED_0; +- if (ptr == &sum) +- /* skb_store_bits guaranteed to not return -EFAULT here */ +- skb_store_bits(skb, offset, ptr, sizeof(sum)); +- + return 0; + } + diff --git a/queue-4.7/bridge-re-introduce-fix-parsing-of-mldv2-reports.patch b/queue-4.7/bridge-re-introduce-fix-parsing-of-mldv2-reports.patch new file mode 100644 index 00000000000..8a4edc5a921 --- /dev/null +++ b/queue-4.7/bridge-re-introduce-fix-parsing-of-mldv2-reports.patch @@ -0,0 +1,38 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Davide Caratti +Date: Wed, 31 Aug 2016 14:16:44 +0200 +Subject: bridge: re-introduce 'fix parsing of MLDv2 reports' + +From: Davide Caratti + + +[ Upstream commit 9264251ee2a55bce8fb93826b3f581fb9eb7e2c2 ] + +commit bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with +INCLUDE and no sources as a leave") seems to have accidentally reverted +commit 47cc84ce0c2f ("bridge: fix parsing of MLDv2 reports"). This +commit brings back a change to br_ip6_multicast_mld2_report() where +parsing of MLDv2 reports stops when the first group is successfully +added to the MDB cache. + +Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") +Signed-off-by: Davide Caratti +Acked-by: Nikolay Aleksandrov +Acked-by: Thadeu Lima de Souza Cascardo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -1121,7 +1121,7 @@ static int br_ip6_multicast_mld2_report( + } else { + err = br_ip6_multicast_add_group(br, port, + &grec->grec_mca, vid); +- if (!err) ++ if (err) + break; + } + } diff --git a/queue-4.7/ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch b/queue-4.7/ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch new file mode 100644 index 00000000000..8c919f89006 --- /dev/null +++ b/queue-4.7/ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch @@ -0,0 +1,101 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: David Forster +Date: Wed, 3 Aug 2016 15:13:01 +0100 +Subject: ipv4: panic in leaf_walk_rcu due to stale node pointer + +From: David Forster + + +[ Upstream commit 94d9f1c5906b20053efe375b6d66610bca4b8b64 ] + +Panic occurs when issuing "cat /proc/net/route" whilst +populating FIB with > 1M routes. + +Use of cached node pointer in fib_route_get_idx is unsafe. + + BUG: unable to handle kernel paging request at ffffc90001630024 + IP: [] leaf_walk_rcu+0x10/0xe0 + PGD 11b08d067 PUD 11b08e067 PMD dac4b067 PTE 0 + Oops: 0000 [#1] SMP + Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscac + snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep virti + acpi_cpufreq button parport_pc ppdev lp parport autofs4 ext4 crc16 mbcache jbd +tio_ring virtio floppy uhci_hcd ehci_hcd usbcore usb_common libata scsi_mod + CPU: 1 PID: 785 Comm: cat Not tainted 4.2.0-rc8+ #4 + Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 + task: ffff8800da1c0bc0 ti: ffff88011a05c000 task.ti: ffff88011a05c000 + RIP: 0010:[] [] leaf_walk_rcu+0x10/0xe0 + RSP: 0018:ffff88011a05fda0 EFLAGS: 00010202 + RAX: ffff8800d8a40c00 RBX: ffff8800da4af940 RCX: ffff88011a05ff20 + RDX: ffffc90001630020 RSI: 0000000001013531 RDI: ffff8800da4af950 + RBP: 0000000000000000 R08: ffff8800da1f9a00 R09: 0000000000000000 + R10: ffff8800db45b7e4 R11: 0000000000000246 R12: ffff8800da4af950 + R13: ffff8800d97a74c0 R14: 0000000000000000 R15: ffff8800d97a7480 + FS: 00007fd3970e0700(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b + CR2: ffffc90001630024 CR3: 000000011a7e4000 CR4: 00000000000006e0 + Stack: + ffffffff814d00d3 0000000000000000 ffff88011a05ff20 ffff8800da1f9a00 + ffffffff811dd8b9 0000000000000800 0000000000020000 00007fd396f35000 + ffffffff811f8714 0000000000003431 ffffffff8138dce0 0000000000000f80 + Call Trace: + [] ? fib_route_seq_start+0x93/0xc0 + [] ? seq_read+0x149/0x380 + [] ? fsnotify+0x3b4/0x500 + [] ? process_echoes+0x70/0x70 + [] ? proc_reg_read+0x47/0x70 + [] ? __vfs_read+0x23/0xd0 + [] ? rw_verify_area+0x52/0xf0 + [] ? vfs_read+0x81/0x120 + [] ? SyS_read+0x42/0xa0 + [] ? entry_SYSCALL_64_fastpath+0x16/0x75 + Code: 48 85 c0 75 d8 f3 c3 31 c0 c3 f3 c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 +a 04 89 f0 33 02 44 89 c9 48 d3 e8 0f b6 4a 05 49 89 + RIP [] leaf_walk_rcu+0x10/0xe0 + RSP + CR2: ffffc90001630024 + +Signed-off-by: Dave Forster +Acked-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -2452,9 +2452,7 @@ struct fib_route_iter { + static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, + loff_t pos) + { +- struct fib_table *tb = iter->main_tb; + struct key_vector *l, **tp = &iter->tnode; +- struct trie *t; + t_key key; + + /* use cache location of next-to-find key */ +@@ -2462,8 +2460,6 @@ static struct key_vector *fib_route_get_ + pos -= iter->pos; + key = iter->key; + } else { +- t = (struct trie *)tb->tb_data; +- iter->tnode = t->kv; + iter->pos = 0; + key = 0; + } +@@ -2504,12 +2500,12 @@ static void *fib_route_seq_start(struct + return NULL; + + iter->main_tb = tb; ++ t = (struct trie *)tb->tb_data; ++ iter->tnode = t->kv; + + if (*pos != 0) + return fib_route_get_idx(iter, *pos); + +- t = (struct trie *)tb->tb_data; +- iter->tnode = t->kv; + iter->pos = 0; + iter->key = 0; + diff --git a/queue-4.7/ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch b/queue-4.7/ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch new file mode 100644 index 00000000000..54fde3cd1ec --- /dev/null +++ b/queue-4.7/ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch @@ -0,0 +1,60 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Wei Yongjun +Date: Mon, 5 Sep 2016 16:06:31 +0800 +Subject: ipv6: addrconf: fix dev refcont leak when DAD failed + +From: Wei Yongjun + + +[ Upstream commit 751eb6b6042a596b0080967c1a529a9fe98dac1d ] + +In general, when DAD detected IPv6 duplicate address, ifp->state +will be set to INET6_IFADDR_STATE_ERRDAD and DAD is stopped by a +delayed work, the call tree should be like this: + +ndisc_recv_ns + -> addrconf_dad_failure <- missing ifp put + -> addrconf_mod_dad_work + -> schedule addrconf_dad_work() + -> addrconf_dad_stop() <- missing ifp hold before call it + +addrconf_dad_failure() called with ifp refcont holding but not put. +addrconf_dad_work() call addrconf_dad_stop() without extra holding +refcount. This will not cause any issue normally. + +But the race between addrconf_dad_failure() and addrconf_dad_work() +may cause ifp refcount leak and netdevice can not be unregister, +dmesg show the following messages: + +IPv6: eth0: IPv6 duplicate address fe80::XX:XXXX:XXXX:XX detected! +... +unregister_netdevice: waiting for eth0 to become free. Usage count = 1 + +Cc: stable@vger.kernel.org +Fixes: c15b1ccadb32 ("ipv6: move DAD and addrconf_verify processing +to workqueue") +Signed-off-by: Wei Yongjun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -1906,6 +1906,7 @@ errdad: + spin_unlock_bh(&ifp->lock); + + addrconf_mod_dad_work(ifp, 0); ++ in6_ifa_put(ifp); + } + + /* Join to solicited addr multicast group. +@@ -3771,6 +3772,7 @@ static void addrconf_dad_work(struct wor + addrconf_dad_begin(ifp); + goto out; + } else if (action == DAD_ABORT) { ++ in6_ifa_hold(ifp); + addrconf_dad_stop(ifp, 1); + goto out; + } diff --git a/queue-4.7/ipv6-release-dst-in-ping_v6_sendmsg.patch b/queue-4.7/ipv6-release-dst-in-ping_v6_sendmsg.patch new file mode 100644 index 00000000000..82c681c7209 --- /dev/null +++ b/queue-4.7/ipv6-release-dst-in-ping_v6_sendmsg.patch @@ -0,0 +1,53 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Dave Jones +Date: Fri, 2 Sep 2016 14:39:50 -0400 +Subject: ipv6: release dst in ping_v6_sendmsg + +From: Dave Jones + + +[ Upstream commit 03c2778a938aaba0893f6d6cdc29511d91a79848 ] + +Neither the failure or success paths of ping_v6_sendmsg release +the dst it acquires. This leads to a flood of warnings from +"net/core/dst.c:288 dst_release" on older kernels that +don't have 8bf4ada2e21378816b28205427ee6b0e1ca4c5f1 backported. + +That patch optimistically hoped this had been fixed post 3.10, but +it seems at least one case wasn't, where I've seen this triggered +a lot from machines doing unprivileged icmp sockets. + +Cc: Martin Lau +Signed-off-by: Dave Jones +Acked-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ping.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ping.c ++++ b/net/ipv6/ping.c +@@ -122,8 +122,10 @@ static int ping_v6_sendmsg(struct sock * + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); +- if (!np) +- return -EBADF; ++ if (!np) { ++ err = -EBADF; ++ goto dst_err_out; ++ } + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; +@@ -160,6 +162,9 @@ static int ping_v6_sendmsg(struct sock * + } + release_sock(sk); + ++dst_err_out: ++ dst_release(dst); ++ + if (err) + return err; + diff --git a/queue-4.7/kcm-fix-a-socket-double-free.patch b/queue-4.7/kcm-fix-a-socket-double-free.patch new file mode 100644 index 00000000000..350ff1d727e --- /dev/null +++ b/queue-4.7/kcm-fix-a-socket-double-free.patch @@ -0,0 +1,58 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: WANG Cong +Date: Sun, 28 Aug 2016 21:28:26 -0700 +Subject: kcm: fix a socket double free + +From: WANG Cong + + +[ Upstream commit c0338aff2260ea6c092806312dbb154cec07a242 ] + +Dmitry reported a double free on kcm socket, which could +be easily reproduced by: + + #include + #include + + int main() + { + int fd = syscall(SYS_socket, 0x29ul, 0x5ul, 0x0ul, 0, 0, 0); + syscall(SYS_ioctl, fd, 0x89e2ul, 0x20a98000ul, 0, 0, 0); + return 0; + } + +This is because on the error path, after we install +the new socket file, we call sock_release() to clean +up the socket, which leaves the fd pointing to a freed +socket. Fix this by calling sys_close() on that fd +directly. + +Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") +Reported-by: Dmitry Vyukov +Cc: Tom Herbert +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/kcm/kcmsock.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/kcm/kcmsock.c ++++ b/net/kcm/kcmsock.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2035,7 +2036,7 @@ static int kcm_ioctl(struct socket *sock + if (copy_to_user((void __user *)arg, &info, + sizeof(info))) { + err = -EFAULT; +- sock_release(newsock); ++ sys_close(info.fd); + } + } + diff --git a/queue-4.7/net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch b/queue-4.7/net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch new file mode 100644 index 00000000000..d471433890f --- /dev/null +++ b/queue-4.7/net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch @@ -0,0 +1,47 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Florian Fainelli +Date: Wed, 24 Aug 2016 11:01:20 -0700 +Subject: net: dsa: bcm_sf2: Fix race condition while unmasking interrupts + +From: Florian Fainelli + + +[ Upstream commit 4f101c47791cdcb831b3ef1f831b1cc51e4fe03c ] + +We kept shadow copies of which interrupt sources we have enabled and +disabled, but due to an order bug in how intrl2_mask_clear was defined, +we could run into the following scenario: + +CPU0 CPU1 +intrl2_1_mask_clear(..) +sets INTRL2_CPU_MASK_CLEAR + bcm_sf2_switch_1_isr + read INTRL2_CPU_STATUS and masks with stale + irq1_mask value +updates irq1_mask value + +Which would make us loop again and again trying to process and interrupt +we are not clearing since our copy of whether it was enabled before +still indicates it was not. Fix this by updating the shadow copy first, +and then unasking at the HW level. + +Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/dsa/bcm_sf2.h ++++ b/drivers/net/dsa/bcm_sf2.h +@@ -189,8 +189,8 @@ static inline void name##_writeq(struct + static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \ + u32 mask) \ + { \ +- intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ + priv->irq##which##_mask &= ~(mask); \ ++ intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ + } \ + static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \ + u32 mask) \ diff --git a/queue-4.7/net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch b/queue-4.7/net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch new file mode 100644 index 00000000000..1d87db633e9 --- /dev/null +++ b/queue-4.7/net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch @@ -0,0 +1,48 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Mike Manning +Date: Fri, 12 Aug 2016 12:02:38 +0100 +Subject: net: ipv6: Do not keep IPv6 addresses when IPv6 is disabled + +From: Mike Manning + + +[ Upstream commit bc561632dddd5af0c4444d919f01cbf6d553aa0a ] + +If IPv6 is disabled when the option is set to keep IPv6 +addresses on link down, userspace is unaware of this as +there is no such indication via netlink. The solution is to +remove the IPv6 addresses in this case, which results in +netlink messages indicating removal of addresses in the +usual manner. This fix also makes the behavior consistent +with the case of having IPv6 disabled first, which stops +IPv6 addresses from being added. + +Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") +Signed-off-by: Mike Manning +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3469,7 +3469,7 @@ static int addrconf_ifdown(struct net_de + /* combine the user config with event to determine if permanent + * addresses are to be removed from address hash table + */ +- keep_addr = !(how || _keep_addr <= 0); ++ keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6); + + /* Step 2: clear hash table */ + for (i = 0; i < IN6_ADDR_HSIZE; i++) { +@@ -3525,7 +3525,7 @@ restart: + /* re-combine the user config with event to determine if permanent + * addresses are to be removed from the interface list + */ +- keep_addr = (!how && _keep_addr > 0); ++ keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); + + INIT_LIST_HEAD(&del_list); + list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { diff --git a/queue-4.7/net-irda-handle-iriap_register_lsap-allocation-failure.patch b/queue-4.7/net-irda-handle-iriap_register_lsap-allocation-failure.patch new file mode 100644 index 00000000000..83a3a008027 --- /dev/null +++ b/queue-4.7/net-irda-handle-iriap_register_lsap-allocation-failure.patch @@ -0,0 +1,74 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Vegard Nossum +Date: Fri, 12 Aug 2016 10:29:13 +0200 +Subject: net/irda: handle iriap_register_lsap() allocation failure + +From: Vegard Nossum + + +[ Upstream commit 5ba092efc7ddff040777ae7162f1d195f513571b ] + +If iriap_register_lsap() fails to allocate memory, self->lsap is +set to NULL. However, none of the callers handle the failure and +irlmp_connect_request() will happily dereference it: + + iriap_register_lsap: Unable to allocated LSAP! + ================================================================================ + UBSAN: Undefined behaviour in net/irda/irlmp.c:378:2 + member access within null pointer of type 'struct lsap_cb' + CPU: 1 PID: 15403 Comm: trinity-c0 Not tainted 4.8.0-rc1+ #81 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org + 04/01/2014 + 0000000000000000 ffff88010c7e78a8 ffffffff82344f40 0000000041b58ab3 + ffffffff84f98000 ffffffff82344e94 ffff88010c7e78d0 ffff88010c7e7880 + ffff88010630ad00 ffffffff84a5fae0 ffffffff84d3f5c0 000000000000017a + Call Trace: + [] dump_stack+0xac/0xfc + [] ubsan_epilogue+0xd/0x8a + [] __ubsan_handle_type_mismatch+0x157/0x411 + [] irlmp_connect_request+0x7ac/0x970 + [] iriap_connect_request+0xa0/0x160 + [] state_s_disconnect+0x88/0xd0 + [] iriap_do_client_event+0x94/0x120 + [] iriap_getvaluebyclass_request+0x3e0/0x6d0 + [] irda_find_lsap_sel+0x1eb/0x630 + [] irda_connect+0x828/0x12d0 + [] SYSC_connect+0x22b/0x340 + [] SyS_connect+0x9/0x10 + [] do_syscall_64+0x1b3/0x4b0 + [] entry_SYSCALL64_slow_path+0x25/0x25 + ================================================================================ + +The bug seems to have been around since forever. + +There's more problems with missing error checks in iriap_init() (and +indeed all of irda_init()), but that's a bigger problem that needs +very careful review and testing. This patch will fix the most serious +bug (as it's easily reached from unprivileged userspace). + +I have tested my patch with a reproducer. + +Signed-off-by: Vegard Nossum +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/irda/iriap.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/irda/iriap.c ++++ b/net/irda/iriap.c +@@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_s + + self->magic = IAS_MAGIC; + self->mode = mode; +- if (mode == IAS_CLIENT) +- iriap_register_lsap(self, slsap_sel, mode); ++ if (mode == IAS_CLIENT) { ++ if (iriap_register_lsap(self, slsap_sel, mode)) { ++ kfree(self); ++ return NULL; ++ } ++ } + + self->confirm = callback; + self->priv = priv; diff --git a/queue-4.7/net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch b/queue-4.7/net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch new file mode 100644 index 00000000000..bc8b3b8f5c7 --- /dev/null +++ b/queue-4.7/net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch @@ -0,0 +1,259 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Paul Blakey +Date: Thu, 18 Aug 2016 21:09:05 +0300 +Subject: net/mlx5: Added missing check of msg length in verifying its signature + +From: Paul Blakey + + +[ Upstream commit 2c0f8ce1b584a4d7b8ff53140d21dfed99834940 ] + +Set and verify signature calculates the signature for each of the +mailbox nodes, even for those that are unused (from cache). Added +a missing length check to set and verify only those which are used. + +While here, also moved the setting of msg's nodes token to where we +already go over them. This saves a pass because checksum is disabled, +and the only useful thing remaining that set signature does is setting +the token. + +Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB +adapters') +Signed-off-by: Paul Blakey + +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 83 ++++++++++++++++---------- + 1 file changed, 53 insertions(+), 30 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst( + return cmd->cmd_buf + (idx << cmd->log_stride); + } + +-static u8 xor8_buf(void *buf, int len) ++static u8 xor8_buf(void *buf, size_t offset, int len) + { + u8 *ptr = buf; + u8 sum = 0; + int i; ++ int end = len + offset; + +- for (i = 0; i < len; i++) ++ for (i = offset; i < end; i++) + sum ^= ptr[i]; + + return sum; +@@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len) + + static int verify_block_sig(struct mlx5_cmd_prot_block *block) + { +- if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff) ++ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); ++ int xor_len = sizeof(*block) - sizeof(block->data) - 1; ++ ++ if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) + return -EINVAL; + +- if (xor8_buf(block, sizeof(*block)) != 0xff) ++ if (xor8_buf(block, 0, sizeof(*block)) != 0xff) + return -EINVAL; + + return 0; + } + +-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token, +- int csum) ++static void calc_block_sig(struct mlx5_cmd_prot_block *block) + { +- block->token = token; +- if (csum) { +- block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - +- sizeof(block->data) - 2); +- block->sig = ~xor8_buf(block, sizeof(*block) - 1); +- } ++ int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2; ++ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); ++ ++ block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len); ++ block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1); + } + +-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum) ++static void calc_chain_sig(struct mlx5_cmd_msg *msg) + { + struct mlx5_cmd_mailbox *next = msg->next; ++ int size = msg->len; ++ int blen = size - min_t(int, sizeof(msg->first.data), size); ++ int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) ++ / MLX5_CMD_DATA_BLOCK_SIZE; ++ int i = 0; + +- while (next) { +- calc_block_sig(next->buf, token, csum); ++ for (i = 0; i < n && next; i++) { ++ calc_block_sig(next->buf); + next = next->next; + } + } + + static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) + { +- ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); +- calc_chain_sig(ent->in, ent->token, csum); +- calc_chain_sig(ent->out, ent->token, csum); ++ ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay)); ++ if (csum) { ++ calc_chain_sig(ent->in); ++ calc_chain_sig(ent->out); ++ } + } + + static void poll_timeout(struct mlx5_cmd_work_ent *ent) +@@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_ + struct mlx5_cmd_mailbox *next = ent->out->next; + int err; + u8 sig; ++ int size = ent->out->len; ++ int blen = size - min_t(int, sizeof(ent->out->first.data), size); ++ int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) ++ / MLX5_CMD_DATA_BLOCK_SIZE; ++ int i = 0; + +- sig = xor8_buf(ent->lay, sizeof(*ent->lay)); ++ sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (sig != 0xff) + return -EINVAL; + +- while (next) { ++ for (i = 0; i < n && next; i++) { + err = verify_block_sig(next->buf); + if (err) + return err; +@@ -656,7 +670,6 @@ static void cmd_work_handler(struct work + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + } + +- ent->token = alloc_token(cmd); + cmd->ent_arr[ent->idx] = ent; + lay = get_inst(cmd, ent->idx); + ent->lay = lay; +@@ -766,7 +779,8 @@ static u8 *get_status_ptr(struct mlx5_ou + static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t callback, +- void *context, int page_queue, u8 *status) ++ void *context, int page_queue, u8 *status, ++ u8 token) + { + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; +@@ -783,6 +797,8 @@ static int mlx5_cmd_invoke(struct mlx5_c + if (IS_ERR(ent)) + return PTR_ERR(ent); + ++ ent->token = token; ++ + if (!callback) + init_completion(&ent->done); + +@@ -854,7 +870,8 @@ static const struct file_operations fops + .write = dbg_write, + }; + +-static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) ++static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size, ++ u8 token) + { + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; +@@ -880,6 +897,7 @@ static int mlx5_copy_to_msg(struct mlx5_ + memcpy(block->data, from, copy); + from += copy; + size -= copy; ++ block->token = token; + next = next->next; + } + +@@ -949,7 +967,8 @@ static void free_cmd_box(struct mlx5_cor + } + + static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, +- gfp_t flags, int size) ++ gfp_t flags, int size, ++ u8 token) + { + struct mlx5_cmd_mailbox *tmp, *head = NULL; + struct mlx5_cmd_prot_block *block; +@@ -978,6 +997,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_c + tmp->next = head; + block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); + block->block_num = cpu_to_be32(n - i - 1); ++ block->token = token; + head = tmp; + } + msg->next = head; +@@ -1352,7 +1372,7 @@ static struct mlx5_cmd_msg *alloc_msg(st + } + + if (IS_ERR(msg)) +- msg = mlx5_alloc_cmd_msg(dev, gfp, in_size); ++ msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0); + + return msg; + } +@@ -1377,6 +1397,7 @@ static int cmd_exec(struct mlx5_core_dev + int err; + u8 status = 0; + u32 drv_synd; ++ u8 token; + + if (pci_channel_offline(dev->pdev) || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { +@@ -1395,20 +1416,22 @@ static int cmd_exec(struct mlx5_core_dev + return err; + } + +- err = mlx5_copy_to_msg(inb, in, in_size); ++ token = alloc_token(&dev->cmd); ++ ++ err = mlx5_copy_to_msg(inb, in, in_size, token); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + goto out_in; + } + +- outb = mlx5_alloc_cmd_msg(dev, gfp, out_size); ++ outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token); + if (IS_ERR(outb)) { + err = PTR_ERR(outb); + goto out_in; + } + + err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, +- pages_queue, &status); ++ pages_queue, &status, token); + if (err) + goto out_out; + +@@ -1476,7 +1499,7 @@ static int create_msg_cache(struct mlx5_ + INIT_LIST_HEAD(&cmd->cache.med.head); + + for (i = 0; i < NUM_LONG_LISTS; i++) { +- msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE); ++ msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto ex_err; +@@ -1486,7 +1509,7 @@ static int create_msg_cache(struct mlx5_ + } + + for (i = 0; i < NUM_MED_LISTS; i++) { +- msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE); ++ msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto ex_err; diff --git a/queue-4.7/net-mlx5-fix-pci-error-recovery-flow.patch b/queue-4.7/net-mlx5-fix-pci-error-recovery-flow.patch new file mode 100644 index 00000000000..38d986493ee --- /dev/null +++ b/queue-4.7/net-mlx5-fix-pci-error-recovery-flow.patch @@ -0,0 +1,117 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Mohamad Haj Yahia +Date: Thu, 18 Aug 2016 21:09:04 +0300 +Subject: net/mlx5: Fix pci error recovery flow + +From: Mohamad Haj Yahia + + +[ Upstream commit 1061c90f524963a0a90e7d2f9a6bfa666458af51 ] + +When PCI error is detected we should save the state of the pci prior to +disabling it. + +Also when receiving pci slot reset call we need to verify that the +device is responsive. + +Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core +driver') +Signed-off-by: Mohamad Haj Yahia + +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 59 ++++++++++++------------- + 1 file changed, 29 insertions(+), 30 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1392,36 +1392,12 @@ static pci_ers_result_t mlx5_pci_err_det + dev_info(&pdev->dev, "%s was called\n", __func__); + mlx5_enter_error_state(dev); + mlx5_unload_one(dev, priv); ++ pci_save_state(pdev); + mlx5_pci_disable_device(dev); + return state == pci_channel_io_perm_failure ? + PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; + } + +-static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) +-{ +- struct mlx5_core_dev *dev = pci_get_drvdata(pdev); +- int err = 0; +- +- dev_info(&pdev->dev, "%s was called\n", __func__); +- +- err = mlx5_pci_enable_device(dev); +- if (err) { +- dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" +- , __func__, err); +- return PCI_ERS_RESULT_DISCONNECT; +- } +- pci_set_master(pdev); +- pci_set_power_state(pdev, PCI_D0); +- pci_restore_state(pdev); +- +- return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; +-} +- +-void mlx5_disable_device(struct mlx5_core_dev *dev) +-{ +- mlx5_pci_err_detected(dev->pdev, 0); +-} +- + /* wait for the device to show vital signs by waiting + * for the health counter to start counting. + */ +@@ -1449,21 +1425,44 @@ static int wait_vital(struct pci_dev *pd + return -ETIMEDOUT; + } + +-static void mlx5_pci_resume(struct pci_dev *pdev) ++static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) + { + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); +- struct mlx5_priv *priv = &dev->priv; + int err; + + dev_info(&pdev->dev, "%s was called\n", __func__); + +- pci_save_state(pdev); +- err = wait_vital(pdev); ++ err = mlx5_pci_enable_device(dev); + if (err) { ++ dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" ++ , __func__, err); ++ return PCI_ERS_RESULT_DISCONNECT; ++ } ++ ++ pci_set_master(pdev); ++ pci_restore_state(pdev); ++ ++ if (wait_vital(pdev)) { + dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); +- return; ++ return PCI_ERS_RESULT_DISCONNECT; + } + ++ return PCI_ERS_RESULT_RECOVERED; ++} ++ ++void mlx5_disable_device(struct mlx5_core_dev *dev) ++{ ++ mlx5_pci_err_detected(dev->pdev, 0); ++} ++ ++static void mlx5_pci_resume(struct pci_dev *pdev) ++{ ++ struct mlx5_core_dev *dev = pci_get_drvdata(pdev); ++ struct mlx5_priv *priv = &dev->priv; ++ int err; ++ ++ dev_info(&pdev->dev, "%s was called\n", __func__); ++ + err = mlx5_load_one(dev, priv); + if (err) + dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" diff --git a/queue-4.7/net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch b/queue-4.7/net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch new file mode 100644 index 00000000000..1b8423d1330 --- /dev/null +++ b/queue-4.7/net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch @@ -0,0 +1,68 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Gal Pressman +Date: Wed, 7 Sep 2016 19:08:01 +0300 +Subject: net/mlx5e: Fix parsing of vlan packets when updating lro header + +From: Gal Pressman + + +[ Upstream commit cd17d230dd060a12f7451c0caeedb3fd5158eaf9 ] + +Currently vlan tagged packets were not parsed correctly +and assumed to be regular IPv4/IPv6 packets. +We should check for 802.1Q/802.1ad tags and update the lro header +accordingly. +This fixes the use case where LRO is on and rxvlan is off +(vlan stripping is off). + +Fixes: e586b3b0baee ('net/mlx5: Ethernet Datapath files') +Signed-off-by: Gal Pressman +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 22 +++++++++++++++------- + 1 file changed, 15 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -648,24 +648,32 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq + static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) + { +- struct ethhdr *eth = (struct ethhdr *)(skb->data); +- struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN); +- struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + ETH_HLEN); ++ struct ethhdr *eth = (struct ethhdr *)(skb->data); ++ struct iphdr *ipv4; ++ struct ipv6hdr *ipv6; + struct tcphdr *tcp; ++ int network_depth = 0; ++ __be16 proto; ++ u16 tot_len; + + u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); + int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || + (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); + +- u16 tot_len = cqe_bcnt - ETH_HLEN; ++ skb->mac_len = ETH_HLEN; ++ proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); + +- if (eth->h_proto == htons(ETH_P_IP)) { +- tcp = (struct tcphdr *)(skb->data + ETH_HLEN + ++ ipv4 = (struct iphdr *)(skb->data + network_depth); ++ ipv6 = (struct ipv6hdr *)(skb->data + network_depth); ++ tot_len = cqe_bcnt - network_depth; ++ ++ if (proto == htons(ETH_P_IP)) { ++ tcp = (struct tcphdr *)(skb->data + network_depth + + sizeof(struct iphdr)); + ipv6 = NULL; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + } else { +- tcp = (struct tcphdr *)(skb->data + ETH_HLEN + ++ tcp = (struct tcphdr *)(skb->data + network_depth + + sizeof(struct ipv6hdr)); + ipv4 = NULL; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; diff --git a/queue-4.7/net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch b/queue-4.7/net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch new file mode 100644 index 00000000000..fed4e60d542 --- /dev/null +++ b/queue-4.7/net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch @@ -0,0 +1,34 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Hadar Hen Zion +Date: Thu, 18 Aug 2016 21:09:07 +0300 +Subject: net/mlx5e: Use correct flow dissector key on flower offloading + +From: Hadar Hen Zion + + +[ Upstream commit 1dbd0d373ac338903d27fab5204b13122cc5accd ] + +The wrong key is used when extracting the address type field set by +the flower offload code. We have to use the control key and not the +basic key, fix that. + +Fixes: e3a2b7ed018e ('net/mlx5e: Support offload cls_flower with drop action') +Signed-off-by: Hadar Hen Zion +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -150,7 +150,7 @@ static int parse_cls_flower(struct mlx5e + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, +- FLOW_DISSECTOR_KEY_BASIC, ++ FLOW_DISSECTOR_KEY_CONTROL, + f->key); + addr_type = key->addr_type; + } diff --git a/queue-4.7/net-sched-fix-encoding-to-use-real-length.patch b/queue-4.7/net-sched-fix-encoding-to-use-real-length.patch new file mode 100644 index 00000000000..718a66a0ffb --- /dev/null +++ b/queue-4.7/net-sched-fix-encoding-to-use-real-length.patch @@ -0,0 +1,87 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Jamal Hadi Salim +Date: Mon, 22 Aug 2016 07:10:20 -0400 +Subject: net sched: fix encoding to use real length + +From: Jamal Hadi Salim + + +[ Upstream commit 28a10c426e81afc88514bca8e73affccf850fdf6 ] + +Encoding of the metadata was using the padded length as opposed to +the real length of the data which is a bug per specification. +This has not been an issue todate because all metadatum specified +so far has been 32 bit where aligned and data length are the same width. +This also includes a bug fix for validating the length of a u16 field. +But since there is no metadata of size u16 yes we are fine to include it +here. + +While at it get rid of magic numbers. + +Fixes: ef6980b6becb ("net sched: introduce IFE action") +Signed-off-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_ife.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +--- a/net/sched/act_ife.c ++++ b/net/sched/act_ife.c +@@ -52,7 +52,7 @@ int ife_tlv_meta_encode(void *skbdata, u + u32 *tlv = (u32 *)(skbdata); + u16 totlen = nla_total_size(dlen); /*alignment + hdr */ + char *dptr = (char *)tlv + NLA_HDRLEN; +- u32 htlv = attrtype << 16 | totlen; ++ u32 htlv = attrtype << 16 | dlen; + + *tlv = htonl(htlv); + memset(dptr, 0, totlen - NLA_HDRLEN); +@@ -134,7 +134,7 @@ EXPORT_SYMBOL_GPL(ife_release_meta_gen); + + int ife_validate_meta_u32(void *val, int len) + { +- if (len == 4) ++ if (len == sizeof(u32)) + return 0; + + return -EINVAL; +@@ -143,8 +143,8 @@ EXPORT_SYMBOL_GPL(ife_validate_meta_u32) + + int ife_validate_meta_u16(void *val, int len) + { +- /* length will include padding */ +- if (len == NLA_ALIGN(2)) ++ /* length will not include padding */ ++ if (len == sizeof(u16)) + return 0; + + return -EINVAL; +@@ -652,12 +652,14 @@ static int tcf_ife_decode(struct sk_buff + u8 *tlvdata = (u8 *)tlv; + u16 mtype = tlv->type; + u16 mlen = tlv->len; ++ u16 alen; + + mtype = ntohs(mtype); + mlen = ntohs(mlen); ++ alen = NLA_ALIGN(mlen); + +- if (find_decode_metaid(skb, ife, mtype, (mlen - 4), +- (void *)(tlvdata + 4))) { ++ if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN), ++ (void *)(tlvdata + NLA_HDRLEN))) { + /* abuse overlimits to count when we receive metadata + * but dont have an ops for it + */ +@@ -666,8 +668,8 @@ static int tcf_ife_decode(struct sk_buff + ife->tcf_qstats.overlimits++; + } + +- tlvdata += mlen; +- ifehdrln -= mlen; ++ tlvdata += alen; ++ ifehdrln -= alen; + tlv = (struct meta_tlvhdr *)tlvdata; + } + diff --git a/queue-4.7/net-sctp-always-initialise-sctp_ht_iter-start_fail.patch b/queue-4.7/net-sctp-always-initialise-sctp_ht_iter-start_fail.patch new file mode 100644 index 00000000000..afe93313eb2 --- /dev/null +++ b/queue-4.7/net-sctp-always-initialise-sctp_ht_iter-start_fail.patch @@ -0,0 +1,77 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Vegard Nossum +Date: Fri, 12 Aug 2016 09:50:51 +0200 +Subject: net/sctp: always initialise sctp_ht_iter::start_fail + +From: Vegard Nossum + + +[ Upstream commit 54236ab09e9696a27baaae693c288920a26e8588 ] + +sctp_transport_seq_start() does not currently clear iter->start_fail on +success, but relies on it being zero when it is allocated (by +seq_open_net()). + +This can be a problem in the following sequence: + + open() // allocates iter (and implicitly sets iter->start_fail = 0) + read() + - iter->start() // fails and sets iter->start_fail = 1 + - iter->stop() // doesn't call sctp_transport_walk_stop() (correct) + read() again + - iter->start() // succeeds, but doesn't change iter->start_fail + - iter->stop() // doesn't call sctp_transport_walk_stop() (wrong) + +We should initialize sctp_ht_iter::start_fail to zero if ->start() +succeeds, otherwise it's possible that we leave an old value of 1 there, +which will cause ->stop() to not call sctp_transport_walk_stop(), which +causes all sorts of problems like not calling rcu_read_unlock() (and +preempt_enable()), eventually leading to more warnings like this: + + BUG: sleeping function called from invalid context at mm/slab.h:388 + in_atomic(): 0, irqs_disabled(): 0, pid: 16551, name: trinity-c2 + Preemption disabled at:[] rhashtable_walk_start+0x46/0x150 + + [] preempt_count_add+0x1fb/0x280 + [] _raw_spin_lock+0x12/0x40 + [] rhashtable_walk_start+0x46/0x150 + [] sctp_transport_walk_start+0x2f/0x60 + [] sctp_transport_seq_start+0x4d/0x150 + [] traverse+0x170/0x850 + [] seq_read+0x7cc/0x1180 + [] proc_reg_read+0xbc/0x180 + [] do_loop_readv_writev+0x134/0x210 + [] do_readv_writev+0x565/0x660 + [] vfs_readv+0x67/0xa0 + [] do_preadv+0x126/0x170 + [] SyS_preadv+0xc/0x10 + [] do_syscall_64+0x19c/0x410 + [] return_from_SYSCALL_64+0x0/0x6a + [] 0xffffffffffffffff + +Notice that this is a subtly different stacktrace from the one in commit +5fc382d875 ("net/sctp: terminate rhashtable walk correctly"). + +Cc: Xin Long +Cc: Herbert Xu +Cc: Eric W. Biederman +Cc: Marcelo Ricardo Leitner +Signed-off-by: Vegard Nossum +Acked-By: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/proc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sctp/proc.c ++++ b/net/sctp/proc.c +@@ -293,6 +293,7 @@ static void *sctp_transport_seq_start(st + return ERR_PTR(err); + } + ++ iter->start_fail = 0; + return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); + } + diff --git a/queue-4.7/net-smc91x-fix-smc-accesses.patch b/queue-4.7/net-smc91x-fix-smc-accesses.patch new file mode 100644 index 00000000000..0e6dc09e8ef --- /dev/null +++ b/queue-4.7/net-smc91x-fix-smc-accesses.patch @@ -0,0 +1,263 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Russell King +Date: Sat, 27 Aug 2016 17:33:03 +0100 +Subject: net: smc91x: fix SMC accesses + +From: Russell King + + +[ Upstream commit 2fb04fdf30192ff1e2b5834e9b7745889ea8bbcb ] + +Commit b70661c70830 ("net: smc91x: use run-time configuration on all ARM +machines") broke some ARM platforms through several mistakes. Firstly, +the access size must correspond to the following rule: + +(a) at least one of 16-bit or 8-bit access size must be supported +(b) 32-bit accesses are optional, and may be enabled in addition to + the above. + +Secondly, it provides no emulation of 16-bit accesses, instead blindly +making 16-bit accesses even when the platform specifies that only 8-bit +is supported. + +Reorganise smc91x.h so we can make use of the existing 16-bit access +emulation already provided - if 16-bit accesses are supported, use +16-bit accesses directly, otherwise if 8-bit accesses are supported, +use the provided 16-bit access emulation. If neither, BUG(). This +exactly reflects the driver behaviour prior to the commit being fixed. + +Since the conversion incorrectly cut down the available access sizes on +several platforms, we also need to go through every platform and fix up +the overly-restrictive access size: Arnd assumed that if a platform can +perform 32-bit, 16-bit and 8-bit accesses, then only a 32-bit access +size needed to be specified - not so, all available access sizes must +be specified. + +This likely fixes some performance regressions in doing this: if a +platform does not support 8-bit accesses, 8-bit accesses have been +emulated by performing a 16-bit read-modify-write access. + +Tested on the Intel Assabet/Neponset platform, which supports only 8-bit +accesses, which was broken by the original commit. + +Fixes: b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines") +Signed-off-by: Russell King +Tested-by: Robert Jarzmik +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/mach-pxa/idp.c | 3 - + arch/arm/mach-pxa/xcep.c | 3 - + arch/arm/mach-realview/core.c | 3 - + arch/arm/mach-sa1100/pleb.c | 2 + arch/blackfin/mach-bf561/boards/cm_bf561.c | 3 - + arch/blackfin/mach-bf561/boards/ezkit.c | 3 - + drivers/net/ethernet/smsc/smc91x.c | 7 +++ + drivers/net/ethernet/smsc/smc91x.h | 65 ++++++++++++++++++++--------- + include/linux/smc91x.h | 10 ++++ + 9 files changed, 73 insertions(+), 26 deletions(-) + +--- a/arch/arm/mach-pxa/idp.c ++++ b/arch/arm/mach-pxa/idp.c +@@ -83,7 +83,8 @@ static struct resource smc91x_resources[ + }; + + static struct smc91x_platdata smc91x_platdata = { +- .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_USE_DMA | SMC91X_NOWAIT, + }; + + static struct platform_device smc91x_device = { +--- a/arch/arm/mach-pxa/xcep.c ++++ b/arch/arm/mach-pxa/xcep.c +@@ -120,7 +120,8 @@ static struct resource smc91x_resources[ + }; + + static struct smc91x_platdata xcep_smc91x_info = { +- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_NOWAIT | SMC91X_USE_DMA, + }; + + static struct platform_device smc91x_device = { +--- a/arch/arm/mach-realview/core.c ++++ b/arch/arm/mach-realview/core.c +@@ -93,7 +93,8 @@ static struct smsc911x_platform_config s + }; + + static struct smc91x_platdata smc91x_platdata = { +- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_NOWAIT, + }; + + static struct platform_device realview_eth_device = { +--- a/arch/arm/mach-sa1100/pleb.c ++++ b/arch/arm/mach-sa1100/pleb.c +@@ -45,7 +45,7 @@ static struct resource smc91x_resources[ + }; + + static struct smc91x_platdata smc91x_platdata = { +- .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT, + }; + + static struct platform_device smc91x_device = { +--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c ++++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c +@@ -146,7 +146,8 @@ static struct platform_device hitachi_fb + #include + + static struct smc91x_platdata smc91x_info = { +- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_NOWAIT, + .leda = RPC_LED_100_10, + .ledb = RPC_LED_TX_RX, + }; +--- a/arch/blackfin/mach-bf561/boards/ezkit.c ++++ b/arch/blackfin/mach-bf561/boards/ezkit.c +@@ -134,7 +134,8 @@ static struct platform_device net2272_bf + #include + + static struct smc91x_platdata smc91x_info = { +- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_NOWAIT, + .leda = RPC_LED_100_10, + .ledb = RPC_LED_TX_RX, + }; +--- a/drivers/net/ethernet/smsc/smc91x.c ++++ b/drivers/net/ethernet/smsc/smc91x.c +@@ -2269,6 +2269,13 @@ static int smc_drv_probe(struct platform + if (pd) { + memcpy(&lp->cfg, pd, sizeof(lp->cfg)); + lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags); ++ ++ if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) { ++ dev_err(&pdev->dev, ++ "at least one of 8-bit or 16-bit access support is required.\n"); ++ ret = -ENXIO; ++ goto out_free_netdev; ++ } + } + + #if IS_BUILTIN(CONFIG_OF) +--- a/drivers/net/ethernet/smsc/smc91x.h ++++ b/drivers/net/ethernet/smsc/smc91x.h +@@ -37,6 +37,27 @@ + #include + + /* ++ * Any 16-bit access is performed with two 8-bit accesses if the hardware ++ * can't do it directly. Most registers are 16-bit so those are mandatory. ++ */ ++#define SMC_outw_b(x, a, r) \ ++ do { \ ++ unsigned int __val16 = (x); \ ++ unsigned int __reg = (r); \ ++ SMC_outb(__val16, a, __reg); \ ++ SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT)); \ ++ } while (0) ++ ++#define SMC_inw_b(a, r) \ ++ ({ \ ++ unsigned int __val16; \ ++ unsigned int __reg = r; \ ++ __val16 = SMC_inb(a, __reg); \ ++ __val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \ ++ __val16; \ ++ }) ++ ++/* + * Define your architecture specific bus configuration parameters here. + */ + +@@ -55,10 +76,30 @@ + #define SMC_IO_SHIFT (lp->io_shift) + + #define SMC_inb(a, r) readb((a) + (r)) +-#define SMC_inw(a, r) readw((a) + (r)) ++#define SMC_inw(a, r) \ ++ ({ \ ++ unsigned int __smc_r = r; \ ++ SMC_16BIT(lp) ? readw((a) + __smc_r) : \ ++ SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) : \ ++ ({ BUG(); 0; }); \ ++ }) ++ + #define SMC_inl(a, r) readl((a) + (r)) + #define SMC_outb(v, a, r) writeb(v, (a) + (r)) ++#define SMC_outw(v, a, r) \ ++ do { \ ++ unsigned int __v = v, __smc_r = r; \ ++ if (SMC_16BIT(lp)) \ ++ __SMC_outw(__v, a, __smc_r); \ ++ else if (SMC_8BIT(lp)) \ ++ SMC_outw_b(__v, a, __smc_r); \ ++ else \ ++ BUG(); \ ++ } while (0) ++ + #define SMC_outl(v, a, r) writel(v, (a) + (r)) ++#define SMC_insb(a, r, p, l) readsb((a) + (r), p, l) ++#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, l) + #define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) + #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) + #define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) +@@ -66,7 +107,7 @@ + #define SMC_IRQ_FLAGS (-1) /* from resource */ + + /* We actually can't write halfwords properly if not word aligned */ +-static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg) ++static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg) + { + if ((machine_is_mainstone() || machine_is_stargate2() || + machine_is_pxa_idp()) && reg & 2) { +@@ -416,24 +457,8 @@ smc_pxa_dma_insw(void __iomem *ioaddr, s + + #if ! SMC_CAN_USE_16BIT + +-/* +- * Any 16-bit access is performed with two 8-bit accesses if the hardware +- * can't do it directly. Most registers are 16-bit so those are mandatory. +- */ +-#define SMC_outw(x, ioaddr, reg) \ +- do { \ +- unsigned int __val16 = (x); \ +- SMC_outb( __val16, ioaddr, reg ); \ +- SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\ +- } while (0) +-#define SMC_inw(ioaddr, reg) \ +- ({ \ +- unsigned int __val16; \ +- __val16 = SMC_inb( ioaddr, reg ); \ +- __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \ +- __val16; \ +- }) +- ++#define SMC_outw(x, ioaddr, reg) SMC_outw_b(x, ioaddr, reg) ++#define SMC_inw(ioaddr, reg) SMC_inw_b(ioaddr, reg) + #define SMC_insw(a, r, p, l) BUG() + #define SMC_outsw(a, r, p, l) BUG() + +--- a/include/linux/smc91x.h ++++ b/include/linux/smc91x.h +@@ -1,6 +1,16 @@ + #ifndef __SMC91X_H__ + #define __SMC91X_H__ + ++/* ++ * These bits define which access sizes a platform can support, rather ++ * than the maximal access size. So, if your platform can do 16-bit ++ * and 32-bit accesses to the SMC91x device, but not 8-bit, set both ++ * SMC91X_USE_16BIT and SMC91X_USE_32BIT. ++ * ++ * The SMC91x driver requires at least one of SMC91X_USE_8BIT or ++ * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is ++ * an invalid configuration. ++ */ + #define SMC91X_USE_8BIT (1 << 0) + #define SMC91X_USE_16BIT (1 << 1) + #define SMC91X_USE_32BIT (1 << 2) diff --git a/queue-4.7/revert-af_unix-fix-splice-bind-deadlock.patch b/queue-4.7/revert-af_unix-fix-splice-bind-deadlock.patch new file mode 100644 index 00000000000..a5c4c6b1c25 --- /dev/null +++ b/queue-4.7/revert-af_unix-fix-splice-bind-deadlock.patch @@ -0,0 +1,161 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Linus Torvalds +Date: Thu, 1 Sep 2016 14:56:49 -0700 +Subject: Revert "af_unix: Fix splice-bind deadlock" + +From: Linus Torvalds + + +[ Upstream commit 38f7bd94a97b542de86a2be9229289717e33a7a4 ] + +This reverts commit c845acb324aa85a39650a14e7696982ceea75dc1. + +It turns out that it just replaces one deadlock with another one: we can +still get the wrong lock ordering with the readlock due to overlayfs +calling back into the filesystem layer and still taking the vfs locks +after the readlock. + +The proper solution ends up being to just split the readlock into two +pieces: the bind lock (taken *outside* the vfs locks) and the IO lock +(taken *inside* the filesystem locks). The two locks are independent +anyway. + +Signed-off-by: Linus Torvalds +Reviewed-by: Shmulik Ladkani +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 68 +++++++++++++++++++++-------------------------------- + 1 file changed, 27 insertions(+), 41 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -953,20 +953,32 @@ fail: + return NULL; + } + +-static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode, +- struct path *res) ++static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) + { +- int err; +- +- err = security_path_mknod(path, dentry, mode, 0); ++ struct dentry *dentry; ++ struct path path; ++ int err = 0; ++ /* ++ * Get the parent directory, calculate the hash for last ++ * component. ++ */ ++ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); ++ err = PTR_ERR(dentry); ++ if (IS_ERR(dentry)) ++ return err; ++ ++ /* ++ * All right, let's create it. ++ */ ++ err = security_path_mknod(&path, dentry, mode, 0); + if (!err) { +- err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0); ++ err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); + if (!err) { +- res->mnt = mntget(path->mnt); ++ res->mnt = mntget(path.mnt); + res->dentry = dget(dentry); + } + } +- ++ done_path_create(&path, dentry); + return err; + } + +@@ -977,12 +989,10 @@ static int unix_bind(struct socket *sock + struct unix_sock *u = unix_sk(sk); + struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; + char *sun_path = sunaddr->sun_path; +- int err, name_err; ++ int err; + unsigned int hash; + struct unix_address *addr; + struct hlist_head *list; +- struct path path; +- struct dentry *dentry; + + err = -EINVAL; + if (sunaddr->sun_family != AF_UNIX) +@@ -998,34 +1008,14 @@ static int unix_bind(struct socket *sock + goto out; + addr_len = err; + +- name_err = 0; +- dentry = NULL; +- if (sun_path[0]) { +- /* Get the parent directory, calculate the hash for last +- * component. +- */ +- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); +- +- if (IS_ERR(dentry)) { +- /* delay report until after 'already bound' check */ +- name_err = PTR_ERR(dentry); +- dentry = NULL; +- } +- } +- + err = mutex_lock_interruptible(&u->readlock); + if (err) +- goto out_path; ++ goto out; + + err = -EINVAL; + if (u->addr) + goto out_up; + +- if (name_err) { +- err = name_err == -EEXIST ? -EADDRINUSE : name_err; +- goto out_up; +- } +- + err = -ENOMEM; + addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); + if (!addr) +@@ -1036,11 +1026,11 @@ static int unix_bind(struct socket *sock + addr->hash = hash ^ sk->sk_type; + atomic_set(&addr->refcnt, 1); + +- if (dentry) { +- struct path u_path; ++ if (sun_path[0]) { ++ struct path path; + umode_t mode = S_IFSOCK | + (SOCK_INODE(sock)->i_mode & ~current_umask()); +- err = unix_mknod(dentry, &path, mode, &u_path); ++ err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; +@@ -1048,9 +1038,9 @@ static int unix_bind(struct socket *sock + goto out_up; + } + addr->hash = UNIX_HASH_SIZE; +- hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); ++ hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); + spin_lock(&unix_table_lock); +- u->path = u_path; ++ u->path = path; + list = &unix_socket_table[hash]; + } else { + spin_lock(&unix_table_lock); +@@ -1073,10 +1063,6 @@ out_unlock: + spin_unlock(&unix_table_lock); + out_up: + mutex_unlock(&u->readlock); +-out_path: +- if (dentry) +- done_path_create(&path, dentry); +- + out: + return err; + } diff --git a/queue-4.7/revert-phy-irq-cannot-be-shared.patch b/queue-4.7/revert-phy-irq-cannot-be-shared.patch new file mode 100644 index 00000000000..f4795a3a5f9 --- /dev/null +++ b/queue-4.7/revert-phy-irq-cannot-be-shared.patch @@ -0,0 +1,45 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Xander Huff +Date: Wed, 24 Aug 2016 16:47:53 -0500 +Subject: Revert "phy: IRQ cannot be shared" + +From: Xander Huff + + +[ Upstream commit c3e70edd7c2eed6acd234627a6007627f5c76e8e ] + +This reverts: + commit 33c133cc7598 ("phy: IRQ cannot be shared") + +On hardware with multiple PHY devices hooked up to the same IRQ line, allow +them to share it. + +Sergei Shtylyov says: + "I'm not sure now what was the reason I concluded that the IRQ sharing + was impossible... most probably I thought that the kernel IRQ handling + code exited the loop over the IRQ actions once IRQ_HANDLED was returned + -- which is obviously not so in reality..." + +Signed-off-by: Xander Huff +Signed-off-by: Nathan Sullivan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/phy/phy.c ++++ b/drivers/net/phy/phy.c +@@ -722,8 +722,10 @@ phy_err: + int phy_start_interrupts(struct phy_device *phydev) + { + atomic_set(&phydev->irq_disable, 0); +- if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt", +- phydev) < 0) { ++ if (request_irq(phydev->irq, phy_interrupt, ++ IRQF_SHARED, ++ "phy_interrupt", ++ phydev) < 0) { + pr_warn("%s: Can't get IRQ %d (PHY)\n", + phydev->mdio.bus->name, phydev->irq); + phydev->irq = PHY_POLL; diff --git a/queue-4.7/sctp-fix-overrun-in-sctp_diag_dump_one.patch b/queue-4.7/sctp-fix-overrun-in-sctp_diag_dump_one.patch new file mode 100644 index 00000000000..543de17d110 --- /dev/null +++ b/queue-4.7/sctp-fix-overrun-in-sctp_diag_dump_one.patch @@ -0,0 +1,43 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Lance Richardson +Date: Tue, 23 Aug 2016 11:40:52 -0400 +Subject: sctp: fix overrun in sctp_diag_dump_one() + +From: Lance Richardson + + +[ Upstream commit 232cb53a45965f8789fbf0a9a1962f8c67ab1a3c ] + +The function sctp_diag_dump_one() currently performs a memcpy() +of 64 bytes from a 16 byte field into another 16 byte field. Fix +by using correct size, use sizeof to obtain correct size instead +of using a hard-coded constant. + +Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") +Signed-off-by: Lance Richardson +Reviewed-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sctp_diag.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/sctp/sctp_diag.c ++++ b/net/sctp/sctp_diag.c +@@ -418,11 +418,13 @@ static int sctp_diag_dump_one(struct sk_ + paddr.v4.sin_family = AF_INET; + } else { + laddr.v6.sin6_port = req->id.idiag_sport; +- memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64); ++ memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, ++ sizeof(laddr.v6.sin6_addr)); + laddr.v6.sin6_family = AF_INET6; + + paddr.v6.sin6_port = req->id.idiag_dport; +- memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64); ++ memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, ++ sizeof(paddr.v6.sin6_addr)); + paddr.v6.sin6_family = AF_INET6; + } + diff --git a/queue-4.7/series b/queue-4.7/series index dd097a9fe9e..d7bf1f6b74c 100644 --- a/queue-4.7/series +++ b/queue-4.7/series @@ -2,3 +2,34 @@ clocksource-drivers-sun4i-clear-interrupts-after-stopping-timer-in-probe-functio mips-kvm-check-for-pfn-noslot-case.patch fscrypto-require-write-access-to-mount-to-set-encryption-policy.patch drm-msm-protect-against-faults-from-copy_from_user-in-submit-ioctl.patch +bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch +ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch +vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch +bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch +net-irda-handle-iriap_register_lsap-allocation-failure.patch +net-sctp-always-initialise-sctp_ht_iter-start_fail.patch +net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch +tipc-fix-null-pointer-dereference-in-shutdown.patch +tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch +net-mlx5-fix-pci-error-recovery-flow.patch +net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch +net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch +net-sched-fix-encoding-to-use-real-length.patch +udp-fix-poll-issue-with-zero-sized-packets.patch +tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch +sctp-fix-overrun-in-sctp_diag_dump_one.patch +tun-fix-transmit-timestamp-support.patch +net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch +revert-phy-irq-cannot-be-shared.patch +net-smc91x-fix-smc-accesses.patch +bridge-re-introduce-fix-parsing-of-mldv2-reports.patch +kcm-fix-a-socket-double-free.patch +bonding-fix-bonding-crash.patch +revert-af_unix-fix-splice-bind-deadlock.patch +af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch +ipv6-release-dst-in-ping_v6_sendmsg.patch +bnxt_en-fix-tx-push-operation-on-arm64.patch +ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch +tcp-fastopen-avoid-negative-sk_forward_alloc.patch +net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch +tcp-cwnd-does-not-increase-in-tcp-yeah.patch diff --git a/queue-4.7/tcp-cwnd-does-not-increase-in-tcp-yeah.patch b/queue-4.7/tcp-cwnd-does-not-increase-in-tcp-yeah.patch new file mode 100644 index 00000000000..c4b189a6aca --- /dev/null +++ b/queue-4.7/tcp-cwnd-does-not-increase-in-tcp-yeah.patch @@ -0,0 +1,38 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Artem Germanov +Date: Wed, 7 Sep 2016 10:49:36 -0700 +Subject: tcp: cwnd does not increase in TCP YeAH + +From: Artem Germanov + + +[ Upstream commit db7196a0d0984b933ccf2cd6a60e26abf466e8a3 ] + +Commit 76174004a0f19785a328f40388e87e982bbf69b9 +(tcp: do not slow start when cwnd equals ssthresh ) +introduced regression in TCP YeAH. Using 100ms delay 1% loss virtual +ethernet link kernel 4.2 shows bandwidth ~500KB/s for single TCP +connection and kernel 4.3 and above (including 4.8-rc4) shows bandwidth +~100KB/s. + That is caused by stalled cwnd when cwnd equals ssthresh. This patch +fixes it by proper increasing cwnd in this case. + +Signed-off-by: Artem Germanov +Acked-by: Dmitry Adamushko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_yeah.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_yeah.c ++++ b/net/ipv4/tcp_yeah.c +@@ -76,7 +76,7 @@ static void tcp_yeah_cong_avoid(struct s + if (!tcp_is_cwnd_limited(sk)) + return; + +- if (tp->snd_cwnd <= tp->snd_ssthresh) ++ if (tcp_in_slow_start(tp)) + tcp_slow_start(tp, acked); + + else if (!yeah->doing_reno_now) { diff --git a/queue-4.7/tcp-fastopen-avoid-negative-sk_forward_alloc.patch b/queue-4.7/tcp-fastopen-avoid-negative-sk_forward_alloc.patch new file mode 100644 index 00000000000..b32a340a2f0 --- /dev/null +++ b/queue-4.7/tcp-fastopen-avoid-negative-sk_forward_alloc.patch @@ -0,0 +1,39 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Eric Dumazet +Date: Wed, 7 Sep 2016 08:34:11 -0700 +Subject: tcp: fastopen: avoid negative sk_forward_alloc + +From: Eric Dumazet + + +[ Upstream commit 76061f631c2ea4ab9c4d66f3a96ecc5737f5aaf7 ] + +When DATA and/or FIN are carried in a SYN/ACK message or SYN message, +we append an skb in socket receive queue, but we forget to call +sk_forced_mem_schedule(). + +Effect is that the socket has a negative sk->sk_forward_alloc as long as +the message is not read by the application. + +Josh Hunt fixed a similar issue in commit d22e15371811 ("tcp: fix tcp +fin memory accounting") + +Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") +Signed-off-by: Eric Dumazet +Reviewed-by: Josh Hunt +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_fastopen.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -150,6 +150,7 @@ void tcp_fastopen_add_skb(struct sock *s + tp->segs_in = 0; + tcp_segs_in(tp, skb); + __skb_pull(skb, tcp_hdrlen(skb)); ++ sk_forced_mem_schedule(sk, skb->truesize); + skb_set_owner_r(skb, sk); + + TCP_SKB_CB(skb)->seq++; diff --git a/queue-4.7/tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch b/queue-4.7/tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch new file mode 100644 index 00000000000..61fedff6574 --- /dev/null +++ b/queue-4.7/tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch @@ -0,0 +1,53 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Eric Dumazet +Date: Wed, 17 Aug 2016 05:56:26 -0700 +Subject: tcp: fix use after free in tcp_xmit_retransmit_queue() + +From: Eric Dumazet + + +[ Upstream commit bb1fceca22492109be12640d49f5ea5a544c6bb4 ] + +When tcp_sendmsg() allocates a fresh and empty skb, it puts it at the +tail of the write queue using tcp_add_write_queue_tail() + +Then it attempts to copy user data into this fresh skb. + +If the copy fails, we undo the work and remove the fresh skb. + +Unfortunately, this undo lacks the change done to tp->highest_sack and +we can leave a dangling pointer (to a freed skb) + +Later, tcp_xmit_retransmit_queue() can dereference this pointer and +access freed memory. For regular kernels where memory is not unmapped, +this might cause SACK bugs because tcp_highest_sack_seq() is buggy, +returning garbage instead of tp->snd_nxt, but with various debug +features like CONFIG_DEBUG_PAGEALLOC, this can crash the kernel. + +This bug was found by Marco Grassi thanks to syzkaller. + +Fixes: 6859d49475d4 ("[TCP]: Abstract tp->highest_sack accessing & point to next skb") +Reported-by: Marco Grassi +Signed-off-by: Eric Dumazet +Cc: Ilpo Järvinen +Cc: Yuchung Cheng +Cc: Neal Cardwell +Acked-by: Neal Cardwell +Reviewed-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1522,6 +1522,8 @@ static inline void tcp_check_send_head(s + { + if (sk->sk_send_head == skb_unlinked) + sk->sk_send_head = NULL; ++ if (tcp_sk(sk)->highest_sack == skb_unlinked) ++ tcp_sk(sk)->highest_sack = NULL; + } + + static inline void tcp_init_send_head(struct sock *sk) diff --git a/queue-4.7/tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch b/queue-4.7/tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch new file mode 100644 index 00000000000..e45b5bde976 --- /dev/null +++ b/queue-4.7/tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch @@ -0,0 +1,79 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Eric Dumazet +Date: Mon, 22 Aug 2016 11:31:10 -0700 +Subject: tcp: properly scale window in tcp_v[46]_reqsk_send_ack() + +From: Eric Dumazet + + +[ Upstream commit 20a2b49fc538540819a0c552877086548cff8d8d ] + +When sending an ack in SYN_RECV state, we must scale the offered +window if wscale option was negotiated and accepted. + +Tested: + Following packetdrill test demonstrates the issue : + +0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + +// Establish a connection. ++0 < S 0:0(0) win 20000 ++0 > S. 0:0(0) ack 1 win 28960 + ++0 < . 1:11(10) ack 1 win 156 +// check that window is properly scaled ! ++0 > . 1:1(0) ack 1 win 226 + +Signed-off-by: Eric Dumazet +Cc: Yuchung Cheng +Cc: Neal Cardwell +Acked-by: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 8 +++++++- + net/ipv6/tcp_ipv6.c | 8 +++++++- + 2 files changed, 14 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -814,8 +814,14 @@ static void tcp_v4_reqsk_send_ack(const + u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : + tcp_sk(sk)->snd_nxt; + ++ /* RFC 7323 2.3 ++ * The window field (SEG.WND) of every outgoing segment, with the ++ * exception of segments, MUST be right-shifted by ++ * Rcv.Wind.Shift bits: ++ */ + tcp_v4_send_ack(sock_net(sk), skb, seq, +- tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, ++ tcp_rsk(req)->rcv_nxt, ++ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, + tcp_time_stamp, + req->ts_recent, + 0, +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -937,9 +937,15 @@ static void tcp_v6_reqsk_send_ack(const + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ ++ /* RFC 7323 2.3 ++ * The window field (SEG.WND) of every outgoing segment, with the ++ * exception of segments, MUST be right-shifted by ++ * Rcv.Wind.Shift bits: ++ */ + tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, +- tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, ++ tcp_rsk(req)->rcv_nxt, ++ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, + tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), + 0, 0); diff --git a/queue-4.7/tipc-fix-null-pointer-dereference-in-shutdown.patch b/queue-4.7/tipc-fix-null-pointer-dereference-in-shutdown.patch new file mode 100644 index 00000000000..8c98caf2e47 --- /dev/null +++ b/queue-4.7/tipc-fix-null-pointer-dereference-in-shutdown.patch @@ -0,0 +1,68 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Vegard Nossum +Date: Sat, 23 Jul 2016 08:15:04 +0200 +Subject: tipc: fix NULL pointer dereference in shutdown() + +From: Vegard Nossum + + +[ Upstream commit d2fbdf76b85bcdfe57b8ef2ba09d20e8ada79abd ] + +tipc_msg_create() can return a NULL skb and if so, we shouldn't try to +call tipc_node_xmit_skb() on it. + + general protection fault: 0000 [#1] PREEMPT SMP KASAN + CPU: 3 PID: 30298 Comm: trinity-c0 Not tainted 4.7.0-rc7+ #19 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + task: ffff8800baf09980 ti: ffff8800595b8000 task.ti: ffff8800595b8000 + RIP: 0010:[] [] tipc_node_xmit_skb+0x6b/0x140 + RSP: 0018:ffff8800595bfce8 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003023b0e0 + RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffffff83d12580 + RBP: ffff8800595bfd78 R08: ffffed000b2b7f32 R09: 0000000000000000 + R10: fffffbfff0759725 R11: 0000000000000000 R12: 1ffff1000b2b7f9f + R13: ffff8800595bfd58 R14: ffffffff83d12580 R15: dffffc0000000000 + FS: 00007fcdde242700(0000) GS:ffff88011af80000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fcddde1db10 CR3: 000000006874b000 CR4: 00000000000006e0 + DR0: 00007fcdde248000 DR1: 00007fcddd73d000 DR2: 00007fcdde248000 + DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602 + Stack: + 0000000000000018 0000000000000018 0000000041b58ab3 ffffffff83954208 + ffffffff830bb400 ffff8800595bfd30 ffffffff8309d767 0000000000000018 + 0000000000000018 ffff8800595bfd78 ffffffff8309da1a 00000000810ee611 + Call Trace: + [] tipc_shutdown+0x553/0x880 + [] SyS_shutdown+0x14b/0x170 + [] do_syscall_64+0x19c/0x410 + [] entry_SYSCALL64_slow_path+0x25/0x25 + Code: 90 00 b4 0b 83 c7 00 f1 f1 f1 f1 4c 8d 6d e0 c7 40 04 00 00 00 f4 c7 40 08 f3 f3 f3 f3 48 89 d8 48 c1 e8 03 c7 45 b4 00 00 00 00 <80> 3c 30 00 75 78 48 8d 7b 08 49 8d 75 c0 48 b8 00 00 00 00 00 + RIP [] tipc_node_xmit_skb+0x6b/0x140 + RSP + ---[ end trace 57b0484e351e71f1 ]--- + +I feel like we should maybe return -ENOMEM or -ENOBUFS, but I'm not sure +userspace is equipped to handle that. Anyway, this is better than a GPF +and looks somewhat consistent with other tipc_msg_create() callers. + +Signed-off-by: Vegard Nossum +Acked-by: Ying Xue +Acked-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/socket.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -2180,7 +2180,8 @@ restart: + TIPC_CONN_MSG, SHORT_H_SIZE, + 0, dnode, onode, dport, oport, + TIPC_CONN_SHUTDOWN); +- tipc_node_xmit_skb(net, skb, dnode, tsk->portid); ++ if (skb) ++ tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + } + tsk->connected = 0; + sock->state = SS_DISCONNECTING; diff --git a/queue-4.7/tun-fix-transmit-timestamp-support.patch b/queue-4.7/tun-fix-transmit-timestamp-support.patch new file mode 100644 index 00000000000..323f23bd134 --- /dev/null +++ b/queue-4.7/tun-fix-transmit-timestamp-support.patch @@ -0,0 +1,56 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Soheil Hassas Yeganeh +Date: Tue, 23 Aug 2016 18:22:33 -0400 +Subject: tun: fix transmit timestamp support + +From: Soheil Hassas Yeganeh + + +[ Upstream commit 7b996243fab46092fb3a29c773c54be8152366e4 ] + +Instead of using sock_tx_timestamp, use skb_tx_timestamp to record +software transmit timestamp of a packet. + +sock_tx_timestamp resets and overrides the tx_flags of the skb. +The function is intended to be called from within the protocol +layer when creating the skb, not from a device driver. This is +inconsistent with other drivers and will cause issues for TCP. + +In TCP, we intend to sample the timestamps for the last byte +for each sendmsg/sendpage. For that reason, tcp_sendmsg calls +tcp_tx_timestamp only with the last skb that it generates. +For example, if a 128KB message is split into two 64KB packets +we want to sample the SND timestamp of the last packet. The current +code in the tun driver, however, will result in sampling the SND +timestamp for both packets. + +Also, when the last packet is split into smaller packets for +retranmission (see tcp_fragment), the tun driver will record +timestamps for all of the retransmitted packets and not only the +last packet. + +Fixes: eda297729171 (tun: Support software transmit time stamping.) +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: Francis Yan +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -878,11 +878,7 @@ static netdev_tx_t tun_net_xmit(struct s + if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) + goto drop; + +- if (skb->sk && sk_fullsock(skb->sk)) { +- sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags, +- &skb_shinfo(skb)->tx_flags); +- sw_tx_timestamp(skb); +- } ++ skb_tx_timestamp(skb); + + /* Orphan the skb - required as we might hang on to it + * for indefinite time. diff --git a/queue-4.7/udp-fix-poll-issue-with-zero-sized-packets.patch b/queue-4.7/udp-fix-poll-issue-with-zero-sized-packets.patch new file mode 100644 index 00000000000..4c9e3cc760b --- /dev/null +++ b/queue-4.7/udp-fix-poll-issue-with-zero-sized-packets.patch @@ -0,0 +1,75 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Eric Dumazet +Date: Tue, 23 Aug 2016 13:59:33 -0700 +Subject: udp: fix poll() issue with zero sized packets + +From: Eric Dumazet + + +[ Upstream commit e83c6744e81abc93a20d0eb3b7f504a176a6126a ] + +Laura tracked poll() [and friends] regression caused by commit +e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") + +udp_poll() needs to know if there is a valid packet in receive queue, +even if its payload length is 0. + +Change first_packet_length() to return an signed int, and use -1 +as the indication of an empty queue. + +Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") +Reported-by: Laura Abbott +Signed-off-by: Eric Dumazet +Tested-by: Laura Abbott +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1182,13 +1182,13 @@ out: + * @sk: socket + * + * Drops all bad checksum frames, until a valid one is found. +- * Returns the length of found skb, or 0 if none is found. ++ * Returns the length of found skb, or -1 if none is found. + */ +-static unsigned int first_packet_length(struct sock *sk) ++static int first_packet_length(struct sock *sk) + { + struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; + struct sk_buff *skb; +- unsigned int res; ++ int res; + + __skb_queue_head_init(&list_kill); + +@@ -1203,7 +1203,7 @@ static unsigned int first_packet_length( + __skb_unlink(skb, rcvq); + __skb_queue_tail(&list_kill, skb); + } +- res = skb ? skb->len : 0; ++ res = skb ? skb->len : -1; + spin_unlock_bh(&rcvq->lock); + + if (!skb_queue_empty(&list_kill)) { +@@ -1232,7 +1232,7 @@ int udp_ioctl(struct sock *sk, int cmd, + + case SIOCINQ: + { +- unsigned int amount = first_packet_length(sk); ++ int amount = max_t(int, 0, first_packet_length(sk)); + + return put_user(amount, (int __user *)arg); + } +@@ -2184,7 +2184,7 @@ unsigned int udp_poll(struct file *file, + + /* Check for false positives due to checksum errors */ + if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && +- !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) ++ !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) + mask &= ~(POLLIN | POLLRDNORM); + + return mask; diff --git a/queue-4.7/vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch b/queue-4.7/vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch new file mode 100644 index 00000000000..44691080d5a --- /dev/null +++ b/queue-4.7/vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch @@ -0,0 +1,159 @@ +From foo@baz Wed Sep 21 10:05:18 CEST 2016 +From: Lance Richardson +Date: Tue, 9 Aug 2016 15:29:42 -0400 +Subject: vti: flush x-netns xfrm cache when vti interface is removed + +From: Lance Richardson + + +[ Upstream commit a5d0dc810abf3d6b241777467ee1d6efb02575fc ] + +When executing the script included below, the netns delete operation +hangs with the following message (repeated at 10 second intervals): + + kernel:unregister_netdevice: waiting for lo to become free. Usage count = 1 + +This occurs because a reference to the lo interface in the "secure" netns +is still held by a dst entry in the xfrm bundle cache in the init netns. + +Address this problem by garbage collecting the tunnel netns flow cache +when a cross-namespace vti interface receives a NETDEV_DOWN notification. + +A more detailed description of the problem scenario (referencing commands +in the script below): + +(1) ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 + + The vti_test interface is created in the init namespace. vti_tunnel_init() + attaches a struct ip_tunnel to the vti interface's netdev_priv(dev), + setting the tunnel net to &init_net. + +(2) ip link set vti_test netns secure + + The vti_test interface is moved to the "secure" netns. Note that + the associated struct ip_tunnel still has tunnel->net set to &init_net. + +(3) ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 + + The first packet sent using the vti device causes xfrm_lookup() to be + called as follows: + + dst = xfrm_lookup(tunnel->net, skb_dst(skb), fl, NULL, 0); + + Note that tunnel->net is the init namespace, while skb_dst(skb) references + the vti_test interface in the "secure" namespace. The returned dst + references an interface in the init namespace. + + Also note that the first parameter to xfrm_lookup() determines which flow + cache is used to store the computed xfrm bundle, so after xfrm_lookup() + returns there will be a cached bundle in the init namespace flow cache + with a dst referencing a device in the "secure" namespace. + +(4) ip netns del secure + + Kernel begins to delete the "secure" namespace. At some point the + vti_test interface is deleted, at which point dst_ifdown() changes + the dst->dev in the cached xfrm bundle flow from vti_test to lo (still + in the "secure" namespace however). + Since nothing has happened to cause the init namespace's flow cache + to be garbage collected, this dst remains attached to the flow cache, + so the kernel loops waiting for the last reference to lo to go away. + + +ip link add br1 type bridge +ip link set dev br1 up +ip addr add dev br1 1.1.1.1/8 + +ip netns add secure +ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 +ip link set vti_test netns secure +ip netns exec secure ip link set vti_test up +ip netns exec secure ip link s lo up +ip netns exec secure ip addr add dev lo 192.168.100.1/24 +ip netns exec secure ip route add 192.168.200.0/24 dev vti_test +ip xfrm policy flush +ip xfrm state flush +ip xfrm policy add dir out tmpl src 1.1.1.1 dst 1.1.1.2 \ + proto esp mode tunnel mark 1 +ip xfrm policy add dir in tmpl src 1.1.1.2 dst 1.1.1.1 \ + proto esp mode tunnel mark 1 +ip xfrm state add src 1.1.1.1 dst 1.1.1.2 proto esp spi 1 \ + mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 +ip xfrm state add src 1.1.1.2 dst 1.1.1.1 proto esp spi 1 \ + mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 + +ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 + +ip netns del secure + + +Reported-by: Hangbin Liu +Reported-by: Jan Tluka +Signed-off-by: Lance Richardson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_vti.c | 31 +++++++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +--- a/net/ipv4/ip_vti.c ++++ b/net/ipv4/ip_vti.c +@@ -557,6 +557,33 @@ static struct rtnl_link_ops vti_link_ops + .get_link_net = ip_tunnel_get_link_net, + }; + ++static bool is_vti_tunnel(const struct net_device *dev) ++{ ++ return dev->netdev_ops == &vti_netdev_ops; ++} ++ ++static int vti_device_event(struct notifier_block *unused, ++ unsigned long event, void *ptr) ++{ ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ struct ip_tunnel *tunnel = netdev_priv(dev); ++ ++ if (!is_vti_tunnel(dev)) ++ return NOTIFY_DONE; ++ ++ switch (event) { ++ case NETDEV_DOWN: ++ if (!net_eq(tunnel->net, dev_net(dev))) ++ xfrm_garbage_collect(tunnel->net); ++ break; ++ } ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block vti_notifier_block __read_mostly = { ++ .notifier_call = vti_device_event, ++}; ++ + static int __init vti_init(void) + { + const char *msg; +@@ -564,6 +591,8 @@ static int __init vti_init(void) + + pr_info("IPv4 over IPsec tunneling driver\n"); + ++ register_netdevice_notifier(&vti_notifier_block); ++ + msg = "tunnel device"; + err = register_pernet_device(&vti_net_ops); + if (err < 0) +@@ -596,6 +625,7 @@ xfrm_proto_ah_failed: + xfrm_proto_esp_failed: + unregister_pernet_device(&vti_net_ops); + pernet_dev_failed: ++ unregister_netdevice_notifier(&vti_notifier_block); + pr_err("vti init: failed to register %s\n", msg); + return err; + } +@@ -607,6 +637,7 @@ static void __exit vti_fini(void) + xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); + xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); + unregister_pernet_device(&vti_net_ops); ++ unregister_netdevice_notifier(&vti_notifier_block); + } + + module_init(vti_init);