]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Sun, 10 Mar 2024 02:31:48 +0000 (21:31 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 10 Mar 2024 02:31:48 +0000 (21:31 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
49 files changed:
queue-5.10/bpf-net-change-sk_getsockopt-to-take-the-sockptr_t-a.patch [new file with mode: 0644]
queue-5.10/drivers-hv-vmbus-add-vmbus_requestor-data-structure-.patch [new file with mode: 0644]
queue-5.10/drivers-hv-vmbus-drop-error-message-when-no-request-.patch [new file with mode: 0644]
queue-5.10/exit-fix-typo-in-comment-s-sub-theads-sub-threads.patch [new file with mode: 0644]
queue-5.10/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch [new file with mode: 0644]
queue-5.10/ext4-convert-to-exclusive-lock-while-inserting-delal.patch [new file with mode: 0644]
queue-5.10/ext4-make-ext4_es_insert_extent-return-void.patch [new file with mode: 0644]
queue-5.10/ext4-refactor-ext4_da_map_blocks.patch [new file with mode: 0644]
queue-5.10/getrusage-add-the-signal_struct-sig-local-variable.patch [new file with mode: 0644]
queue-5.10/getrusage-move-thread_group_cputime_adjusted-outside.patch [new file with mode: 0644]
queue-5.10/getrusage-use-__for_each_thread.patch [new file with mode: 0644]
queue-5.10/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch [new file with mode: 0644]
queue-5.10/hv_netvsc-make-netvsc-vf-binding-check-both-mac-and-.patch [new file with mode: 0644]
queue-5.10/hv_netvsc-process-netdev_going_down-on-vf-hot-remove.patch [new file with mode: 0644]
queue-5.10/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_.patch [new file with mode: 0644]
queue-5.10/hv_netvsc-use-netif_is_bond_master-instead-of-open-c.patch [new file with mode: 0644]
queue-5.10/hv_netvsc-use-vmbus_requestor-to-generate-transactio.patch [new file with mode: 0644]
queue-5.10/hv_netvsc-wait-for-completion-on-request-switch_data.patch [new file with mode: 0644]
queue-5.10/lsm-fix-default-return-value-of-the-socket_getpeerse.patch [new file with mode: 0644]
queue-5.10/lsm-make-security_socket_getpeersec_stream-sockptr_t.patch [new file with mode: 0644]
queue-5.10/mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch [new file with mode: 0644]
queue-5.10/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-.patch [new file with mode: 0644]
queue-5.10/net-change-sock_getsockopt-to-take-the-sk-ptr-instea.patch [new file with mode: 0644]
queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch [new file with mode: 0644]
queue-5.10/nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch [new file with mode: 0644]
queue-5.10/nfsd-fix-release_lockowner.patch [new file with mode: 0644]
queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch [new file with mode: 0644]
queue-5.10/regmap-add-bulk-read-write-callbacks-into-regmap_con.patch [new file with mode: 0644]
queue-5.10/regmap-allow-to-define-reg_update_bits-for-no-bus-co.patch [new file with mode: 0644]
queue-5.10/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch [new file with mode: 0644]
queue-5.10/selftests-mm-switch-to-bash-from-sh.patch [new file with mode: 0644]
queue-5.10/serial-max310x-fail-probe-if-clock-crystal-is-unstab.patch [new file with mode: 0644]
queue-5.10/serial-max310x-fix-io-data-corruption-in-batched-ope.patch [new file with mode: 0644]
queue-5.10/serial-max310x-implement-i2c-support.patch [new file with mode: 0644]
queue-5.10/serial-max310x-make-accessing-revision-id-interface-.patch [new file with mode: 0644]
queue-5.10/serial-max310x-make-use-of-device-properties.patch [new file with mode: 0644]
queue-5.10/serial-max310x-prevent-infinite-while-loop-in-port-s.patch [new file with mode: 0644]
queue-5.10/serial-max310x-try-to-get-crystal-clock-rate-from-pr.patch [new file with mode: 0644]
queue-5.10/serial-max310x-unprepare-and-disable-clock-in-error-.patch [new file with mode: 0644]
queue-5.10/serial-max310x-use-a-separate-regmap-for-each-port.patch [new file with mode: 0644]
queue-5.10/serial-max310x-use-devm_clk_get_optional-to-get-the-.patch [new file with mode: 0644]
queue-5.10/serial-max310x-use-regmap-methods-for-spi-batch-oper.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/um-allow-not-setting-extra-rpaths-in-the-linux-binar.patch [new file with mode: 0644]
queue-5.10/um-fix-adding-no-pie-for-clang.patch [new file with mode: 0644]
queue-5.10/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch [new file with mode: 0644]
queue-5.10/xhci-prevent-double-fetch-of-transfer-and-transfer-e.patch [new file with mode: 0644]
queue-5.10/xhci-process-isoc-td-properly-when-there-was-a-trans.patch [new file with mode: 0644]
queue-5.10/xhci-remove-extra-loop-in-interrupt-context.patch [new file with mode: 0644]

diff --git a/queue-5.10/bpf-net-change-sk_getsockopt-to-take-the-sockptr_t-a.patch b/queue-5.10/bpf-net-change-sk_getsockopt-to-take-the-sockptr_t-a.patch
new file mode 100644 (file)
index 0000000..9d7e066
--- /dev/null
@@ -0,0 +1,236 @@
+From a18447e359ab34c6f720c82fa9bb7785051c94fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 17:28:02 -0700
+Subject: bpf: net: Change sk_getsockopt() to take the sockptr_t argument
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit 4ff09db1b79b98b4a2a7511571c640b76cab3beb ]
+
+This patch changes sk_getsockopt() to take the sockptr_t argument
+such that it can be used by bpf_getsockopt(SOL_SOCKET) in a
+latter patch.
+
+security_socket_getpeersec_stream() is not changed.  It stays
+with the __user ptr (optval.user and optlen.user) to avoid changes
+to other security hooks.  bpf_getsockopt(SOL_SOCKET) also does not
+support SO_PEERSEC.
+
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Link: https://lore.kernel.org/r/20220902002802.2888419-1-kafai@fb.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/filter.h  |  3 +--
+ include/linux/sockptr.h |  5 +++++
+ net/core/filter.c       |  5 ++---
+ net/core/sock.c         | 43 +++++++++++++++++++++++------------------
+ 4 files changed, 32 insertions(+), 24 deletions(-)
+
+diff --git a/include/linux/filter.h b/include/linux/filter.h
+index bc6ce4b202a80..cd56e53bd42e2 100644
+--- a/include/linux/filter.h
++++ b/include/linux/filter.h
+@@ -892,8 +892,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
+ void sk_reuseport_prog_free(struct bpf_prog *prog);
+ int sk_detach_filter(struct sock *sk);
+-int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
+-                unsigned int len);
++int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len);
+ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
+ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
+diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
+index ea193414298b7..38862819e77a1 100644
+--- a/include/linux/sockptr.h
++++ b/include/linux/sockptr.h
+@@ -64,6 +64,11 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset,
+       return 0;
+ }
++static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size)
++{
++      return copy_to_sockptr_offset(dst, 0, src, size);
++}
++
+ static inline void *memdup_sockptr(sockptr_t src, size_t len)
+ {
+       void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 6cfc8fb0562a2..49e4d1535cc82 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -9903,8 +9903,7 @@ int sk_detach_filter(struct sock *sk)
+ }
+ EXPORT_SYMBOL_GPL(sk_detach_filter);
+-int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
+-                unsigned int len)
++int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len)
+ {
+       struct sock_fprog_kern *fprog;
+       struct sk_filter *filter;
+@@ -9935,7 +9934,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
+               goto out;
+       ret = -EFAULT;
+-      if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
++      if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog)))
+               goto out;
+       /* Instead of bytes, the API requests to return the number
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 95559d088a169..42da46965b16f 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -644,8 +644,8 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
+       return ret;
+ }
+-static int sock_getbindtodevice(struct sock *sk, char __user *optval,
+-                              int __user *optlen, int len)
++static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
++                              sockptr_t optlen, int len)
+ {
+       int ret = -ENOPROTOOPT;
+ #ifdef CONFIG_NETDEVICES
+@@ -668,12 +668,12 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
+       len = strlen(devname) + 1;
+       ret = -EFAULT;
+-      if (copy_to_user(optval, devname, len))
++      if (copy_to_sockptr(optval, devname, len))
+               goto out;
+ zero:
+       ret = -EFAULT;
+-      if (put_user(len, optlen))
++      if (copy_to_sockptr(optlen, &len, sizeof(int)))
+               goto out;
+       ret = 0;
+@@ -1281,20 +1281,23 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred,
+       }
+ }
+-static int groups_to_user(gid_t __user *dst, const struct group_info *src)
++static int groups_to_user(sockptr_t dst, const struct group_info *src)
+ {
+       struct user_namespace *user_ns = current_user_ns();
+       int i;
+-      for (i = 0; i < src->ngroups; i++)
+-              if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
++      for (i = 0; i < src->ngroups; i++) {
++              gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
++
++              if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
+                       return -EFAULT;
++      }
+       return 0;
+ }
+ static int sk_getsockopt(struct sock *sk, int level, int optname,
+-                       char __user *optval, int __user *optlen)
++                       sockptr_t optval, sockptr_t optlen)
+ {
+       struct socket *sock = sk->sk_socket;
+@@ -1312,7 +1315,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+       int lv = sizeof(int);
+       int len;
+-      if (get_user(len, optlen))
++      if (copy_from_sockptr(&len, optlen, sizeof(int)))
+               return -EFAULT;
+       if (len < 0)
+               return -EINVAL;
+@@ -1445,7 +1448,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+               cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+               spin_unlock(&sk->sk_peer_lock);
+-              if (copy_to_user(optval, &peercred, len))
++              if (copy_to_sockptr(optval, &peercred, len))
+                       return -EFAULT;
+               goto lenout;
+       }
+@@ -1463,11 +1466,11 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+               if (len < n * sizeof(gid_t)) {
+                       len = n * sizeof(gid_t);
+                       put_cred(cred);
+-                      return put_user(len, optlen) ? -EFAULT : -ERANGE;
++                      return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
+               }
+               len = n * sizeof(gid_t);
+-              ret = groups_to_user((gid_t __user *)optval, cred->group_info);
++              ret = groups_to_user(optval, cred->group_info);
+               put_cred(cred);
+               if (ret)
+                       return ret;
+@@ -1483,7 +1486,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+                       return -ENOTCONN;
+               if (lv < len)
+                       return -EINVAL;
+-              if (copy_to_user(optval, address, len))
++              if (copy_to_sockptr(optval, address, len))
+                       return -EFAULT;
+               goto lenout;
+       }
+@@ -1500,7 +1503,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+               break;
+       case SO_PEERSEC:
+-              return security_socket_getpeersec_stream(sock, optval, optlen, len);
++              return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len);
+       case SO_MARK:
+               v.val = sk->sk_mark;
+@@ -1528,7 +1531,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+               return sock_getbindtodevice(sk, optval, optlen, len);
+       case SO_GET_FILTER:
+-              len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
++              len = sk_get_filter(sk, optval, len);
+               if (len < 0)
+                       return len;
+@@ -1575,7 +1578,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+               sk_get_meminfo(sk, meminfo);
+               len = min_t(unsigned int, len, sizeof(meminfo));
+-              if (copy_to_user(optval, &meminfo, len))
++              if (copy_to_sockptr(optval, &meminfo, len))
+                       return -EFAULT;
+               goto lenout;
+@@ -1625,10 +1628,10 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+       if (len > lv)
+               len = lv;
+-      if (copy_to_user(optval, &v, len))
++      if (copy_to_sockptr(optval, &v, len))
+               return -EFAULT;
+ lenout:
+-      if (put_user(len, optlen))
++      if (copy_to_sockptr(optlen, &len, sizeof(int)))
+               return -EFAULT;
+       return 0;
+ }
+@@ -1636,7 +1639,9 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ int sock_getsockopt(struct socket *sock, int level, int optname,
+                   char __user *optval, int __user *optlen)
+ {
+-      return sk_getsockopt(sock->sk, level, optname, optval, optlen);
++      return sk_getsockopt(sock->sk, level, optname,
++                           USER_SOCKPTR(optval),
++                           USER_SOCKPTR(optlen));
+ }
+ /*
+-- 
+2.43.0
+
diff --git a/queue-5.10/drivers-hv-vmbus-add-vmbus_requestor-data-structure-.patch b/queue-5.10/drivers-hv-vmbus-add-vmbus_requestor-data-structure-.patch
new file mode 100644 (file)
index 0000000..21b4000
--- /dev/null
@@ -0,0 +1,407 @@
+From f01b7ce60d0b2730d8de1f8f49a16ed9fe64c76e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Nov 2020 11:04:00 +0100
+Subject: Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus
+ hardening
+
+From: Andres Beltran <lkmlabelt@gmail.com>
+
+[ Upstream commit e8b7db38449ac5b950a3f00519171c4be3e226ff ]
+
+Currently, VMbus drivers use pointers into guest memory as request IDs
+for interactions with Hyper-V. To be more robust in the face of errors
+or malicious behavior from a compromised Hyper-V, avoid exposing
+guest memory addresses to Hyper-V. Also avoid Hyper-V giving back a
+bad request ID that is then treated as the address of a guest data
+structure with no validation. Instead, encapsulate these memory
+addresses and provide small integers as request IDs.
+
+Signed-off-by: Andres Beltran <lkmlabelt@gmail.com>
+Co-developed-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Reviewed-by: Wei Liu <wei.liu@kernel.org>
+Link: https://lore.kernel.org/r/20201109100402.8946-2-parri.andrea@gmail.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hv/channel.c      | 174 ++++++++++++++++++++++++++++++++++++--
+ drivers/hv/hyperv_vmbus.h |   3 +-
+ drivers/hv/ring_buffer.c  |  29 ++++++-
+ include/linux/hyperv.h    |  22 +++++
+ 4 files changed, 219 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
+index f064fa6ef181a..a59ab2f3d68e1 100644
+--- a/drivers/hv/channel.c
++++ b/drivers/hv/channel.c
+@@ -503,6 +503,70 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
+ }
+ EXPORT_SYMBOL_GPL(vmbus_establish_gpadl);
++/**
++ * request_arr_init - Allocates memory for the requestor array. Each slot
++ * keeps track of the next available slot in the array. Initially, each
++ * slot points to the next one (as in a Linked List). The last slot
++ * does not point to anything, so its value is U64_MAX by default.
++ * @size The size of the array
++ */
++static u64 *request_arr_init(u32 size)
++{
++      int i;
++      u64 *req_arr;
++
++      req_arr = kcalloc(size, sizeof(u64), GFP_KERNEL);
++      if (!req_arr)
++              return NULL;
++
++      for (i = 0; i < size - 1; i++)
++              req_arr[i] = i + 1;
++
++      /* Last slot (no more available slots) */
++      req_arr[i] = U64_MAX;
++
++      return req_arr;
++}
++
++/*
++ * vmbus_alloc_requestor - Initializes @rqstor's fields.
++ * Index 0 is the first free slot
++ * @size: Size of the requestor array
++ */
++static int vmbus_alloc_requestor(struct vmbus_requestor *rqstor, u32 size)
++{
++      u64 *rqst_arr;
++      unsigned long *bitmap;
++
++      rqst_arr = request_arr_init(size);
++      if (!rqst_arr)
++              return -ENOMEM;
++
++      bitmap = bitmap_zalloc(size, GFP_KERNEL);
++      if (!bitmap) {
++              kfree(rqst_arr);
++              return -ENOMEM;
++      }
++
++      rqstor->req_arr = rqst_arr;
++      rqstor->req_bitmap = bitmap;
++      rqstor->size = size;
++      rqstor->next_request_id = 0;
++      spin_lock_init(&rqstor->req_lock);
++
++      return 0;
++}
++
++/*
++ * vmbus_free_requestor - Frees memory allocated for @rqstor
++ * @rqstor: Pointer to the requestor struct
++ */
++static void vmbus_free_requestor(struct vmbus_requestor *rqstor)
++{
++      kfree(rqstor->req_arr);
++      bitmap_free(rqstor->req_bitmap);
++}
++
+ static int __vmbus_open(struct vmbus_channel *newchannel,
+                      void *userdata, u32 userdatalen,
+                      void (*onchannelcallback)(void *context), void *context)
+@@ -523,6 +587,12 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
+       if (newchannel->state != CHANNEL_OPEN_STATE)
+               return -EINVAL;
++      /* Create and init requestor */
++      if (newchannel->rqstor_size) {
++              if (vmbus_alloc_requestor(&newchannel->requestor, newchannel->rqstor_size))
++                      return -ENOMEM;
++      }
++
+       newchannel->state = CHANNEL_OPENING_STATE;
+       newchannel->onchannel_callback = onchannelcallback;
+       newchannel->channel_callback_context = context;
+@@ -626,6 +696,7 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
+ error_clean_ring:
+       hv_ringbuffer_cleanup(&newchannel->outbound);
+       hv_ringbuffer_cleanup(&newchannel->inbound);
++      vmbus_free_requestor(&newchannel->requestor);
+       newchannel->state = CHANNEL_OPEN_STATE;
+       return err;
+ }
+@@ -808,6 +879,9 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
+               channel->ringbuffer_gpadlhandle = 0;
+       }
++      if (!ret)
++              vmbus_free_requestor(&channel->requestor);
++
+       return ret;
+ }
+@@ -888,7 +962,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
+       /* in 8-bytes granularity */
+       desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3;
+       desc.len8 = (u16)(packetlen_aligned >> 3);
+-      desc.trans_id = requestid;
++      desc.trans_id = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */
+       bufferlist[0].iov_base = &desc;
+       bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor);
+@@ -897,7 +971,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
+       bufferlist[2].iov_base = &aligned_data;
+       bufferlist[2].iov_len = (packetlen_aligned - packetlen);
+-      return hv_ringbuffer_write(channel, bufferlist, num_vecs);
++      return hv_ringbuffer_write(channel, bufferlist, num_vecs, requestid);
+ }
+ EXPORT_SYMBOL(vmbus_sendpacket);
+@@ -939,7 +1013,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
+       desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+       desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */
+       desc.length8 = (u16)(packetlen_aligned >> 3);
+-      desc.transactionid = requestid;
++      desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */
+       desc.reserved = 0;
+       desc.rangecount = pagecount;
+@@ -956,7 +1030,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
+       bufferlist[2].iov_base = &aligned_data;
+       bufferlist[2].iov_len = (packetlen_aligned - packetlen);
+-      return hv_ringbuffer_write(channel, bufferlist, 3);
++      return hv_ringbuffer_write(channel, bufferlist, 3, requestid);
+ }
+ EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
+@@ -983,7 +1057,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
+       desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+       desc->dataoffset8 = desc_size >> 3; /* in 8-bytes granularity */
+       desc->length8 = (u16)(packetlen_aligned >> 3);
+-      desc->transactionid = requestid;
++      desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */
+       desc->reserved = 0;
+       desc->rangecount = 1;
+@@ -994,7 +1068,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
+       bufferlist[2].iov_base = &aligned_data;
+       bufferlist[2].iov_len = (packetlen_aligned - packetlen);
+-      return hv_ringbuffer_write(channel, bufferlist, 3);
++      return hv_ringbuffer_write(channel, bufferlist, 3, requestid);
+ }
+ EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
+@@ -1042,3 +1116,91 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer,
+                                 buffer_actual_len, requestid, true);
+ }
+ EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);
++
++/*
++ * vmbus_next_request_id - Returns a new request id. It is also
++ * the index at which the guest memory address is stored.
++ * Uses a spin lock to avoid race conditions.
++ * @rqstor: Pointer to the requestor struct
++ * @rqst_add: Guest memory address to be stored in the array
++ */
++u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr)
++{
++      unsigned long flags;
++      u64 current_id;
++      const struct vmbus_channel *channel =
++              container_of(rqstor, const struct vmbus_channel, requestor);
++
++      /* Check rqstor has been initialized */
++      if (!channel->rqstor_size)
++              return VMBUS_NO_RQSTOR;
++
++      spin_lock_irqsave(&rqstor->req_lock, flags);
++      current_id = rqstor->next_request_id;
++
++      /* Requestor array is full */
++      if (current_id >= rqstor->size) {
++              spin_unlock_irqrestore(&rqstor->req_lock, flags);
++              return VMBUS_RQST_ERROR;
++      }
++
++      rqstor->next_request_id = rqstor->req_arr[current_id];
++      rqstor->req_arr[current_id] = rqst_addr;
++
++      /* The already held spin lock provides atomicity */
++      bitmap_set(rqstor->req_bitmap, current_id, 1);
++
++      spin_unlock_irqrestore(&rqstor->req_lock, flags);
++
++      /*
++       * Cannot return an ID of 0, which is reserved for an unsolicited
++       * message from Hyper-V.
++       */
++      return current_id + 1;
++}
++EXPORT_SYMBOL_GPL(vmbus_next_request_id);
++
++/*
++ * vmbus_request_addr - Returns the memory address stored at @trans_id
++ * in @rqstor. Uses a spin lock to avoid race conditions.
++ * @rqstor: Pointer to the requestor struct
++ * @trans_id: Request id sent back from Hyper-V. Becomes the requestor's
++ * next request id.
++ */
++u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id)
++{
++      unsigned long flags;
++      u64 req_addr;
++      const struct vmbus_channel *channel =
++              container_of(rqstor, const struct vmbus_channel, requestor);
++
++      /* Check rqstor has been initialized */
++      if (!channel->rqstor_size)
++              return VMBUS_NO_RQSTOR;
++
++      /* Hyper-V can send an unsolicited message with ID of 0 */
++      if (!trans_id)
++              return trans_id;
++
++      spin_lock_irqsave(&rqstor->req_lock, flags);
++
++      /* Data corresponding to trans_id is stored at trans_id - 1 */
++      trans_id--;
++
++      /* Invalid trans_id */
++      if (trans_id >= rqstor->size || !test_bit(trans_id, rqstor->req_bitmap)) {
++              spin_unlock_irqrestore(&rqstor->req_lock, flags);
++              return VMBUS_RQST_ERROR;
++      }
++
++      req_addr = rqstor->req_arr[trans_id];
++      rqstor->req_arr[trans_id] = rqstor->next_request_id;
++      rqstor->next_request_id = trans_id;
++
++      /* The already held spin lock provides atomicity */
++      bitmap_clear(rqstor->req_bitmap, trans_id, 1);
++
++      spin_unlock_irqrestore(&rqstor->req_lock, flags);
++      return req_addr;
++}
++EXPORT_SYMBOL_GPL(vmbus_request_addr);
+diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
+index 7845fa5de79e9..601660bca5d47 100644
+--- a/drivers/hv/hyperv_vmbus.h
++++ b/drivers/hv/hyperv_vmbus.h
+@@ -180,7 +180,8 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
+ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
+ int hv_ringbuffer_write(struct vmbus_channel *channel,
+-                      const struct kvec *kv_list, u32 kv_count);
++                      const struct kvec *kv_list, u32 kv_count,
++                      u64 requestid);
+ int hv_ringbuffer_read(struct vmbus_channel *channel,
+                      void *buffer, u32 buflen, u32 *buffer_actual_len,
+diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
+index 7ed6fad3fa8ff..a0ba6ac487368 100644
+--- a/drivers/hv/ring_buffer.c
++++ b/drivers/hv/ring_buffer.c
+@@ -261,7 +261,8 @@ EXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy);
+ /* Write to the ring buffer. */
+ int hv_ringbuffer_write(struct vmbus_channel *channel,
+-                      const struct kvec *kv_list, u32 kv_count)
++                      const struct kvec *kv_list, u32 kv_count,
++                      u64 requestid)
+ {
+       int i;
+       u32 bytes_avail_towrite;
+@@ -271,6 +272,8 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
+       u64 prev_indices;
+       unsigned long flags;
+       struct hv_ring_buffer_info *outring_info = &channel->outbound;
++      struct vmpacket_descriptor *desc = kv_list[0].iov_base;
++      u64 rqst_id = VMBUS_NO_RQSTOR;
+       if (channel->rescind)
+               return -ENODEV;
+@@ -313,6 +316,23 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
+                                                    kv_list[i].iov_len);
+       }
++      /*
++       * Allocate the request ID after the data has been copied into the
++       * ring buffer.  Once this request ID is allocated, the completion
++       * path could find the data and free it.
++       */
++
++      if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) {
++              rqst_id = vmbus_next_request_id(&channel->requestor, requestid);
++              if (rqst_id == VMBUS_RQST_ERROR) {
++                      spin_unlock_irqrestore(&outring_info->ring_lock, flags);
++                      pr_err("No request id available\n");
++                      return -EAGAIN;
++              }
++      }
++      desc = hv_get_ring_buffer(outring_info) + old_write;
++      desc->trans_id = (rqst_id == VMBUS_NO_RQSTOR) ? requestid : rqst_id;
++
+       /* Set previous packet start */
+       prev_indices = hv_get_ring_bufferindices(outring_info);
+@@ -332,8 +352,13 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
+       hv_signal_on_write(old_write, channel);
+-      if (channel->rescind)
++      if (channel->rescind) {
++              if (rqst_id != VMBUS_NO_RQSTOR) {
++                      /* Reclaim request ID to avoid leak of IDs */
++                      vmbus_request_addr(&channel->requestor, rqst_id);
++              }
+               return -ENODEV;
++      }
+       return 0;
+ }
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
+index eada4d8d65879..4cb65a79d92f6 100644
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -764,6 +764,22 @@ enum vmbus_device_type {
+       HV_UNKNOWN,
+ };
++/*
++ * Provides request ids for VMBus. Encapsulates guest memory
++ * addresses and stores the next available slot in req_arr
++ * to generate new ids in constant time.
++ */
++struct vmbus_requestor {
++      u64 *req_arr;
++      unsigned long *req_bitmap; /* is a given slot available? */
++      u32 size;
++      u64 next_request_id;
++      spinlock_t req_lock; /* provides atomicity */
++};
++
++#define VMBUS_NO_RQSTOR U64_MAX
++#define VMBUS_RQST_ERROR (U64_MAX - 1)
++
+ struct vmbus_device {
+       u16  dev_type;
+       guid_t guid;
+@@ -988,8 +1004,14 @@ struct vmbus_channel {
+       u32 fuzz_testing_interrupt_delay;
+       u32 fuzz_testing_message_delay;
++      /* request/transaction ids for VMBus */
++      struct vmbus_requestor requestor;
++      u32 rqstor_size;
+ };
++u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr);
++u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id);
++
+ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
+ {
+       return !!(c->offermsg.offer.chn_flags &
+-- 
+2.43.0
+
diff --git a/queue-5.10/drivers-hv-vmbus-drop-error-message-when-no-request-.patch b/queue-5.10/drivers-hv-vmbus-drop-error-message-when-no-request-.patch
new file mode 100644 (file)
index 0000000..f443a16
--- /dev/null
@@ -0,0 +1,40 @@
+From fa8553d3a820c2eb5fe34180444ba299b6087594 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Mar 2021 20:13:48 +0100
+Subject: Drivers: hv: vmbus: Drop error message when 'No request id available'
+
+From: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+
+[ Upstream commit 0c85c54bf7faeb80c6b76901ed77d93acef0207d ]
+
+Running out of request IDs on a channel essentially produces the same
+effect as running out of space in the ring buffer, in that -EAGAIN is
+returned.  The error message in hv_ringbuffer_write() should either be
+dropped (since we don't output a message when the ring buffer is full)
+or be made conditional/debug-only.
+
+Suggested-by: Michael Kelley <mikelley@microsoft.com>
+Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Fixes: e8b7db38449ac ("Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus hardening")
+Link: https://lore.kernel.org/r/20210301191348.196485-1-parri.andrea@gmail.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hv/ring_buffer.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
+index a0ba6ac487368..a49cc69c56af0 100644
+--- a/drivers/hv/ring_buffer.c
++++ b/drivers/hv/ring_buffer.c
+@@ -326,7 +326,6 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
+               rqst_id = vmbus_next_request_id(&channel->requestor, requestid);
+               if (rqst_id == VMBUS_RQST_ERROR) {
+                       spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+-                      pr_err("No request id available\n");
+                       return -EAGAIN;
+               }
+       }
+-- 
+2.43.0
+
diff --git a/queue-5.10/exit-fix-typo-in-comment-s-sub-theads-sub-threads.patch b/queue-5.10/exit-fix-typo-in-comment-s-sub-theads-sub-threads.patch
new file mode 100644 (file)
index 0000000..790e1bb
--- /dev/null
@@ -0,0 +1,33 @@
+From 9cf15c9f4363b09b25014512da81a180f3e5e6c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Aug 2022 10:43:42 +0200
+Subject: exit: Fix typo in comment: s/sub-theads/sub-threads
+
+From: Ingo Molnar <mingo@kernel.org>
+
+[ Upstream commit dcca34754a3f5290406403b8066e3b15dda9f4bf ]
+
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Stable-dep-of: c1be35a16b2f ("exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock)")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/exit.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/exit.c b/kernel/exit.c
+index bacdaf980933b..c41bdc0a7f06b 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1105,7 +1105,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+                * p->signal fields because the whole thread group is dead
+                * and nobody can change them.
+                *
+-               * psig->stats_lock also protects us from our sub-theads
++               * psig->stats_lock also protects us from our sub-threads
+                * which can reap other children at the same time. Until
+                * we change k_getrusage()-like users to rely on this lock
+                * we have to take ->siglock as well.
+-- 
+2.43.0
+
diff --git a/queue-5.10/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch b/queue-5.10/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch
new file mode 100644 (file)
index 0000000..a9f4daa
--- /dev/null
@@ -0,0 +1,65 @@
+From 2b40d3238f9ea8a9cf9a61ce0cbec94ef5d2ced8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 16:34:00 +0100
+Subject: exit: wait_task_zombie: kill the no longer necessary
+ spin_lock_irq(siglock)
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit c1be35a16b2f1fe21f4f26f9de030ad6eaaf6a25 ]
+
+After the recent changes nobody use siglock to read the values protected
+by stats_lock, we can kill spin_lock_irq(&current->sighand->siglock) and
+update the comment.
+
+With this patch only __exit_signal() and thread_group_start_cputime() take
+stats_lock under siglock.
+
+Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/exit.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/kernel/exit.c b/kernel/exit.c
+index c41bdc0a7f06b..8f25abdd5fa7d 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1106,17 +1106,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+                * and nobody can change them.
+                *
+                * psig->stats_lock also protects us from our sub-threads
+-               * which can reap other children at the same time. Until
+-               * we change k_getrusage()-like users to rely on this lock
+-               * we have to take ->siglock as well.
++               * which can reap other children at the same time.
+                *
+                * We use thread_group_cputime_adjusted() to get times for
+                * the thread group, which consolidates times for all threads
+                * in the group including the group leader.
+                */
+               thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+-              spin_lock_irq(&current->sighand->siglock);
+-              write_seqlock(&psig->stats_lock);
++              write_seqlock_irq(&psig->stats_lock);
+               psig->cutime += tgutime + sig->cutime;
+               psig->cstime += tgstime + sig->cstime;
+               psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
+@@ -1139,8 +1136,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+                       psig->cmaxrss = maxrss;
+               task_io_accounting_add(&psig->ioac, &p->ioac);
+               task_io_accounting_add(&psig->ioac, &sig->ioac);
+-              write_sequnlock(&psig->stats_lock);
+-              spin_unlock_irq(&current->sighand->siglock);
++              write_sequnlock_irq(&psig->stats_lock);
+       }
+       if (wo->wo_rusage)
+-- 
+2.43.0
+
diff --git a/queue-5.10/ext4-convert-to-exclusive-lock-while-inserting-delal.patch b/queue-5.10/ext4-convert-to-exclusive-lock-while-inserting-delal.patch
new file mode 100644 (file)
index 0000000..890a1d7
--- /dev/null
@@ -0,0 +1,113 @@
+From b26ac4d68bbe68214213c82814d4c7acd12c6a64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 27 Jan 2024 09:58:01 +0800
+Subject: ext4: convert to exclusive lock while inserting delalloc extents
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+[ Upstream commit acf795dc161f3cf481db20f05db4250714e375e5 ]
+
+ext4_da_map_blocks() only hold i_data_sem in shared mode and i_rwsem
+when inserting delalloc extents, it could be raced by another querying
+path of ext4_map_blocks() without i_rwsem, .e.g buffered read path.
+Suppose we buffered read a file containing just a hole, and without any
+cached extents tree, then it is raced by another delayed buffered write
+to the same area or the near area belongs to the same hole, and the new
+delalloc extent could be overwritten to a hole extent.
+
+ pread()                           pwrite()
+  filemap_read_folio()
+   ext4_mpage_readpages()
+    ext4_map_blocks()
+     down_read(i_data_sem)
+     ext4_ext_determine_hole()
+     //find hole
+     ext4_ext_put_gap_in_cache()
+      ext4_es_find_extent_range()
+      //no delalloc extent
+                                    ext4_da_map_blocks()
+                                     down_read(i_data_sem)
+                                     ext4_insert_delayed_block()
+                                     //insert delalloc extent
+      ext4_es_insert_extent()
+      //overwrite delalloc extent to hole
+
+This race could lead to inconsistent delalloc extents tree and
+incorrect reserved space counter. Fix this by converting to hold
+i_data_sem in exclusive mode when adding a new delalloc extent in
+ext4_da_map_blocks().
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Suggested-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240127015825.1608160-3-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 25 +++++++++++--------------
+ 1 file changed, 11 insertions(+), 14 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 64162470a7e6c..8b48ed351c4b9 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1728,10 +1728,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+       /* Lookup extent status tree firstly */
+       if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
+-              if (ext4_es_is_hole(&es)) {
+-                      down_read(&EXT4_I(inode)->i_data_sem);
++              if (ext4_es_is_hole(&es))
+                       goto add_delayed;
+-              }
+               /*
+                * Delayed extent could be allocated by fallocate.
+@@ -1773,8 +1771,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+               retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+       else
+               retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+-      if (retval < 0)
+-              goto out_unlock;
++      if (retval < 0) {
++              up_read(&EXT4_I(inode)->i_data_sem);
++              return retval;
++      }
+       if (retval > 0) {
+               unsigned int status;
+@@ -1790,24 +1790,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+                               EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+               ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+                                     map->m_pblk, status);
+-              goto out_unlock;
++              up_read(&EXT4_I(inode)->i_data_sem);
++              return retval;
+       }
++      up_read(&EXT4_I(inode)->i_data_sem);
+ add_delayed:
+-      /*
+-       * XXX: __block_prepare_write() unmaps passed block,
+-       * is it OK?
+-       */
++      down_write(&EXT4_I(inode)->i_data_sem);
+       retval = ext4_insert_delayed_block(inode, map->m_lblk);
++      up_write(&EXT4_I(inode)->i_data_sem);
+       if (retval)
+-              goto out_unlock;
++              return retval;
+       map_bh(bh, inode->i_sb, invalid_block);
+       set_buffer_new(bh);
+       set_buffer_delay(bh);
+-
+-out_unlock:
+-      up_read((&EXT4_I(inode)->i_data_sem));
+       return retval;
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.10/ext4-make-ext4_es_insert_extent-return-void.patch b/queue-5.10/ext4-make-ext4_es_insert_extent-return-void.patch
new file mode 100644 (file)
index 0000000..91789fe
--- /dev/null
@@ -0,0 +1,158 @@
+From 5ad8f4e0d1ca2d96d546f15fefdd9659c3535249 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Apr 2023 11:38:45 +0800
+Subject: ext4: make ext4_es_insert_extent() return void
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 6c120399cde6b1b5cf65ce403765c579fb3d3e50 ]
+
+Now ext4_es_insert_extent() never return error, so make it return void.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230424033846.4732-12-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: acf795dc161f ("ext4: convert to exclusive lock while inserting delalloc extents")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/extents.c        |  5 +++--
+ fs/ext4/extents_status.c | 14 ++++++--------
+ fs/ext4/extents_status.h |  6 +++---
+ fs/ext4/inode.c          | 21 ++++++---------------
+ 4 files changed, 18 insertions(+), 28 deletions(-)
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 68aa8760cb465..9e12592727914 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -3107,8 +3107,9 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
+       if (ee_len == 0)
+               return 0;
+-      return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
+-                                   EXTENT_STATUS_WRITTEN);
++      ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
++                            EXTENT_STATUS_WRITTEN);
++      return 0;
+ }
+ /* FIXME!! we need to try to merge to left or right after zero-out  */
+diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
+index cccbdfd49a86b..f37e62546745b 100644
+--- a/fs/ext4/extents_status.c
++++ b/fs/ext4/extents_status.c
+@@ -846,12 +846,10 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
+ /*
+  * ext4_es_insert_extent() adds information to an inode's extent
+  * status tree.
+- *
+- * Return 0 on success, error code on failure.
+  */
+-int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+-                        ext4_lblk_t len, ext4_fsblk_t pblk,
+-                        unsigned int status)
++void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
++                         ext4_lblk_t len, ext4_fsblk_t pblk,
++                         unsigned int status)
+ {
+       struct extent_status newes;
+       ext4_lblk_t end = lblk + len - 1;
+@@ -863,13 +861,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+       bool revise_pending = false;
+       if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+-              return 0;
++              return;
+       es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
+                lblk, len, pblk, status, inode->i_ino);
+       if (!len)
+-              return 0;
++              return;
+       BUG_ON(end < lblk);
+@@ -938,7 +936,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+               goto retry;
+       ext4_es_print_tree(inode);
+-      return 0;
++      return;
+ }
+ /*
+diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
+index 4ec30a7982605..481ec4381bee6 100644
+--- a/fs/ext4/extents_status.h
++++ b/fs/ext4/extents_status.h
+@@ -127,9 +127,9 @@ extern int __init ext4_init_es(void);
+ extern void ext4_exit_es(void);
+ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
+-extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+-                               ext4_lblk_t len, ext4_fsblk_t pblk,
+-                               unsigned int status);
++extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
++                                ext4_lblk_t len, ext4_fsblk_t pblk,
++                                unsigned int status);
+ extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
+                                ext4_lblk_t len, ext4_fsblk_t pblk,
+                                unsigned int status);
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 365c4d3a434ab..ab2a7f9902887 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -589,10 +589,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
+                   ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
+                                      map->m_lblk + map->m_len - 1))
+                       status |= EXTENT_STATUS_DELAYED;
+-              ret = ext4_es_insert_extent(inode, map->m_lblk,
+-                                          map->m_len, map->m_pblk, status);
+-              if (ret < 0)
+-                      retval = ret;
++              ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
++                                    map->m_pblk, status);
+       }
+       up_read((&EXT4_I(inode)->i_data_sem));
+@@ -701,12 +699,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
+                   ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
+                                      map->m_lblk + map->m_len - 1))
+                       status |= EXTENT_STATUS_DELAYED;
+-              ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+-                                          map->m_pblk, status);
+-              if (ret < 0) {
+-                      retval = ret;
+-                      goto out_sem;
+-              }
++              ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
++                                    map->m_pblk, status);
+       }
+ out_sem:
+@@ -1800,7 +1794,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+               set_buffer_new(bh);
+               set_buffer_delay(bh);
+       } else if (retval > 0) {
+-              int ret;
+               unsigned int status;
+               if (unlikely(retval != map->m_len)) {
+@@ -1813,10 +1806,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+               status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+                               EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+-              ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+-                                          map->m_pblk, status);
+-              if (ret != 0)
+-                      retval = ret;
++              ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
++                                    map->m_pblk, status);
+       }
+ out_unlock:
+-- 
+2.43.0
+
diff --git a/queue-5.10/ext4-refactor-ext4_da_map_blocks.patch b/queue-5.10/ext4-refactor-ext4_da_map_blocks.patch
new file mode 100644 (file)
index 0000000..522ae1d
--- /dev/null
@@ -0,0 +1,93 @@
+From b636ced6288f229dff9b524a82aa30d7cfa5e8f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 27 Jan 2024 09:58:00 +0800
+Subject: ext4: refactor ext4_da_map_blocks()
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+[ Upstream commit 3fcc2b887a1ba4c1f45319cd8c54daa263ecbc36 ]
+
+Refactor and cleanup ext4_da_map_blocks(), reduce some unnecessary
+parameters and branches, no logic changes.
+
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240127015825.1608160-2-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: acf795dc161f ("ext4: convert to exclusive lock while inserting delalloc extents")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 39 +++++++++++++++++----------------------
+ 1 file changed, 17 insertions(+), 22 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index ab2a7f9902887..64162470a7e6c 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1729,7 +1729,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+       /* Lookup extent status tree firstly */
+       if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
+               if (ext4_es_is_hole(&es)) {
+-                      retval = 0;
+                       down_read(&EXT4_I(inode)->i_data_sem);
+                       goto add_delayed;
+               }
+@@ -1774,26 +1773,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+               retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+       else
+               retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+-
+-add_delayed:
+-      if (retval == 0) {
+-              int ret;
+-
+-              /*
+-               * XXX: __block_prepare_write() unmaps passed block,
+-               * is it OK?
+-               */
+-
+-              ret = ext4_insert_delayed_block(inode, map->m_lblk);
+-              if (ret != 0) {
+-                      retval = ret;
+-                      goto out_unlock;
+-              }
+-
+-              map_bh(bh, inode->i_sb, invalid_block);
+-              set_buffer_new(bh);
+-              set_buffer_delay(bh);
+-      } else if (retval > 0) {
++      if (retval < 0)
++              goto out_unlock;
++      if (retval > 0) {
+               unsigned int status;
+               if (unlikely(retval != map->m_len)) {
+@@ -1808,11 +1790,24 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+                               EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+               ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+                                     map->m_pblk, status);
++              goto out_unlock;
+       }
++add_delayed:
++      /*
++       * XXX: __block_prepare_write() unmaps passed block,
++       * is it OK?
++       */
++      retval = ext4_insert_delayed_block(inode, map->m_lblk);
++      if (retval)
++              goto out_unlock;
++
++      map_bh(bh, inode->i_sb, invalid_block);
++      set_buffer_new(bh);
++      set_buffer_delay(bh);
++
+ out_unlock:
+       up_read((&EXT4_I(inode)->i_data_sem));
+-
+       return retval;
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.10/getrusage-add-the-signal_struct-sig-local-variable.patch b/queue-5.10/getrusage-add-the-signal_struct-sig-local-variable.patch
new file mode 100644 (file)
index 0000000..34eef5f
--- /dev/null
@@ -0,0 +1,93 @@
+From 2278229f5e1d78b6ef0a2938f41ed309435ff40d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 19:25:54 +0200
+Subject: getrusage: add the "signal_struct *sig" local variable
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit c7ac8231ace9b07306d0299969e42073b189c70a ]
+
+No functional changes, cleanup/preparation.
+
+Link: https://lkml.kernel.org/r/20230909172554.GA20441@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: daa694e41375 ("getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 37 +++++++++++++++++++------------------
+ 1 file changed, 19 insertions(+), 18 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index bff14910b9262..8a53d858d7375 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1737,6 +1737,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       unsigned long flags;
+       u64 tgutime, tgstime, utime, stime;
+       unsigned long maxrss = 0;
++      struct signal_struct *sig = p->signal;
+       memset((char *)r, 0, sizeof (*r));
+       utime = stime = 0;
+@@ -1744,7 +1745,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       if (who == RUSAGE_THREAD) {
+               task_cputime_adjusted(current, &utime, &stime);
+               accumulate_thread_rusage(p, r);
+-              maxrss = p->signal->maxrss;
++              maxrss = sig->maxrss;
+               goto out;
+       }
+@@ -1754,15 +1755,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       switch (who) {
+       case RUSAGE_BOTH:
+       case RUSAGE_CHILDREN:
+-              utime = p->signal->cutime;
+-              stime = p->signal->cstime;
+-              r->ru_nvcsw = p->signal->cnvcsw;
+-              r->ru_nivcsw = p->signal->cnivcsw;
+-              r->ru_minflt = p->signal->cmin_flt;
+-              r->ru_majflt = p->signal->cmaj_flt;
+-              r->ru_inblock = p->signal->cinblock;
+-              r->ru_oublock = p->signal->coublock;
+-              maxrss = p->signal->cmaxrss;
++              utime = sig->cutime;
++              stime = sig->cstime;
++              r->ru_nvcsw = sig->cnvcsw;
++              r->ru_nivcsw = sig->cnivcsw;
++              r->ru_minflt = sig->cmin_flt;
++              r->ru_majflt = sig->cmaj_flt;
++              r->ru_inblock = sig->cinblock;
++              r->ru_oublock = sig->coublock;
++              maxrss = sig->cmaxrss;
+               if (who == RUSAGE_CHILDREN)
+                       break;
+@@ -1772,14 +1773,14 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+               utime += tgutime;
+               stime += tgstime;
+-              r->ru_nvcsw += p->signal->nvcsw;
+-              r->ru_nivcsw += p->signal->nivcsw;
+-              r->ru_minflt += p->signal->min_flt;
+-              r->ru_majflt += p->signal->maj_flt;
+-              r->ru_inblock += p->signal->inblock;
+-              r->ru_oublock += p->signal->oublock;
+-              if (maxrss < p->signal->maxrss)
+-                      maxrss = p->signal->maxrss;
++              r->ru_nvcsw += sig->nvcsw;
++              r->ru_nivcsw += sig->nivcsw;
++              r->ru_minflt += sig->min_flt;
++              r->ru_majflt += sig->maj_flt;
++              r->ru_inblock += sig->inblock;
++              r->ru_oublock += sig->oublock;
++              if (maxrss < sig->maxrss)
++                      maxrss = sig->maxrss;
+               t = p;
+               do {
+                       accumulate_thread_rusage(t, r);
+-- 
+2.43.0
+
diff --git a/queue-5.10/getrusage-move-thread_group_cputime_adjusted-outside.patch b/queue-5.10/getrusage-move-thread_group_cputime_adjusted-outside.patch
new file mode 100644 (file)
index 0000000..5072d8e
--- /dev/null
@@ -0,0 +1,111 @@
+From fdc881c5376b0354fa1c63267602f1fe09e16092 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 16:50:50 +0100
+Subject: getrusage: move thread_group_cputime_adjusted() outside of
+ lock_task_sighand()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit daa694e4137571b4ebec330f9a9b4d54aa8b8089 ]
+
+Patch series "getrusage: use sig->stats_lock", v2.
+
+This patch (of 2):
+
+thread_group_cputime() does its own locking, we can safely shift
+thread_group_cputime_adjusted() which does another for_each_thread loop
+outside of ->siglock protected section.
+
+This is also preparation for the next patch which changes getrusage() to
+use stats_lock instead of siglock, thread_group_cputime() takes the same
+lock.  With the current implementation recursive read_seqbegin_or_lock()
+is fine, thread_group_cputime() can't enter the slow mode if the caller
+holds stats_lock, yet this looks more safe and better performance-wise.
+
+Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com
+Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Dylan Hatch <dylanbhatch@google.com>
+Tested-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 34 +++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 8a53d858d7375..26c8783bd0757 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1736,17 +1736,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       struct task_struct *t;
+       unsigned long flags;
+       u64 tgutime, tgstime, utime, stime;
+-      unsigned long maxrss = 0;
++      unsigned long maxrss;
++      struct mm_struct *mm;
+       struct signal_struct *sig = p->signal;
+-      memset((char *)r, 0, sizeof (*r));
++      memset(r, 0, sizeof(*r));
+       utime = stime = 0;
++      maxrss = 0;
+       if (who == RUSAGE_THREAD) {
+               task_cputime_adjusted(current, &utime, &stime);
+               accumulate_thread_rusage(p, r);
+               maxrss = sig->maxrss;
+-              goto out;
++              goto out_thread;
+       }
+       if (!lock_task_sighand(p, &flags))
+@@ -1770,9 +1772,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               fallthrough;
+       case RUSAGE_SELF:
+-              thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+-              utime += tgutime;
+-              stime += tgstime;
+               r->ru_nvcsw += sig->nvcsw;
+               r->ru_nivcsw += sig->nivcsw;
+               r->ru_minflt += sig->min_flt;
+@@ -1792,19 +1791,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       }
+       unlock_task_sighand(p, &flags);
+-out:
+-      r->ru_utime = ns_to_kernel_old_timeval(utime);
+-      r->ru_stime = ns_to_kernel_old_timeval(stime);
++      if (who == RUSAGE_CHILDREN)
++              goto out_children;
+-      if (who != RUSAGE_CHILDREN) {
+-              struct mm_struct *mm = get_task_mm(p);
++      thread_group_cputime_adjusted(p, &tgutime, &tgstime);
++      utime += tgutime;
++      stime += tgstime;
+-              if (mm) {
+-                      setmax_mm_hiwater_rss(&maxrss, mm);
+-                      mmput(mm);
+-              }
++out_thread:
++      mm = get_task_mm(p);
++      if (mm) {
++              setmax_mm_hiwater_rss(&maxrss, mm);
++              mmput(mm);
+       }
++
++out_children:
+       r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
++      r->ru_utime = ns_to_kernel_old_timeval(utime);
++      r->ru_stime = ns_to_kernel_old_timeval(stime);
+ }
+ SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
+-- 
+2.43.0
+
diff --git a/queue-5.10/getrusage-use-__for_each_thread.patch b/queue-5.10/getrusage-use-__for_each_thread.patch
new file mode 100644 (file)
index 0000000..7c0d2bc
--- /dev/null
@@ -0,0 +1,43 @@
+From d4552b3a4300a1e6456d4a94e2501a8ab77c6b12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 19:26:29 +0200
+Subject: getrusage: use __for_each_thread()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit 13b7bc60b5353371460a203df6c38ccd38ad7a3a ]
+
+do/while_each_thread should be avoided when possible.
+
+Plus this change allows to avoid lock_task_sighand(), we can use rcu
+and/or sig->stats_lock instead.
+
+Link: https://lkml.kernel.org/r/20230909172629.GA20454@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: f7ec1cd5cc7e ("getrusage: use sig->stats_lock rather than lock_task_sighand()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 26c8783bd0757..f1ae8fa627145 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1780,10 +1780,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               r->ru_oublock += sig->oublock;
+               if (maxrss < sig->maxrss)
+                       maxrss = sig->maxrss;
+-              t = p;
+-              do {
++              __for_each_thread(sig, t)
+                       accumulate_thread_rusage(t, r);
+-              } while_each_thread(p, t);
+               break;
+       default:
+-- 
+2.43.0
+
diff --git a/queue-5.10/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch b/queue-5.10/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch
new file mode 100644 (file)
index 0000000..5c26477
--- /dev/null
@@ -0,0 +1,92 @@
+From 130977df9b7119b933e5ca62435814492925b7dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 16:50:53 +0100
+Subject: getrusage: use sig->stats_lock rather than lock_task_sighand()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 ]
+
+lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call
+getrusage() at the same time and the process has NR_THREADS, spin_lock_irq
+will spin with irqs disabled O(NR_CPUS * NR_THREADS) time.
+
+Change getrusage() to use sig->stats_lock, it was specifically designed
+for this type of use. This way it runs lockless in the likely case.
+
+TODO:
+       - Change do_task_stat() to use sig->stats_lock too, then we can
+         remove spin_lock_irq(siglock) in wait_task_zombie().
+
+       - Turn sig->stats_lock into seqcount_rwlock_t, this way the
+         readers in the slow mode won't exclude each other. See
+         https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/
+
+       - stats_lock has to disable irqs because ->siglock can be taken
+         in irq context, it would be very nice to change __exit_signal()
+         to avoid the siglock->stats_lock dependency.
+
+Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Dylan Hatch <dylanbhatch@google.com>
+Tested-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index f1ae8fa627145..efc213ae4c5ad 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1739,7 +1739,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+       unsigned long maxrss;
+       struct mm_struct *mm;
+       struct signal_struct *sig = p->signal;
++      unsigned int seq = 0;
++retry:
+       memset(r, 0, sizeof(*r));
+       utime = stime = 0;
+       maxrss = 0;
+@@ -1751,8 +1753,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               goto out_thread;
+       }
+-      if (!lock_task_sighand(p, &flags))
+-              return;
++      flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+       switch (who) {
+       case RUSAGE_BOTH:
+@@ -1780,14 +1781,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+               r->ru_oublock += sig->oublock;
+               if (maxrss < sig->maxrss)
+                       maxrss = sig->maxrss;
++
++              rcu_read_lock();
+               __for_each_thread(sig, t)
+                       accumulate_thread_rusage(t, r);
++              rcu_read_unlock();
++
+               break;
+       default:
+               BUG();
+       }
+-      unlock_task_sighand(p, &flags);
++
++      if (need_seqretry(&sig->stats_lock, seq)) {
++              seq = 1;
++              goto retry;
++      }
++      done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+       if (who == RUSAGE_CHILDREN)
+               goto out_children;
+-- 
+2.43.0
+
diff --git a/queue-5.10/hv_netvsc-make-netvsc-vf-binding-check-both-mac-and-.patch b/queue-5.10/hv_netvsc-make-netvsc-vf-binding-check-both-mac-and-.patch
new file mode 100644 (file)
index 0000000..efbd579
--- /dev/null
@@ -0,0 +1,60 @@
+From d716ca09e4a57e4809e656010300dcdb7f40d4cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Apr 2021 18:12:35 -0700
+Subject: hv_netvsc: Make netvsc/VF binding check both MAC and serial number
+
+From: Dexuan Cui <decui@microsoft.com>
+
+[ Upstream commit 64ff412ad41fe3a5bf759ff4844dc1382176485c ]
+
+Currently the netvsc/VF binding logic only checks the PCI serial number.
+
+The Microsoft Azure Network Adapter (MANA) supports multiple net_device
+interfaces (each such interface is called a "vPort", and has its unique
+MAC address) which are backed by the same VF PCI device, so the binding
+logic should check both the MAC address and the PCI serial number.
+
+The change should not break any other existing VF drivers, because
+Hyper-V NIC SR-IOV implementation requires the netvsc network
+interface and the VF network interface have the same MAC address.
+
+Co-developed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Co-developed-by: Shachar Raindel <shacharr@microsoft.com>
+Signed-off-by: Shachar Raindel <shacharr@microsoft.com>
+Acked-by: Stephen Hemminger <stephen@networkplumber.org>
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 057b1a9dde153..9ec1633b89b48 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -2310,8 +2310,17 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
+               if (!ndev_ctx->vf_alloc)
+                       continue;
+-              if (ndev_ctx->vf_serial == serial)
+-                      return hv_get_drvdata(ndev_ctx->device_ctx);
++              if (ndev_ctx->vf_serial != serial)
++                      continue;
++
++              ndev = hv_get_drvdata(ndev_ctx->device_ctx);
++              if (ndev->addr_len != vf_netdev->addr_len ||
++                  memcmp(ndev->perm_addr, vf_netdev->perm_addr,
++                         ndev->addr_len) != 0)
++                      continue;
++
++              return ndev;
++
+       }
+       /* Fallback path to check synthetic vf with help of mac addr.
+-- 
+2.43.0
+
diff --git a/queue-5.10/hv_netvsc-process-netdev_going_down-on-vf-hot-remove.patch b/queue-5.10/hv_netvsc-process-netdev_going_down-on-vf-hot-remove.patch
new file mode 100644 (file)
index 0000000..c69442e
--- /dev/null
@@ -0,0 +1,60 @@
+From 08cd6ba213b3786688214a60972f77235cc64410 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jan 2021 16:53:43 -0800
+Subject: hv_netvsc: Process NETDEV_GOING_DOWN on VF hot remove
+
+From: Long Li <longli@microsoft.com>
+
+[ Upstream commit 34b06a2eee44d469f2e2c013a83e6dac3aff6411 ]
+
+On VF hot remove, NETDEV_GOING_DOWN is sent to notify the VF is about to
+go down. At this time, the VF is still sending/receiving traffic and we
+request the VSP to switch datapath.
+
+On completion, the datapath is switched to synthetic and we can proceed
+with VF hot remove.
+
+Signed-off-by: Long Li <longli@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 57a5ec098e7e0..057b1a9dde153 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -2411,12 +2411,15 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
+  * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network
+  * interface, there is only the CHANGE event and no UP or DOWN event.
+  */
+-static int netvsc_vf_changed(struct net_device *vf_netdev)
++static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
+ {
+       struct net_device_context *net_device_ctx;
+       struct netvsc_device *netvsc_dev;
+       struct net_device *ndev;
+-      bool vf_is_up = netif_running(vf_netdev);
++      bool vf_is_up = false;
++
++      if (event != NETDEV_GOING_DOWN)
++              vf_is_up = netif_running(vf_netdev);
+       ndev = get_netvsc_byref(vf_netdev);
+       if (!ndev)
+@@ -2762,7 +2765,8 @@ static int netvsc_netdev_event(struct notifier_block *this,
+       case NETDEV_UP:
+       case NETDEV_DOWN:
+       case NETDEV_CHANGE:
+-              return netvsc_vf_changed(event_dev);
++      case NETDEV_GOING_DOWN:
++              return netvsc_vf_changed(event_dev, event);
+       default:
+               return NOTIFY_DONE;
+       }
+-- 
+2.43.0
+
diff --git a/queue-5.10/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_.patch b/queue-5.10/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_.patch
new file mode 100644 (file)
index 0000000..e0491cf
--- /dev/null
@@ -0,0 +1,184 @@
+From 7bd309d035dcf27d9ac09dda05158de11370e3c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Feb 2024 20:40:38 -0800
+Subject: hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed
+
+From: Shradha Gupta <shradhagupta@linux.microsoft.com>
+
+[ Upstream commit 9cae43da9867412f8bd09aee5c8a8dc5e8dc3dc2 ]
+
+If hv_netvsc driver is unloaded and reloaded, the NET_DEVICE_REGISTER
+handler cannot perform VF register successfully as the register call
+is received before netvsc_probe is finished. This is because we
+register register_netdevice_notifier() very early( even before
+vmbus_driver_register()).
+To fix this, we try to register each such matching VF( if it is visible
+as a netdevice) at the end of netvsc_probe.
+
+Cc: stable@vger.kernel.org
+Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register")
+Suggested-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 82 +++++++++++++++++++++++++--------
+ 1 file changed, 62 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index e8efcc6a0b05a..0fc0f9cb3f34b 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -44,6 +44,10 @@
+ #define LINKCHANGE_INT (2 * HZ)
+ #define VF_TAKEOVER_INT (HZ / 10)
++/* Macros to define the context of vf registration */
++#define VF_REG_IN_PROBE               1
++#define VF_REG_IN_NOTIFIER    2
++
+ static unsigned int ring_size __ro_after_init = 128;
+ module_param(ring_size, uint, 0444);
+ MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
+@@ -2194,7 +2198,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
+ }
+ static int netvsc_vf_join(struct net_device *vf_netdev,
+-                        struct net_device *ndev)
++                        struct net_device *ndev, int context)
+ {
+       struct net_device_context *ndev_ctx = netdev_priv(ndev);
+       int ret;
+@@ -2217,7 +2221,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
+               goto upper_link_failed;
+       }
+-      schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
++      /* If this registration is called from probe context vf_takeover
++       * is taken care of later in probe itself.
++       */
++      if (context == VF_REG_IN_NOTIFIER)
++              schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+       call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
+@@ -2355,7 +2363,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev)
+       return NOTIFY_DONE;
+ }
+-static int netvsc_register_vf(struct net_device *vf_netdev)
++static int netvsc_register_vf(struct net_device *vf_netdev, int context)
+ {
+       struct net_device_context *net_device_ctx;
+       struct netvsc_device *netvsc_dev;
+@@ -2395,7 +2403,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
+       netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
+-      if (netvsc_vf_join(vf_netdev, ndev) != 0)
++      if (netvsc_vf_join(vf_netdev, ndev, context) != 0)
+               return NOTIFY_DONE;
+       dev_hold(vf_netdev);
+@@ -2479,10 +2487,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
+       return NOTIFY_OK;
+ }
++static int check_dev_is_matching_vf(struct net_device *event_ndev)
++{
++      /* Skip NetVSC interfaces */
++      if (event_ndev->netdev_ops == &device_ops)
++              return -ENODEV;
++
++      /* Avoid non-Ethernet type devices */
++      if (event_ndev->type != ARPHRD_ETHER)
++              return -ENODEV;
++
++      /* Avoid Vlan dev with same MAC registering as VF */
++      if (is_vlan_dev(event_ndev))
++              return -ENODEV;
++
++      /* Avoid Bonding master dev with same MAC registering as VF */
++      if (netif_is_bond_master(event_ndev))
++              return -ENODEV;
++
++      return 0;
++}
++
+ static int netvsc_probe(struct hv_device *dev,
+                       const struct hv_vmbus_device_id *dev_id)
+ {
+-      struct net_device *net = NULL;
++      struct net_device *net = NULL, *vf_netdev;
+       struct net_device_context *net_device_ctx;
+       struct netvsc_device_info *device_info = NULL;
+       struct netvsc_device *nvdev;
+@@ -2590,6 +2619,30 @@ static int netvsc_probe(struct hv_device *dev,
+       }
+       list_add(&net_device_ctx->list, &netvsc_dev_list);
++
++      /* When the hv_netvsc driver is unloaded and reloaded, the
++       * NET_DEVICE_REGISTER for the vf device is replayed before probe
++       * is complete. This is because register_netdevice_notifier() gets
++       * registered before vmbus_driver_register() so that callback func
++       * is set before probe and we don't miss events like NETDEV_POST_INIT
++       * So, in this section we try to register the matching vf device that
++       * is present as a netdevice, knowing that its register call is not
++       * processed in the netvsc_netdev_notifier(as probing is progress and
++       * get_netvsc_byslot fails).
++       */
++      for_each_netdev(dev_net(net), vf_netdev) {
++              ret = check_dev_is_matching_vf(vf_netdev);
++              if (ret != 0)
++                      continue;
++
++              if (net != get_netvsc_byslot(vf_netdev))
++                      continue;
++
++              netvsc_prepare_bonding(vf_netdev);
++              netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
++              __netvsc_vf_setup(net, vf_netdev);
++              break;
++      }
+       rtnl_unlock();
+       netvsc_devinfo_put(device_info);
+@@ -2746,28 +2799,17 @@ static int netvsc_netdev_event(struct notifier_block *this,
+                              unsigned long event, void *ptr)
+ {
+       struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
++      int ret = 0;
+-      /* Skip our own events */
+-      if (event_dev->netdev_ops == &device_ops)
+-              return NOTIFY_DONE;
+-
+-      /* Avoid non-Ethernet type devices */
+-      if (event_dev->type != ARPHRD_ETHER)
+-              return NOTIFY_DONE;
+-
+-      /* Avoid Vlan dev with same MAC registering as VF */
+-      if (is_vlan_dev(event_dev))
+-              return NOTIFY_DONE;
+-
+-      /* Avoid Bonding master dev with same MAC registering as VF */
+-      if (netif_is_bond_master(event_dev))
++      ret = check_dev_is_matching_vf(event_dev);
++      if (ret != 0)
+               return NOTIFY_DONE;
+       switch (event) {
+       case NETDEV_POST_INIT:
+               return netvsc_prepare_bonding(event_dev);
+       case NETDEV_REGISTER:
+-              return netvsc_register_vf(event_dev);
++              return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER);
+       case NETDEV_UNREGISTER:
+               return netvsc_unregister_vf(event_dev);
+       case NETDEV_UP:
+-- 
+2.43.0
+
diff --git a/queue-5.10/hv_netvsc-use-netif_is_bond_master-instead-of-open-c.patch b/queue-5.10/hv_netvsc-use-netif_is_bond_master-instead-of-open-c.patch
new file mode 100644 (file)
index 0000000..3435e06
--- /dev/null
@@ -0,0 +1,38 @@
+From 0d6995f74ccf64cb2ddb909e952411877f54d143 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Oct 2021 13:03:28 +0900
+Subject: hv_netvsc: use netif_is_bond_master() instead of open code
+
+From: Juhee Kang <claudiajkang@gmail.com>
+
+[ Upstream commit c60882a4566a0a62dc3a40c85131103aad83dcb3 ]
+
+Use netif_is_bond_master() function instead of open code, which is
+((event_dev->priv_flags & IFF_BONDING) && (event_dev->flags & IFF_MASTER)).
+This patch doesn't change logic.
+
+Signed-off-by: Juhee Kang <claudiajkang@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 9ec1633b89b48..e8efcc6a0b05a 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -2760,8 +2760,7 @@ static int netvsc_netdev_event(struct notifier_block *this,
+               return NOTIFY_DONE;
+       /* Avoid Bonding master dev with same MAC registering as VF */
+-      if ((event_dev->priv_flags & IFF_BONDING) &&
+-          (event_dev->flags & IFF_MASTER))
++      if (netif_is_bond_master(event_dev))
+               return NOTIFY_DONE;
+       switch (event) {
+-- 
+2.43.0
+
diff --git a/queue-5.10/hv_netvsc-use-vmbus_requestor-to-generate-transactio.patch b/queue-5.10/hv_netvsc-use-vmbus_requestor-to-generate-transactio.patch
new file mode 100644 (file)
index 0000000..1336e54
--- /dev/null
@@ -0,0 +1,166 @@
+From 49485861d465362468d21d6dde9d62f83ed55c86 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Nov 2020 11:04:02 +0100
+Subject: hv_netvsc: Use vmbus_requestor to generate transaction IDs for VMBus
+ hardening
+
+From: Andres Beltran <lkmlabelt@gmail.com>
+
+[ Upstream commit 4d18fcc95f50950a99bd940d4e61a983f91d267a ]
+
+Currently, pointers to guest memory are passed to Hyper-V as
+transaction IDs in netvsc. In the face of errors or malicious
+behavior in Hyper-V, netvsc should not expose or trust the transaction
+IDs returned by Hyper-V to be valid guest memory addresses. Instead,
+use small integers generated by vmbus_requestor as requests
+(transaction) IDs.
+
+Signed-off-by: Andres Beltran <lkmlabelt@gmail.com>
+Co-developed-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Acked-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Wei Liu <wei.liu@kernel.org>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: netdev@vger.kernel.org
+Link: https://lore.kernel.org/r/20201109100402.8946-4-parri.andrea@gmail.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/hyperv_net.h   | 13 +++++++++++++
+ drivers/net/hyperv/netvsc.c       | 22 ++++++++++++++++------
+ drivers/net/hyperv/rndis_filter.c |  1 +
+ include/linux/hyperv.h            |  1 +
+ 4 files changed, 31 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 367878493e704..15652d7951f9e 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -847,6 +847,19 @@ struct nvsp_message {
+ #define NETVSC_XDP_HDRM 256
++#define NETVSC_MIN_OUT_MSG_SIZE (sizeof(struct vmpacket_descriptor) + \
++                               sizeof(struct nvsp_message))
++#define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor)
++
++/* Estimated requestor size:
++ * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size
++ */
++static inline u32 netvsc_rqstor_size(unsigned long ringbytes)
++{
++      return ringbytes / NETVSC_MIN_OUT_MSG_SIZE +
++              ringbytes / NETVSC_MIN_IN_MSG_SIZE;
++}
++
+ #define NETVSC_XFER_HEADER_SIZE(rng_cnt) \
+               (offsetof(struct vmtransfer_page_packet_header, ranges) + \
+               (rng_cnt) * sizeof(struct vmtransfer_page_range))
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 3eae31c0f97a6..c9b73a0448813 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -50,7 +50,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
+       vmbus_sendpacket(dev->channel, init_pkt,
+                              sizeof(struct nvsp_message),
+-                             (unsigned long)init_pkt,
++                             VMBUS_RQST_ID_NO_RESPONSE,
+                              VM_PKT_DATA_INBAND, 0);
+ }
+@@ -163,7 +163,7 @@ static void netvsc_revoke_recv_buf(struct hv_device *device,
+               ret = vmbus_sendpacket(device->channel,
+                                      revoke_packet,
+                                      sizeof(struct nvsp_message),
+-                                     (unsigned long)revoke_packet,
++                                     VMBUS_RQST_ID_NO_RESPONSE,
+                                      VM_PKT_DATA_INBAND, 0);
+               /* If the failure is because the channel is rescinded;
+                * ignore the failure since we cannot send on a rescinded
+@@ -213,7 +213,7 @@ static void netvsc_revoke_send_buf(struct hv_device *device,
+               ret = vmbus_sendpacket(device->channel,
+                                      revoke_packet,
+                                      sizeof(struct nvsp_message),
+-                                     (unsigned long)revoke_packet,
++                                     VMBUS_RQST_ID_NO_RESPONSE,
+                                      VM_PKT_DATA_INBAND, 0);
+               /* If the failure is because the channel is rescinded;
+@@ -557,7 +557,7 @@ static int negotiate_nvsp_ver(struct hv_device *device,
+       ret = vmbus_sendpacket(device->channel, init_packet,
+                               sizeof(struct nvsp_message),
+-                              (unsigned long)init_packet,
++                              VMBUS_RQST_ID_NO_RESPONSE,
+                               VM_PKT_DATA_INBAND, 0);
+       return ret;
+@@ -614,7 +614,7 @@ static int netvsc_connect_vsp(struct hv_device *device,
+       /* Send the init request */
+       ret = vmbus_sendpacket(device->channel, init_packet,
+                               sizeof(struct nvsp_message),
+-                              (unsigned long)init_packet,
++                              VMBUS_RQST_ID_NO_RESPONSE,
+                               VM_PKT_DATA_INBAND, 0);
+       if (ret != 0)
+               goto cleanup;
+@@ -698,10 +698,19 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
+                                   const struct vmpacket_descriptor *desc,
+                                   int budget)
+ {
+-      struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
+       struct net_device_context *ndev_ctx = netdev_priv(ndev);
++      struct sk_buff *skb;
+       u16 q_idx = 0;
+       int queue_sends;
++      u64 cmd_rqst;
++
++      cmd_rqst = vmbus_request_addr(&channel->requestor, (u64)desc->trans_id);
++      if (cmd_rqst == VMBUS_RQST_ERROR) {
++              netdev_err(ndev, "Incorrect transaction id\n");
++              return;
++      }
++
++      skb = (struct sk_buff *)(unsigned long)cmd_rqst;
+       /* Notify the layer above us */
+       if (likely(skb)) {
+@@ -1530,6 +1539,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
+                      netvsc_poll, NAPI_POLL_WEIGHT);
+       /* Open the channel */
++      device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+       ret = vmbus_open(device->channel, netvsc_ring_bytes,
+                        netvsc_ring_bytes,  NULL, 0,
+                        netvsc_channel_cb, net_device->chan_table);
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 90bc0008fa2fd..13f62950eeb9f 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1170,6 +1170,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
+       /* Set the channel before opening.*/
+       nvchan->channel = new_sc;
++      new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+       ret = vmbus_open(new_sc, netvsc_ring_bytes,
+                        netvsc_ring_bytes, NULL, 0,
+                        netvsc_channel_cb, nvchan);
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
+index 4cb65a79d92f6..2aaf450c8d800 100644
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -779,6 +779,7 @@ struct vmbus_requestor {
+ #define VMBUS_NO_RQSTOR U64_MAX
+ #define VMBUS_RQST_ERROR (U64_MAX - 1)
++#define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2)
+ struct vmbus_device {
+       u16  dev_type;
+-- 
+2.43.0
+
diff --git a/queue-5.10/hv_netvsc-wait-for-completion-on-request-switch_data.patch b/queue-5.10/hv_netvsc-wait-for-completion-on-request-switch_data.patch
new file mode 100644 (file)
index 0000000..3f27c93
--- /dev/null
@@ -0,0 +1,108 @@
+From d069a005c45770eced39a4cc7f4048713e0ec0dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jan 2021 16:53:42 -0800
+Subject: hv_netvsc: Wait for completion on request SWITCH_DATA_PATH
+
+From: Long Li <longli@microsoft.com>
+
+[ Upstream commit 8b31f8c982b738e4130539e47f03967c599d8e22 ]
+
+The completion indicates if NVSP_MSG4_TYPE_SWITCH_DATA_PATH has been
+processed by the VSP. The traffic is steered to VF or synthetic after we
+receive this completion.
+
+Signed-off-by: Long Li <longli@microsoft.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc.c     | 37 ++++++++++++++++++++++++++++++---
+ drivers/net/hyperv/netvsc_drv.c |  1 -
+ 2 files changed, 34 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index c9b73a0448813..03333a4136bf4 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -37,6 +37,10 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
+       struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
+       struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
++      /* Block sending traffic to VF if it's about to be gone */
++      if (!vf)
++              net_device_ctx->data_path_is_vf = vf;
++
+       memset(init_pkt, 0, sizeof(struct nvsp_message));
+       init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
+       if (vf)
+@@ -50,8 +54,11 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
+       vmbus_sendpacket(dev->channel, init_pkt,
+                              sizeof(struct nvsp_message),
+-                             VMBUS_RQST_ID_NO_RESPONSE,
+-                             VM_PKT_DATA_INBAND, 0);
++                             (unsigned long)init_pkt,
++                             VM_PKT_DATA_INBAND,
++                             VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
++      wait_for_completion(&nv_dev->channel_init_wait);
++      net_device_ctx->data_path_is_vf = vf;
+ }
+ /* Worker to setup sub channels on initial setup
+@@ -757,8 +764,31 @@ static void netvsc_send_completion(struct net_device *ndev,
+                                  const struct vmpacket_descriptor *desc,
+                                  int budget)
+ {
+-      const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
++      const struct nvsp_message *nvsp_packet;
+       u32 msglen = hv_pkt_datalen(desc);
++      struct nvsp_message *pkt_rqst;
++      u64 cmd_rqst;
++
++      /* First check if this is a VMBUS completion without data payload */
++      if (!msglen) {
++              cmd_rqst = vmbus_request_addr(&incoming_channel->requestor,
++                                            (u64)desc->trans_id);
++              if (cmd_rqst == VMBUS_RQST_ERROR) {
++                      netdev_err(ndev, "Invalid transaction id\n");
++                      return;
++              }
++
++              pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
++              switch (pkt_rqst->hdr.msg_type) {
++              case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
++                      complete(&net_device->channel_init_wait);
++                      break;
++
++              default:
++                      netdev_err(ndev, "Unexpected VMBUS completion!!\n");
++              }
++              return;
++      }
+       /* Ensure packet is big enough to read header fields */
+       if (msglen < sizeof(struct nvsp_message_header)) {
+@@ -766,6 +796,7 @@ static void netvsc_send_completion(struct net_device *ndev,
+               return;
+       }
++      nvsp_packet = hv_pkt_data(desc);
+       switch (nvsp_packet->hdr.msg_type) {
+       case NVSP_MSG_TYPE_INIT_COMPLETE:
+               if (msglen < sizeof(struct nvsp_message_header) +
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 790bf750281ad..57a5ec098e7e0 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -2429,7 +2429,6 @@ static int netvsc_vf_changed(struct net_device *vf_netdev)
+       if (net_device_ctx->data_path_is_vf == vf_is_up)
+               return NOTIFY_OK;
+-      net_device_ctx->data_path_is_vf = vf_is_up;
+       if (vf_is_up && !net_device_ctx->vf_alloc) {
+               netdev_info(ndev, "Waiting for the VF association from host\n");
+-- 
+2.43.0
+
diff --git a/queue-5.10/lsm-fix-default-return-value-of-the-socket_getpeerse.patch b/queue-5.10/lsm-fix-default-return-value-of-the-socket_getpeerse.patch
new file mode 100644 (file)
index 0000000..88375c8
--- /dev/null
@@ -0,0 +1,91 @@
+From 412604ade6d81740b674c81ad30bd5d459553e94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jan 2024 19:45:31 +0100
+Subject: lsm: fix default return value of the socket_getpeersec_*() hooks
+
+From: Ondrej Mosnacek <omosnace@redhat.com>
+
+[ Upstream commit 5a287d3d2b9de2b3e747132c615599907ba5c3c1 ]
+
+For these hooks the true "neutral" value is -EOPNOTSUPP, which is
+currently what is returned when no LSM provides this hook and what LSMs
+return when there is no security context set on the socket. Correct the
+value in <linux/lsm_hooks.h> and adjust the dispatch functions in
+security/security.c to avoid issues when the BPF LSM is enabled.
+
+Cc: stable@vger.kernel.org
+Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks")
+Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
+[PM: subject line tweak]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/lsm_hook_defs.h |  4 ++--
+ security/security.c           | 31 +++++++++++++++++++++++++++----
+ 2 files changed, 29 insertions(+), 6 deletions(-)
+
+diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
+index 9f550eab8ebdb..07abcd384975b 100644
+--- a/include/linux/lsm_hook_defs.h
++++ b/include/linux/lsm_hook_defs.h
+@@ -293,9 +293,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname)
+ LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname)
+ LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how)
+ LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb)
+-LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock,
++LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock,
+        sockptr_t optval, sockptr_t optlen, unsigned int len)
+-LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock,
++LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock,
+        struct sk_buff *skb, u32 *secid)
+ LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
+ LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk)
+diff --git a/security/security.c b/security/security.c
+index e9dcde3c4f14b..0bbcb100ba8e9 100644
+--- a/security/security.c
++++ b/security/security.c
+@@ -2227,14 +2227,37 @@ EXPORT_SYMBOL(security_sock_rcv_skb);
+ int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
+                                     sockptr_t optlen, unsigned int len)
+ {
+-      return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock,
+-                           optval, optlen, len);
++      struct security_hook_list *hp;
++      int rc;
++
++      /*
++       * Only one module will provide a security context.
++       */
++      hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream,
++                           list) {
++              rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen,
++                                                     len);
++              if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream))
++                      return rc;
++      }
++      return LSM_RET_DEFAULT(socket_getpeersec_stream);
+ }
+ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
+ {
+-      return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock,
+-                           skb, secid);
++      struct security_hook_list *hp;
++      int rc;
++
++      /*
++       * Only one module will provide a security context.
++       */
++      hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram,
++                           list) {
++              rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid);
++              if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram))
++                      return rc;
++      }
++      return LSM_RET_DEFAULT(socket_getpeersec_dgram);
+ }
+ EXPORT_SYMBOL(security_socket_getpeersec_dgram);
+-- 
+2.43.0
+
diff --git a/queue-5.10/lsm-make-security_socket_getpeersec_stream-sockptr_t.patch b/queue-5.10/lsm-make-security_socket_getpeersec_stream-sockptr_t.patch
new file mode 100644 (file)
index 0000000..21edacc
--- /dev/null
@@ -0,0 +1,282 @@
+From d0ab4effe8f2356e5ad02e4c5d59527127721934 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Oct 2022 12:31:21 -0400
+Subject: lsm: make security_socket_getpeersec_stream() sockptr_t safe
+
+From: Paul Moore <paul@paul-moore.com>
+
+[ Upstream commit b10b9c342f7571f287fd422be5d5c0beb26ba974 ]
+
+Commit 4ff09db1b79b ("bpf: net: Change sk_getsockopt() to take the
+sockptr_t argument") made it possible to call sk_getsockopt()
+with both user and kernel address space buffers through the use of
+the sockptr_t type.  Unfortunately at the time of conversion the
+security_socket_getpeersec_stream() LSM hook was written to only
+accept userspace buffers, and in a desire to avoid having to change
+the LSM hook the commit author simply passed the sockptr_t's
+userspace buffer pointer.  Since the only sk_getsockopt() callers
+at the time of conversion which used kernel sockptr_t buffers did
+not allow SO_PEERSEC, and hence the
+security_socket_getpeersec_stream() hook, this was acceptable but
+also very fragile as future changes presented the possibility of
+silently passing kernel space pointers to the LSM hook.
+
+There are several ways to protect against this, including careful
+code review of future commits, but since relying on code review to
+catch bugs is a recipe for disaster and the upstream eBPF maintainer
+is "strongly against defensive programming", this patch updates the
+LSM hook, and all of the implementations to support sockptr_t and
+safely handle both user and kernel space buffers.
+
+Acked-by: Casey Schaufler <casey@schaufler-ca.com>
+Acked-by: John Johansen <john.johansen@canonical.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/lsm_hook_defs.h |  2 +-
+ include/linux/lsm_hooks.h     |  4 ++--
+ include/linux/security.h      | 11 +++++++----
+ net/core/sock.c               |  3 ++-
+ security/apparmor/lsm.c       | 29 +++++++++++++----------------
+ security/security.c           |  6 +++---
+ security/selinux/hooks.c      | 13 ++++++-------
+ security/smack/smack_lsm.c    | 19 ++++++++++---------
+ 8 files changed, 44 insertions(+), 43 deletions(-)
+
+diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
+index 92a76ce0c382d..9f550eab8ebdb 100644
+--- a/include/linux/lsm_hook_defs.h
++++ b/include/linux/lsm_hook_defs.h
+@@ -294,7 +294,7 @@ LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname)
+ LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how)
+ LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb)
+ LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock,
+-       char __user *optval, int __user *optlen, unsigned len)
++       sockptr_t optval, sockptr_t optlen, unsigned int len)
+ LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock,
+        struct sk_buff *skb, u32 *secid)
+ LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
+diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
+index 64cdf4d7bfb30..bbf9c8c7bd9c5 100644
+--- a/include/linux/lsm_hooks.h
++++ b/include/linux/lsm_hooks.h
+@@ -926,8 +926,8 @@
+  *    SO_GETPEERSEC.  For tcp sockets this can be meaningful if the
+  *    socket is associated with an ipsec SA.
+  *    @sock is the local socket.
+- *    @optval userspace memory where the security state is to be copied.
+- *    @optlen userspace int where the module should copy the actual length
++ *    @optval memory where the security state is to be copied.
++ *    @optlen memory where the module should copy the actual length
+  *    of the security state.
+  *    @len as input is the maximum length to copy to userspace provided
+  *    by the caller.
+diff --git a/include/linux/security.h b/include/linux/security.h
+index e388b1666bcfc..5b61aa19fac66 100644
+--- a/include/linux/security.h
++++ b/include/linux/security.h
+@@ -31,6 +31,7 @@
+ #include <linux/err.h>
+ #include <linux/string.h>
+ #include <linux/mm.h>
++#include <linux/sockptr.h>
+ struct linux_binprm;
+ struct cred;
+@@ -1366,8 +1367,8 @@ int security_socket_getsockopt(struct socket *sock, int level, int optname);
+ int security_socket_setsockopt(struct socket *sock, int level, int optname);
+ int security_socket_shutdown(struct socket *sock, int how);
+ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb);
+-int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
+-                                    int __user *optlen, unsigned len);
++int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
++                                    sockptr_t optlen, unsigned int len);
+ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid);
+ int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
+ void security_sk_free(struct sock *sk);
+@@ -1501,8 +1502,10 @@ static inline int security_sock_rcv_skb(struct sock *sk,
+       return 0;
+ }
+-static inline int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
+-                                                  int __user *optlen, unsigned len)
++static inline int security_socket_getpeersec_stream(struct socket *sock,
++                                                  sockptr_t optval,
++                                                  sockptr_t optlen,
++                                                  unsigned int len)
+ {
+       return -ENOPROTOOPT;
+ }
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 42da46965b16f..016c0b9e01b70 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1503,7 +1503,8 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+               break;
+       case SO_PEERSEC:
+-              return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len);
++              return security_socket_getpeersec_stream(sock,
++                                                       optval, optlen, len);
+       case SO_MARK:
+               v.val = sk->sk_mark;
+diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
+index 585edcc6814d2..052f1b920e43f 100644
+--- a/security/apparmor/lsm.c
++++ b/security/apparmor/lsm.c
+@@ -1070,11 +1070,10 @@ static struct aa_label *sk_peer_label(struct sock *sk)
+  * Note: for tcp only valid if using ipsec or cipso on lan
+  */
+ static int apparmor_socket_getpeersec_stream(struct socket *sock,
+-                                           char __user *optval,
+-                                           int __user *optlen,
++                                           sockptr_t optval, sockptr_t optlen,
+                                            unsigned int len)
+ {
+-      char *name;
++      char *name = NULL;
+       int slen, error = 0;
+       struct aa_label *label;
+       struct aa_label *peer;
+@@ -1091,23 +1090,21 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock,
+       /* don't include terminating \0 in slen, it breaks some apps */
+       if (slen < 0) {
+               error = -ENOMEM;
+-      } else {
+-              if (slen > len) {
+-                      error = -ERANGE;
+-              } else if (copy_to_user(optval, name, slen)) {
+-                      error = -EFAULT;
+-                      goto out;
+-              }
+-              if (put_user(slen, optlen))
+-                      error = -EFAULT;
+-out:
+-              kfree(name);
+-
++              goto done;
++      }
++      if (slen > len) {
++              error = -ERANGE;
++              goto done_len;
+       }
++      if (copy_to_sockptr(optval, name, slen))
++              error = -EFAULT;
++done_len:
++      if (copy_to_sockptr(optlen, &slen, sizeof(slen)))
++              error = -EFAULT;
+ done:
+       end_current_label_crit_section(label);
+-
++      kfree(name);
+       return error;
+ }
+diff --git a/security/security.c b/security/security.c
+index 269c3965393f4..e9dcde3c4f14b 100644
+--- a/security/security.c
++++ b/security/security.c
+@@ -2224,11 +2224,11 @@ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ }
+ EXPORT_SYMBOL(security_sock_rcv_skb);
+-int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
+-                                    int __user *optlen, unsigned len)
++int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
++                                    sockptr_t optlen, unsigned int len)
+ {
+       return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock,
+-                              optval, optlen, len);
++                           optval, optlen, len);
+ }
+ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
+diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
+index 50d3ddfe15fd1..46c00a68bb4bd 100644
+--- a/security/selinux/hooks.c
++++ b/security/selinux/hooks.c
+@@ -5110,11 +5110,12 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+       return err;
+ }
+-static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *optval,
+-                                          int __user *optlen, unsigned len)
++static int selinux_socket_getpeersec_stream(struct socket *sock,
++                                          sockptr_t optval, sockptr_t optlen,
++                                          unsigned int len)
+ {
+       int err = 0;
+-      char *scontext;
++      char *scontext = NULL;
+       u32 scontext_len;
+       struct sk_security_struct *sksec = sock->sk->sk_security;
+       u32 peer_sid = SECSID_NULL;
+@@ -5130,17 +5131,15 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op
+                                     &scontext_len);
+       if (err)
+               return err;
+-
+       if (scontext_len > len) {
+               err = -ERANGE;
+               goto out_len;
+       }
+-      if (copy_to_user(optval, scontext, scontext_len))
++      if (copy_to_sockptr(optval, scontext, scontext_len))
+               err = -EFAULT;
+-
+ out_len:
+-      if (put_user(scontext_len, optlen))
++      if (copy_to_sockptr(optlen, &scontext_len, sizeof(scontext_len)))
+               err = -EFAULT;
+       kfree(scontext);
+       return err;
+diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
+index e1669759403a6..5388f143eecd8 100644
+--- a/security/smack/smack_lsm.c
++++ b/security/smack/smack_lsm.c
+@@ -4022,12 +4022,12 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+  * returns zero on success, an error code otherwise
+  */
+ static int smack_socket_getpeersec_stream(struct socket *sock,
+-                                        char __user *optval,
+-                                        int __user *optlen, unsigned len)
++                                        sockptr_t optval, sockptr_t optlen,
++                                        unsigned int len)
+ {
+       struct socket_smack *ssp;
+       char *rcp = "";
+-      int slen = 1;
++      u32 slen = 1;
+       int rc = 0;
+       ssp = sock->sk->sk_security;
+@@ -4035,15 +4035,16 @@ static int smack_socket_getpeersec_stream(struct socket *sock,
+               rcp = ssp->smk_packet->smk_known;
+               slen = strlen(rcp) + 1;
+       }
+-
+-      if (slen > len)
++      if (slen > len) {
+               rc = -ERANGE;
+-      else if (copy_to_user(optval, rcp, slen) != 0)
+-              rc = -EFAULT;
++              goto out_len;
++      }
+-      if (put_user(slen, optlen) != 0)
++      if (copy_to_sockptr(optval, rcp, slen))
++              rc = -EFAULT;
++out_len:
++      if (copy_to_sockptr(optlen, &slen, sizeof(slen)))
+               rc = -EFAULT;
+-
+       return rc;
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.10/mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch b/queue-5.10/mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch
new file mode 100644 (file)
index 0000000..d746363
--- /dev/null
@@ -0,0 +1,191 @@
+From 60e180797025bd86a8ca15068002096dbc3f63d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Feb 2021 12:09:54 -0800
+Subject: mm/hugetlb: change hugetlb_reserve_pages() to type bool
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+[ Upstream commit 33b8f84a4ee78491a8f4f9e4c5520c9da4a10983 ]
+
+While reviewing a bug in hugetlb_reserve_pages, it was noticed that all
+callers ignore the return value.  Any failure is considered an ENOMEM
+error by the callers.
+
+Change the function to be of type bool.  The function will return true if
+the reservation was successful, false otherwise.  Callers currently assume
+a zero return code indicates success.  Change the callers to look for true
+to indicate success.  No functional change, only code cleanup.
+
+Link: https://lkml.kernel.org/r/20201221192542.15732-1-mike.kravetz@oracle.com
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Dan Carpenter <dan.carpenter@oracle.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: e656c7a9e596 ("mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/hugetlbfs/inode.c    |  4 ++--
+ include/linux/hugetlb.h |  2 +-
+ mm/hugetlb.c            | 37 ++++++++++++++-----------------------
+ 3 files changed, 17 insertions(+), 26 deletions(-)
+
+diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
+index a0edd4b8fa189..c3e9fa7ce75f7 100644
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -176,7 +176,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+       file_accessed(file);
+       ret = -ENOMEM;
+-      if (hugetlb_reserve_pages(inode,
++      if (!hugetlb_reserve_pages(inode,
+                               vma->vm_pgoff >> huge_page_order(h),
+                               len >> huge_page_shift(h), vma,
+                               vma->vm_flags))
+@@ -1500,7 +1500,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
+       inode->i_size = size;
+       clear_nlink(inode);
+-      if (hugetlb_reserve_pages(inode, 0,
++      if (!hugetlb_reserve_pages(inode, 0,
+                       size >> huge_page_shift(hstate_inode(inode)), NULL,
+                       acctflag))
+               file = ERR_PTR(-ENOMEM);
+diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
+index 99b73fc4a8246..90c66b9458c31 100644
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -140,7 +140,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
+                               unsigned long dst_addr,
+                               unsigned long src_addr,
+                               struct page **pagep);
+-int hugetlb_reserve_pages(struct inode *inode, long from, long to,
++bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
+                                               struct vm_area_struct *vma,
+                                               vm_flags_t vm_flags);
+ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 81949f6d29af5..02b7c8f9b0e87 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5108,12 +5108,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+       return pages << h->order;
+ }
+-int hugetlb_reserve_pages(struct inode *inode,
++/* Return true if reservation was successful, false otherwise.  */
++bool hugetlb_reserve_pages(struct inode *inode,
+                                       long from, long to,
+                                       struct vm_area_struct *vma,
+                                       vm_flags_t vm_flags)
+ {
+-      long ret, chg, add = -1;
++      long chg, add = -1;
+       struct hstate *h = hstate_inode(inode);
+       struct hugepage_subpool *spool = subpool_inode(inode);
+       struct resv_map *resv_map;
+@@ -5123,7 +5124,7 @@ int hugetlb_reserve_pages(struct inode *inode,
+       /* This should never happen */
+       if (from > to) {
+               VM_WARN(1, "%s called with a negative range\n", __func__);
+-              return -EINVAL;
++              return false;
+       }
+       /*
+@@ -5132,7 +5133,7 @@ int hugetlb_reserve_pages(struct inode *inode,
+        * without using reserves
+        */
+       if (vm_flags & VM_NORESERVE)
+-              return 0;
++              return true;
+       /*
+        * Shared mappings base their reservation on the number of pages that
+@@ -5154,7 +5155,7 @@ int hugetlb_reserve_pages(struct inode *inode,
+               /* Private mapping. */
+               resv_map = resv_map_alloc();
+               if (!resv_map)
+-                      return -ENOMEM;
++                      return false;
+               chg = to - from;
+@@ -5162,18 +5163,12 @@ int hugetlb_reserve_pages(struct inode *inode,
+               set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
+       }
+-      if (chg < 0) {
+-              ret = chg;
++      if (chg < 0)
+               goto out_err;
+-      }
+-
+-      ret = hugetlb_cgroup_charge_cgroup_rsvd(
+-              hstate_index(h), chg * pages_per_huge_page(h), &h_cg);
+-      if (ret < 0) {
+-              ret = -ENOMEM;
++      if (hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h),
++                              chg * pages_per_huge_page(h), &h_cg) < 0)
+               goto out_err;
+-      }
+       if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) {
+               /* For private mappings, the hugetlb_cgroup uncharge info hangs
+@@ -5188,19 +5183,15 @@ int hugetlb_reserve_pages(struct inode *inode,
+        * reservations already in place (gbl_reserve).
+        */
+       gbl_reserve = hugepage_subpool_get_pages(spool, chg);
+-      if (gbl_reserve < 0) {
+-              ret = -ENOSPC;
++      if (gbl_reserve < 0)
+               goto out_uncharge_cgroup;
+-      }
+       /*
+        * Check enough hugepages are available for the reservation.
+        * Hand the pages back to the subpool if there are not
+        */
+-      ret = hugetlb_acct_memory(h, gbl_reserve);
+-      if (ret < 0) {
++      if (hugetlb_acct_memory(h, gbl_reserve) < 0)
+               goto out_put_pages;
+-      }
+       /*
+        * Account for the reservations made. Shared mappings record regions
+@@ -5218,7 +5209,6 @@ int hugetlb_reserve_pages(struct inode *inode,
+               if (unlikely(add < 0)) {
+                       hugetlb_acct_memory(h, -gbl_reserve);
+-                      ret = add;
+                       goto out_put_pages;
+               } else if (unlikely(chg > add)) {
+                       /*
+@@ -5251,7 +5241,8 @@ int hugetlb_reserve_pages(struct inode *inode,
+                       hugetlb_cgroup_put_rsvd_cgroup(h_cg);
+               }
+       }
+-      return 0;
++      return true;
++
+ out_put_pages:
+       /* put back original number of pages, chg */
+       (void)hugepage_subpool_put_pages(spool, chg);
+@@ -5267,7 +5258,7 @@ int hugetlb_reserve_pages(struct inode *inode,
+                       region_abort(resv_map, from, to, regions_needed);
+       if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+               kref_put(&resv_map->refs, resv_map_release);
+-      return ret;
++      return false;
+ }
+ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+-- 
+2.43.0
+
diff --git a/queue-5.10/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-.patch b/queue-5.10/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-.patch
new file mode 100644 (file)
index 0000000..e5c3bb2
--- /dev/null
@@ -0,0 +1,108 @@
+From 248455f8b451d2ac07c8a295e2686f1524b80383 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 12:04:42 -0800
+Subject: mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE
+
+From: Prakash Sangappa <prakash.sangappa@oracle.com>
+
+[ Upstream commit e656c7a9e59607d1672d85ffa9a89031876ffe67 ]
+
+For shared memory of type SHM_HUGETLB, hugetlb pages are reserved in
+shmget() call.  If SHM_NORESERVE flags is specified then the hugetlb pages
+are not reserved.  However when the shared memory is attached with the
+shmat() call the hugetlb pages are getting reserved incorrectly for
+SHM_HUGETLB shared memory created with SHM_NORESERVE which is a bug.
+
+-------------------------------
+Following test shows the issue.
+
+$cat shmhtb.c
+
+int main()
+{
+       int shmflags = 0660 | IPC_CREAT | SHM_HUGETLB | SHM_NORESERVE;
+       int shmid;
+
+       shmid = shmget(SKEY, SHMSZ, shmflags);
+       if (shmid < 0)
+       {
+               printf("shmat: shmget() failed, %d\n", errno);
+               return 1;
+       }
+       printf("After shmget()\n");
+       system("cat /proc/meminfo | grep -i hugepages_");
+
+       shmat(shmid, NULL, 0);
+       printf("\nAfter shmat()\n");
+       system("cat /proc/meminfo | grep -i hugepages_");
+
+       shmctl(shmid, IPC_RMID, NULL);
+       return 0;
+}
+
+ #sysctl -w vm.nr_hugepages=20
+ #./shmhtb
+
+After shmget()
+HugePages_Total:      20
+HugePages_Free:       20
+HugePages_Rsvd:        0
+HugePages_Surp:        0
+
+After shmat()
+HugePages_Total:      20
+HugePages_Free:       20
+HugePages_Rsvd:        5 <--
+HugePages_Surp:        0
+--------------------------------
+
+Fix is to ensure that hugetlb pages are not reserved for SHM_HUGETLB shared
+memory in the shmat() call.
+
+Link: https://lkml.kernel.org/r/1706040282-12388-1-git-send-email-prakash.sangappa@oracle.com
+Signed-off-by: Prakash Sangappa <prakash.sangappa@oracle.com>
+Acked-by: Muchun Song <muchun.song@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/hugetlbfs/inode.c | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
+index c3e9fa7ce75f7..bf3cda4989623 100644
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -135,6 +135,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+       loff_t len, vma_len;
+       int ret;
+       struct hstate *h = hstate_file(file);
++      vm_flags_t vm_flags;
+       /*
+        * vma address alignment (but not the pgoff alignment) has
+@@ -176,10 +177,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+       file_accessed(file);
+       ret = -ENOMEM;
++
++      vm_flags = vma->vm_flags;
++      /*
++       * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
++       * reserving here. Note: only for SHM hugetlbfs file, the inode
++       * flag S_PRIVATE is set.
++       */
++      if (inode->i_flags & S_PRIVATE)
++              vm_flags |= VM_NORESERVE;
++
+       if (!hugetlb_reserve_pages(inode,
+                               vma->vm_pgoff >> huge_page_order(h),
+                               len >> huge_page_shift(h), vma,
+-                              vma->vm_flags))
++                              vm_flags))
+               goto out;
+       ret = 0;
+-- 
+2.43.0
+
diff --git a/queue-5.10/net-change-sock_getsockopt-to-take-the-sk-ptr-instea.patch b/queue-5.10/net-change-sock_getsockopt-to-take-the-sk-ptr-instea.patch
new file mode 100644 (file)
index 0000000..a092a02
--- /dev/null
@@ -0,0 +1,80 @@
+From 9b1c36493c827100b8aa5b3dd82cdd808f29d801 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 17:27:56 -0700
+Subject: net: Change sock_getsockopt() to take the sk ptr instead of the sock
+ ptr
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit ba74a7608dc12fbbd8ea36e660087f08a81ef26a ]
+
+A latter patch refactors bpf_getsockopt(SOL_SOCKET) with the
+sock_getsockopt() to avoid code duplication and code
+drift between the two duplicates.
+
+The current sock_getsockopt() takes sock ptr as the argument.
+The very first thing of this function is to get back the sk ptr
+by 'sk = sock->sk'.
+
+bpf_getsockopt() could be called when the sk does not have
+the sock ptr created.  Meaning sk->sk_socket is NULL.  For example,
+when a passive tcp connection has just been established but has yet
+been accept()-ed.  Thus, it cannot use the sock_getsockopt(sk->sk_socket)
+or else it will pass a NULL ptr.
+
+This patch moves all sock_getsockopt implementation to the newly
+added sk_getsockopt().  The new sk_getsockopt() takes a sk ptr
+and immediately gets the sock ptr by 'sock = sk->sk_socket'
+
+The existing sock_getsockopt(sock) is changed to call
+sk_getsockopt(sock->sk).  All existing callers have both sock->sk
+and sk->sk_socket pointer.
+
+The latter patch will make bpf_getsockopt(SOL_SOCKET) call
+sk_getsockopt(sk) directly.  The bpf_getsockopt(SOL_SOCKET) does
+not use the optnames that require sk->sk_socket, so it will
+be safe.
+
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Link: https://lore.kernel.org/r/20220902002756.2887884-1-kafai@fb.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 769e969cd1dc5..95559d088a169 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1293,10 +1293,10 @@ static int groups_to_user(gid_t __user *dst, const struct group_info *src)
+       return 0;
+ }
+-int sock_getsockopt(struct socket *sock, int level, int optname,
+-                  char __user *optval, int __user *optlen)
++static int sk_getsockopt(struct sock *sk, int level, int optname,
++                       char __user *optval, int __user *optlen)
+ {
+-      struct sock *sk = sock->sk;
++      struct socket *sock = sk->sk_socket;
+       union {
+               int val;
+@@ -1633,6 +1633,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+       return 0;
+ }
++int sock_getsockopt(struct socket *sock, int level, int optname,
++                  char __user *optval, int __user *optlen)
++{
++      return sk_getsockopt(sock->sk, level, optname, optval, optlen);
++}
++
+ /*
+  * Initialize an sk_lock.
+  *
+-- 
+2.43.0
+
diff --git a/queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch b/queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch
new file mode 100644 (file)
index 0000000..704cb37
--- /dev/null
@@ -0,0 +1,73 @@
+From 37e6ea8769930834d3f74552c946172c8ef147e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 May 2022 12:34:38 -0400
+Subject: NFSD: Add documenting comment for nfsd4_release_lockowner()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 043862b09cc00273e35e6c3a6389957953a34207 ]
+
+And return explicit nfserr values that match what is documented in the
+new comment / API contract.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4state.c | 23 ++++++++++++++++++++---
+ 1 file changed, 20 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 1b40b2197ce66..b6480be7b5e6a 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7107,6 +7107,23 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+       return status;
+ }
++/**
++ * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations
++ * @rqstp: RPC transaction
++ * @cstate: NFSv4 COMPOUND state
++ * @u: RELEASE_LOCKOWNER arguments
++ *
++ * The lockowner's so_count is bumped when a lock record is added
++ * or when copying a conflicting lock. The latter case is brief,
++ * but can lead to fleeting false positives when looking for
++ * locks-in-use.
++ *
++ * Return values:
++ *   %nfs_ok: lockowner released or not found
++ *   %nfserr_locks_held: lockowner still in use
++ *   %nfserr_stale_clientid: clientid no longer active
++ *   %nfserr_expired: clientid not recognized
++ */
+ __be32
+ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+                       struct nfsd4_compound_state *cstate,
+@@ -7133,7 +7150,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+       lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner);
+       if (!lo) {
+               spin_unlock(&clp->cl_lock);
+-              return status;
++              return nfs_ok;
+       }
+       if (atomic_read(&lo->lo_owner.so_count) != 2) {
+               spin_unlock(&clp->cl_lock);
+@@ -7149,11 +7166,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+               put_ol_stateid_locked(stp, &reaplist);
+       }
+       spin_unlock(&clp->cl_lock);
++
+       free_ol_stateid_reaplist(&reaplist);
+       remove_blocked_locks(lo);
+       nfs4_put_stateowner(&lo->lo_owner);
+-
+-      return status;
++      return nfs_ok;
+ }
+ static inline struct nfs4_client_reclaim *
+-- 
+2.43.0
+
diff --git a/queue-5.10/nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch b/queue-5.10/nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch
new file mode 100644 (file)
index 0000000..2fbf929
--- /dev/null
@@ -0,0 +1,97 @@
+From 72721cc9efa6c9217b39095adbc30d0efd0b10db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Feb 2024 13:22:39 +1100
+Subject: nfsd: don't take fi_lock in nfsd_break_deleg_cb()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 ]
+
+A recent change to check_for_locks() changed it to take ->flc_lock while
+holding ->fi_lock.  This creates a lock inversion (reported by lockdep)
+because there is a case where ->fi_lock is taken while holding
+->flc_lock.
+
+->flc_lock is held across ->fl_lmops callbacks, and
+nfsd_break_deleg_cb() is one of those and does take ->fi_lock.  However
+it doesn't need to.
+
+Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each
+delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list
+and so needed the lock.  Since then it doesn't walk the list and doesn't
+need the lock.
+
+Two actions are performed under the lock.  One is to call
+nfsd_break_one_deleg which calls nfsd4_run_cb().  These doesn't act on
+the nfs4_file at all, so don't need the lock.
+
+The other is to set ->fi_had_conflict which is in the nfs4_file.
+This field is only ever set here (except when initialised to false)
+so there is no possible problem will multiple threads racing when
+setting it.
+
+The field is tested twice in nfs4_set_delegation().  The first test does
+not hold a lock and is documented as an opportunistic optimisation, so
+it doesn't impose any need to hold ->fi_lock while setting
+->fi_had_conflict.
+
+The second test in nfs4_set_delegation() *is* make under ->fi_lock, so
+removing the locking when ->fi_had_conflict is set could make a change.
+The change could only be interesting if ->fi_had_conflict tested as
+false even though nfsd_break_one_deleg() ran before ->fi_lock was
+unlocked.  i.e. while hash_delegation_locked() was running.
+As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb()
+there can be no importance to this interaction.
+
+So this patch removes the locking from nfsd_break_one_deleg() and moves
+the final test on ->fi_had_conflict out of the locked region to make it
+clear that locking isn't important to the test.  It is still tested
+*after* vfs_setlease() has succeeded.  This might be significant and as
+vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called
+under ->flc_lock this "after" is a true ordering provided by a spinlock.
+
+Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4state.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 16b073c637986..7ff1f85f1dd9a 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4617,10 +4617,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
+        */
+       fl->fl_break_time = 0;
+-      spin_lock(&fp->fi_lock);
+       fp->fi_had_conflict = true;
+       nfsd_break_one_deleg(dp);
+-      spin_unlock(&fp->fi_lock);
+       return ret;
+ }
+@@ -5049,12 +5047,13 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
+       if (status)
+               goto out_clnt_odstate;
++      status = -EAGAIN;
++      if (fp->fi_had_conflict)
++              goto out_unlock;
++
+       spin_lock(&state_lock);
+       spin_lock(&fp->fi_lock);
+-      if (fp->fi_had_conflict)
+-              status = -EAGAIN;
+-      else
+-              status = hash_delegation_locked(dp, fp);
++      status = hash_delegation_locked(dp, fp);
+       spin_unlock(&fp->fi_lock);
+       spin_unlock(&state_lock);
+-- 
+2.43.0
+
diff --git a/queue-5.10/nfsd-fix-release_lockowner.patch b/queue-5.10/nfsd-fix-release_lockowner.patch
new file mode 100644 (file)
index 0000000..2b255c7
--- /dev/null
@@ -0,0 +1,149 @@
+From 094bb06a555bffa2d5058ea6657fea919095e171 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 14:58:16 +1100
+Subject: nfsd: fix RELEASE_LOCKOWNER
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit edcf9725150e42beeca42d085149f4c88fa97afd ]
+
+The test on so_count in nfsd4_release_lockowner() is nonsense and
+harmful.  Revert to using check_for_locks(), changing that to not sleep.
+
+First: harmful.
+As is documented in the kdoc comment for nfsd4_release_lockowner(), the
+test on so_count can transiently return a false positive resulting in a
+return of NFS4ERR_LOCKS_HELD when in fact no locks are held.  This is
+clearly a protocol violation and with the Linux NFS client it can cause
+incorrect behaviour.
+
+If RELEASE_LOCKOWNER is sent while some other thread is still
+processing a LOCK request which failed because, at the time that request
+was received, the given owner held a conflicting lock, then the nfsd
+thread processing that LOCK request can hold a reference (conflock) to
+the lock owner that causes nfsd4_release_lockowner() to return an
+incorrect error.
+
+The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
+never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
+it knows that the error is impossible.  It assumes the lock owner was in
+fact released so it feels free to use the same lock owner identifier in
+some later locking request.
+
+When it does reuse a lock owner identifier for which a previous RELEASE
+failed, it will naturally use a lock_seqid of zero.  However the server,
+which didn't release the lock owner, will expect a larger lock_seqid and
+so will respond with NFS4ERR_BAD_SEQID.
+
+So clearly it is harmful to allow a false positive, which testing
+so_count allows.
+
+The test is nonsense because ... well... it doesn't mean anything.
+
+so_count is the sum of three different counts.
+1/ the set of states listed on so_stateids
+2/ the set of active vfs locks owned by any of those states
+3/ various transient counts such as for conflicting locks.
+
+When it is tested against '2' it is clear that one of these is the
+transient reference obtained by find_lockowner_str_locked().  It is not
+clear what the other one is expected to be.
+
+In practice, the count is often 2 because there is precisely one state
+on so_stateids.  If there were more, this would fail.
+
+In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
+In one case, CLOSE is called before RELEASE_LOCKOWNER.  That results in
+all the lock states being removed, and so the lockowner being discarded
+(it is removed when there are no more references which usually happens
+when the lock state is discarded).  When nfsd4_release_lockowner() finds
+that the lock owner doesn't exist, it returns success.
+
+The other case shows an so_count of '2' and precisely one state listed
+in so_stateid.  It appears that the Linux client uses a separate lock
+owner for each file resulting in one lock state per lock owner, so this
+test on '2' is safe.  For another client it might not be safe.
+
+So this patch changes check_for_locks() to use the (newish)
+find_any_file_locked() so that it doesn't take a reference on the
+nfs4_file and so never calls nfsd_file_put(), and so never sleeps.  With
+this check is it safe to restore the use of check_for_locks() rather
+than testing so_count against the mysterious '2'.
+
+Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Cc: stable@vger.kernel.org # v6.2+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4state.c | 26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index b6480be7b5e6a..16b073c637986 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7080,14 +7080,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ {
+       struct file_lock *fl;
+       int status = false;
+-      struct nfsd_file *nf = find_any_file(fp);
++      struct nfsd_file *nf;
+       struct inode *inode;
+       struct file_lock_context *flctx;
++      spin_lock(&fp->fi_lock);
++      nf = find_any_file_locked(fp);
+       if (!nf) {
+               /* Any valid lock stateid should have some sort of access */
+               WARN_ON_ONCE(1);
+-              return status;
++              goto out;
+       }
+       inode = locks_inode(nf->nf_file);
+@@ -7103,7 +7105,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+               }
+               spin_unlock(&flctx->flc_lock);
+       }
+-      nfsd_file_put(nf);
++out:
++      spin_unlock(&fp->fi_lock);
+       return status;
+ }
+@@ -7113,10 +7116,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+  * @cstate: NFSv4 COMPOUND state
+  * @u: RELEASE_LOCKOWNER arguments
+  *
+- * The lockowner's so_count is bumped when a lock record is added
+- * or when copying a conflicting lock. The latter case is brief,
+- * but can lead to fleeting false positives when looking for
+- * locks-in-use.
++ * Check if theree are any locks still held and if not - free the lockowner
++ * and any lock state that is owned.
+  *
+  * Return values:
+  *   %nfs_ok: lockowner released or not found
+@@ -7152,10 +7153,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+               spin_unlock(&clp->cl_lock);
+               return nfs_ok;
+       }
+-      if (atomic_read(&lo->lo_owner.so_count) != 2) {
+-              spin_unlock(&clp->cl_lock);
+-              nfs4_put_stateowner(&lo->lo_owner);
+-              return nfserr_locks_held;
++
++      list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
++              if (check_for_locks(stp->st_stid.sc_file, lo)) {
++                      spin_unlock(&clp->cl_lock);
++                      nfs4_put_stateowner(&lo->lo_owner);
++                      return nfserr_locks_held;
++              }
+       }
+       unhash_lockowner_locked(lo);
+       while (!list_empty(&lo->lo_owner.so_stateids)) {
+-- 
+2.43.0
+
diff --git a/queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch b/queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch
new file mode 100644 (file)
index 0000000..202c3d4
--- /dev/null
@@ -0,0 +1,86 @@
+From 73f6ed66fe4fc6f405e3d72225f9d5cc964c5265 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 May 2022 12:07:18 -0400
+Subject: NFSD: Modernize nfsd4_release_lockowner()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit bd8fdb6e545f950f4654a9a10d7e819ad48146e5 ]
+
+Refactor: Use existing helpers that other lock operations use. This
+change removes several automatic variables, so re-organize the
+variable declarations for readability.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4state.c | 36 +++++++++++-------------------------
+ 1 file changed, 11 insertions(+), 25 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index d402ca0b535f0..1b40b2197ce66 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7113,16 +7113,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+                       union nfsd4_op_u *u)
+ {
+       struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
++      struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+       clientid_t *clid = &rlockowner->rl_clientid;
+-      struct nfs4_stateowner *sop;
+-      struct nfs4_lockowner *lo = NULL;
+       struct nfs4_ol_stateid *stp;
+-      struct xdr_netobj *owner = &rlockowner->rl_owner;
+-      unsigned int hashval = ownerstr_hashval(owner);
+-      __be32 status;
+-      struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
++      struct nfs4_lockowner *lo;
+       struct nfs4_client *clp;
+-      LIST_HEAD (reaplist);
++      LIST_HEAD(reaplist);
++      __be32 status;
+       dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
+               clid->cl_boot, clid->cl_id);
+@@ -7130,30 +7127,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+       status = lookup_clientid(clid, cstate, nn, false);
+       if (status)
+               return status;
+-
+       clp = cstate->clp;
+-      /* Find the matching lock stateowner */
+-      spin_lock(&clp->cl_lock);
+-      list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
+-                          so_strhash) {
+-              if (sop->so_is_open_owner || !same_owner_str(sop, owner))
+-                      continue;
+-
+-              if (atomic_read(&sop->so_count) != 1) {
+-                      spin_unlock(&clp->cl_lock);
+-                      return nfserr_locks_held;
+-              }
+-
+-              lo = lockowner(sop);
+-              nfs4_get_stateowner(sop);
+-              break;
+-      }
++      spin_lock(&clp->cl_lock);
++      lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner);
+       if (!lo) {
+               spin_unlock(&clp->cl_lock);
+               return status;
+       }
+-
++      if (atomic_read(&lo->lo_owner.so_count) != 2) {
++              spin_unlock(&clp->cl_lock);
++              nfs4_put_stateowner(&lo->lo_owner);
++              return nfserr_locks_held;
++      }
+       unhash_lockowner_locked(lo);
+       while (!list_empty(&lo->lo_owner.so_stateids)) {
+               stp = list_first_entry(&lo->lo_owner.so_stateids,
+-- 
+2.43.0
+
diff --git a/queue-5.10/regmap-add-bulk-read-write-callbacks-into-regmap_con.patch b/queue-5.10/regmap-add-bulk-read-write-callbacks-into-regmap_con.patch
new file mode 100644 (file)
index 0000000..f40a145
--- /dev/null
@@ -0,0 +1,300 @@
+From 1cac9c5509fd16616dd5ba9dbaa2ab787b4f5b89 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Apr 2022 04:51:44 +0200
+Subject: regmap: Add bulk read/write callbacks into regmap_config
+
+From: Marek Vasut <marex@denx.de>
+
+[ Upstream commit d77e745613680c54708470402e2b623dcd769681 ]
+
+Currently the regmap_config structure only allows the user to implement
+single element register read/write using .reg_read/.reg_write callbacks.
+The regmap_bus already implements bulk counterparts of both, and is being
+misused as a workaround for the missing bulk read/write callbacks in
+regmap_config by a couple of drivers. To stop this misuse, add the bulk
+read/write callbacks to regmap_config and call them from the regmap core
+code.
+
+Signed-off-by: Marek Vasut <marex@denx.de>
+Cc: Jagan Teki <jagan@amarulasolutions.com>
+Cc: Mark Brown <broonie@kernel.org>
+Cc: Maxime Ripard <maxime@cerno.tech>
+Cc: Robert Foss <robert.foss@linaro.org>
+Cc: Sam Ravnborg <sam@ravnborg.org>
+Cc: Thomas Zimmermann <tzimmermann@suse.de>
+To: dri-devel@lists.freedesktop.org
+Link: https://lore.kernel.org/r/20220430025145.640305-1-marex@denx.de
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/regmap/internal.h |  4 ++
+ drivers/base/regmap/regmap.c   | 76 ++++++++++++++++++----------------
+ include/linux/regmap.h         | 12 ++++++
+ 3 files changed, 56 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
+index 0097696c31de2..2720d8d7bbfc9 100644
+--- a/drivers/base/regmap/internal.h
++++ b/drivers/base/regmap/internal.h
+@@ -104,6 +104,10 @@ struct regmap {
+       int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+       int (*reg_update_bits)(void *context, unsigned int reg,
+                              unsigned int mask, unsigned int val);
++      /* Bulk read/write */
++      int (*read)(void *context, const void *reg_buf, size_t reg_size,
++                  void *val_buf, size_t val_size);
++      int (*write)(void *context, const void *data, size_t count);
+       bool defer_caching;
+diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
+index 8f39aacdad0dc..2dfd6aa600450 100644
+--- a/drivers/base/regmap/regmap.c
++++ b/drivers/base/regmap/regmap.c
+@@ -800,12 +800,15 @@ struct regmap *__regmap_init(struct device *dev,
+               map->reg_stride_order = ilog2(map->reg_stride);
+       else
+               map->reg_stride_order = -1;
+-      map->use_single_read = config->use_single_read || !bus || !bus->read;
+-      map->use_single_write = config->use_single_write || !bus || !bus->write;
+-      map->can_multi_write = config->can_multi_write && bus && bus->write;
++      map->use_single_read = config->use_single_read || !(config->read || (bus && bus->read));
++      map->use_single_write = config->use_single_write || !(config->write || (bus && bus->write));
++      map->can_multi_write = config->can_multi_write && (config->write || (bus && bus->write));
+       if (bus) {
+               map->max_raw_read = bus->max_raw_read;
+               map->max_raw_write = bus->max_raw_write;
++      } else if (config->max_raw_read && config->max_raw_write) {
++              map->max_raw_read = config->max_raw_read;
++              map->max_raw_write = config->max_raw_write;
+       }
+       map->dev = dev;
+       map->bus = bus;
+@@ -839,7 +842,16 @@ struct regmap *__regmap_init(struct device *dev,
+               map->read_flag_mask = bus->read_flag_mask;
+       }
+-      if (!bus) {
++      if (config && config->read && config->write) {
++              map->reg_read  = _regmap_bus_read;
++
++              /* Bulk read/write */
++              map->read = config->read;
++              map->write = config->write;
++
++              reg_endian = REGMAP_ENDIAN_NATIVE;
++              val_endian = REGMAP_ENDIAN_NATIVE;
++      } else if (!bus) {
+               map->reg_read  = config->reg_read;
+               map->reg_write = config->reg_write;
+               map->reg_update_bits = config->reg_update_bits;
+@@ -856,10 +868,13 @@ struct regmap *__regmap_init(struct device *dev,
+       } else {
+               map->reg_read  = _regmap_bus_read;
+               map->reg_update_bits = bus->reg_update_bits;
+-      }
++              /* Bulk read/write */
++              map->read = bus->read;
++              map->write = bus->write;
+-      reg_endian = regmap_get_reg_endian(bus, config);
+-      val_endian = regmap_get_val_endian(dev, bus, config);
++              reg_endian = regmap_get_reg_endian(bus, config);
++              val_endian = regmap_get_val_endian(dev, bus, config);
++      }
+       switch (config->reg_bits + map->reg_shift) {
+       case 2:
+@@ -1628,8 +1643,6 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
+       size_t len;
+       int i;
+-      WARN_ON(!map->bus);
+-
+       /* Check for unwritable or noinc registers in range
+        * before we start
+        */
+@@ -1711,7 +1724,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
+               val = work_val;
+       }
+-      if (map->async && map->bus->async_write) {
++      if (map->async && map->bus && map->bus->async_write) {
+               struct regmap_async *async;
+               trace_regmap_async_write_start(map, reg, val_len);
+@@ -1779,10 +1792,10 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
+        * write.
+        */
+       if (val == work_val)
+-              ret = map->bus->write(map->bus_context, map->work_buf,
+-                                    map->format.reg_bytes +
+-                                    map->format.pad_bytes +
+-                                    val_len);
++              ret = map->write(map->bus_context, map->work_buf,
++                               map->format.reg_bytes +
++                               map->format.pad_bytes +
++                               val_len);
+       else if (map->bus->gather_write)
+               ret = map->bus->gather_write(map->bus_context, map->work_buf,
+                                            map->format.reg_bytes +
+@@ -1801,7 +1814,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
+               memcpy(buf, map->work_buf, map->format.reg_bytes);
+               memcpy(buf + map->format.reg_bytes + map->format.pad_bytes,
+                      val, val_len);
+-              ret = map->bus->write(map->bus_context, buf, len);
++              ret = map->write(map->bus_context, buf, len);
+               kfree(buf);
+       } else if (ret != 0 && !map->cache_bypass && map->format.parse_val) {
+@@ -1858,7 +1871,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
+       struct regmap_range_node *range;
+       struct regmap *map = context;
+-      WARN_ON(!map->bus || !map->format.format_write);
++      WARN_ON(!map->format.format_write);
+       range = _regmap_range_lookup(map, reg);
+       if (range) {
+@@ -1871,8 +1884,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
+       trace_regmap_hw_write_start(map, reg, 1);
+-      ret = map->bus->write(map->bus_context, map->work_buf,
+-                            map->format.buf_size);
++      ret = map->write(map->bus_context, map->work_buf, map->format.buf_size);
+       trace_regmap_hw_write_done(map, reg, 1);
+@@ -1892,7 +1904,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
+ {
+       struct regmap *map = context;
+-      WARN_ON(!map->bus || !map->format.format_val);
++      WARN_ON(!map->format.format_val);
+       map->format.format_val(map->work_buf + map->format.reg_bytes
+                              + map->format.pad_bytes, val, 0);
+@@ -1906,7 +1918,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
+ static inline void *_regmap_map_get_context(struct regmap *map)
+ {
+-      return (map->bus) ? map : map->bus_context;
++      return (map->bus || (!map->bus && map->read)) ? map : map->bus_context;
+ }
+ int _regmap_write(struct regmap *map, unsigned int reg,
+@@ -2313,7 +2325,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
+       u8 = buf;
+       *u8 |= map->write_flag_mask;
+-      ret = map->bus->write(map->bus_context, buf, len);
++      ret = map->write(map->bus_context, buf, len);
+       kfree(buf);
+@@ -2619,9 +2631,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+       struct regmap_range_node *range;
+       int ret;
+-      WARN_ON(!map->bus);
+-
+-      if (!map->bus || !map->bus->read)
++      if (!map->read)
+               return -EINVAL;
+       range = _regmap_range_lookup(map, reg);
+@@ -2637,9 +2647,9 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+                                     map->read_flag_mask);
+       trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes);
+-      ret = map->bus->read(map->bus_context, map->work_buf,
+-                           map->format.reg_bytes + map->format.pad_bytes,
+-                           val, val_len);
++      ret = map->read(map->bus_context, map->work_buf,
++                      map->format.reg_bytes + map->format.pad_bytes,
++                      val, val_len);
+       trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes);
+@@ -2750,8 +2760,6 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+       unsigned int v;
+       int ret, i;
+-      if (!map->bus)
+-              return -EINVAL;
+       if (val_len % map->format.val_bytes)
+               return -EINVAL;
+       if (!IS_ALIGNED(reg, map->reg_stride))
+@@ -2766,7 +2774,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+               size_t chunk_count, chunk_bytes;
+               size_t chunk_regs = val_count;
+-              if (!map->bus->read) {
++              if (!map->read) {
+                       ret = -ENOTSUPP;
+                       goto out;
+               }
+@@ -2826,7 +2834,7 @@ EXPORT_SYMBOL_GPL(regmap_raw_read);
+  * @val: Pointer to data buffer
+  * @val_len: Length of output buffer in bytes.
+  *
+- * The regmap API usually assumes that bulk bus read operations will read a
++ * The regmap API usually assumes that bulk read operations will read a
+  * range of registers. Some devices have certain registers for which a read
+  * operation read will read from an internal FIFO.
+  *
+@@ -2844,10 +2852,6 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg,
+       size_t read_len;
+       int ret;
+-      if (!map->bus)
+-              return -EINVAL;
+-      if (!map->bus->read)
+-              return -ENOTSUPP;
+       if (val_len % map->format.val_bytes)
+               return -EINVAL;
+       if (!IS_ALIGNED(reg, map->reg_stride))
+@@ -2961,7 +2965,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
+       if (val_count == 0)
+               return -EINVAL;
+-      if (map->bus && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
++      if (map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
+               ret = regmap_raw_read(map, reg, val, val_bytes * val_count);
+               if (ret != 0)
+                       return ret;
+diff --git a/include/linux/regmap.h b/include/linux/regmap.h
+index d6f0d876fa424..83a7485de78fb 100644
+--- a/include/linux/regmap.h
++++ b/include/linux/regmap.h
+@@ -294,6 +294,12 @@ typedef void (*regmap_unlock)(void *);
+  *                 if the function require special handling with lock and reg
+  *                 handling and the operation cannot be represented as a simple
+  *                 update_bits operation on a bus such as SPI, I2C, etc.
++ * @read: Optional callback that if filled will be used to perform all the
++ *        bulk reads from the registers. Data is returned in the buffer used
++ *        to transmit data.
++ * @write: Same as above for writing.
++ * @max_raw_read: Max raw read size that can be used on the device.
++ * @max_raw_write: Max raw write size that can be used on the device.
+  * @fast_io:    Register IO is fast. Use a spinlock instead of a mutex
+  *              to perform locking. This field is ignored if custom lock/unlock
+  *              functions are used (see fields lock/unlock of struct regmap_config).
+@@ -373,6 +379,12 @@ struct regmap_config {
+       int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+       int (*reg_update_bits)(void *context, unsigned int reg,
+                              unsigned int mask, unsigned int val);
++      /* Bulk read/write */
++      int (*read)(void *context, const void *reg_buf, size_t reg_size,
++                  void *val_buf, size_t val_size);
++      int (*write)(void *context, const void *data, size_t count);
++      size_t max_raw_read;
++      size_t max_raw_write;
+       bool fast_io;
+-- 
+2.43.0
+
diff --git a/queue-5.10/regmap-allow-to-define-reg_update_bits-for-no-bus-co.patch b/queue-5.10/regmap-allow-to-define-reg_update_bits-for-no-bus-co.patch
new file mode 100644 (file)
index 0000000..2d8fba1
--- /dev/null
@@ -0,0 +1,65 @@
+From 6e5147c99310b8ead55ed6c777a40e69c6c04a3d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Nov 2021 16:00:40 +0100
+Subject: regmap: allow to define reg_update_bits for no bus configuration
+
+From: Ansuel Smith <ansuelsmth@gmail.com>
+
+[ Upstream commit 02d6fdecb9c38de19065f6bed8d5214556fd061d ]
+
+Some device requires a special handling for reg_update_bits and can't use
+the normal regmap read write logic. An example is when locking is
+handled by the device and rmw operations requires to do atomic operations.
+Allow to declare a dedicated function in regmap_config for
+reg_update_bits in no bus configuration.
+
+Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
+Link: https://lore.kernel.org/r/20211104150040.1260-1-ansuelsmth@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/regmap/regmap.c | 1 +
+ include/linux/regmap.h       | 7 +++++++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
+index 7bc603145bd98..8f39aacdad0dc 100644
+--- a/drivers/base/regmap/regmap.c
++++ b/drivers/base/regmap/regmap.c
+@@ -842,6 +842,7 @@ struct regmap *__regmap_init(struct device *dev,
+       if (!bus) {
+               map->reg_read  = config->reg_read;
+               map->reg_write = config->reg_write;
++              map->reg_update_bits = config->reg_update_bits;
+               map->defer_caching = false;
+               goto skip_format_initialization;
+diff --git a/include/linux/regmap.h b/include/linux/regmap.h
+index e7834d98207f7..d6f0d876fa424 100644
+--- a/include/linux/regmap.h
++++ b/include/linux/regmap.h
+@@ -289,6 +289,11 @@ typedef void (*regmap_unlock)(void *);
+  *              read operation on a bus such as SPI, I2C, etc. Most of the
+  *              devices do not need this.
+  * @reg_write:          Same as above for writing.
++ * @reg_update_bits: Optional callback that if filled will be used to perform
++ *                 all the update_bits(rmw) operation. Should only be provided
++ *                 if the function require special handling with lock and reg
++ *                 handling and the operation cannot be represented as a simple
++ *                 update_bits operation on a bus such as SPI, I2C, etc.
+  * @fast_io:    Register IO is fast. Use a spinlock instead of a mutex
+  *              to perform locking. This field is ignored if custom lock/unlock
+  *              functions are used (see fields lock/unlock of struct regmap_config).
+@@ -366,6 +371,8 @@ struct regmap_config {
+       int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
+       int (*reg_write)(void *context, unsigned int reg, unsigned int val);
++      int (*reg_update_bits)(void *context, unsigned int reg,
++                             unsigned int mask, unsigned int val);
+       bool fast_io;
+-- 
+2.43.0
+
diff --git a/queue-5.10/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch b/queue-5.10/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch
new file mode 100644 (file)
index 0000000..7150793
--- /dev/null
@@ -0,0 +1,61 @@
+From 10bf73f868331f0eee3a96f64b8c5a59c14151c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Jan 2024 06:14:29 -0700
+Subject: selftests: mm: fix map_hugetlb failure on 64K page size systems
+
+From: Nico Pache <npache@redhat.com>
+
+[ Upstream commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 ]
+
+On systems with 64k page size and 512M huge page sizes, the allocation and
+test succeeds but errors out at the munmap.  As the comment states, munmap
+will failure if its not HUGEPAGE aligned.  This is due to the length of
+the mapping being 1/2 the size of the hugepage causing the munmap to not
+be hugepage aligned.  Fix this by making the mapping length the full
+hugepage if the hugepage is larger than the length of the mapping.
+
+Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com
+Signed-off-by: Nico Pache <npache@redhat.com>
+Cc: Donet Tom <donettom@linux.vnet.ibm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Christophe Leroy <christophe.leroy@c-s.fr>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/map_hugetlb.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
+index 312889edb84ab..c65c55b7a789f 100644
+--- a/tools/testing/selftests/vm/map_hugetlb.c
++++ b/tools/testing/selftests/vm/map_hugetlb.c
+@@ -15,6 +15,7 @@
+ #include <unistd.h>
+ #include <sys/mman.h>
+ #include <fcntl.h>
++#include "vm_util.h"
+ #define LENGTH (256UL*1024*1024)
+ #define PROTECTION (PROT_READ | PROT_WRITE)
+@@ -70,10 +71,16 @@ int main(int argc, char **argv)
+ {
+       void *addr;
+       int ret;
++      size_t hugepage_size;
+       size_t length = LENGTH;
+       int flags = FLAGS;
+       int shift = 0;
++      hugepage_size = default_huge_page_size();
++      /* munmap with fail if the length is not page aligned */
++      if (hugepage_size > length)
++              length = hugepage_size;
++
+       if (argc > 1)
+               length = atol(argv[1]) << 20;
+       if (argc > 2) {
+-- 
+2.43.0
+
diff --git a/queue-5.10/selftests-mm-switch-to-bash-from-sh.patch b/queue-5.10/selftests-mm-switch-to-bash-from-sh.patch
new file mode 100644 (file)
index 0000000..e5ddb93
--- /dev/null
@@ -0,0 +1,58 @@
+From 94d5ee7a0a350e94c7ba7c41e284fd21934fa921 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jan 2024 14:04:54 +0500
+Subject: selftests/mm: switch to bash from sh
+
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+
+[ Upstream commit bc29036e1da1cf66e5f8312649aeec2d51ea3d86 ]
+
+Running charge_reserved_hugetlb.sh generates errors if sh is set to
+dash:
+
+./charge_reserved_hugetlb.sh: 9: [[: not found
+./charge_reserved_hugetlb.sh: 19: [[: not found
+./charge_reserved_hugetlb.sh: 27: [[: not found
+./charge_reserved_hugetlb.sh: 37: [[: not found
+./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected
+
+Switch to using /bin/bash instead of /bin/sh.  Make the switch for
+write_hugetlb_memory.sh as well which is called from
+charge_reserved_hugetlb.sh.
+
+Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com
+Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: David Laight <David.Laight@ACULAB.COM>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 2 +-
+ tools/testing/selftests/vm/write_hugetlb_memory.sh    | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+index 7536ff2f890a1..d0107f8ae6213 100644
+--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
++++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+@@ -1,4 +1,4 @@
+-#!/bin/sh
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+ set -e
+diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+index 70a02301f4c27..3d2d2eb9d6fff 100644
+--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
++++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+@@ -1,4 +1,4 @@
+-#!/bin/sh
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+ set -e
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-fail-probe-if-clock-crystal-is-unstab.patch b/queue-5.10/serial-max310x-fail-probe-if-clock-crystal-is-unstab.patch
new file mode 100644 (file)
index 0000000..94519ae
--- /dev/null
@@ -0,0 +1,75 @@
+From 0545f60f222499c7ab4f10ef0b2cd8a6c17cf290 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jan 2024 16:30:00 -0500
+Subject: serial: max310x: fail probe if clock crystal is unstable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+
+[ Upstream commit 8afa6c6decea37e7cb473d2c60473f37f46cea35 ]
+
+A stable clock is really required in order to use this UART, so log an
+error message and bail out if the chip reports that the clock is not
+stable.
+
+Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness")
+Cc: stable@vger.kernel.org
+Suggested-by: Jan Kundrát <jan.kundrat@cesnet.cz>
+Link: https://www.spinics.net/lists/linux-serial/msg35773.html
+Signed-off-by: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+Link: https://lore.kernel.org/r/20240116213001.3691629-4-hugo@hugovil.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 0e0f778d75cd4..bbf45c0626681 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -556,7 +556,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr)
+       return 1;
+ }
+-static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
++static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+                              unsigned long freq, bool xtal)
+ {
+       unsigned int div, clksrc, pllcfg = 0;
+@@ -626,7 +626,8 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+               } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES));
+               if (!stable)
+-                      dev_warn(dev, "clock is not stable yet\n");
++                      return dev_err_probe(dev, -EAGAIN,
++                                           "clock is not stable\n");
+       }
+       return bestfreq;
+@@ -1266,7 +1267,7 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+ {
+       int i, ret, fmin, fmax, freq;
+       struct max310x_port *s;
+-      u32 uartclk = 0;
++      s32 uartclk = 0;
+       bool xtal;
+       if (IS_ERR(regmap))
+@@ -1350,6 +1351,11 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+       }
+       uartclk = max310x_set_ref_clk(dev, s, freq, xtal);
++      if (uartclk < 0) {
++              ret = uartclk;
++              goto out_uart;
++      }
++
+       dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk);
+       for (i = 0; i < devtype->nr; i++) {
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-fix-io-data-corruption-in-batched-ope.patch b/queue-5.10/serial-max310x-fix-io-data-corruption-in-batched-ope.patch
new file mode 100644 (file)
index 0000000..58b0e46
--- /dev/null
@@ -0,0 +1,99 @@
+From 32f4aa03c524cbbfdbcdc089e09b2ad40769388d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Apr 2023 22:14:23 +0200
+Subject: serial: max310x: fix IO data corruption in batched operations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jan Kundrát <jan.kundrat@cesnet.cz>
+
+[ Upstream commit 3f42b142ea1171967e40e10e4b0241c0d6d28d41 ]
+
+After upgrading from 5.16 to 6.1, our board with a MAX14830 started
+producing lots of garbage data over UART. Bisection pointed out commit
+285e76fc049c as the culprit. That patch tried to replace hand-written
+code which I added in 2b4bac48c1084 ("serial: max310x: Use batched reads
+when reasonably safe") with the generic regmap infrastructure for
+batched operations.
+
+Unfortunately, the `regmap_raw_read` and `regmap_raw_write` which were
+used are actually functions which perform IO over *multiple* registers.
+That's not what is needed for accessing these Tx/Rx FIFOs; the
+appropriate functions are the `_noinc_` versions, not the `_raw_` ones.
+
+Fix this regression by using `regmap_noinc_read()` and
+`regmap_noinc_write()` along with the necessary `regmap_config` setup;
+with this patch in place, our board communicates happily again. Since
+our board uses SPI for talking to this chip, the I2C part is completely
+untested.
+
+Fixes: 285e76fc049c ("serial: max310x: use regmap methods for SPI batch operations")
+Cc: stable@vger.kernel.org
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Jan Kundrát <jan.kundrat@cesnet.cz>
+Link: https://lore.kernel.org/r/79db8e82aadb0e174bc82b9996423c3503c8fb37.1680732084.git.jan.kundrat@cesnet.cz
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index ed1aaa19854fd..2f88eae8a55a1 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -533,6 +533,11 @@ static bool max310x_reg_precious(struct device *dev, unsigned int reg)
+       return false;
+ }
++static bool max310x_reg_noinc(struct device *dev, unsigned int reg)
++{
++      return reg == MAX310X_RHR_REG;
++}
++
+ static int max310x_set_baud(struct uart_port *port, int baud)
+ {
+       unsigned int mode = 0, div = 0, frac = 0, c = 0, F = 0;
+@@ -667,14 +672,14 @@ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int
+ {
+       struct max310x_one *one = to_max310x_port(port);
+-      regmap_raw_write(one->regmap, MAX310X_THR_REG, txbuf, len);
++      regmap_noinc_write(one->regmap, MAX310X_THR_REG, txbuf, len);
+ }
+ static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len)
+ {
+       struct max310x_one *one = to_max310x_port(port);
+-      regmap_raw_read(one->regmap, MAX310X_RHR_REG, rxbuf, len);
++      regmap_noinc_read(one->regmap, MAX310X_RHR_REG, rxbuf, len);
+ }
+ static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen)
+@@ -1508,6 +1513,10 @@ static struct regmap_config regcfg = {
+       .writeable_reg = max310x_reg_writeable,
+       .volatile_reg = max310x_reg_volatile,
+       .precious_reg = max310x_reg_precious,
++      .writeable_noinc_reg = max310x_reg_noinc,
++      .readable_noinc_reg = max310x_reg_noinc,
++      .max_raw_read = MAX310X_FIFO_SIZE,
++      .max_raw_write = MAX310X_FIFO_SIZE,
+ };
+ #ifdef CONFIG_SPI_MASTER
+@@ -1593,6 +1602,10 @@ static struct regmap_config regcfg_i2c = {
+       .volatile_reg = max310x_reg_volatile,
+       .precious_reg = max310x_reg_precious,
+       .max_register = MAX310X_I2C_REVID_EXTREG,
++      .writeable_noinc_reg = max310x_reg_noinc,
++      .readable_noinc_reg = max310x_reg_noinc,
++      .max_raw_read = MAX310X_FIFO_SIZE,
++      .max_raw_write = MAX310X_FIFO_SIZE,
+ };
+ static const struct max310x_if_cfg max310x_i2c_if_cfg = {
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-implement-i2c-support.patch b/queue-5.10/serial-max310x-implement-i2c-support.patch
new file mode 100644 (file)
index 0000000..a528e8d
--- /dev/null
@@ -0,0 +1,270 @@
+From fdcfee2740c6daddaa534df38a219b30e9f9f638 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jun 2022 17:46:59 +0300
+Subject: serial: max310x: implement I2C support
+
+From: Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+[ Upstream commit 2e1f2d9a9bdbe12ee475c82a45ac46a278e8049a ]
+
+I2C implementation on this chip has a few key differences
+compared to SPI, as described in previous patches.
+ * extended register space access needs no extra logic
+ * slave address is used to select which UART to communicate
+   with
+
+To accommodate these differences, add an I2C interface config,
+set the RevID register address and implement an empty method
+for setting the GlobalCommand register, since no special handling
+is needed for the extended register space.
+
+To handle the port-specific slave address, create an I2C dummy
+device for each port, except the base one (UART0), which is
+expected to be the one specified in firmware, and create a
+regmap for each I2C device.
+Add minimum and maximum slave addresses to each devtype for
+sanity checking.
+
+Also, use a separate regmap config with no write_flag_mask,
+since I2C has a R/W bit in its slave address, and set the
+max register to the address of the RevID register, since the
+extended register space needs no extra logic.
+
+Finally, add the I2C driver.
+
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
+Link: https://lore.kernel.org/r/20220605144659.4169853-5-demonsingur@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/Kconfig   |   1 +
+ drivers/tty/serial/max310x.c | 135 ++++++++++++++++++++++++++++++++++-
+ 2 files changed, 135 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
+index 28f22e58639c6..bd30ae9751bf5 100644
+--- a/drivers/tty/serial/Kconfig
++++ b/drivers/tty/serial/Kconfig
+@@ -343,6 +343,7 @@ config SERIAL_MAX310X
+       depends on SPI_MASTER
+       select SERIAL_CORE
+       select REGMAP_SPI if SPI_MASTER
++      select REGMAP_I2C if I2C
+       help
+         This selects support for an advanced UART from Maxim (Dallas).
+         Supported ICs are MAX3107, MAX3108, MAX3109, MAX14830.
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index b90281ac54c85..ed1aaa19854fd 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -14,6 +14,7 @@
+ #include <linux/delay.h>
+ #include <linux/device.h>
+ #include <linux/gpio/driver.h>
++#include <linux/i2c.h>
+ #include <linux/module.h>
+ #include <linux/mod_devicetable.h>
+ #include <linux/property.h>
+@@ -73,6 +74,7 @@
+ /* Extended registers */
+ #define MAX310X_SPI_REVID_EXTREG      MAX310X_REG_05 /* Revision ID */
++#define MAX310X_I2C_REVID_EXTREG      (0x25) /* Revision ID */
+ /* IRQ register bits */
+ #define MAX310X_IRQ_LSR_BIT           (1 << 0) /* LSR interrupt */
+@@ -260,6 +262,10 @@ struct max310x_if_cfg {
+ };
+ struct max310x_devtype {
++      struct {
++              unsigned short min;
++              unsigned short max;
++      } slave_addr;
+       char    name[9];
+       int     nr;
+       u8      mode1;
+@@ -431,6 +437,10 @@ static const struct max310x_devtype max3107_devtype = {
+       .mode1  = MAX310X_MODE1_AUTOSLEEP_BIT | MAX310X_MODE1_IRQSEL_BIT,
+       .detect = max3107_detect,
+       .power  = max310x_power,
++      .slave_addr     = {
++              .min = 0x2c,
++              .max = 0x2f,
++      },
+ };
+ static const struct max310x_devtype max3108_devtype = {
+@@ -439,6 +449,10 @@ static const struct max310x_devtype max3108_devtype = {
+       .mode1  = MAX310X_MODE1_AUTOSLEEP_BIT,
+       .detect = max3108_detect,
+       .power  = max310x_power,
++      .slave_addr     = {
++              .min = 0x60,
++              .max = 0x6f,
++      },
+ };
+ static const struct max310x_devtype max3109_devtype = {
+@@ -447,6 +461,10 @@ static const struct max310x_devtype max3109_devtype = {
+       .mode1  = MAX310X_MODE1_AUTOSLEEP_BIT,
+       .detect = max3109_detect,
+       .power  = max310x_power,
++      .slave_addr     = {
++              .min = 0x60,
++              .max = 0x6f,
++      },
+ };
+ static const struct max310x_devtype max14830_devtype = {
+@@ -455,6 +473,10 @@ static const struct max310x_devtype max14830_devtype = {
+       .mode1  = MAX310X_MODE1_IRQSEL_BIT,
+       .detect = max14830_detect,
+       .power  = max14830_power,
++      .slave_addr     = {
++              .min = 0x60,
++              .max = 0x6f,
++      },
+ };
+ static bool max310x_reg_writeable(struct device *dev, unsigned int reg)
+@@ -1557,6 +1579,97 @@ static struct spi_driver max310x_spi_driver = {
+ };
+ #endif
++#ifdef CONFIG_I2C
++static int max310x_i2c_extended_reg_enable(struct device *dev, bool enable)
++{
++      return 0;
++}
++
++static struct regmap_config regcfg_i2c = {
++      .reg_bits = 8,
++      .val_bits = 8,
++      .cache_type = REGCACHE_RBTREE,
++      .writeable_reg = max310x_reg_writeable,
++      .volatile_reg = max310x_reg_volatile,
++      .precious_reg = max310x_reg_precious,
++      .max_register = MAX310X_I2C_REVID_EXTREG,
++};
++
++static const struct max310x_if_cfg max310x_i2c_if_cfg = {
++      .extended_reg_enable = max310x_i2c_extended_reg_enable,
++      .rev_id_reg = MAX310X_I2C_REVID_EXTREG,
++};
++
++static unsigned short max310x_i2c_slave_addr(unsigned short addr,
++                                           unsigned int nr)
++{
++      /*
++       * For MAX14830 and MAX3109, the slave address depends on what the
++       * A0 and A1 pins are tied to.
++       * See Table I2C Address Map of the datasheet.
++       * Based on that table, the following formulas were determined.
++       * UART1 - UART0 = 0x10
++       * UART2 - UART1 = 0x20 + 0x10
++       * UART3 - UART2 = 0x10
++       */
++
++      addr -= nr * 0x10;
++
++      if (nr >= 2)
++              addr -= 0x20;
++
++      return addr;
++}
++
++static int max310x_i2c_probe(struct i2c_client *client)
++{
++      const struct max310x_devtype *devtype =
++                      device_get_match_data(&client->dev);
++      struct i2c_client *port_client;
++      struct regmap *regmaps[4];
++      unsigned int i;
++      u8 port_addr;
++
++      if (client->addr < devtype->slave_addr.min ||
++              client->addr > devtype->slave_addr.max)
++              return dev_err_probe(&client->dev, -EINVAL,
++                                   "Slave addr 0x%x outside of range [0x%x, 0x%x]\n",
++                                   client->addr, devtype->slave_addr.min,
++                                   devtype->slave_addr.max);
++
++      regmaps[0] = devm_regmap_init_i2c(client, &regcfg_i2c);
++
++      for (i = 1; i < devtype->nr; i++) {
++              port_addr = max310x_i2c_slave_addr(client->addr, i);
++              port_client = devm_i2c_new_dummy_device(&client->dev,
++                                                      client->adapter,
++                                                      port_addr);
++
++              regmaps[i] = devm_regmap_init_i2c(port_client, &regcfg_i2c);
++      }
++
++      return max310x_probe(&client->dev, devtype, &max310x_i2c_if_cfg,
++                           regmaps, client->irq);
++}
++
++static int max310x_i2c_remove(struct i2c_client *client)
++{
++      max310x_remove(&client->dev);
++
++      return 0;
++}
++
++static struct i2c_driver max310x_i2c_driver = {
++      .driver = {
++              .name           = MAX310X_NAME,
++              .of_match_table = max310x_dt_ids,
++              .pm             = &max310x_pm_ops,
++      },
++      .probe_new      = max310x_i2c_probe,
++      .remove         = max310x_i2c_remove,
++};
++#endif
++
+ static int __init max310x_uart_init(void)
+ {
+       int ret;
+@@ -1570,15 +1683,35 @@ static int __init max310x_uart_init(void)
+ #ifdef CONFIG_SPI_MASTER
+       ret = spi_register_driver(&max310x_spi_driver);
+       if (ret)
+-              uart_unregister_driver(&max310x_uart);
++              goto err_spi_register;
++#endif
++
++#ifdef CONFIG_I2C
++      ret = i2c_add_driver(&max310x_i2c_driver);
++      if (ret)
++              goto err_i2c_register;
+ #endif
++      return 0;
++
++#ifdef CONFIG_I2C
++err_i2c_register:
++      spi_unregister_driver(&max310x_spi_driver);
++#endif
++
++err_spi_register:
++      uart_unregister_driver(&max310x_uart);
++
+       return ret;
+ }
+ module_init(max310x_uart_init);
+ static void __exit max310x_uart_exit(void)
+ {
++#ifdef CONFIG_I2C
++      i2c_del_driver(&max310x_i2c_driver);
++#endif
++
+ #ifdef CONFIG_SPI_MASTER
+       spi_unregister_driver(&max310x_spi_driver);
+ #endif
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-make-accessing-revision-id-interface-.patch b/queue-5.10/serial-max310x-make-accessing-revision-id-interface-.patch
new file mode 100644 (file)
index 0000000..bb04134
--- /dev/null
@@ -0,0 +1,157 @@
+From 6bb1445dd5bb1d4549f294e07cad6f3dd9b5d476 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jun 2022 17:46:58 +0300
+Subject: serial: max310x: make accessing revision id interface-agnostic
+
+From: Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+[ Upstream commit b3883ab5e95713e479f774ea68be275413e8e5b2 ]
+
+SPI can only use 5 address bits, since one bit is reserved for
+specifying R/W and 2 bits are used to specify the UART port.
+To access registers that have addresses past 0x1F, an extended
+register space can be enabled by writing to the GlobalCommand
+register (address 0x1F).
+
+I2C uses 8 address bits. The R/W bit is placed in the slave
+address, and so is the UART port. Because of this, registers
+that have addresses higher than 0x1F can be accessed normally.
+
+To access the RevID register, on SPI, 0xCE must be written to
+the 0x1F address to enable the extended register space, after
+which the RevID register is accessible at address 0x5. 0xCD
+must be written to the 0x1F address to disable the extended
+register space.
+
+On I2C, the RevID register is accessible at address 0x25.
+
+Create an interface config struct, and add a method for
+toggling the extended register space and a member for the RevId
+register address. Implement these for SPI.
+
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
+Link: https://lore.kernel.org/r/20220605144659.4169853-4-demonsingur@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 40 +++++++++++++++++++++++++++---------
+ 1 file changed, 30 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index a09ec46e0310d..b90281ac54c85 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -72,7 +72,7 @@
+ #define MAX310X_GLOBALCMD_REG         MAX310X_REG_1F /* Global Command (WO) */
+ /* Extended registers */
+-#define MAX310X_REVID_EXTREG          MAX310X_REG_05 /* Revision ID */
++#define MAX310X_SPI_REVID_EXTREG      MAX310X_REG_05 /* Revision ID */
+ /* IRQ register bits */
+ #define MAX310X_IRQ_LSR_BIT           (1 << 0) /* LSR interrupt */
+@@ -253,6 +253,12 @@
+ #define MAX14830_BRGCFG_CLKDIS_BIT    (1 << 6) /* Clock Disable */
+ #define MAX14830_REV_ID                       (0xb0)
++struct max310x_if_cfg {
++      int (*extended_reg_enable)(struct device *dev, bool enable);
++
++      unsigned int rev_id_reg;
++};
++
+ struct max310x_devtype {
+       char    name[9];
+       int     nr;
+@@ -275,6 +281,7 @@ struct max310x_one {
+ struct max310x_port {
+       const struct max310x_devtype *devtype;
++      const struct max310x_if_cfg *if_cfg;
+       struct regmap           *regmap;
+       struct clk              *clk;
+ #ifdef CONFIG_GPIOLIB
+@@ -364,13 +371,12 @@ static int max3109_detect(struct device *dev)
+       unsigned int val = 0;
+       int ret;
+-      ret = regmap_write(s->regmap, MAX310X_GLOBALCMD_REG,
+-                         MAX310X_EXTREG_ENBL);
++      ret = s->if_cfg->extended_reg_enable(dev, true);
+       if (ret)
+               return ret;
+-      regmap_read(s->regmap, MAX310X_REVID_EXTREG, &val);
+-      regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, MAX310X_EXTREG_DSBL);
++      regmap_read(s->regmap, s->if_cfg->rev_id_reg, &val);
++      s->if_cfg->extended_reg_enable(dev, false);
+       if (((val & MAX310x_REV_MASK) != MAX3109_REV_ID)) {
+               dev_err(dev,
+                       "%s ID 0x%02x does not match\n", s->devtype->name, val);
+@@ -395,13 +401,12 @@ static int max14830_detect(struct device *dev)
+       unsigned int val = 0;
+       int ret;
+-      ret = regmap_write(s->regmap, MAX310X_GLOBALCMD_REG,
+-                         MAX310X_EXTREG_ENBL);
++      ret = s->if_cfg->extended_reg_enable(dev, true);
+       if (ret)
+               return ret;
+       
+-      regmap_read(s->regmap, MAX310X_REVID_EXTREG, &val);
+-      regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, MAX310X_EXTREG_DSBL);
++      regmap_read(s->regmap, s->if_cfg->rev_id_reg, &val);
++      s->if_cfg->extended_reg_enable(dev, false);
+       if (((val & MAX310x_REV_MASK) != MAX14830_REV_ID)) {
+               dev_err(dev,
+                       "%s ID 0x%02x does not match\n", s->devtype->name, val);
+@@ -1250,6 +1255,7 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
+ #endif
+ static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype,
++                       const struct max310x_if_cfg *if_cfg,
+                        struct regmap *regmaps[], int irq)
+ {
+       int i, ret, fmin, fmax, freq;
+@@ -1313,6 +1319,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+       s->regmap = regmaps[0];
+       s->devtype = devtype;
++      s->if_cfg = if_cfg;
+       dev_set_drvdata(dev, s);
+       /* Check device to ensure we are talking to what we expect */
+@@ -1482,6 +1489,19 @@ static struct regmap_config regcfg = {
+ };
+ #ifdef CONFIG_SPI_MASTER
++static int max310x_spi_extended_reg_enable(struct device *dev, bool enable)
++{
++      struct max310x_port *s = dev_get_drvdata(dev);
++
++      return regmap_write(s->regmap, MAX310X_GLOBALCMD_REG,
++                          enable ? MAX310X_EXTREG_ENBL : MAX310X_EXTREG_DSBL);
++}
++
++static const struct max310x_if_cfg __maybe_unused max310x_spi_if_cfg = {
++      .extended_reg_enable = max310x_spi_extended_reg_enable,
++      .rev_id_reg = MAX310X_SPI_REVID_EXTREG,
++};
++
+ static int max310x_spi_probe(struct spi_device *spi)
+ {
+       const struct max310x_devtype *devtype;
+@@ -1508,7 +1528,7 @@ static int max310x_spi_probe(struct spi_device *spi)
+               regmaps[i] = devm_regmap_init_spi(spi, &regcfg);
+       }
+-      return max310x_probe(&spi->dev, devtype, regmaps, spi->irq);
++      return max310x_probe(&spi->dev, devtype, &max310x_spi_if_cfg, regmaps, spi->irq);
+ }
+ static int max310x_spi_remove(struct spi_device *spi)
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-make-use-of-device-properties.patch b/queue-5.10/serial-max310x-make-use-of-device-properties.patch
new file mode 100644 (file)
index 0000000..a76dfda
--- /dev/null
@@ -0,0 +1,97 @@
+From 866a5aceec6bce05749d0a998ceb472da9dd98b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Oct 2020 11:46:34 +0300
+Subject: serial: max310x: Make use of device properties
+
+From: Andy Shevchenko <andy.shevchenko@gmail.com>
+
+[ Upstream commit c808fab604ca62cff19ee6b261211483830807aa ]
+
+Device property API allows to gather device resources from different sources,
+such as ACPI. Convert the drivers to unleash the power of device property API.
+
+Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Link: https://lore.kernel.org/r/20201007084635.594991-1-andy.shevchenko@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 27 +++++++++------------------
+ 1 file changed, 9 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index bbf45c0626681..8d42c537ee5ea 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -15,8 +15,8 @@
+ #include <linux/device.h>
+ #include <linux/gpio/driver.h>
+ #include <linux/module.h>
+-#include <linux/of.h>
+-#include <linux/of_device.h>
++#include <linux/mod_devicetable.h>
++#include <linux/property.h>
+ #include <linux/regmap.h>
+ #include <linux/serial_core.h>
+ #include <linux/serial.h>
+@@ -271,7 +271,7 @@ struct max310x_one {
+       container_of(_port, struct max310x_one, port)
+ struct max310x_port {
+-      struct max310x_devtype  *devtype;
++      const struct max310x_devtype *devtype;
+       struct regmap           *regmap;
+       struct clk              *clk;
+ #ifdef CONFIG_GPIOLIB
+@@ -1262,7 +1262,7 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
+ }
+ #endif
+-static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
++static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype,
+                        struct regmap *regmap, int irq)
+ {
+       int i, ret, fmin, fmax, freq;
+@@ -1488,7 +1488,7 @@ static struct regmap_config regcfg = {
+ #ifdef CONFIG_SPI_MASTER
+ static int max310x_spi_probe(struct spi_device *spi)
+ {
+-      struct max310x_devtype *devtype;
++      const struct max310x_devtype *devtype;
+       struct regmap *regmap;
+       int ret;
+@@ -1500,18 +1500,9 @@ static int max310x_spi_probe(struct spi_device *spi)
+       if (ret)
+               return ret;
+-      if (spi->dev.of_node) {
+-              const struct of_device_id *of_id =
+-                      of_match_device(max310x_dt_ids, &spi->dev);
+-              if (!of_id)
+-                      return -ENODEV;
+-
+-              devtype = (struct max310x_devtype *)of_id->data;
+-      } else {
+-              const struct spi_device_id *id_entry = spi_get_device_id(spi);
+-
+-              devtype = (struct max310x_devtype *)id_entry->driver_data;
+-      }
++      devtype = device_get_match_data(&spi->dev);
++      if (!devtype)
++              devtype = (struct max310x_devtype *)spi_get_device_id(spi)->driver_data;
+       regcfg.max_register = devtype->nr * 0x20 - 1;
+       regmap = devm_regmap_init_spi(spi, &regcfg);
+@@ -1536,7 +1527,7 @@ MODULE_DEVICE_TABLE(spi, max310x_id_table);
+ static struct spi_driver max310x_spi_driver = {
+       .driver = {
+               .name           = MAX310X_NAME,
+-              .of_match_table = of_match_ptr(max310x_dt_ids),
++              .of_match_table = max310x_dt_ids,
+               .pm             = &max310x_pm_ops,
+       },
+       .probe          = max310x_spi_probe,
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-prevent-infinite-while-loop-in-port-s.patch b/queue-5.10/serial-max310x-prevent-infinite-while-loop-in-port-s.patch
new file mode 100644 (file)
index 0000000..552a534
--- /dev/null
@@ -0,0 +1,76 @@
+From 86c63668f8208c860ff435e1ce718253043893a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jan 2024 16:30:01 -0500
+Subject: serial: max310x: prevent infinite while() loop in port startup
+
+From: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+
+[ Upstream commit b35f8dbbce818b02c730dc85133dc7754266e084 ]
+
+If there is a problem after resetting a port, the do/while() loop that
+checks the default value of DIVLSB register may run forever and spam the
+I2C bus.
+
+Add a delay before each read of DIVLSB, and a maximum number of tries to
+prevent that situation from happening.
+
+Also fail probe if port reset is unsuccessful.
+
+Fixes: 10d8b34a4217 ("serial: max310x: Driver rework")
+Cc: stable@vger.kernel.org
+Signed-off-by: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+Link: https://lore.kernel.org/r/20240116213001.3691629-5-hugo@hugovil.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 80298a5714bcb..978d9d93127e5 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -235,6 +235,10 @@
+ #define MAX310x_REV_MASK              (0xf8)
+ #define MAX310X_WRITE_BIT             0x80
++/* Port startup definitions */
++#define MAX310X_PORT_STARTUP_WAIT_RETRIES     20 /* Number of retries */
++#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS    10 /* Delay between retries */
++
+ /* Crystal-related definitions */
+ #define MAX310X_XTAL_WAIT_RETRIES     20 /* Number of retries */
+ #define MAX310X_XTAL_WAIT_DELAY_MS    10 /* Delay between retries */
+@@ -1316,6 +1320,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+               goto out_clk;
+       for (i = 0; i < devtype->nr; i++) {
++              bool started = false;
++              unsigned int try = 0, val = 0;
++
+               /* Reset port */
+               regmap_write(regmaps[i], MAX310X_MODE2_REG,
+                            MAX310X_MODE2_RST_BIT);
+@@ -1324,8 +1331,17 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+               /* Wait for port startup */
+               do {
+-                      regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret);
+-              } while (ret != 0x01);
++                      msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS);
++                      regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val);
++
++                      if (val == 0x01)
++                              started = true;
++              } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES));
++
++              if (!started) {
++                      ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n");
++                      goto out_uart;
++              }
+               regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1);
+       }
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-try-to-get-crystal-clock-rate-from-pr.patch b/queue-5.10/serial-max310x-try-to-get-crystal-clock-rate-from-pr.patch
new file mode 100644 (file)
index 0000000..39f1fae
--- /dev/null
@@ -0,0 +1,113 @@
+From fbb1b8f42c124d4d27ab9da383decb1ce9c6fe16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 May 2021 20:29:30 +0300
+Subject: serial: max310x: Try to get crystal clock rate from property
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit d4d6f03c4fb3a91dadfe147b47edd40e4d7e4d36 ]
+
+In some configurations, mainly ACPI-based, the clock frequency of the device
+is supplied by very well established 'clock-frequency' property. Hence, try
+to get it from the property at last if no other providers are available.
+
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20210517172930.83353-1-andriy.shevchenko@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8afa6c6decea ("serial: max310x: fail probe if clock crystal is unstable")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 40 +++++++++++++++++++++++-------------
+ 1 file changed, 26 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 8bf3c5ab59431..0e0f778d75cd4 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -556,7 +556,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr)
+       return 1;
+ }
+-static int max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
++static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+                              unsigned long freq, bool xtal)
+ {
+       unsigned int div, clksrc, pllcfg = 0;
+@@ -629,7 +629,7 @@ static int max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+                       dev_warn(dev, "clock is not stable yet\n");
+       }
+-      return (int)bestfreq;
++      return bestfreq;
+ }
+ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len)
+@@ -1264,9 +1264,10 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
+ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+                        struct regmap *regmap, int irq)
+ {
+-      int i, ret, fmin, fmax, freq, uartclk;
++      int i, ret, fmin, fmax, freq;
+       struct max310x_port *s;
+-      bool xtal = false;
++      u32 uartclk = 0;
++      bool xtal;
+       if (IS_ERR(regmap))
+               return PTR_ERR(regmap);
+@@ -1278,24 +1279,20 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+               return -ENOMEM;
+       }
++      /* Always ask for fixed clock rate from a property. */
++      device_property_read_u32(dev, "clock-frequency", &uartclk);
++
+       s->clk = devm_clk_get_optional(dev, "osc");
+       if (IS_ERR(s->clk))
+               return PTR_ERR(s->clk);
+       if (s->clk) {
+-              fmin = 500000;
+-              fmax = 35000000;
++              xtal = false;
+       } else {
+               s->clk = devm_clk_get_optional(dev, "xtal");
+               if (IS_ERR(s->clk))
+                       return PTR_ERR(s->clk);
+-              if (s->clk) {
+-                      fmin = 1000000;
+-                      fmax = 4000000;
+-                      xtal = true;
+-              } else {
+-                      dev_err(dev, "Cannot get clock\n");
+-                      return -EINVAL;
+-              }
++
++              xtal = true;
+       }
+       ret = clk_prepare_enable(s->clk);
+@@ -1303,6 +1300,21 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+               return ret;
+       freq = clk_get_rate(s->clk);
++      if (freq == 0)
++              freq = uartclk;
++      if (freq == 0) {
++              dev_err(dev, "Cannot get clock rate\n");
++              return -EINVAL;
++      }
++
++      if (xtal) {
++              fmin = 1000000;
++              fmax = 4000000;
++      } else {
++              fmin = 500000;
++              fmax = 35000000;
++      }
++
+       /* Check frequency limits */
+       if (freq < fmin || freq > fmax) {
+               ret = -ERANGE;
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-unprepare-and-disable-clock-in-error-.patch b/queue-5.10/serial-max310x-unprepare-and-disable-clock-in-error-.patch
new file mode 100644 (file)
index 0000000..38a2feb
--- /dev/null
@@ -0,0 +1,40 @@
+From 96e94efbea517370c304ba5f5f0689deaefd232a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Jun 2021 18:37:33 +0300
+Subject: serial: max310x: Unprepare and disable clock in error path
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit 61acabaae5ba58b3c32e6e90d24c2c0827fd27a8 ]
+
+In one error case the clock may be left prepared and enabled.
+Unprepare and disable clock in that case to balance state of
+the hardware.
+
+Fixes: d4d6f03c4fb3 ("serial: max310x: Try to get crystal clock rate from property")
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20210625153733.12911-1-andriy.shevchenko@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 978d9d93127e5..a09ec46e0310d 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -1293,7 +1293,8 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+               freq = uartclk;
+       if (freq == 0) {
+               dev_err(dev, "Cannot get clock rate\n");
+-              return -EINVAL;
++              ret = -EINVAL;
++              goto out_clk;
+       }
+       if (xtal) {
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-use-a-separate-regmap-for-each-port.patch b/queue-5.10/serial-max310x-use-a-separate-regmap-for-each-port.patch
new file mode 100644 (file)
index 0000000..6359b4b
--- /dev/null
@@ -0,0 +1,243 @@
+From bd91908dc08161b09b7f50372e564bfa3381c1b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jun 2022 17:46:57 +0300
+Subject: serial: max310x: use a separate regmap for each port
+
+From: Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+[ Upstream commit 6ef281daf020592c219fa91780abc381c6c20db5 ]
+
+The driver currently does manual register manipulation in
+multiple places to talk to a specific UART port.
+
+In order to talk to a specific UART port over SPI, the bits U1
+and U0 of the register address can be set, as explained in the
+Command byte configuration section of the datasheet.
+
+Make this more elegant by creating regmaps for each UART port
+and setting the read_flag_mask and write_flag_mask
+accordingly.
+
+All communcations regarding global registers are done on UART
+port 0, so replace the global regmap entirely with the port 0
+regmap.
+
+Also, remove the 0x1f masks from reg_writeable(), reg_volatile()
+and reg_precious() methods, since setting the U1 and U0 bits of
+the register address happens inside the regmap core now.
+
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
+Link: https://lore.kernel.org/r/20220605144659.4169853-3-demonsingur@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 68 +++++++++++++++++++-----------------
+ 1 file changed, 36 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index c0fa4ad104774..80298a5714bcb 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -262,6 +262,7 @@ struct max310x_one {
+       struct work_struct      tx_work;
+       struct work_struct      md_work;
+       struct work_struct      rs_work;
++      struct regmap           *regmap;
+       u8 rx_buf[MAX310X_FIFO_SIZE];
+ };
+@@ -291,26 +292,26 @@ static DECLARE_BITMAP(max310x_lines, MAX310X_UART_NRMAX);
+ static u8 max310x_port_read(struct uart_port *port, u8 reg)
+ {
+-      struct max310x_port *s = dev_get_drvdata(port->dev);
++      struct max310x_one *one = to_max310x_port(port);
+       unsigned int val = 0;
+-      regmap_read(s->regmap, port->iobase + reg, &val);
++      regmap_read(one->regmap, reg, &val);
+       return val;
+ }
+ static void max310x_port_write(struct uart_port *port, u8 reg, u8 val)
+ {
+-      struct max310x_port *s = dev_get_drvdata(port->dev);
++      struct max310x_one *one = to_max310x_port(port);
+-      regmap_write(s->regmap, port->iobase + reg, val);
++      regmap_write(one->regmap, reg, val);
+ }
+ static void max310x_port_update(struct uart_port *port, u8 reg, u8 mask, u8 val)
+ {
+-      struct max310x_port *s = dev_get_drvdata(port->dev);
++      struct max310x_one *one = to_max310x_port(port);
+-      regmap_update_bits(s->regmap, port->iobase + reg, mask, val);
++      regmap_update_bits(one->regmap, reg, mask, val);
+ }
+ static int max3107_detect(struct device *dev)
+@@ -449,7 +450,7 @@ static const struct max310x_devtype max14830_devtype = {
+ static bool max310x_reg_writeable(struct device *dev, unsigned int reg)
+ {
+-      switch (reg & 0x1f) {
++      switch (reg) {
+       case MAX310X_IRQSTS_REG:
+       case MAX310X_LSR_IRQSTS_REG:
+       case MAX310X_SPCHR_IRQSTS_REG:
+@@ -466,7 +467,7 @@ static bool max310x_reg_writeable(struct device *dev, unsigned int reg)
+ static bool max310x_reg_volatile(struct device *dev, unsigned int reg)
+ {
+-      switch (reg & 0x1f) {
++      switch (reg) {
+       case MAX310X_RHR_REG:
+       case MAX310X_IRQSTS_REG:
+       case MAX310X_LSR_IRQSTS_REG:
+@@ -488,7 +489,7 @@ static bool max310x_reg_volatile(struct device *dev, unsigned int reg)
+ static bool max310x_reg_precious(struct device *dev, unsigned int reg)
+ {
+-      switch (reg & 0x1f) {
++      switch (reg) {
+       case MAX310X_RHR_REG:
+       case MAX310X_IRQSTS_REG:
+       case MAX310X_SPCHR_IRQSTS_REG:
+@@ -633,18 +634,16 @@ static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len)
+ {
+-      struct max310x_port *s = dev_get_drvdata(port->dev);
+-      u8 reg = port->iobase + MAX310X_THR_REG;
++      struct max310x_one *one = to_max310x_port(port);
+-      regmap_raw_write(s->regmap, reg, txbuf, len);
++      regmap_raw_write(one->regmap, MAX310X_THR_REG, txbuf, len);
+ }
+ static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len)
+ {
+-      struct max310x_port *s = dev_get_drvdata(port->dev);
+-      u8 reg = port->iobase + MAX310X_RHR_REG;
++      struct max310x_one *one = to_max310x_port(port);
+-      regmap_raw_read(s->regmap, reg, rxbuf, len);
++      regmap_raw_read(one->regmap, MAX310X_RHR_REG, rxbuf, len);
+ }
+ static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen)
+@@ -1247,15 +1246,16 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
+ #endif
+ static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype,
+-                       struct regmap *regmap, int irq)
++                       struct regmap *regmaps[], int irq)
+ {
+       int i, ret, fmin, fmax, freq;
+       struct max310x_port *s;
+       s32 uartclk = 0;
+       bool xtal;
+-      if (IS_ERR(regmap))
+-              return PTR_ERR(regmap);
++      for (i = 0; i < devtype->nr; i++)
++              if (IS_ERR(regmaps[i]))
++                      return PTR_ERR(regmaps[i]);
+       /* Alloc port structure */
+       s = devm_kzalloc(dev, struct_size(s, p, devtype->nr), GFP_KERNEL);
+@@ -1306,7 +1306,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+               goto out_clk;
+       }
+-      s->regmap = regmap;
++      s->regmap = regmaps[0];
+       s->devtype = devtype;
+       dev_set_drvdata(dev, s);
+@@ -1316,22 +1316,18 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+               goto out_clk;
+       for (i = 0; i < devtype->nr; i++) {
+-              unsigned int offs = i << 5;
+-
+               /* Reset port */
+-              regmap_write(s->regmap, MAX310X_MODE2_REG + offs,
++              regmap_write(regmaps[i], MAX310X_MODE2_REG,
+                            MAX310X_MODE2_RST_BIT);
+               /* Clear port reset */
+-              regmap_write(s->regmap, MAX310X_MODE2_REG + offs, 0);
++              regmap_write(regmaps[i], MAX310X_MODE2_REG, 0);
+               /* Wait for port startup */
+               do {
+-                      regmap_read(s->regmap,
+-                                  MAX310X_BRGDIVLSB_REG + offs, &ret);
++                      regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret);
+               } while (ret != 0x01);
+-              regmap_write(s->regmap, MAX310X_MODE1_REG + offs,
+-                           devtype->mode1);
++              regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1);
+       }
+       uartclk = max310x_set_ref_clk(dev, s, freq, xtal);
+@@ -1359,11 +1355,13 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+               s->p[i].port.fifosize   = MAX310X_FIFO_SIZE;
+               s->p[i].port.flags      = UPF_FIXED_TYPE | UPF_LOW_LATENCY;
+               s->p[i].port.iotype     = UPIO_PORT;
+-              s->p[i].port.iobase     = i * 0x20;
++              s->p[i].port.iobase     = i;
+               s->p[i].port.membase    = (void __iomem *)~0;
+               s->p[i].port.uartclk    = uartclk;
+               s->p[i].port.rs485_config = max310x_rs485_config;
+               s->p[i].port.ops        = &max310x_ops;
++              s->p[i].regmap          = regmaps[i];
++
+               /* Disable all interrupts */
+               max310x_port_write(&s->p[i].port, MAX310X_IRQEN_REG, 0);
+               /* Clear IRQ status register */
+@@ -1460,6 +1458,7 @@ static struct regmap_config regcfg = {
+       .val_bits = 8,
+       .write_flag_mask = MAX310X_WRITE_BIT,
+       .cache_type = REGCACHE_RBTREE,
++      .max_register = MAX310X_REG_1F,
+       .writeable_reg = max310x_reg_writeable,
+       .volatile_reg = max310x_reg_volatile,
+       .precious_reg = max310x_reg_precious,
+@@ -1469,7 +1468,8 @@ static struct regmap_config regcfg = {
+ static int max310x_spi_probe(struct spi_device *spi)
+ {
+       const struct max310x_devtype *devtype;
+-      struct regmap *regmap;
++      struct regmap *regmaps[4];
++      unsigned int i;
+       int ret;
+       /* Setup SPI bus */
+@@ -1484,10 +1484,14 @@ static int max310x_spi_probe(struct spi_device *spi)
+       if (!devtype)
+               devtype = (struct max310x_devtype *)spi_get_device_id(spi)->driver_data;
+-      regcfg.max_register = devtype->nr * 0x20 - 1;
+-      regmap = devm_regmap_init_spi(spi, &regcfg);
++      for (i = 0; i < devtype->nr; i++) {
++              u8 port_mask = i * 0x20;
++              regcfg.read_flag_mask = port_mask;
++              regcfg.write_flag_mask = port_mask | MAX310X_WRITE_BIT;
++              regmaps[i] = devm_regmap_init_spi(spi, &regcfg);
++      }
+-      return max310x_probe(&spi->dev, devtype, regmap, spi->irq);
++      return max310x_probe(&spi->dev, devtype, regmaps, spi->irq);
+ }
+ static int max310x_spi_remove(struct spi_device *spi)
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-use-devm_clk_get_optional-to-get-the-.patch b/queue-5.10/serial-max310x-use-devm_clk_get_optional-to-get-the-.patch
new file mode 100644 (file)
index 0000000..b0cb6e8
--- /dev/null
@@ -0,0 +1,77 @@
+From a4b4ca6718f9264e4ceb95efdbc44618ef8102ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Oct 2020 11:46:35 +0300
+Subject: serial: max310x: Use devm_clk_get_optional() to get the input clock
+
+From: Andy Shevchenko <andy.shevchenko@gmail.com>
+
+[ Upstream commit 974e454d6f96da0c0ab1b4115b92587dd9406f6a ]
+
+Simplify the code which fetches the input clock by using
+devm_clk_get_optional(). If no input clock is present
+devm_clk_get_optional() will return NULL instead of an error
+which matches the behavior of the old code.
+
+Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Link: https://lore.kernel.org/r/20201007084635.594991-2-andy.shevchenko@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8afa6c6decea ("serial: max310x: fail probe if clock crystal is unstable")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 14537878f9855..8bf3c5ab59431 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -1265,7 +1265,6 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+                        struct regmap *regmap, int irq)
+ {
+       int i, ret, fmin, fmax, freq, uartclk;
+-      struct clk *clk_osc, *clk_xtal;
+       struct max310x_port *s;
+       bool xtal = false;
+@@ -1279,23 +1278,24 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+               return -ENOMEM;
+       }
+-      clk_osc = devm_clk_get(dev, "osc");
+-      clk_xtal = devm_clk_get(dev, "xtal");
+-      if (!IS_ERR(clk_osc)) {
+-              s->clk = clk_osc;
++      s->clk = devm_clk_get_optional(dev, "osc");
++      if (IS_ERR(s->clk))
++              return PTR_ERR(s->clk);
++      if (s->clk) {
+               fmin = 500000;
+               fmax = 35000000;
+-      } else if (!IS_ERR(clk_xtal)) {
+-              s->clk = clk_xtal;
+-              fmin = 1000000;
+-              fmax = 4000000;
+-              xtal = true;
+-      } else if (PTR_ERR(clk_osc) == -EPROBE_DEFER ||
+-                 PTR_ERR(clk_xtal) == -EPROBE_DEFER) {
+-              return -EPROBE_DEFER;
+       } else {
+-              dev_err(dev, "Cannot get clock\n");
+-              return -EINVAL;
++              s->clk = devm_clk_get_optional(dev, "xtal");
++              if (IS_ERR(s->clk))
++                      return PTR_ERR(s->clk);
++              if (s->clk) {
++                      fmin = 1000000;
++                      fmax = 4000000;
++                      xtal = true;
++              } else {
++                      dev_err(dev, "Cannot get clock\n");
++                      return -EINVAL;
++              }
+       }
+       ret = clk_prepare_enable(s->clk);
+-- 
+2.43.0
+
diff --git a/queue-5.10/serial-max310x-use-regmap-methods-for-spi-batch-oper.patch b/queue-5.10/serial-max310x-use-regmap-methods-for-spi-batch-oper.patch
new file mode 100644 (file)
index 0000000..c10a321
--- /dev/null
@@ -0,0 +1,93 @@
+From 9ef224786adcc93be42e08d3aaf13a93699d18b2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jun 2022 17:46:56 +0300
+Subject: serial: max310x: use regmap methods for SPI batch operations
+
+From: Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+[ Upstream commit 285e76fc049c4d32c772eea9460a7ef28a193802 ]
+
+The SPI batch read/write operations can be implemented as simple
+regmap raw read and write, which will also try to do a gather
+write just as it is done here.
+
+Use the regmap raw read and write methods.
+
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
+Link: https://lore.kernel.org/r/20220605144659.4169853-2-demonsingur@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 36 ++++++++----------------------------
+ 1 file changed, 8 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 8d42c537ee5ea..c0fa4ad104774 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -263,8 +263,6 @@ struct max310x_one {
+       struct work_struct      md_work;
+       struct work_struct      rs_work;
+-      u8 wr_header;
+-      u8 rd_header;
+       u8 rx_buf[MAX310X_FIFO_SIZE];
+ };
+ #define to_max310x_port(_port) \
+@@ -635,32 +633,18 @@ static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len)
+ {
+-      struct max310x_one *one = to_max310x_port(port);
+-      struct spi_transfer xfer[] = {
+-              {
+-                      .tx_buf = &one->wr_header,
+-                      .len = sizeof(one->wr_header),
+-              }, {
+-                      .tx_buf = txbuf,
+-                      .len = len,
+-              }
+-      };
+-      spi_sync_transfer(to_spi_device(port->dev), xfer, ARRAY_SIZE(xfer));
++      struct max310x_port *s = dev_get_drvdata(port->dev);
++      u8 reg = port->iobase + MAX310X_THR_REG;
++
++      regmap_raw_write(s->regmap, reg, txbuf, len);
+ }
+ static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len)
+ {
+-      struct max310x_one *one = to_max310x_port(port);
+-      struct spi_transfer xfer[] = {
+-              {
+-                      .tx_buf = &one->rd_header,
+-                      .len = sizeof(one->rd_header),
+-              }, {
+-                      .rx_buf = rxbuf,
+-                      .len = len,
+-              }
+-      };
+-      spi_sync_transfer(to_spi_device(port->dev), xfer, ARRAY_SIZE(xfer));
++      struct max310x_port *s = dev_get_drvdata(port->dev);
++      u8 reg = port->iobase + MAX310X_RHR_REG;
++
++      regmap_raw_read(s->regmap, reg, rxbuf, len);
+ }
+ static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen)
+@@ -1390,10 +1374,6 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+               INIT_WORK(&s->p[i].md_work, max310x_md_proc);
+               /* Initialize queue for changing RS485 mode */
+               INIT_WORK(&s->p[i].rs_work, max310x_rs_proc);
+-              /* Initialize SPI-transfer buffers */
+-              s->p[i].wr_header = (s->p[i].port.iobase + MAX310X_THR_REG) |
+-                                  MAX310X_WRITE_BIT;
+-              s->p[i].rd_header = (s->p[i].port.iobase + MAX310X_RHR_REG);
+               /* Register port */
+               ret = uart_add_one_port(&max310x_uart, &s->p[i].port);
+-- 
+2.43.0
+
index 2e25614686ec147f483d66d510c0dab866e9ade5..c7142614720089cc565ede5d31aa9c64f1b05f3f 100644 (file)
@@ -28,3 +28,51 @@ netrom-fix-a-data-race-around-sysctl_netrom_transpor.patch-9832
 netrom-fix-a-data-race-around-sysctl_netrom_routing_.patch
 netrom-fix-a-data-race-around-sysctl_netrom_link_fai.patch
 netrom-fix-data-races-around-sysctl_net_busy_read.patch
+nfsd-modernize-nfsd4_release_lockowner.patch
+nfsd-add-documenting-comment-for-nfsd4_release_locko.patch
+nfsd-fix-release_lockowner.patch
+selftests-mm-switch-to-bash-from-sh.patch
+selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch
+um-allow-not-setting-extra-rpaths-in-the-linux-binar.patch
+um-fix-adding-no-pie-for-clang.patch
+xhci-remove-extra-loop-in-interrupt-context.patch
+xhci-prevent-double-fetch-of-transfer-and-transfer-e.patch
+xhci-process-isoc-td-properly-when-there-was-a-trans.patch
+xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch
+serial-max310x-use-devm_clk_get_optional-to-get-the-.patch
+serial-max310x-try-to-get-crystal-clock-rate-from-pr.patch
+serial-max310x-fail-probe-if-clock-crystal-is-unstab.patch
+serial-max310x-make-use-of-device-properties.patch
+serial-max310x-use-regmap-methods-for-spi-batch-oper.patch
+serial-max310x-use-a-separate-regmap-for-each-port.patch
+serial-max310x-prevent-infinite-while-loop-in-port-s.patch
+net-change-sock_getsockopt-to-take-the-sk-ptr-instea.patch
+bpf-net-change-sk_getsockopt-to-take-the-sockptr_t-a.patch
+lsm-make-security_socket_getpeersec_stream-sockptr_t.patch
+lsm-fix-default-return-value-of-the-socket_getpeerse.patch
+ext4-make-ext4_es_insert_extent-return-void.patch
+ext4-refactor-ext4_da_map_blocks.patch
+ext4-convert-to-exclusive-lock-while-inserting-delal.patch
+drivers-hv-vmbus-add-vmbus_requestor-data-structure-.patch
+hv_netvsc-use-vmbus_requestor-to-generate-transactio.patch
+hv_netvsc-wait-for-completion-on-request-switch_data.patch
+hv_netvsc-process-netdev_going_down-on-vf-hot-remove.patch
+hv_netvsc-make-netvsc-vf-binding-check-both-mac-and-.patch
+hv_netvsc-use-netif_is_bond_master-instead-of-open-c.patch
+hv_netvsc-register-vf-in-netvsc_probe-if-net_device_.patch
+mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch
+mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-.patch
+getrusage-add-the-signal_struct-sig-local-variable.patch
+getrusage-move-thread_group_cputime_adjusted-outside.patch
+getrusage-use-__for_each_thread.patch
+getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch
+exit-fix-typo-in-comment-s-sub-theads-sub-threads.patch
+exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch
+serial-max310x-unprepare-and-disable-clock-in-error-.patch
+drivers-hv-vmbus-drop-error-message-when-no-request-.patch
+nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch
+regmap-allow-to-define-reg_update_bits-for-no-bus-co.patch
+regmap-add-bulk-read-write-callbacks-into-regmap_con.patch
+serial-max310x-make-accessing-revision-id-interface-.patch
+serial-max310x-implement-i2c-support.patch
+serial-max310x-fix-io-data-corruption-in-batched-ope.patch
diff --git a/queue-5.10/um-allow-not-setting-extra-rpaths-in-the-linux-binar.patch b/queue-5.10/um-allow-not-setting-extra-rpaths-in-the-linux-binar.patch
new file mode 100644 (file)
index 0000000..105f6be
--- /dev/null
@@ -0,0 +1,82 @@
+From a62300c8e6ec3597b4c1f4360679e84b12a3a1e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Mar 2021 14:02:37 +0100
+Subject: um: allow not setting extra rpaths in the linux binary
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 386093c68ba3e8bcfe7f46deba901e0e80713c29 ]
+
+There doesn't seem to be any reason for the rpath being set in
+the binaries, at on systems that I tested on. On the other hand,
+setting rpath is actually harming binaries in some cases, e.g.
+if using nix-based compilation environments where /lib & /lib64
+are not part of the actual environment.
+
+Add a new Kconfig option (under EXPERT, for less user confusion)
+that allows disabling the rpath additions.
+
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/um/Kconfig      | 13 +++++++++++++
+ arch/um/Makefile     |  3 ++-
+ arch/x86/Makefile.um |  2 +-
+ 3 files changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/arch/um/Kconfig b/arch/um/Kconfig
+index eb1c6880bde49..20264b47dcffc 100644
+--- a/arch/um/Kconfig
++++ b/arch/um/Kconfig
+@@ -92,6 +92,19 @@ config LD_SCRIPT_DYN
+       depends on !LD_SCRIPT_STATIC
+       select MODULE_REL_CRCS if MODVERSIONS
++config LD_SCRIPT_DYN_RPATH
++      bool "set rpath in the binary" if EXPERT
++      default y
++      depends on LD_SCRIPT_DYN
++      help
++        Add /lib (and /lib64 for 64-bit) to the linux binary's rpath
++        explicitly.
++
++        You may need to turn this off if compiling for nix systems
++        that have their libraries in random /nix directories and
++        might otherwise unexpected use libraries from /lib or /lib64
++        instead of the desired ones.
++
+ config HOSTFS
+       tristate "Host filesystem"
+       help
+diff --git a/arch/um/Makefile b/arch/um/Makefile
+index 56e5320da7624..4211e23a2f68f 100644
+--- a/arch/um/Makefile
++++ b/arch/um/Makefile
+@@ -118,7 +118,8 @@ archprepare:
+       $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
+ LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
+-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie)
++LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
++LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
+ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
+       -fno-stack-protector $(call cc-option, -fno-stack-protector-all)
+diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
+index 1db7913795f51..b3c1ae084180d 100644
+--- a/arch/x86/Makefile.um
++++ b/arch/x86/Makefile.um
+@@ -44,7 +44,7 @@ ELF_FORMAT := elf64-x86-64
+ # Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
+-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
++LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib64
+ LINK-y += -m64
+ endif
+-- 
+2.43.0
+
diff --git a/queue-5.10/um-fix-adding-no-pie-for-clang.patch b/queue-5.10/um-fix-adding-no-pie-for-clang.patch
new file mode 100644 (file)
index 0000000..5e2a36c
--- /dev/null
@@ -0,0 +1,68 @@
+From e9d85964ffc6c7b86d09408ccde4e85c228101ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 15:59:54 -0700
+Subject: um: Fix adding '-no-pie' for clang
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+[ Upstream commit 846cfbeed09b45d985079a9173cf390cc053715b ]
+
+The kernel builds with -fno-PIE, so commit 883354afbc10 ("um: link
+vmlinux with -no-pie") added the compiler linker flag '-no-pie' via
+cc-option because '-no-pie' was only supported in GCC 6.1.0 and newer.
+
+While this works for GCC, this does not work for clang because cc-option
+uses '-c', which stops the pipeline right before linking, so '-no-pie'
+is unconsumed and clang warns, causing cc-option to fail just as it
+would if the option was entirely unsupported:
+
+  $ clang -Werror -no-pie -c -o /dev/null -x c /dev/null
+  clang-16: error: argument unused during compilation: '-no-pie' [-Werror,-Wunused-command-line-argument]
+
+A recent version of clang exposes this because it generates a relocation
+under '-mcmodel=large' that is not supported in PIE mode:
+
+  /usr/sbin/ld: init/main.o: relocation R_X86_64_32 against symbol `saved_command_line' can not be used when making a PIE object; recompile with -fPIE
+  /usr/sbin/ld: failed to set dynamic section sizes: bad value
+  clang: error: linker command failed with exit code 1 (use -v to see invocation)
+
+Remove the cc-option check altogether. It is wasteful to invoke the
+compiler to check for '-no-pie' because only one supported compiler
+version does not support it, GCC 5.x (as it is supported with the
+minimum version of clang and GCC 6.1.0+). Use a combination of the
+gcc-min-version macro and CONFIG_CC_IS_CLANG to unconditionally add
+'-no-pie' with CONFIG_LD_SCRIPT_DYN=y, so that it is enabled with all
+compilers that support this. Furthermore, using gcc-min-version can help
+turn this back into
+
+  LINK-$(CONFIG_LD_SCRIPT_DYN) += -no-pie
+
+when the minimum version of GCC is bumped past 6.1.0.
+
+Cc: stable@vger.kernel.org
+Closes: https://github.com/ClangBuiltLinux/linux/issues/1982
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/um/Makefile | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/um/Makefile b/arch/um/Makefile
+index 4211e23a2f68f..81d35b1f315ae 100644
+--- a/arch/um/Makefile
++++ b/arch/um/Makefile
+@@ -118,7 +118,9 @@ archprepare:
+       $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
+ LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
+-LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
++ifdef CONFIG_LD_SCRIPT_DYN
++LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie
++endif
+ LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
+ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
+-- 
+2.43.0
+
diff --git a/queue-5.10/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch b/queue-5.10/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch
new file mode 100644 (file)
index 0000000..2ee7737
--- /dev/null
@@ -0,0 +1,57 @@
+From 1750a9b03f733605bfe32a0394c292f09ec37f8c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 17:27:37 +0200
+Subject: xhci: handle isoc Babble and Buffer Overrun events properly
+
+From: Michal Pecio <michal.pecio@gmail.com>
+
+[ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ]
+
+xHCI 4.9 explicitly forbids assuming that the xHC has released its
+ownership of a multi-TRB TD when it reports an error on one of the
+early TRBs. Yet the driver makes such assumption and releases the TD,
+allowing the remaining TRBs to be freed or overwritten by new TDs.
+
+The xHC should also report completion of the final TRB due to its IOC
+flag being set by us, regardless of prior errors. This event cannot
+be recognized if the TD has already been freed earlier, resulting in
+"Transfer event TRB DMA ptr not part of current TD" error message.
+
+Fix this by reusing the logic for processing isoc Transaction Errors.
+This also handles hosts which fail to report the final completion.
+
+Fix transfer length reporting on Babble errors. They may be caused by
+device malfunction, no guarantee that the buffer has been filled.
+
+Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 883cf477a70b9..4fa387e447f08 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2262,9 +2262,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+       case COMP_BANDWIDTH_OVERRUN_ERROR:
+               frame->status = -ECOMM;
+               break;
+-      case COMP_ISOCH_BUFFER_OVERRUN:
+       case COMP_BABBLE_DETECTED_ERROR:
++              sum_trbs_for_length = true;
++              fallthrough;
++      case COMP_ISOCH_BUFFER_OVERRUN:
+               frame->status = -EOVERFLOW;
++              if (ep_trb != td->last_trb)
++                      td->error_mid_td = true;
+               break;
+       case COMP_INCOMPATIBLE_DEVICE_ERROR:
+       case COMP_STALL_ERROR:
+-- 
+2.43.0
+
diff --git a/queue-5.10/xhci-prevent-double-fetch-of-transfer-and-transfer-e.patch b/queue-5.10/xhci-prevent-double-fetch-of-transfer-and-transfer-e.patch
new file mode 100644 (file)
index 0000000..b13aea6
--- /dev/null
@@ -0,0 +1,143 @@
+From 9c80999c4b17a687981d277bb41ed65de6e95686 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Apr 2021 10:02:08 +0300
+Subject: xhci: prevent double-fetch of transfer and transfer event TRBs
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+[ Upstream commit e9fcb07704fcef6fa6d0333fd2b3a62442eaf45b ]
+
+The same values are parsed several times from transfer and event
+TRBs by different functions in the same call path, all while processing
+one transfer event.
+
+As the TRBs are in DMA memory and can be accessed by the xHC host we want
+to avoid this to prevent double-fetch issues.
+
+To resolve this pass the already parsed values to the different functions
+in the path of parsing a transfer event
+
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20210406070208.3406266-5-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 42 ++++++++++++++++--------------------
+ 1 file changed, 19 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index b814dc07116da..62d92da7016e7 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2059,16 +2059,13 @@ int xhci_is_vendor_info_code(struct xhci_hcd *xhci, unsigned int trb_comp_code)
+       return 0;
+ }
+-static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td,
+-      struct xhci_transfer_event *event, struct xhci_virt_ep *ep)
++static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
++                   struct xhci_ring *ep_ring, struct xhci_td *td,
++                   u32 trb_comp_code)
+ {
+       struct xhci_ep_ctx *ep_ctx;
+-      struct xhci_ring *ep_ring;
+-      u32 trb_comp_code;
+-      ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
+       ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index);
+-      trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
+       if (trb_comp_code == COMP_STOPPED_LENGTH_INVALID ||
+                       trb_comp_code == COMP_STOPPED ||
+@@ -2126,9 +2123,9 @@ static int sum_trb_lengths(struct xhci_hcd *xhci, struct xhci_ring *ring,
+ /*
+  * Process control tds, update urb status and actual_length.
+  */
+-static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
+-      union xhci_trb *ep_trb, struct xhci_transfer_event *event,
+-      struct xhci_virt_ep *ep)
++static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
++              struct xhci_ring *ep_ring,  struct xhci_td *td,
++                         union xhci_trb *ep_trb, struct xhci_transfer_event *event)
+ {
+       struct xhci_ep_ctx *ep_ctx;
+       u32 trb_comp_code;
+@@ -2216,15 +2213,15 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
+               td->urb->actual_length = requested;
+ finish_td:
+-      return finish_td(xhci, td, event, ep);
++      return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+ /*
+  * Process isochronous tds, update urb packet status and actual_length.
+  */
+-static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+-      union xhci_trb *ep_trb, struct xhci_transfer_event *event,
+-      struct xhci_virt_ep *ep)
++static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
++              struct xhci_ring *ep_ring, struct xhci_td *td,
++              union xhci_trb *ep_trb, struct xhci_transfer_event *event)
+ {
+       struct urb_priv *urb_priv;
+       int idx;
+@@ -2301,7 +2298,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+       td->urb->actual_length += frame->actual_length;
+-      return finish_td(xhci, td, event, ep);
++      return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+@@ -2333,17 +2330,15 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+ /*
+  * Process bulk and interrupt tds, update urb status and actual_length.
+  */
+-static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
+-      union xhci_trb *ep_trb, struct xhci_transfer_event *event,
+-      struct xhci_virt_ep *ep)
++static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
++              struct xhci_ring *ep_ring, struct xhci_td *td,
++              union xhci_trb *ep_trb, struct xhci_transfer_event *event)
+ {
+       struct xhci_slot_ctx *slot_ctx;
+-      struct xhci_ring *ep_ring;
+       u32 trb_comp_code;
+       u32 remaining, requested, ep_trb_len;
+       slot_ctx = xhci_get_slot_ctx(xhci, ep->vdev->out_ctx);
+-      ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
+       trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
+       remaining = EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
+       ep_trb_len = TRB_LEN(le32_to_cpu(ep_trb->generic.field[2]));
+@@ -2403,7 +2398,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
+                         remaining);
+               td->urb->actual_length = 0;
+       }
+-      return finish_td(xhci, td, event, ep);
++
++      return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+ /*
+@@ -2754,11 +2750,11 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+               /* update the urb's actual_length and give back to the core */
+               if (usb_endpoint_xfer_control(&td->urb->ep->desc))
+-                      process_ctrl_td(xhci, td, ep_trb, event, ep);
++                      process_ctrl_td(xhci, ep, ep_ring, td, ep_trb, event);
+               else if (usb_endpoint_xfer_isoc(&td->urb->ep->desc))
+-                      process_isoc_td(xhci, td, ep_trb, event, ep);
++                      process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event);
+               else
+-                      process_bulk_intr_td(xhci, td, ep_trb, event, ep);
++                      process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event);
+ cleanup:
+               handling_skipped_tds = ep->skip &&
+                       trb_comp_code != COMP_MISSED_SERVICE_ERROR &&
+-- 
+2.43.0
+
diff --git a/queue-5.10/xhci-process-isoc-td-properly-when-there-was-a-trans.patch b/queue-5.10/xhci-process-isoc-td-properly-when-there-was-a-trans.patch
new file mode 100644 (file)
index 0000000..cefcfc8
--- /dev/null
@@ -0,0 +1,186 @@
+From 6bece1cf082431fcec40f34122386e466f10b62f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 17:27:36 +0200
+Subject: xhci: process isoc TD properly when there was a transaction error mid
+ TD.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+[ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ]
+
+The last TRB of a isoc TD might not trigger an event if there was
+an error event for a TRB mid TD. This is seen on a NEC Corporation
+uPD720200 USB 3.0 Host
+
+After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1
+generate events for passed TRBs with IOC flag set if it proceeds to the
+next TD. This event is either a copy of the original error, or a
+"success" transfer event.
+
+If that event is missing then the driver and xHC host get out of sync as
+the driver is still expecting a transfer event for that first TD, while
+xHC host is already sending events for the next TD in the list.
+This leads to
+"Transfer event TRB DMA ptr not part of current TD" messages.
+
+As a solution we tag the isoc TDs that get error events mid TD.
+If an event doesn't match the first TD, then check if the tag is
+set, and event points to the next TD.
+In that case give back the fist TD and process the next TD normally
+
+Make sure TD status and transferred length stay valid in both cases
+with and without final TD completion event.
+
+Reported-by: Michał Pecio <michal.pecio@gmail.com>
+Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/
+Tested-by: Michał Pecio <michal.pecio@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++-------
+ drivers/usb/host/xhci.h      |  1 +
+ 2 files changed, 61 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 62d92da7016e7..883cf477a70b9 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2244,6 +2244,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+       /* handle completion code */
+       switch (trb_comp_code) {
+       case COMP_SUCCESS:
++              /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */
++              if (td->error_mid_td)
++                      break;
+               if (remaining) {
+                       frame->status = short_framestatus;
+                       if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
+@@ -2269,8 +2272,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+               break;
+       case COMP_USB_TRANSACTION_ERROR:
+               frame->status = -EPROTO;
++              sum_trbs_for_length = true;
+               if (ep_trb != td->last_trb)
+-                      return 0;
++                      td->error_mid_td = true;
+               break;
+       case COMP_STOPPED:
+               sum_trbs_for_length = true;
+@@ -2290,6 +2294,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+               break;
+       }
++      if (td->urb_length_set)
++              goto finish_td;
++
+       if (sum_trbs_for_length)
+               frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) +
+                       ep_trb_len - remaining;
+@@ -2298,6 +2305,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+       td->urb->actual_length += frame->actual_length;
++finish_td:
++      /* Don't give back TD yet if we encountered an error mid TD */
++      if (td->error_mid_td && ep_trb != td->last_trb) {
++              xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n");
++              td->urb_length_set = true;
++              return 0;
++      }
++
+       return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+@@ -2684,17 +2699,51 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+               }
+               if (!ep_seg) {
+-                      if (!ep->skip ||
+-                          !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
+-                              /* Some host controllers give a spurious
+-                               * successful event after a short transfer.
+-                               * Ignore it.
+-                               */
+-                              if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
+-                                              ep_ring->last_td_was_short) {
+-                                      ep_ring->last_td_was_short = false;
+-                                      goto cleanup;
++
++                      if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
++                              skip_isoc_td(xhci, td, ep, status);
++                              goto cleanup;
++                      }
++
++                      /*
++                       * Some hosts give a spurious success event after a short
++                       * transfer. Ignore it.
++                       */
++                      if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
++                          ep_ring->last_td_was_short) {
++                              ep_ring->last_td_was_short = false;
++                              goto cleanup;
++                      }
++
++                      /*
++                       * xhci 4.10.2 states isoc endpoints should continue
++                       * processing the next TD if there was an error mid TD.
++                       * So host like NEC don't generate an event for the last
++                       * isoc TRB even if the IOC flag is set.
++                       * xhci 4.9.1 states that if there are errors in mult-TRB
++                       * TDs xHC should generate an error for that TRB, and if xHC
++                       * proceeds to the next TD it should genete an event for
++                       * any TRB with IOC flag on the way. Other host follow this.
++                       * So this event might be for the next TD.
++                       */
++                      if (td->error_mid_td &&
++                          !list_is_last(&td->td_list, &ep_ring->td_list)) {
++                              struct xhci_td *td_next = list_next_entry(td, td_list);
++
++                              ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb,
++                                                 td_next->last_trb, ep_trb_dma, false);
++                              if (ep_seg) {
++                                      /* give back previous TD, start handling new */
++                                      xhci_dbg(xhci, "Missing TD completion event after mid TD error\n");
++                                      ep_ring->dequeue = td->last_trb;
++                                      ep_ring->deq_seg = td->last_trb_seg;
++                                      inc_deq(xhci, ep_ring);
++                                      xhci_td_cleanup(xhci, td, ep_ring, td->status);
++                                      td = td_next;
+                               }
++                      }
++
++                      if (!ep_seg) {
+                               /* HC is busted, give up! */
+                               xhci_err(xhci,
+                                       "ERROR Transfer event TRB DMA ptr not "
+@@ -2706,9 +2755,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+                                         ep_trb_dma, true);
+                               return -ESHUTDOWN;
+                       }
+-
+-                      skip_isoc_td(xhci, td, ep, status);
+-                      goto cleanup;
+               }
+               if (trb_comp_code == COMP_SHORT_PACKET)
+                       ep_ring->last_td_was_short = true;
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 85ab213c7940a..5a8443f6ed703 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1554,6 +1554,7 @@ struct xhci_td {
+       struct xhci_segment     *bounce_seg;
+       /* actual_length of the URB has already been set */
+       bool                    urb_length_set;
++      bool                    error_mid_td;
+       unsigned int            num_trbs;
+ };
+-- 
+2.43.0
+
diff --git a/queue-5.10/xhci-remove-extra-loop-in-interrupt-context.patch b/queue-5.10/xhci-remove-extra-loop-in-interrupt-context.patch
new file mode 100644 (file)
index 0000000..89b8d45
--- /dev/null
@@ -0,0 +1,143 @@
+From dcd48a2dfd74f9eb405b0181d978261958264b64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Jan 2021 15:00:28 +0200
+Subject: xhci: remove extra loop in interrupt context
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+[ Upstream commit 55f6153d8cc8eff0852d108f80087fdf41dc2169 ]
+
+When finishing a TD we walk the endpoint dequeue trb pointer
+until it matches the last TRB of the TD.
+
+TDs can contain over 100 TRBs, meaning we call a function 100 times,
+do a few comparisons and increase a couple values for each of these calls,
+all in interrupt context.
+
+This can all be avoided by adding a pointer to the last TRB segment, and
+a number of TRBs in the TD. So instead of walking through each TRB just
+set the new dequeue segment, pointer, and number of free TRBs directly.
+
+Getting rid of the while loop also reduces the risk of getting stuck in a
+infinite loop in the interrupt handler. Loop relied on valid matching
+dequeue and last_trb values to break.
+
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20210129130044.206855-12-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 21 ++++++++++++++-------
+ drivers/usb/host/xhci.h      |  2 ++
+ 2 files changed, 16 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index eb70f07e3623a..b814dc07116da 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2099,8 +2099,9 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td,
+                                            EP_HARD_RESET);
+       } else {
+               /* Update ring dequeue pointer */
+-              while (ep_ring->dequeue != td->last_trb)
+-                      inc_deq(xhci, ep_ring);
++              ep_ring->dequeue = td->last_trb;
++              ep_ring->deq_seg = td->last_trb_seg;
++              ep_ring->num_trbs_free += td->num_trbs - 1;
+               inc_deq(xhci, ep_ring);
+       }
+@@ -2321,8 +2322,9 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+       frame->actual_length = 0;
+       /* Update ring dequeue pointer */
+-      while (ep->ring->dequeue != td->last_trb)
+-              inc_deq(xhci, ep->ring);
++      ep->ring->dequeue = td->last_trb;
++      ep->ring->deq_seg = td->last_trb_seg;
++      ep->ring->num_trbs_free += td->num_trbs - 1;
+       inc_deq(xhci, ep->ring);
+       return xhci_td_cleanup(xhci, td, ep->ring, status);
+@@ -3487,7 +3489,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+                       field |= TRB_IOC;
+                       more_trbs_coming = false;
+                       td->last_trb = ring->enqueue;
+-
++                      td->last_trb_seg = ring->enq_seg;
+                       if (xhci_urb_suitable_for_idt(urb)) {
+                               memcpy(&send_addr, urb->transfer_buffer,
+                                      trb_buff_len);
+@@ -3513,7 +3515,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+                               upper_32_bits(send_addr),
+                               length_field,
+                               field);
+-
++              td->num_trbs++;
+               addr += trb_buff_len;
+               sent_len = trb_buff_len;
+@@ -3537,8 +3539,10 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+                                      ep_index, urb->stream_id,
+                                      1, urb, 1, mem_flags);
+               urb_priv->td[1].last_trb = ring->enqueue;
++              urb_priv->td[1].last_trb_seg = ring->enq_seg;
+               field = TRB_TYPE(TRB_NORMAL) | ring->cycle_state | TRB_IOC;
+               queue_trb(xhci, ring, 0, 0, 0, TRB_INTR_TARGET(0), field);
++              urb_priv->td[1].num_trbs++;
+       }
+       check_trb_math(urb, enqd_len);
+@@ -3589,6 +3593,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+       urb_priv = urb->hcpriv;
+       td = &urb_priv->td[0];
++      td->num_trbs = num_trbs;
+       /*
+        * Don't give the first TRB to the hardware (by toggling the cycle bit)
+@@ -3661,6 +3666,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+       /* Save the DMA address of the last TRB in the TD */
+       td->last_trb = ep_ring->enqueue;
++      td->last_trb_seg = ep_ring->enq_seg;
+       /* Queue status TRB - see Table 7 and sections 4.11.2.2 and 6.4.1.2.3 */
+       /* If the device sent data, the status stage is an OUT transfer */
+@@ -3905,7 +3911,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+                       goto cleanup;
+               }
+               td = &urb_priv->td[i];
+-
++              td->num_trbs = trbs_per_td;
+               /* use SIA as default, if frame id is used overwrite it */
+               sia_frame_id = TRB_SIA;
+               if (!(urb->transfer_flags & URB_ISO_ASAP) &&
+@@ -3948,6 +3954,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+                       } else {
+                               more_trbs_coming = false;
+                               td->last_trb = ep_ring->enqueue;
++                              td->last_trb_seg = ep_ring->enq_seg;
+                               field |= TRB_IOC;
+                               if (trb_block_event_intr(xhci, num_tds, i))
+                                       field |= TRB_BEI;
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index bb3c362a194b2..85ab213c7940a 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1550,9 +1550,11 @@ struct xhci_td {
+       struct xhci_segment     *start_seg;
+       union xhci_trb          *first_trb;
+       union xhci_trb          *last_trb;
++      struct xhci_segment     *last_trb_seg;
+       struct xhci_segment     *bounce_seg;
+       /* actual_length of the URB has already been set */
+       bool                    urb_length_set;
++      unsigned int            num_trbs;
+ };
+ /* xHCI command default timeout value */
+-- 
+2.43.0
+