--- /dev/null
+From a18447e359ab34c6f720c82fa9bb7785051c94fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 17:28:02 -0700
+Subject: bpf: net: Change sk_getsockopt() to take the sockptr_t argument
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit 4ff09db1b79b98b4a2a7511571c640b76cab3beb ]
+
+This patch changes sk_getsockopt() to take the sockptr_t argument
+such that it can be used by bpf_getsockopt(SOL_SOCKET) in a
+latter patch.
+
+security_socket_getpeersec_stream() is not changed. It stays
+with the __user ptr (optval.user and optlen.user) to avoid changes
+to other security hooks. bpf_getsockopt(SOL_SOCKET) also does not
+support SO_PEERSEC.
+
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Link: https://lore.kernel.org/r/20220902002802.2888419-1-kafai@fb.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/filter.h | 3 +--
+ include/linux/sockptr.h | 5 +++++
+ net/core/filter.c | 5 ++---
+ net/core/sock.c | 43 +++++++++++++++++++++++------------------
+ 4 files changed, 32 insertions(+), 24 deletions(-)
+
+diff --git a/include/linux/filter.h b/include/linux/filter.h
+index bc6ce4b202a80..cd56e53bd42e2 100644
+--- a/include/linux/filter.h
++++ b/include/linux/filter.h
+@@ -892,8 +892,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
+ void sk_reuseport_prog_free(struct bpf_prog *prog);
+ int sk_detach_filter(struct sock *sk);
+-int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
+- unsigned int len);
++int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len);
+
+ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
+ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
+diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
+index ea193414298b7..38862819e77a1 100644
+--- a/include/linux/sockptr.h
++++ b/include/linux/sockptr.h
+@@ -64,6 +64,11 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset,
+ return 0;
+ }
+
++static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size)
++{
++ return copy_to_sockptr_offset(dst, 0, src, size);
++}
++
+ static inline void *memdup_sockptr(sockptr_t src, size_t len)
+ {
+ void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 6cfc8fb0562a2..49e4d1535cc82 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -9903,8 +9903,7 @@ int sk_detach_filter(struct sock *sk)
+ }
+ EXPORT_SYMBOL_GPL(sk_detach_filter);
+
+-int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
+- unsigned int len)
++int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len)
+ {
+ struct sock_fprog_kern *fprog;
+ struct sk_filter *filter;
+@@ -9935,7 +9934,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
+ goto out;
+
+ ret = -EFAULT;
+- if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
++ if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog)))
+ goto out;
+
+ /* Instead of bytes, the API requests to return the number
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 95559d088a169..42da46965b16f 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -644,8 +644,8 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
+ return ret;
+ }
+
+-static int sock_getbindtodevice(struct sock *sk, char __user *optval,
+- int __user *optlen, int len)
++static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
++ sockptr_t optlen, int len)
+ {
+ int ret = -ENOPROTOOPT;
+ #ifdef CONFIG_NETDEVICES
+@@ -668,12 +668,12 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
+ len = strlen(devname) + 1;
+
+ ret = -EFAULT;
+- if (copy_to_user(optval, devname, len))
++ if (copy_to_sockptr(optval, devname, len))
+ goto out;
+
+ zero:
+ ret = -EFAULT;
+- if (put_user(len, optlen))
++ if (copy_to_sockptr(optlen, &len, sizeof(int)))
+ goto out;
+
+ ret = 0;
+@@ -1281,20 +1281,23 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred,
+ }
+ }
+
+-static int groups_to_user(gid_t __user *dst, const struct group_info *src)
++static int groups_to_user(sockptr_t dst, const struct group_info *src)
+ {
+ struct user_namespace *user_ns = current_user_ns();
+ int i;
+
+- for (i = 0; i < src->ngroups; i++)
+- if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
++ for (i = 0; i < src->ngroups; i++) {
++ gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
++
++ if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
+ return -EFAULT;
++ }
+
+ return 0;
+ }
+
+ static int sk_getsockopt(struct sock *sk, int level, int optname,
+- char __user *optval, int __user *optlen)
++ sockptr_t optval, sockptr_t optlen)
+ {
+ struct socket *sock = sk->sk_socket;
+
+@@ -1312,7 +1315,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ int lv = sizeof(int);
+ int len;
+
+- if (get_user(len, optlen))
++ if (copy_from_sockptr(&len, optlen, sizeof(int)))
+ return -EFAULT;
+ if (len < 0)
+ return -EINVAL;
+@@ -1445,7 +1448,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+ spin_unlock(&sk->sk_peer_lock);
+
+- if (copy_to_user(optval, &peercred, len))
++ if (copy_to_sockptr(optval, &peercred, len))
+ return -EFAULT;
+ goto lenout;
+ }
+@@ -1463,11 +1466,11 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ if (len < n * sizeof(gid_t)) {
+ len = n * sizeof(gid_t);
+ put_cred(cred);
+- return put_user(len, optlen) ? -EFAULT : -ERANGE;
++ return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
+ }
+ len = n * sizeof(gid_t);
+
+- ret = groups_to_user((gid_t __user *)optval, cred->group_info);
++ ret = groups_to_user(optval, cred->group_info);
+ put_cred(cred);
+ if (ret)
+ return ret;
+@@ -1483,7 +1486,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ return -ENOTCONN;
+ if (lv < len)
+ return -EINVAL;
+- if (copy_to_user(optval, address, len))
++ if (copy_to_sockptr(optval, address, len))
+ return -EFAULT;
+ goto lenout;
+ }
+@@ -1500,7 +1503,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ break;
+
+ case SO_PEERSEC:
+- return security_socket_getpeersec_stream(sock, optval, optlen, len);
++ return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len);
+
+ case SO_MARK:
+ v.val = sk->sk_mark;
+@@ -1528,7 +1531,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ return sock_getbindtodevice(sk, optval, optlen, len);
+
+ case SO_GET_FILTER:
+- len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
++ len = sk_get_filter(sk, optval, len);
+ if (len < 0)
+ return len;
+
+@@ -1575,7 +1578,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ sk_get_meminfo(sk, meminfo);
+
+ len = min_t(unsigned int, len, sizeof(meminfo));
+- if (copy_to_user(optval, &meminfo, len))
++ if (copy_to_sockptr(optval, &meminfo, len))
+ return -EFAULT;
+
+ goto lenout;
+@@ -1625,10 +1628,10 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+
+ if (len > lv)
+ len = lv;
+- if (copy_to_user(optval, &v, len))
++ if (copy_to_sockptr(optval, &v, len))
+ return -EFAULT;
+ lenout:
+- if (put_user(len, optlen))
++ if (copy_to_sockptr(optlen, &len, sizeof(int)))
+ return -EFAULT;
+ return 0;
+ }
+@@ -1636,7 +1639,9 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ int sock_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+ {
+- return sk_getsockopt(sock->sk, level, optname, optval, optlen);
++ return sk_getsockopt(sock->sk, level, optname,
++ USER_SOCKPTR(optval),
++ USER_SOCKPTR(optlen));
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From f01b7ce60d0b2730d8de1f8f49a16ed9fe64c76e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Nov 2020 11:04:00 +0100
+Subject: Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus
+ hardening
+
+From: Andres Beltran <lkmlabelt@gmail.com>
+
+[ Upstream commit e8b7db38449ac5b950a3f00519171c4be3e226ff ]
+
+Currently, VMbus drivers use pointers into guest memory as request IDs
+for interactions with Hyper-V. To be more robust in the face of errors
+or malicious behavior from a compromised Hyper-V, avoid exposing
+guest memory addresses to Hyper-V. Also avoid Hyper-V giving back a
+bad request ID that is then treated as the address of a guest data
+structure with no validation. Instead, encapsulate these memory
+addresses and provide small integers as request IDs.
+
+Signed-off-by: Andres Beltran <lkmlabelt@gmail.com>
+Co-developed-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Reviewed-by: Wei Liu <wei.liu@kernel.org>
+Link: https://lore.kernel.org/r/20201109100402.8946-2-parri.andrea@gmail.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hv/channel.c | 174 ++++++++++++++++++++++++++++++++++++--
+ drivers/hv/hyperv_vmbus.h | 3 +-
+ drivers/hv/ring_buffer.c | 29 ++++++-
+ include/linux/hyperv.h | 22 +++++
+ 4 files changed, 219 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
+index f064fa6ef181a..a59ab2f3d68e1 100644
+--- a/drivers/hv/channel.c
++++ b/drivers/hv/channel.c
+@@ -503,6 +503,70 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
+ }
+ EXPORT_SYMBOL_GPL(vmbus_establish_gpadl);
+
++/**
++ * request_arr_init - Allocates memory for the requestor array. Each slot
++ * keeps track of the next available slot in the array. Initially, each
++ * slot points to the next one (as in a Linked List). The last slot
++ * does not point to anything, so its value is U64_MAX by default.
++ * @size The size of the array
++ */
++static u64 *request_arr_init(u32 size)
++{
++ int i;
++ u64 *req_arr;
++
++ req_arr = kcalloc(size, sizeof(u64), GFP_KERNEL);
++ if (!req_arr)
++ return NULL;
++
++ for (i = 0; i < size - 1; i++)
++ req_arr[i] = i + 1;
++
++ /* Last slot (no more available slots) */
++ req_arr[i] = U64_MAX;
++
++ return req_arr;
++}
++
++/*
++ * vmbus_alloc_requestor - Initializes @rqstor's fields.
++ * Index 0 is the first free slot
++ * @size: Size of the requestor array
++ */
++static int vmbus_alloc_requestor(struct vmbus_requestor *rqstor, u32 size)
++{
++ u64 *rqst_arr;
++ unsigned long *bitmap;
++
++ rqst_arr = request_arr_init(size);
++ if (!rqst_arr)
++ return -ENOMEM;
++
++ bitmap = bitmap_zalloc(size, GFP_KERNEL);
++ if (!bitmap) {
++ kfree(rqst_arr);
++ return -ENOMEM;
++ }
++
++ rqstor->req_arr = rqst_arr;
++ rqstor->req_bitmap = bitmap;
++ rqstor->size = size;
++ rqstor->next_request_id = 0;
++ spin_lock_init(&rqstor->req_lock);
++
++ return 0;
++}
++
++/*
++ * vmbus_free_requestor - Frees memory allocated for @rqstor
++ * @rqstor: Pointer to the requestor struct
++ */
++static void vmbus_free_requestor(struct vmbus_requestor *rqstor)
++{
++ kfree(rqstor->req_arr);
++ bitmap_free(rqstor->req_bitmap);
++}
++
+ static int __vmbus_open(struct vmbus_channel *newchannel,
+ void *userdata, u32 userdatalen,
+ void (*onchannelcallback)(void *context), void *context)
+@@ -523,6 +587,12 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
+ if (newchannel->state != CHANNEL_OPEN_STATE)
+ return -EINVAL;
+
++ /* Create and init requestor */
++ if (newchannel->rqstor_size) {
++ if (vmbus_alloc_requestor(&newchannel->requestor, newchannel->rqstor_size))
++ return -ENOMEM;
++ }
++
+ newchannel->state = CHANNEL_OPENING_STATE;
+ newchannel->onchannel_callback = onchannelcallback;
+ newchannel->channel_callback_context = context;
+@@ -626,6 +696,7 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
+ error_clean_ring:
+ hv_ringbuffer_cleanup(&newchannel->outbound);
+ hv_ringbuffer_cleanup(&newchannel->inbound);
++ vmbus_free_requestor(&newchannel->requestor);
+ newchannel->state = CHANNEL_OPEN_STATE;
+ return err;
+ }
+@@ -808,6 +879,9 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
+ channel->ringbuffer_gpadlhandle = 0;
+ }
+
++ if (!ret)
++ vmbus_free_requestor(&channel->requestor);
++
+ return ret;
+ }
+
+@@ -888,7 +962,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
+ /* in 8-bytes granularity */
+ desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3;
+ desc.len8 = (u16)(packetlen_aligned >> 3);
+- desc.trans_id = requestid;
++ desc.trans_id = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */
+
+ bufferlist[0].iov_base = &desc;
+ bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor);
+@@ -897,7 +971,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
+ bufferlist[2].iov_base = &aligned_data;
+ bufferlist[2].iov_len = (packetlen_aligned - packetlen);
+
+- return hv_ringbuffer_write(channel, bufferlist, num_vecs);
++ return hv_ringbuffer_write(channel, bufferlist, num_vecs, requestid);
+ }
+ EXPORT_SYMBOL(vmbus_sendpacket);
+
+@@ -939,7 +1013,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
+ desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+ desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */
+ desc.length8 = (u16)(packetlen_aligned >> 3);
+- desc.transactionid = requestid;
++ desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */
+ desc.reserved = 0;
+ desc.rangecount = pagecount;
+
+@@ -956,7 +1030,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
+ bufferlist[2].iov_base = &aligned_data;
+ bufferlist[2].iov_len = (packetlen_aligned - packetlen);
+
+- return hv_ringbuffer_write(channel, bufferlist, 3);
++ return hv_ringbuffer_write(channel, bufferlist, 3, requestid);
+ }
+ EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
+
+@@ -983,7 +1057,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
+ desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+ desc->dataoffset8 = desc_size >> 3; /* in 8-bytes granularity */
+ desc->length8 = (u16)(packetlen_aligned >> 3);
+- desc->transactionid = requestid;
++ desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */
+ desc->reserved = 0;
+ desc->rangecount = 1;
+
+@@ -994,7 +1068,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
+ bufferlist[2].iov_base = &aligned_data;
+ bufferlist[2].iov_len = (packetlen_aligned - packetlen);
+
+- return hv_ringbuffer_write(channel, bufferlist, 3);
++ return hv_ringbuffer_write(channel, bufferlist, 3, requestid);
+ }
+ EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
+
+@@ -1042,3 +1116,91 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer,
+ buffer_actual_len, requestid, true);
+ }
+ EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);
++
++/*
++ * vmbus_next_request_id - Returns a new request id. It is also
++ * the index at which the guest memory address is stored.
++ * Uses a spin lock to avoid race conditions.
++ * @rqstor: Pointer to the requestor struct
++ * @rqst_add: Guest memory address to be stored in the array
++ */
++u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr)
++{
++ unsigned long flags;
++ u64 current_id;
++ const struct vmbus_channel *channel =
++ container_of(rqstor, const struct vmbus_channel, requestor);
++
++ /* Check rqstor has been initialized */
++ if (!channel->rqstor_size)
++ return VMBUS_NO_RQSTOR;
++
++ spin_lock_irqsave(&rqstor->req_lock, flags);
++ current_id = rqstor->next_request_id;
++
++ /* Requestor array is full */
++ if (current_id >= rqstor->size) {
++ spin_unlock_irqrestore(&rqstor->req_lock, flags);
++ return VMBUS_RQST_ERROR;
++ }
++
++ rqstor->next_request_id = rqstor->req_arr[current_id];
++ rqstor->req_arr[current_id] = rqst_addr;
++
++ /* The already held spin lock provides atomicity */
++ bitmap_set(rqstor->req_bitmap, current_id, 1);
++
++ spin_unlock_irqrestore(&rqstor->req_lock, flags);
++
++ /*
++ * Cannot return an ID of 0, which is reserved for an unsolicited
++ * message from Hyper-V.
++ */
++ return current_id + 1;
++}
++EXPORT_SYMBOL_GPL(vmbus_next_request_id);
++
++/*
++ * vmbus_request_addr - Returns the memory address stored at @trans_id
++ * in @rqstor. Uses a spin lock to avoid race conditions.
++ * @rqstor: Pointer to the requestor struct
++ * @trans_id: Request id sent back from Hyper-V. Becomes the requestor's
++ * next request id.
++ */
++u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id)
++{
++ unsigned long flags;
++ u64 req_addr;
++ const struct vmbus_channel *channel =
++ container_of(rqstor, const struct vmbus_channel, requestor);
++
++ /* Check rqstor has been initialized */
++ if (!channel->rqstor_size)
++ return VMBUS_NO_RQSTOR;
++
++ /* Hyper-V can send an unsolicited message with ID of 0 */
++ if (!trans_id)
++ return trans_id;
++
++ spin_lock_irqsave(&rqstor->req_lock, flags);
++
++ /* Data corresponding to trans_id is stored at trans_id - 1 */
++ trans_id--;
++
++ /* Invalid trans_id */
++ if (trans_id >= rqstor->size || !test_bit(trans_id, rqstor->req_bitmap)) {
++ spin_unlock_irqrestore(&rqstor->req_lock, flags);
++ return VMBUS_RQST_ERROR;
++ }
++
++ req_addr = rqstor->req_arr[trans_id];
++ rqstor->req_arr[trans_id] = rqstor->next_request_id;
++ rqstor->next_request_id = trans_id;
++
++ /* The already held spin lock provides atomicity */
++ bitmap_clear(rqstor->req_bitmap, trans_id, 1);
++
++ spin_unlock_irqrestore(&rqstor->req_lock, flags);
++ return req_addr;
++}
++EXPORT_SYMBOL_GPL(vmbus_request_addr);
+diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
+index 7845fa5de79e9..601660bca5d47 100644
+--- a/drivers/hv/hyperv_vmbus.h
++++ b/drivers/hv/hyperv_vmbus.h
+@@ -180,7 +180,8 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
+ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
+
+ int hv_ringbuffer_write(struct vmbus_channel *channel,
+- const struct kvec *kv_list, u32 kv_count);
++ const struct kvec *kv_list, u32 kv_count,
++ u64 requestid);
+
+ int hv_ringbuffer_read(struct vmbus_channel *channel,
+ void *buffer, u32 buflen, u32 *buffer_actual_len,
+diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
+index 7ed6fad3fa8ff..a0ba6ac487368 100644
+--- a/drivers/hv/ring_buffer.c
++++ b/drivers/hv/ring_buffer.c
+@@ -261,7 +261,8 @@ EXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy);
+
+ /* Write to the ring buffer. */
+ int hv_ringbuffer_write(struct vmbus_channel *channel,
+- const struct kvec *kv_list, u32 kv_count)
++ const struct kvec *kv_list, u32 kv_count,
++ u64 requestid)
+ {
+ int i;
+ u32 bytes_avail_towrite;
+@@ -271,6 +272,8 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
+ u64 prev_indices;
+ unsigned long flags;
+ struct hv_ring_buffer_info *outring_info = &channel->outbound;
++ struct vmpacket_descriptor *desc = kv_list[0].iov_base;
++ u64 rqst_id = VMBUS_NO_RQSTOR;
+
+ if (channel->rescind)
+ return -ENODEV;
+@@ -313,6 +316,23 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
+ kv_list[i].iov_len);
+ }
+
++ /*
++ * Allocate the request ID after the data has been copied into the
++ * ring buffer. Once this request ID is allocated, the completion
++ * path could find the data and free it.
++ */
++
++ if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) {
++ rqst_id = vmbus_next_request_id(&channel->requestor, requestid);
++ if (rqst_id == VMBUS_RQST_ERROR) {
++ spin_unlock_irqrestore(&outring_info->ring_lock, flags);
++ pr_err("No request id available\n");
++ return -EAGAIN;
++ }
++ }
++ desc = hv_get_ring_buffer(outring_info) + old_write;
++ desc->trans_id = (rqst_id == VMBUS_NO_RQSTOR) ? requestid : rqst_id;
++
+ /* Set previous packet start */
+ prev_indices = hv_get_ring_bufferindices(outring_info);
+
+@@ -332,8 +352,13 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
+
+ hv_signal_on_write(old_write, channel);
+
+- if (channel->rescind)
++ if (channel->rescind) {
++ if (rqst_id != VMBUS_NO_RQSTOR) {
++ /* Reclaim request ID to avoid leak of IDs */
++ vmbus_request_addr(&channel->requestor, rqst_id);
++ }
+ return -ENODEV;
++ }
+
+ return 0;
+ }
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
+index eada4d8d65879..4cb65a79d92f6 100644
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -764,6 +764,22 @@ enum vmbus_device_type {
+ HV_UNKNOWN,
+ };
+
++/*
++ * Provides request ids for VMBus. Encapsulates guest memory
++ * addresses and stores the next available slot in req_arr
++ * to generate new ids in constant time.
++ */
++struct vmbus_requestor {
++ u64 *req_arr;
++ unsigned long *req_bitmap; /* is a given slot available? */
++ u32 size;
++ u64 next_request_id;
++ spinlock_t req_lock; /* provides atomicity */
++};
++
++#define VMBUS_NO_RQSTOR U64_MAX
++#define VMBUS_RQST_ERROR (U64_MAX - 1)
++
+ struct vmbus_device {
+ u16 dev_type;
+ guid_t guid;
+@@ -988,8 +1004,14 @@ struct vmbus_channel {
+ u32 fuzz_testing_interrupt_delay;
+ u32 fuzz_testing_message_delay;
+
++ /* request/transaction ids for VMBus */
++ struct vmbus_requestor requestor;
++ u32 rqstor_size;
+ };
+
++u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr);
++u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id);
++
+ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
+ {
+ return !!(c->offermsg.offer.chn_flags &
+--
+2.43.0
+
--- /dev/null
+From fa8553d3a820c2eb5fe34180444ba299b6087594 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Mar 2021 20:13:48 +0100
+Subject: Drivers: hv: vmbus: Drop error message when 'No request id available'
+
+From: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+
+[ Upstream commit 0c85c54bf7faeb80c6b76901ed77d93acef0207d ]
+
+Running out of request IDs on a channel essentially produces the same
+effect as running out of space in the ring buffer, in that -EAGAIN is
+returned. The error message in hv_ringbuffer_write() should either be
+dropped (since we don't output a message when the ring buffer is full)
+or be made conditional/debug-only.
+
+Suggested-by: Michael Kelley <mikelley@microsoft.com>
+Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Fixes: e8b7db38449ac ("Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus hardening")
+Link: https://lore.kernel.org/r/20210301191348.196485-1-parri.andrea@gmail.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hv/ring_buffer.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
+index a0ba6ac487368..a49cc69c56af0 100644
+--- a/drivers/hv/ring_buffer.c
++++ b/drivers/hv/ring_buffer.c
+@@ -326,7 +326,6 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
+ rqst_id = vmbus_next_request_id(&channel->requestor, requestid);
+ if (rqst_id == VMBUS_RQST_ERROR) {
+ spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+- pr_err("No request id available\n");
+ return -EAGAIN;
+ }
+ }
+--
+2.43.0
+
--- /dev/null
+From 9cf15c9f4363b09b25014512da81a180f3e5e6c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Aug 2022 10:43:42 +0200
+Subject: exit: Fix typo in comment: s/sub-theads/sub-threads
+
+From: Ingo Molnar <mingo@kernel.org>
+
+[ Upstream commit dcca34754a3f5290406403b8066e3b15dda9f4bf ]
+
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Stable-dep-of: c1be35a16b2f ("exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock)")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/exit.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/exit.c b/kernel/exit.c
+index bacdaf980933b..c41bdc0a7f06b 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1105,7 +1105,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+ * p->signal fields because the whole thread group is dead
+ * and nobody can change them.
+ *
+- * psig->stats_lock also protects us from our sub-theads
++ * psig->stats_lock also protects us from our sub-threads
+ * which can reap other children at the same time. Until
+ * we change k_getrusage()-like users to rely on this lock
+ * we have to take ->siglock as well.
+--
+2.43.0
+
--- /dev/null
+From 2b40d3238f9ea8a9cf9a61ce0cbec94ef5d2ced8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 16:34:00 +0100
+Subject: exit: wait_task_zombie: kill the no longer necessary
+ spin_lock_irq(siglock)
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit c1be35a16b2f1fe21f4f26f9de030ad6eaaf6a25 ]
+
+After the recent changes nobody use siglock to read the values protected
+by stats_lock, we can kill spin_lock_irq(¤t->sighand->siglock) and
+update the comment.
+
+With this patch only __exit_signal() and thread_group_start_cputime() take
+stats_lock under siglock.
+
+Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/exit.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/kernel/exit.c b/kernel/exit.c
+index c41bdc0a7f06b..8f25abdd5fa7d 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1106,17 +1106,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+ * and nobody can change them.
+ *
+ * psig->stats_lock also protects us from our sub-threads
+- * which can reap other children at the same time. Until
+- * we change k_getrusage()-like users to rely on this lock
+- * we have to take ->siglock as well.
++ * which can reap other children at the same time.
+ *
+ * We use thread_group_cputime_adjusted() to get times for
+ * the thread group, which consolidates times for all threads
+ * in the group including the group leader.
+ */
+ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+- spin_lock_irq(¤t->sighand->siglock);
+- write_seqlock(&psig->stats_lock);
++ write_seqlock_irq(&psig->stats_lock);
+ psig->cutime += tgutime + sig->cutime;
+ psig->cstime += tgstime + sig->cstime;
+ psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
+@@ -1139,8 +1136,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+ psig->cmaxrss = maxrss;
+ task_io_accounting_add(&psig->ioac, &p->ioac);
+ task_io_accounting_add(&psig->ioac, &sig->ioac);
+- write_sequnlock(&psig->stats_lock);
+- spin_unlock_irq(¤t->sighand->siglock);
++ write_sequnlock_irq(&psig->stats_lock);
+ }
+
+ if (wo->wo_rusage)
+--
+2.43.0
+
--- /dev/null
+From b26ac4d68bbe68214213c82814d4c7acd12c6a64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 27 Jan 2024 09:58:01 +0800
+Subject: ext4: convert to exclusive lock while inserting delalloc extents
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+[ Upstream commit acf795dc161f3cf481db20f05db4250714e375e5 ]
+
+ext4_da_map_blocks() only hold i_data_sem in shared mode and i_rwsem
+when inserting delalloc extents, it could be raced by another querying
+path of ext4_map_blocks() without i_rwsem, .e.g buffered read path.
+Suppose we buffered read a file containing just a hole, and without any
+cached extents tree, then it is raced by another delayed buffered write
+to the same area or the near area belongs to the same hole, and the new
+delalloc extent could be overwritten to a hole extent.
+
+ pread() pwrite()
+ filemap_read_folio()
+ ext4_mpage_readpages()
+ ext4_map_blocks()
+ down_read(i_data_sem)
+ ext4_ext_determine_hole()
+ //find hole
+ ext4_ext_put_gap_in_cache()
+ ext4_es_find_extent_range()
+ //no delalloc extent
+ ext4_da_map_blocks()
+ down_read(i_data_sem)
+ ext4_insert_delayed_block()
+ //insert delalloc extent
+ ext4_es_insert_extent()
+ //overwrite delalloc extent to hole
+
+This race could lead to inconsistent delalloc extents tree and
+incorrect reserved space counter. Fix this by converting to hold
+i_data_sem in exclusive mode when adding a new delalloc extent in
+ext4_da_map_blocks().
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Suggested-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240127015825.1608160-3-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 25 +++++++++++--------------
+ 1 file changed, 11 insertions(+), 14 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 64162470a7e6c..8b48ed351c4b9 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1728,10 +1728,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+
+ /* Lookup extent status tree firstly */
+ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
+- if (ext4_es_is_hole(&es)) {
+- down_read(&EXT4_I(inode)->i_data_sem);
++ if (ext4_es_is_hole(&es))
+ goto add_delayed;
+- }
+
+ /*
+ * Delayed extent could be allocated by fallocate.
+@@ -1773,8 +1771,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+ retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+ else
+ retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+- if (retval < 0)
+- goto out_unlock;
++ if (retval < 0) {
++ up_read(&EXT4_I(inode)->i_data_sem);
++ return retval;
++ }
+ if (retval > 0) {
+ unsigned int status;
+
+@@ -1790,24 +1790,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+ EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+ map->m_pblk, status);
+- goto out_unlock;
++ up_read(&EXT4_I(inode)->i_data_sem);
++ return retval;
+ }
++ up_read(&EXT4_I(inode)->i_data_sem);
+
+ add_delayed:
+- /*
+- * XXX: __block_prepare_write() unmaps passed block,
+- * is it OK?
+- */
++ down_write(&EXT4_I(inode)->i_data_sem);
+ retval = ext4_insert_delayed_block(inode, map->m_lblk);
++ up_write(&EXT4_I(inode)->i_data_sem);
+ if (retval)
+- goto out_unlock;
++ return retval;
+
+ map_bh(bh, inode->i_sb, invalid_block);
+ set_buffer_new(bh);
+ set_buffer_delay(bh);
+-
+-out_unlock:
+- up_read((&EXT4_I(inode)->i_data_sem));
+ return retval;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 5ad8f4e0d1ca2d96d546f15fefdd9659c3535249 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Apr 2023 11:38:45 +0800
+Subject: ext4: make ext4_es_insert_extent() return void
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 6c120399cde6b1b5cf65ce403765c579fb3d3e50 ]
+
+Now ext4_es_insert_extent() never return error, so make it return void.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230424033846.4732-12-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: acf795dc161f ("ext4: convert to exclusive lock while inserting delalloc extents")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/extents.c | 5 +++--
+ fs/ext4/extents_status.c | 14 ++++++--------
+ fs/ext4/extents_status.h | 6 +++---
+ fs/ext4/inode.c | 21 ++++++---------------
+ 4 files changed, 18 insertions(+), 28 deletions(-)
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 68aa8760cb465..9e12592727914 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -3107,8 +3107,9 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
+ if (ee_len == 0)
+ return 0;
+
+- return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
+- EXTENT_STATUS_WRITTEN);
++ ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
++ EXTENT_STATUS_WRITTEN);
++ return 0;
+ }
+
+ /* FIXME!! we need to try to merge to left or right after zero-out */
+diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
+index cccbdfd49a86b..f37e62546745b 100644
+--- a/fs/ext4/extents_status.c
++++ b/fs/ext4/extents_status.c
+@@ -846,12 +846,10 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
+ /*
+ * ext4_es_insert_extent() adds information to an inode's extent
+ * status tree.
+- *
+- * Return 0 on success, error code on failure.
+ */
+-int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+- ext4_lblk_t len, ext4_fsblk_t pblk,
+- unsigned int status)
++void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
++ ext4_lblk_t len, ext4_fsblk_t pblk,
++ unsigned int status)
+ {
+ struct extent_status newes;
+ ext4_lblk_t end = lblk + len - 1;
+@@ -863,13 +861,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+ bool revise_pending = false;
+
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+- return 0;
++ return;
+
+ es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
+ lblk, len, pblk, status, inode->i_ino);
+
+ if (!len)
+- return 0;
++ return;
+
+ BUG_ON(end < lblk);
+
+@@ -938,7 +936,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+ goto retry;
+
+ ext4_es_print_tree(inode);
+- return 0;
++ return;
+ }
+
+ /*
+diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
+index 4ec30a7982605..481ec4381bee6 100644
+--- a/fs/ext4/extents_status.h
++++ b/fs/ext4/extents_status.h
+@@ -127,9 +127,9 @@ extern int __init ext4_init_es(void);
+ extern void ext4_exit_es(void);
+ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
+
+-extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+- ext4_lblk_t len, ext4_fsblk_t pblk,
+- unsigned int status);
++extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
++ ext4_lblk_t len, ext4_fsblk_t pblk,
++ unsigned int status);
+ extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len, ext4_fsblk_t pblk,
+ unsigned int status);
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 365c4d3a434ab..ab2a7f9902887 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -589,10 +589,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
+ ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
+ map->m_lblk + map->m_len - 1))
+ status |= EXTENT_STATUS_DELAYED;
+- ret = ext4_es_insert_extent(inode, map->m_lblk,
+- map->m_len, map->m_pblk, status);
+- if (ret < 0)
+- retval = ret;
++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
++ map->m_pblk, status);
+ }
+ up_read((&EXT4_I(inode)->i_data_sem));
+
+@@ -701,12 +699,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
+ ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
+ map->m_lblk + map->m_len - 1))
+ status |= EXTENT_STATUS_DELAYED;
+- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+- map->m_pblk, status);
+- if (ret < 0) {
+- retval = ret;
+- goto out_sem;
+- }
++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
++ map->m_pblk, status);
+ }
+
+ out_sem:
+@@ -1800,7 +1794,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+ set_buffer_new(bh);
+ set_buffer_delay(bh);
+ } else if (retval > 0) {
+- int ret;
+ unsigned int status;
+
+ if (unlikely(retval != map->m_len)) {
+@@ -1813,10 +1806,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+
+ status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+ EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+- map->m_pblk, status);
+- if (ret != 0)
+- retval = ret;
++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
++ map->m_pblk, status);
+ }
+
+ out_unlock:
+--
+2.43.0
+
--- /dev/null
+From b636ced6288f229dff9b524a82aa30d7cfa5e8f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 27 Jan 2024 09:58:00 +0800
+Subject: ext4: refactor ext4_da_map_blocks()
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+[ Upstream commit 3fcc2b887a1ba4c1f45319cd8c54daa263ecbc36 ]
+
+Refactor and cleanup ext4_da_map_blocks(), reduce some unnecessary
+parameters and branches, no logic changes.
+
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240127015825.1608160-2-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: acf795dc161f ("ext4: convert to exclusive lock while inserting delalloc extents")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 39 +++++++++++++++++----------------------
+ 1 file changed, 17 insertions(+), 22 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index ab2a7f9902887..64162470a7e6c 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1729,7 +1729,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+ /* Lookup extent status tree firstly */
+ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
+ if (ext4_es_is_hole(&es)) {
+- retval = 0;
+ down_read(&EXT4_I(inode)->i_data_sem);
+ goto add_delayed;
+ }
+@@ -1774,26 +1773,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+ retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+ else
+ retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+-
+-add_delayed:
+- if (retval == 0) {
+- int ret;
+-
+- /*
+- * XXX: __block_prepare_write() unmaps passed block,
+- * is it OK?
+- */
+-
+- ret = ext4_insert_delayed_block(inode, map->m_lblk);
+- if (ret != 0) {
+- retval = ret;
+- goto out_unlock;
+- }
+-
+- map_bh(bh, inode->i_sb, invalid_block);
+- set_buffer_new(bh);
+- set_buffer_delay(bh);
+- } else if (retval > 0) {
++ if (retval < 0)
++ goto out_unlock;
++ if (retval > 0) {
+ unsigned int status;
+
+ if (unlikely(retval != map->m_len)) {
+@@ -1808,11 +1790,24 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+ EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+ map->m_pblk, status);
++ goto out_unlock;
+ }
+
++add_delayed:
++ /*
++ * XXX: __block_prepare_write() unmaps passed block,
++ * is it OK?
++ */
++ retval = ext4_insert_delayed_block(inode, map->m_lblk);
++ if (retval)
++ goto out_unlock;
++
++ map_bh(bh, inode->i_sb, invalid_block);
++ set_buffer_new(bh);
++ set_buffer_delay(bh);
++
+ out_unlock:
+ up_read((&EXT4_I(inode)->i_data_sem));
+-
+ return retval;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 2278229f5e1d78b6ef0a2938f41ed309435ff40d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 19:25:54 +0200
+Subject: getrusage: add the "signal_struct *sig" local variable
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit c7ac8231ace9b07306d0299969e42073b189c70a ]
+
+No functional changes, cleanup/preparation.
+
+Link: https://lkml.kernel.org/r/20230909172554.GA20441@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: daa694e41375 ("getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 37 +++++++++++++++++++------------------
+ 1 file changed, 19 insertions(+), 18 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index bff14910b9262..8a53d858d7375 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1737,6 +1737,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ unsigned long flags;
+ u64 tgutime, tgstime, utime, stime;
+ unsigned long maxrss = 0;
++ struct signal_struct *sig = p->signal;
+
+ memset((char *)r, 0, sizeof (*r));
+ utime = stime = 0;
+@@ -1744,7 +1745,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ if (who == RUSAGE_THREAD) {
+ task_cputime_adjusted(current, &utime, &stime);
+ accumulate_thread_rusage(p, r);
+- maxrss = p->signal->maxrss;
++ maxrss = sig->maxrss;
+ goto out;
+ }
+
+@@ -1754,15 +1755,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ switch (who) {
+ case RUSAGE_BOTH:
+ case RUSAGE_CHILDREN:
+- utime = p->signal->cutime;
+- stime = p->signal->cstime;
+- r->ru_nvcsw = p->signal->cnvcsw;
+- r->ru_nivcsw = p->signal->cnivcsw;
+- r->ru_minflt = p->signal->cmin_flt;
+- r->ru_majflt = p->signal->cmaj_flt;
+- r->ru_inblock = p->signal->cinblock;
+- r->ru_oublock = p->signal->coublock;
+- maxrss = p->signal->cmaxrss;
++ utime = sig->cutime;
++ stime = sig->cstime;
++ r->ru_nvcsw = sig->cnvcsw;
++ r->ru_nivcsw = sig->cnivcsw;
++ r->ru_minflt = sig->cmin_flt;
++ r->ru_majflt = sig->cmaj_flt;
++ r->ru_inblock = sig->cinblock;
++ r->ru_oublock = sig->coublock;
++ maxrss = sig->cmaxrss;
+
+ if (who == RUSAGE_CHILDREN)
+ break;
+@@ -1772,14 +1773,14 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+ utime += tgutime;
+ stime += tgstime;
+- r->ru_nvcsw += p->signal->nvcsw;
+- r->ru_nivcsw += p->signal->nivcsw;
+- r->ru_minflt += p->signal->min_flt;
+- r->ru_majflt += p->signal->maj_flt;
+- r->ru_inblock += p->signal->inblock;
+- r->ru_oublock += p->signal->oublock;
+- if (maxrss < p->signal->maxrss)
+- maxrss = p->signal->maxrss;
++ r->ru_nvcsw += sig->nvcsw;
++ r->ru_nivcsw += sig->nivcsw;
++ r->ru_minflt += sig->min_flt;
++ r->ru_majflt += sig->maj_flt;
++ r->ru_inblock += sig->inblock;
++ r->ru_oublock += sig->oublock;
++ if (maxrss < sig->maxrss)
++ maxrss = sig->maxrss;
+ t = p;
+ do {
+ accumulate_thread_rusage(t, r);
+--
+2.43.0
+
--- /dev/null
+From fdc881c5376b0354fa1c63267602f1fe09e16092 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 16:50:50 +0100
+Subject: getrusage: move thread_group_cputime_adjusted() outside of
+ lock_task_sighand()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit daa694e4137571b4ebec330f9a9b4d54aa8b8089 ]
+
+Patch series "getrusage: use sig->stats_lock", v2.
+
+This patch (of 2):
+
+thread_group_cputime() does its own locking, we can safely shift
+thread_group_cputime_adjusted() which does another for_each_thread loop
+outside of ->siglock protected section.
+
+This is also preparation for the next patch which changes getrusage() to
+use stats_lock instead of siglock, thread_group_cputime() takes the same
+lock. With the current implementation recursive read_seqbegin_or_lock()
+is fine, thread_group_cputime() can't enter the slow mode if the caller
+holds stats_lock, yet this looks more safe and better performance-wise.
+
+Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com
+Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Dylan Hatch <dylanbhatch@google.com>
+Tested-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 34 +++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 8a53d858d7375..26c8783bd0757 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1736,17 +1736,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ struct task_struct *t;
+ unsigned long flags;
+ u64 tgutime, tgstime, utime, stime;
+- unsigned long maxrss = 0;
++ unsigned long maxrss;
++ struct mm_struct *mm;
+ struct signal_struct *sig = p->signal;
+
+- memset((char *)r, 0, sizeof (*r));
++ memset(r, 0, sizeof(*r));
+ utime = stime = 0;
++ maxrss = 0;
+
+ if (who == RUSAGE_THREAD) {
+ task_cputime_adjusted(current, &utime, &stime);
+ accumulate_thread_rusage(p, r);
+ maxrss = sig->maxrss;
+- goto out;
++ goto out_thread;
+ }
+
+ if (!lock_task_sighand(p, &flags))
+@@ -1770,9 +1772,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ fallthrough;
+
+ case RUSAGE_SELF:
+- thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+- utime += tgutime;
+- stime += tgstime;
+ r->ru_nvcsw += sig->nvcsw;
+ r->ru_nivcsw += sig->nivcsw;
+ r->ru_minflt += sig->min_flt;
+@@ -1792,19 +1791,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ }
+ unlock_task_sighand(p, &flags);
+
+-out:
+- r->ru_utime = ns_to_kernel_old_timeval(utime);
+- r->ru_stime = ns_to_kernel_old_timeval(stime);
++ if (who == RUSAGE_CHILDREN)
++ goto out_children;
+
+- if (who != RUSAGE_CHILDREN) {
+- struct mm_struct *mm = get_task_mm(p);
++ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
++ utime += tgutime;
++ stime += tgstime;
+
+- if (mm) {
+- setmax_mm_hiwater_rss(&maxrss, mm);
+- mmput(mm);
+- }
++out_thread:
++ mm = get_task_mm(p);
++ if (mm) {
++ setmax_mm_hiwater_rss(&maxrss, mm);
++ mmput(mm);
+ }
++
++out_children:
+ r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
++ r->ru_utime = ns_to_kernel_old_timeval(utime);
++ r->ru_stime = ns_to_kernel_old_timeval(stime);
+ }
+
+ SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
+--
+2.43.0
+
--- /dev/null
+From d4552b3a4300a1e6456d4a94e2501a8ab77c6b12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Sep 2023 19:26:29 +0200
+Subject: getrusage: use __for_each_thread()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit 13b7bc60b5353371460a203df6c38ccd38ad7a3a ]
+
+do/while_each_thread should be avoided when possible.
+
+Plus this change allows to avoid lock_task_sighand(), we can use rcu
+and/or sig->stats_lock instead.
+
+Link: https://lkml.kernel.org/r/20230909172629.GA20454@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: f7ec1cd5cc7e ("getrusage: use sig->stats_lock rather than lock_task_sighand()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 26c8783bd0757..f1ae8fa627145 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1780,10 +1780,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ r->ru_oublock += sig->oublock;
+ if (maxrss < sig->maxrss)
+ maxrss = sig->maxrss;
+- t = p;
+- do {
++ __for_each_thread(sig, t)
+ accumulate_thread_rusage(t, r);
+- } while_each_thread(p, t);
+ break;
+
+ default:
+--
+2.43.0
+
--- /dev/null
+From 130977df9b7119b933e5ca62435814492925b7dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 16:50:53 +0100
+Subject: getrusage: use sig->stats_lock rather than lock_task_sighand()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 ]
+
+lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call
+getrusage() at the same time and the process has NR_THREADS, spin_lock_irq
+will spin with irqs disabled O(NR_CPUS * NR_THREADS) time.
+
+Change getrusage() to use sig->stats_lock, it was specifically designed
+for this type of use. This way it runs lockless in the likely case.
+
+TODO:
+ - Change do_task_stat() to use sig->stats_lock too, then we can
+ remove spin_lock_irq(siglock) in wait_task_zombie().
+
+ - Turn sig->stats_lock into seqcount_rwlock_t, this way the
+ readers in the slow mode won't exclude each other. See
+ https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/
+
+ - stats_lock has to disable irqs because ->siglock can be taken
+ in irq context, it would be very nice to change __exit_signal()
+ to avoid the siglock->stats_lock dependency.
+
+Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Dylan Hatch <dylanbhatch@google.com>
+Tested-by: Dylan Hatch <dylanbhatch@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sys.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/sys.c b/kernel/sys.c
+index f1ae8fa627145..efc213ae4c5ad 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1739,7 +1739,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ unsigned long maxrss;
+ struct mm_struct *mm;
+ struct signal_struct *sig = p->signal;
++ unsigned int seq = 0;
+
++retry:
+ memset(r, 0, sizeof(*r));
+ utime = stime = 0;
+ maxrss = 0;
+@@ -1751,8 +1753,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ goto out_thread;
+ }
+
+- if (!lock_task_sighand(p, &flags))
+- return;
++ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+
+ switch (who) {
+ case RUSAGE_BOTH:
+@@ -1780,14 +1781,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
+ r->ru_oublock += sig->oublock;
+ if (maxrss < sig->maxrss)
+ maxrss = sig->maxrss;
++
++ rcu_read_lock();
+ __for_each_thread(sig, t)
+ accumulate_thread_rusage(t, r);
++ rcu_read_unlock();
++
+ break;
+
+ default:
+ BUG();
+ }
+- unlock_task_sighand(p, &flags);
++
++ if (need_seqretry(&sig->stats_lock, seq)) {
++ seq = 1;
++ goto retry;
++ }
++ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+
+ if (who == RUSAGE_CHILDREN)
+ goto out_children;
+--
+2.43.0
+
--- /dev/null
+From d716ca09e4a57e4809e656010300dcdb7f40d4cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Apr 2021 18:12:35 -0700
+Subject: hv_netvsc: Make netvsc/VF binding check both MAC and serial number
+
+From: Dexuan Cui <decui@microsoft.com>
+
+[ Upstream commit 64ff412ad41fe3a5bf759ff4844dc1382176485c ]
+
+Currently the netvsc/VF binding logic only checks the PCI serial number.
+
+The Microsoft Azure Network Adapter (MANA) supports multiple net_device
+interfaces (each such interface is called a "vPort", and has its unique
+MAC address) which are backed by the same VF PCI device, so the binding
+logic should check both the MAC address and the PCI serial number.
+
+The change should not break any other existing VF drivers, because
+Hyper-V NIC SR-IOV implementation requires the netvsc network
+interface and the VF network interface have the same MAC address.
+
+Co-developed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Co-developed-by: Shachar Raindel <shacharr@microsoft.com>
+Signed-off-by: Shachar Raindel <shacharr@microsoft.com>
+Acked-by: Stephen Hemminger <stephen@networkplumber.org>
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 057b1a9dde153..9ec1633b89b48 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -2310,8 +2310,17 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
+ if (!ndev_ctx->vf_alloc)
+ continue;
+
+- if (ndev_ctx->vf_serial == serial)
+- return hv_get_drvdata(ndev_ctx->device_ctx);
++ if (ndev_ctx->vf_serial != serial)
++ continue;
++
++ ndev = hv_get_drvdata(ndev_ctx->device_ctx);
++ if (ndev->addr_len != vf_netdev->addr_len ||
++ memcmp(ndev->perm_addr, vf_netdev->perm_addr,
++ ndev->addr_len) != 0)
++ continue;
++
++ return ndev;
++
+ }
+
+ /* Fallback path to check synthetic vf with help of mac addr.
+--
+2.43.0
+
--- /dev/null
+From 08cd6ba213b3786688214a60972f77235cc64410 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jan 2021 16:53:43 -0800
+Subject: hv_netvsc: Process NETDEV_GOING_DOWN on VF hot remove
+
+From: Long Li <longli@microsoft.com>
+
+[ Upstream commit 34b06a2eee44d469f2e2c013a83e6dac3aff6411 ]
+
+On VF hot remove, NETDEV_GOING_DOWN is sent to notify the VF is about to
+go down. At this time, the VF is still sending/receiving traffic and we
+request the VSP to switch datapath.
+
+On completion, the datapath is switched to synthetic and we can proceed
+with VF hot remove.
+
+Signed-off-by: Long Li <longli@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 57a5ec098e7e0..057b1a9dde153 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -2411,12 +2411,15 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
+ * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network
+ * interface, there is only the CHANGE event and no UP or DOWN event.
+ */
+-static int netvsc_vf_changed(struct net_device *vf_netdev)
++static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
+ {
+ struct net_device_context *net_device_ctx;
+ struct netvsc_device *netvsc_dev;
+ struct net_device *ndev;
+- bool vf_is_up = netif_running(vf_netdev);
++ bool vf_is_up = false;
++
++ if (event != NETDEV_GOING_DOWN)
++ vf_is_up = netif_running(vf_netdev);
+
+ ndev = get_netvsc_byref(vf_netdev);
+ if (!ndev)
+@@ -2762,7 +2765,8 @@ static int netvsc_netdev_event(struct notifier_block *this,
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+- return netvsc_vf_changed(event_dev);
++ case NETDEV_GOING_DOWN:
++ return netvsc_vf_changed(event_dev, event);
+ default:
+ return NOTIFY_DONE;
+ }
+--
+2.43.0
+
--- /dev/null
+From 7bd309d035dcf27d9ac09dda05158de11370e3c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Feb 2024 20:40:38 -0800
+Subject: hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed
+
+From: Shradha Gupta <shradhagupta@linux.microsoft.com>
+
+[ Upstream commit 9cae43da9867412f8bd09aee5c8a8dc5e8dc3dc2 ]
+
+If hv_netvsc driver is unloaded and reloaded, the NET_DEVICE_REGISTER
+handler cannot perform VF register successfully as the register call
+is received before netvsc_probe is finished. This is because we
+register register_netdevice_notifier() very early( even before
+vmbus_driver_register()).
+To fix this, we try to register each such matching VF( if it is visible
+as a netdevice) at the end of netvsc_probe.
+
+Cc: stable@vger.kernel.org
+Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register")
+Suggested-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 82 +++++++++++++++++++++++++--------
+ 1 file changed, 62 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index e8efcc6a0b05a..0fc0f9cb3f34b 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -44,6 +44,10 @@
+ #define LINKCHANGE_INT (2 * HZ)
+ #define VF_TAKEOVER_INT (HZ / 10)
+
++/* Macros to define the context of vf registration */
++#define VF_REG_IN_PROBE 1
++#define VF_REG_IN_NOTIFIER 2
++
+ static unsigned int ring_size __ro_after_init = 128;
+ module_param(ring_size, uint, 0444);
+ MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
+@@ -2194,7 +2198,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
+ }
+
+ static int netvsc_vf_join(struct net_device *vf_netdev,
+- struct net_device *ndev)
++ struct net_device *ndev, int context)
+ {
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ int ret;
+@@ -2217,7 +2221,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
+ goto upper_link_failed;
+ }
+
+- schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
++ /* If this registration is called from probe context vf_takeover
++ * is taken care of later in probe itself.
++ */
++ if (context == VF_REG_IN_NOTIFIER)
++ schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+
+ call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
+
+@@ -2355,7 +2363,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev)
+ return NOTIFY_DONE;
+ }
+
+-static int netvsc_register_vf(struct net_device *vf_netdev)
++static int netvsc_register_vf(struct net_device *vf_netdev, int context)
+ {
+ struct net_device_context *net_device_ctx;
+ struct netvsc_device *netvsc_dev;
+@@ -2395,7 +2403,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
+
+ netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
+
+- if (netvsc_vf_join(vf_netdev, ndev) != 0)
++ if (netvsc_vf_join(vf_netdev, ndev, context) != 0)
+ return NOTIFY_DONE;
+
+ dev_hold(vf_netdev);
+@@ -2479,10 +2487,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
+ return NOTIFY_OK;
+ }
+
++static int check_dev_is_matching_vf(struct net_device *event_ndev)
++{
++ /* Skip NetVSC interfaces */
++ if (event_ndev->netdev_ops == &device_ops)
++ return -ENODEV;
++
++ /* Avoid non-Ethernet type devices */
++ if (event_ndev->type != ARPHRD_ETHER)
++ return -ENODEV;
++
++ /* Avoid Vlan dev with same MAC registering as VF */
++ if (is_vlan_dev(event_ndev))
++ return -ENODEV;
++
++ /* Avoid Bonding master dev with same MAC registering as VF */
++ if (netif_is_bond_master(event_ndev))
++ return -ENODEV;
++
++ return 0;
++}
++
+ static int netvsc_probe(struct hv_device *dev,
+ const struct hv_vmbus_device_id *dev_id)
+ {
+- struct net_device *net = NULL;
++ struct net_device *net = NULL, *vf_netdev;
+ struct net_device_context *net_device_ctx;
+ struct netvsc_device_info *device_info = NULL;
+ struct netvsc_device *nvdev;
+@@ -2590,6 +2619,30 @@ static int netvsc_probe(struct hv_device *dev,
+ }
+
+ list_add(&net_device_ctx->list, &netvsc_dev_list);
++
++ /* When the hv_netvsc driver is unloaded and reloaded, the
++ * NET_DEVICE_REGISTER for the vf device is replayed before probe
++ * is complete. This is because register_netdevice_notifier() gets
++ * registered before vmbus_driver_register() so that callback func
++ * is set before probe and we don't miss events like NETDEV_POST_INIT
++ * So, in this section we try to register the matching vf device that
++ * is present as a netdevice, knowing that its register call is not
++ * processed in the netvsc_netdev_notifier(as probing is progress and
++ * get_netvsc_byslot fails).
++ */
++ for_each_netdev(dev_net(net), vf_netdev) {
++ ret = check_dev_is_matching_vf(vf_netdev);
++ if (ret != 0)
++ continue;
++
++ if (net != get_netvsc_byslot(vf_netdev))
++ continue;
++
++ netvsc_prepare_bonding(vf_netdev);
++ netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
++ __netvsc_vf_setup(net, vf_netdev);
++ break;
++ }
+ rtnl_unlock();
+
+ netvsc_devinfo_put(device_info);
+@@ -2746,28 +2799,17 @@ static int netvsc_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+ {
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
++ int ret = 0;
+
+- /* Skip our own events */
+- if (event_dev->netdev_ops == &device_ops)
+- return NOTIFY_DONE;
+-
+- /* Avoid non-Ethernet type devices */
+- if (event_dev->type != ARPHRD_ETHER)
+- return NOTIFY_DONE;
+-
+- /* Avoid Vlan dev with same MAC registering as VF */
+- if (is_vlan_dev(event_dev))
+- return NOTIFY_DONE;
+-
+- /* Avoid Bonding master dev with same MAC registering as VF */
+- if (netif_is_bond_master(event_dev))
++ ret = check_dev_is_matching_vf(event_dev);
++ if (ret != 0)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_POST_INIT:
+ return netvsc_prepare_bonding(event_dev);
+ case NETDEV_REGISTER:
+- return netvsc_register_vf(event_dev);
++ return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER);
+ case NETDEV_UNREGISTER:
+ return netvsc_unregister_vf(event_dev);
+ case NETDEV_UP:
+--
+2.43.0
+
--- /dev/null
+From 0d6995f74ccf64cb2ddb909e952411877f54d143 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Oct 2021 13:03:28 +0900
+Subject: hv_netvsc: use netif_is_bond_master() instead of open code
+
+From: Juhee Kang <claudiajkang@gmail.com>
+
+[ Upstream commit c60882a4566a0a62dc3a40c85131103aad83dcb3 ]
+
+Use netif_is_bond_master() function instead of open code, which is
+((event_dev->priv_flags & IFF_BONDING) && (event_dev->flags & IFF_MASTER)).
+This patch doesn't change logic.
+
+Signed-off-by: Juhee Kang <claudiajkang@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 9ec1633b89b48..e8efcc6a0b05a 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -2760,8 +2760,7 @@ static int netvsc_netdev_event(struct notifier_block *this,
+ return NOTIFY_DONE;
+
+ /* Avoid Bonding master dev with same MAC registering as VF */
+- if ((event_dev->priv_flags & IFF_BONDING) &&
+- (event_dev->flags & IFF_MASTER))
++ if (netif_is_bond_master(event_dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+--
+2.43.0
+
--- /dev/null
+From 49485861d465362468d21d6dde9d62f83ed55c86 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Nov 2020 11:04:02 +0100
+Subject: hv_netvsc: Use vmbus_requestor to generate transaction IDs for VMBus
+ hardening
+
+From: Andres Beltran <lkmlabelt@gmail.com>
+
+[ Upstream commit 4d18fcc95f50950a99bd940d4e61a983f91d267a ]
+
+Currently, pointers to guest memory are passed to Hyper-V as
+transaction IDs in netvsc. In the face of errors or malicious
+behavior in Hyper-V, netvsc should not expose or trust the transaction
+IDs returned by Hyper-V to be valid guest memory addresses. Instead,
+use small integers generated by vmbus_requestor as requests
+(transaction) IDs.
+
+Signed-off-by: Andres Beltran <lkmlabelt@gmail.com>
+Co-developed-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Acked-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Wei Liu <wei.liu@kernel.org>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: netdev@vger.kernel.org
+Link: https://lore.kernel.org/r/20201109100402.8946-4-parri.andrea@gmail.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/hyperv_net.h | 13 +++++++++++++
+ drivers/net/hyperv/netvsc.c | 22 ++++++++++++++++------
+ drivers/net/hyperv/rndis_filter.c | 1 +
+ include/linux/hyperv.h | 1 +
+ 4 files changed, 31 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 367878493e704..15652d7951f9e 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -847,6 +847,19 @@ struct nvsp_message {
+
+ #define NETVSC_XDP_HDRM 256
+
++#define NETVSC_MIN_OUT_MSG_SIZE (sizeof(struct vmpacket_descriptor) + \
++ sizeof(struct nvsp_message))
++#define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor)
++
++/* Estimated requestor size:
++ * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size
++ */
++static inline u32 netvsc_rqstor_size(unsigned long ringbytes)
++{
++ return ringbytes / NETVSC_MIN_OUT_MSG_SIZE +
++ ringbytes / NETVSC_MIN_IN_MSG_SIZE;
++}
++
+ #define NETVSC_XFER_HEADER_SIZE(rng_cnt) \
+ (offsetof(struct vmtransfer_page_packet_header, ranges) + \
+ (rng_cnt) * sizeof(struct vmtransfer_page_range))
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 3eae31c0f97a6..c9b73a0448813 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -50,7 +50,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
+
+ vmbus_sendpacket(dev->channel, init_pkt,
+ sizeof(struct nvsp_message),
+- (unsigned long)init_pkt,
++ VMBUS_RQST_ID_NO_RESPONSE,
+ VM_PKT_DATA_INBAND, 0);
+ }
+
+@@ -163,7 +163,7 @@ static void netvsc_revoke_recv_buf(struct hv_device *device,
+ ret = vmbus_sendpacket(device->channel,
+ revoke_packet,
+ sizeof(struct nvsp_message),
+- (unsigned long)revoke_packet,
++ VMBUS_RQST_ID_NO_RESPONSE,
+ VM_PKT_DATA_INBAND, 0);
+ /* If the failure is because the channel is rescinded;
+ * ignore the failure since we cannot send on a rescinded
+@@ -213,7 +213,7 @@ static void netvsc_revoke_send_buf(struct hv_device *device,
+ ret = vmbus_sendpacket(device->channel,
+ revoke_packet,
+ sizeof(struct nvsp_message),
+- (unsigned long)revoke_packet,
++ VMBUS_RQST_ID_NO_RESPONSE,
+ VM_PKT_DATA_INBAND, 0);
+
+ /* If the failure is because the channel is rescinded;
+@@ -557,7 +557,7 @@ static int negotiate_nvsp_ver(struct hv_device *device,
+
+ ret = vmbus_sendpacket(device->channel, init_packet,
+ sizeof(struct nvsp_message),
+- (unsigned long)init_packet,
++ VMBUS_RQST_ID_NO_RESPONSE,
+ VM_PKT_DATA_INBAND, 0);
+
+ return ret;
+@@ -614,7 +614,7 @@ static int netvsc_connect_vsp(struct hv_device *device,
+ /* Send the init request */
+ ret = vmbus_sendpacket(device->channel, init_packet,
+ sizeof(struct nvsp_message),
+- (unsigned long)init_packet,
++ VMBUS_RQST_ID_NO_RESPONSE,
+ VM_PKT_DATA_INBAND, 0);
+ if (ret != 0)
+ goto cleanup;
+@@ -698,10 +698,19 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
+ const struct vmpacket_descriptor *desc,
+ int budget)
+ {
+- struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
++ struct sk_buff *skb;
+ u16 q_idx = 0;
+ int queue_sends;
++ u64 cmd_rqst;
++
++ cmd_rqst = vmbus_request_addr(&channel->requestor, (u64)desc->trans_id);
++ if (cmd_rqst == VMBUS_RQST_ERROR) {
++ netdev_err(ndev, "Incorrect transaction id\n");
++ return;
++ }
++
++ skb = (struct sk_buff *)(unsigned long)cmd_rqst;
+
+ /* Notify the layer above us */
+ if (likely(skb)) {
+@@ -1530,6 +1539,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
+ netvsc_poll, NAPI_POLL_WEIGHT);
+
+ /* Open the channel */
++ device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+ ret = vmbus_open(device->channel, netvsc_ring_bytes,
+ netvsc_ring_bytes, NULL, 0,
+ netvsc_channel_cb, net_device->chan_table);
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 90bc0008fa2fd..13f62950eeb9f 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1170,6 +1170,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
+ /* Set the channel before opening.*/
+ nvchan->channel = new_sc;
+
++ new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+ ret = vmbus_open(new_sc, netvsc_ring_bytes,
+ netvsc_ring_bytes, NULL, 0,
+ netvsc_channel_cb, nvchan);
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
+index 4cb65a79d92f6..2aaf450c8d800 100644
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -779,6 +779,7 @@ struct vmbus_requestor {
+
+ #define VMBUS_NO_RQSTOR U64_MAX
+ #define VMBUS_RQST_ERROR (U64_MAX - 1)
++#define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2)
+
+ struct vmbus_device {
+ u16 dev_type;
+--
+2.43.0
+
--- /dev/null
+From d069a005c45770eced39a4cc7f4048713e0ec0dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jan 2021 16:53:42 -0800
+Subject: hv_netvsc: Wait for completion on request SWITCH_DATA_PATH
+
+From: Long Li <longli@microsoft.com>
+
+[ Upstream commit 8b31f8c982b738e4130539e47f03967c599d8e22 ]
+
+The completion indicates if NVSP_MSG4_TYPE_SWITCH_DATA_PATH has been
+processed by the VSP. The traffic is steered to VF or synthetic after we
+receive this completion.
+
+Signed-off-by: Long Li <longli@microsoft.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/hyperv/netvsc.c | 37 ++++++++++++++++++++++++++++++---
+ drivers/net/hyperv/netvsc_drv.c | 1 -
+ 2 files changed, 34 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index c9b73a0448813..03333a4136bf4 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -37,6 +37,10 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
+ struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
+ struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
+
++ /* Block sending traffic to VF if it's about to be gone */
++ if (!vf)
++ net_device_ctx->data_path_is_vf = vf;
++
+ memset(init_pkt, 0, sizeof(struct nvsp_message));
+ init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
+ if (vf)
+@@ -50,8 +54,11 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
+
+ vmbus_sendpacket(dev->channel, init_pkt,
+ sizeof(struct nvsp_message),
+- VMBUS_RQST_ID_NO_RESPONSE,
+- VM_PKT_DATA_INBAND, 0);
++ (unsigned long)init_pkt,
++ VM_PKT_DATA_INBAND,
++ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
++ wait_for_completion(&nv_dev->channel_init_wait);
++ net_device_ctx->data_path_is_vf = vf;
+ }
+
+ /* Worker to setup sub channels on initial setup
+@@ -757,8 +764,31 @@ static void netvsc_send_completion(struct net_device *ndev,
+ const struct vmpacket_descriptor *desc,
+ int budget)
+ {
+- const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
++ const struct nvsp_message *nvsp_packet;
+ u32 msglen = hv_pkt_datalen(desc);
++ struct nvsp_message *pkt_rqst;
++ u64 cmd_rqst;
++
++ /* First check if this is a VMBUS completion without data payload */
++ if (!msglen) {
++ cmd_rqst = vmbus_request_addr(&incoming_channel->requestor,
++ (u64)desc->trans_id);
++ if (cmd_rqst == VMBUS_RQST_ERROR) {
++ netdev_err(ndev, "Invalid transaction id\n");
++ return;
++ }
++
++ pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
++ switch (pkt_rqst->hdr.msg_type) {
++ case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
++ complete(&net_device->channel_init_wait);
++ break;
++
++ default:
++ netdev_err(ndev, "Unexpected VMBUS completion!!\n");
++ }
++ return;
++ }
+
+ /* Ensure packet is big enough to read header fields */
+ if (msglen < sizeof(struct nvsp_message_header)) {
+@@ -766,6 +796,7 @@ static void netvsc_send_completion(struct net_device *ndev,
+ return;
+ }
+
++ nvsp_packet = hv_pkt_data(desc);
+ switch (nvsp_packet->hdr.msg_type) {
+ case NVSP_MSG_TYPE_INIT_COMPLETE:
+ if (msglen < sizeof(struct nvsp_message_header) +
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 790bf750281ad..57a5ec098e7e0 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -2429,7 +2429,6 @@ static int netvsc_vf_changed(struct net_device *vf_netdev)
+
+ if (net_device_ctx->data_path_is_vf == vf_is_up)
+ return NOTIFY_OK;
+- net_device_ctx->data_path_is_vf = vf_is_up;
+
+ if (vf_is_up && !net_device_ctx->vf_alloc) {
+ netdev_info(ndev, "Waiting for the VF association from host\n");
+--
+2.43.0
+
--- /dev/null
+From 412604ade6d81740b674c81ad30bd5d459553e94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jan 2024 19:45:31 +0100
+Subject: lsm: fix default return value of the socket_getpeersec_*() hooks
+
+From: Ondrej Mosnacek <omosnace@redhat.com>
+
+[ Upstream commit 5a287d3d2b9de2b3e747132c615599907ba5c3c1 ]
+
+For these hooks the true "neutral" value is -EOPNOTSUPP, which is
+currently what is returned when no LSM provides this hook and what LSMs
+return when there is no security context set on the socket. Correct the
+value in <linux/lsm_hooks.h> and adjust the dispatch functions in
+security/security.c to avoid issues when the BPF LSM is enabled.
+
+Cc: stable@vger.kernel.org
+Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks")
+Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
+[PM: subject line tweak]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/lsm_hook_defs.h | 4 ++--
+ security/security.c | 31 +++++++++++++++++++++++++++----
+ 2 files changed, 29 insertions(+), 6 deletions(-)
+
+diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
+index 9f550eab8ebdb..07abcd384975b 100644
+--- a/include/linux/lsm_hook_defs.h
++++ b/include/linux/lsm_hook_defs.h
+@@ -293,9 +293,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname)
+ LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname)
+ LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how)
+ LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb)
+-LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock,
++LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock,
+ sockptr_t optval, sockptr_t optlen, unsigned int len)
+-LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock,
++LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock,
+ struct sk_buff *skb, u32 *secid)
+ LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
+ LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk)
+diff --git a/security/security.c b/security/security.c
+index e9dcde3c4f14b..0bbcb100ba8e9 100644
+--- a/security/security.c
++++ b/security/security.c
+@@ -2227,14 +2227,37 @@ EXPORT_SYMBOL(security_sock_rcv_skb);
+ int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
+ sockptr_t optlen, unsigned int len)
+ {
+- return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock,
+- optval, optlen, len);
++ struct security_hook_list *hp;
++ int rc;
++
++ /*
++ * Only one module will provide a security context.
++ */
++ hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream,
++ list) {
++ rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen,
++ len);
++ if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream))
++ return rc;
++ }
++ return LSM_RET_DEFAULT(socket_getpeersec_stream);
+ }
+
+ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
+ {
+- return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock,
+- skb, secid);
++ struct security_hook_list *hp;
++ int rc;
++
++ /*
++ * Only one module will provide a security context.
++ */
++ hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram,
++ list) {
++ rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid);
++ if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram))
++ return rc;
++ }
++ return LSM_RET_DEFAULT(socket_getpeersec_dgram);
+ }
+ EXPORT_SYMBOL(security_socket_getpeersec_dgram);
+
+--
+2.43.0
+
--- /dev/null
+From d0ab4effe8f2356e5ad02e4c5d59527127721934 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Oct 2022 12:31:21 -0400
+Subject: lsm: make security_socket_getpeersec_stream() sockptr_t safe
+
+From: Paul Moore <paul@paul-moore.com>
+
+[ Upstream commit b10b9c342f7571f287fd422be5d5c0beb26ba974 ]
+
+Commit 4ff09db1b79b ("bpf: net: Change sk_getsockopt() to take the
+sockptr_t argument") made it possible to call sk_getsockopt()
+with both user and kernel address space buffers through the use of
+the sockptr_t type. Unfortunately at the time of conversion the
+security_socket_getpeersec_stream() LSM hook was written to only
+accept userspace buffers, and in a desire to avoid having to change
+the LSM hook the commit author simply passed the sockptr_t's
+userspace buffer pointer. Since the only sk_getsockopt() callers
+at the time of conversion which used kernel sockptr_t buffers did
+not allow SO_PEERSEC, and hence the
+security_socket_getpeersec_stream() hook, this was acceptable but
+also very fragile as future changes presented the possibility of
+silently passing kernel space pointers to the LSM hook.
+
+There are several ways to protect against this, including careful
+code review of future commits, but since relying on code review to
+catch bugs is a recipe for disaster and the upstream eBPF maintainer
+is "strongly against defensive programming", this patch updates the
+LSM hook, and all of the implementations to support sockptr_t and
+safely handle both user and kernel space buffers.
+
+Acked-by: Casey Schaufler <casey@schaufler-ca.com>
+Acked-by: John Johansen <john.johansen@canonical.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/lsm_hook_defs.h | 2 +-
+ include/linux/lsm_hooks.h | 4 ++--
+ include/linux/security.h | 11 +++++++----
+ net/core/sock.c | 3 ++-
+ security/apparmor/lsm.c | 29 +++++++++++++----------------
+ security/security.c | 6 +++---
+ security/selinux/hooks.c | 13 ++++++-------
+ security/smack/smack_lsm.c | 19 ++++++++++---------
+ 8 files changed, 44 insertions(+), 43 deletions(-)
+
+diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
+index 92a76ce0c382d..9f550eab8ebdb 100644
+--- a/include/linux/lsm_hook_defs.h
++++ b/include/linux/lsm_hook_defs.h
+@@ -294,7 +294,7 @@ LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname)
+ LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how)
+ LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb)
+ LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock,
+- char __user *optval, int __user *optlen, unsigned len)
++ sockptr_t optval, sockptr_t optlen, unsigned int len)
+ LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock,
+ struct sk_buff *skb, u32 *secid)
+ LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
+diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
+index 64cdf4d7bfb30..bbf9c8c7bd9c5 100644
+--- a/include/linux/lsm_hooks.h
++++ b/include/linux/lsm_hooks.h
+@@ -926,8 +926,8 @@
+ * SO_GETPEERSEC. For tcp sockets this can be meaningful if the
+ * socket is associated with an ipsec SA.
+ * @sock is the local socket.
+- * @optval userspace memory where the security state is to be copied.
+- * @optlen userspace int where the module should copy the actual length
++ * @optval memory where the security state is to be copied.
++ * @optlen memory where the module should copy the actual length
+ * of the security state.
+ * @len as input is the maximum length to copy to userspace provided
+ * by the caller.
+diff --git a/include/linux/security.h b/include/linux/security.h
+index e388b1666bcfc..5b61aa19fac66 100644
+--- a/include/linux/security.h
++++ b/include/linux/security.h
+@@ -31,6 +31,7 @@
+ #include <linux/err.h>
+ #include <linux/string.h>
+ #include <linux/mm.h>
++#include <linux/sockptr.h>
+
+ struct linux_binprm;
+ struct cred;
+@@ -1366,8 +1367,8 @@ int security_socket_getsockopt(struct socket *sock, int level, int optname);
+ int security_socket_setsockopt(struct socket *sock, int level, int optname);
+ int security_socket_shutdown(struct socket *sock, int how);
+ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb);
+-int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
+- int __user *optlen, unsigned len);
++int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
++ sockptr_t optlen, unsigned int len);
+ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid);
+ int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
+ void security_sk_free(struct sock *sk);
+@@ -1501,8 +1502,10 @@ static inline int security_sock_rcv_skb(struct sock *sk,
+ return 0;
+ }
+
+-static inline int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
+- int __user *optlen, unsigned len)
++static inline int security_socket_getpeersec_stream(struct socket *sock,
++ sockptr_t optval,
++ sockptr_t optlen,
++ unsigned int len)
+ {
+ return -ENOPROTOOPT;
+ }
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 42da46965b16f..016c0b9e01b70 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1503,7 +1503,8 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
+ break;
+
+ case SO_PEERSEC:
+- return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len);
++ return security_socket_getpeersec_stream(sock,
++ optval, optlen, len);
+
+ case SO_MARK:
+ v.val = sk->sk_mark;
+diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
+index 585edcc6814d2..052f1b920e43f 100644
+--- a/security/apparmor/lsm.c
++++ b/security/apparmor/lsm.c
+@@ -1070,11 +1070,10 @@ static struct aa_label *sk_peer_label(struct sock *sk)
+ * Note: for tcp only valid if using ipsec or cipso on lan
+ */
+ static int apparmor_socket_getpeersec_stream(struct socket *sock,
+- char __user *optval,
+- int __user *optlen,
++ sockptr_t optval, sockptr_t optlen,
+ unsigned int len)
+ {
+- char *name;
++ char *name = NULL;
+ int slen, error = 0;
+ struct aa_label *label;
+ struct aa_label *peer;
+@@ -1091,23 +1090,21 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock,
+ /* don't include terminating \0 in slen, it breaks some apps */
+ if (slen < 0) {
+ error = -ENOMEM;
+- } else {
+- if (slen > len) {
+- error = -ERANGE;
+- } else if (copy_to_user(optval, name, slen)) {
+- error = -EFAULT;
+- goto out;
+- }
+- if (put_user(slen, optlen))
+- error = -EFAULT;
+-out:
+- kfree(name);
+-
++ goto done;
++ }
++ if (slen > len) {
++ error = -ERANGE;
++ goto done_len;
+ }
+
++ if (copy_to_sockptr(optval, name, slen))
++ error = -EFAULT;
++done_len:
++ if (copy_to_sockptr(optlen, &slen, sizeof(slen)))
++ error = -EFAULT;
+ done:
+ end_current_label_crit_section(label);
+-
++ kfree(name);
+ return error;
+ }
+
+diff --git a/security/security.c b/security/security.c
+index 269c3965393f4..e9dcde3c4f14b 100644
+--- a/security/security.c
++++ b/security/security.c
+@@ -2224,11 +2224,11 @@ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ }
+ EXPORT_SYMBOL(security_sock_rcv_skb);
+
+-int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
+- int __user *optlen, unsigned len)
++int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
++ sockptr_t optlen, unsigned int len)
+ {
+ return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock,
+- optval, optlen, len);
++ optval, optlen, len);
+ }
+
+ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
+diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
+index 50d3ddfe15fd1..46c00a68bb4bd 100644
+--- a/security/selinux/hooks.c
++++ b/security/selinux/hooks.c
+@@ -5110,11 +5110,12 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ return err;
+ }
+
+-static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *optval,
+- int __user *optlen, unsigned len)
++static int selinux_socket_getpeersec_stream(struct socket *sock,
++ sockptr_t optval, sockptr_t optlen,
++ unsigned int len)
+ {
+ int err = 0;
+- char *scontext;
++ char *scontext = NULL;
+ u32 scontext_len;
+ struct sk_security_struct *sksec = sock->sk->sk_security;
+ u32 peer_sid = SECSID_NULL;
+@@ -5130,17 +5131,15 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op
+ &scontext_len);
+ if (err)
+ return err;
+-
+ if (scontext_len > len) {
+ err = -ERANGE;
+ goto out_len;
+ }
+
+- if (copy_to_user(optval, scontext, scontext_len))
++ if (copy_to_sockptr(optval, scontext, scontext_len))
+ err = -EFAULT;
+-
+ out_len:
+- if (put_user(scontext_len, optlen))
++ if (copy_to_sockptr(optlen, &scontext_len, sizeof(scontext_len)))
+ err = -EFAULT;
+ kfree(scontext);
+ return err;
+diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
+index e1669759403a6..5388f143eecd8 100644
+--- a/security/smack/smack_lsm.c
++++ b/security/smack/smack_lsm.c
+@@ -4022,12 +4022,12 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ * returns zero on success, an error code otherwise
+ */
+ static int smack_socket_getpeersec_stream(struct socket *sock,
+- char __user *optval,
+- int __user *optlen, unsigned len)
++ sockptr_t optval, sockptr_t optlen,
++ unsigned int len)
+ {
+ struct socket_smack *ssp;
+ char *rcp = "";
+- int slen = 1;
++ u32 slen = 1;
+ int rc = 0;
+
+ ssp = sock->sk->sk_security;
+@@ -4035,15 +4035,16 @@ static int smack_socket_getpeersec_stream(struct socket *sock,
+ rcp = ssp->smk_packet->smk_known;
+ slen = strlen(rcp) + 1;
+ }
+-
+- if (slen > len)
++ if (slen > len) {
+ rc = -ERANGE;
+- else if (copy_to_user(optval, rcp, slen) != 0)
+- rc = -EFAULT;
++ goto out_len;
++ }
+
+- if (put_user(slen, optlen) != 0)
++ if (copy_to_sockptr(optval, rcp, slen))
++ rc = -EFAULT;
++out_len:
++ if (copy_to_sockptr(optlen, &slen, sizeof(slen)))
+ rc = -EFAULT;
+-
+ return rc;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 60e180797025bd86a8ca15068002096dbc3f63d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Feb 2021 12:09:54 -0800
+Subject: mm/hugetlb: change hugetlb_reserve_pages() to type bool
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+[ Upstream commit 33b8f84a4ee78491a8f4f9e4c5520c9da4a10983 ]
+
+While reviewing a bug in hugetlb_reserve_pages, it was noticed that all
+callers ignore the return value. Any failure is considered an ENOMEM
+error by the callers.
+
+Change the function to be of type bool. The function will return true if
+the reservation was successful, false otherwise. Callers currently assume
+a zero return code indicates success. Change the callers to look for true
+to indicate success. No functional change, only code cleanup.
+
+Link: https://lkml.kernel.org/r/20201221192542.15732-1-mike.kravetz@oracle.com
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Dan Carpenter <dan.carpenter@oracle.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: e656c7a9e596 ("mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/hugetlbfs/inode.c | 4 ++--
+ include/linux/hugetlb.h | 2 +-
+ mm/hugetlb.c | 37 ++++++++++++++-----------------------
+ 3 files changed, 17 insertions(+), 26 deletions(-)
+
+diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
+index a0edd4b8fa189..c3e9fa7ce75f7 100644
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -176,7 +176,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+ file_accessed(file);
+
+ ret = -ENOMEM;
+- if (hugetlb_reserve_pages(inode,
++ if (!hugetlb_reserve_pages(inode,
+ vma->vm_pgoff >> huge_page_order(h),
+ len >> huge_page_shift(h), vma,
+ vma->vm_flags))
+@@ -1500,7 +1500,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
+ inode->i_size = size;
+ clear_nlink(inode);
+
+- if (hugetlb_reserve_pages(inode, 0,
++ if (!hugetlb_reserve_pages(inode, 0,
+ size >> huge_page_shift(hstate_inode(inode)), NULL,
+ acctflag))
+ file = ERR_PTR(-ENOMEM);
+diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
+index 99b73fc4a8246..90c66b9458c31 100644
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -140,7 +140,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep);
+-int hugetlb_reserve_pages(struct inode *inode, long from, long to,
++bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
+ struct vm_area_struct *vma,
+ vm_flags_t vm_flags);
+ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 81949f6d29af5..02b7c8f9b0e87 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5108,12 +5108,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+ return pages << h->order;
+ }
+
+-int hugetlb_reserve_pages(struct inode *inode,
++/* Return true if reservation was successful, false otherwise. */
++bool hugetlb_reserve_pages(struct inode *inode,
+ long from, long to,
+ struct vm_area_struct *vma,
+ vm_flags_t vm_flags)
+ {
+- long ret, chg, add = -1;
++ long chg, add = -1;
+ struct hstate *h = hstate_inode(inode);
+ struct hugepage_subpool *spool = subpool_inode(inode);
+ struct resv_map *resv_map;
+@@ -5123,7 +5124,7 @@ int hugetlb_reserve_pages(struct inode *inode,
+ /* This should never happen */
+ if (from > to) {
+ VM_WARN(1, "%s called with a negative range\n", __func__);
+- return -EINVAL;
++ return false;
+ }
+
+ /*
+@@ -5132,7 +5133,7 @@ int hugetlb_reserve_pages(struct inode *inode,
+ * without using reserves
+ */
+ if (vm_flags & VM_NORESERVE)
+- return 0;
++ return true;
+
+ /*
+ * Shared mappings base their reservation on the number of pages that
+@@ -5154,7 +5155,7 @@ int hugetlb_reserve_pages(struct inode *inode,
+ /* Private mapping. */
+ resv_map = resv_map_alloc();
+ if (!resv_map)
+- return -ENOMEM;
++ return false;
+
+ chg = to - from;
+
+@@ -5162,18 +5163,12 @@ int hugetlb_reserve_pages(struct inode *inode,
+ set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
+ }
+
+- if (chg < 0) {
+- ret = chg;
++ if (chg < 0)
+ goto out_err;
+- }
+-
+- ret = hugetlb_cgroup_charge_cgroup_rsvd(
+- hstate_index(h), chg * pages_per_huge_page(h), &h_cg);
+
+- if (ret < 0) {
+- ret = -ENOMEM;
++ if (hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h),
++ chg * pages_per_huge_page(h), &h_cg) < 0)
+ goto out_err;
+- }
+
+ if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) {
+ /* For private mappings, the hugetlb_cgroup uncharge info hangs
+@@ -5188,19 +5183,15 @@ int hugetlb_reserve_pages(struct inode *inode,
+ * reservations already in place (gbl_reserve).
+ */
+ gbl_reserve = hugepage_subpool_get_pages(spool, chg);
+- if (gbl_reserve < 0) {
+- ret = -ENOSPC;
++ if (gbl_reserve < 0)
+ goto out_uncharge_cgroup;
+- }
+
+ /*
+ * Check enough hugepages are available for the reservation.
+ * Hand the pages back to the subpool if there are not
+ */
+- ret = hugetlb_acct_memory(h, gbl_reserve);
+- if (ret < 0) {
++ if (hugetlb_acct_memory(h, gbl_reserve) < 0)
+ goto out_put_pages;
+- }
+
+ /*
+ * Account for the reservations made. Shared mappings record regions
+@@ -5218,7 +5209,6 @@ int hugetlb_reserve_pages(struct inode *inode,
+
+ if (unlikely(add < 0)) {
+ hugetlb_acct_memory(h, -gbl_reserve);
+- ret = add;
+ goto out_put_pages;
+ } else if (unlikely(chg > add)) {
+ /*
+@@ -5251,7 +5241,8 @@ int hugetlb_reserve_pages(struct inode *inode,
+ hugetlb_cgroup_put_rsvd_cgroup(h_cg);
+ }
+ }
+- return 0;
++ return true;
++
+ out_put_pages:
+ /* put back original number of pages, chg */
+ (void)hugepage_subpool_put_pages(spool, chg);
+@@ -5267,7 +5258,7 @@ int hugetlb_reserve_pages(struct inode *inode,
+ region_abort(resv_map, from, to, regions_needed);
+ if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+ kref_put(&resv_map->refs, resv_map_release);
+- return ret;
++ return false;
+ }
+
+ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+--
+2.43.0
+
--- /dev/null
+From 248455f8b451d2ac07c8a295e2686f1524b80383 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 12:04:42 -0800
+Subject: mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE
+
+From: Prakash Sangappa <prakash.sangappa@oracle.com>
+
+[ Upstream commit e656c7a9e59607d1672d85ffa9a89031876ffe67 ]
+
+For shared memory of type SHM_HUGETLB, hugetlb pages are reserved in
+shmget() call. If SHM_NORESERVE flags is specified then the hugetlb pages
+are not reserved. However when the shared memory is attached with the
+shmat() call the hugetlb pages are getting reserved incorrectly for
+SHM_HUGETLB shared memory created with SHM_NORESERVE which is a bug.
+
+-------------------------------
+Following test shows the issue.
+
+$cat shmhtb.c
+
+int main()
+{
+ int shmflags = 0660 | IPC_CREAT | SHM_HUGETLB | SHM_NORESERVE;
+ int shmid;
+
+ shmid = shmget(SKEY, SHMSZ, shmflags);
+ if (shmid < 0)
+ {
+ printf("shmat: shmget() failed, %d\n", errno);
+ return 1;
+ }
+ printf("After shmget()\n");
+ system("cat /proc/meminfo | grep -i hugepages_");
+
+ shmat(shmid, NULL, 0);
+ printf("\nAfter shmat()\n");
+ system("cat /proc/meminfo | grep -i hugepages_");
+
+ shmctl(shmid, IPC_RMID, NULL);
+ return 0;
+}
+
+ #sysctl -w vm.nr_hugepages=20
+ #./shmhtb
+
+After shmget()
+HugePages_Total: 20
+HugePages_Free: 20
+HugePages_Rsvd: 0
+HugePages_Surp: 0
+
+After shmat()
+HugePages_Total: 20
+HugePages_Free: 20
+HugePages_Rsvd: 5 <--
+HugePages_Surp: 0
+--------------------------------
+
+Fix is to ensure that hugetlb pages are not reserved for SHM_HUGETLB shared
+memory in the shmat() call.
+
+Link: https://lkml.kernel.org/r/1706040282-12388-1-git-send-email-prakash.sangappa@oracle.com
+Signed-off-by: Prakash Sangappa <prakash.sangappa@oracle.com>
+Acked-by: Muchun Song <muchun.song@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/hugetlbfs/inode.c | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
+index c3e9fa7ce75f7..bf3cda4989623 100644
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -135,6 +135,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+ loff_t len, vma_len;
+ int ret;
+ struct hstate *h = hstate_file(file);
++ vm_flags_t vm_flags;
+
+ /*
+ * vma address alignment (but not the pgoff alignment) has
+@@ -176,10 +177,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+ file_accessed(file);
+
+ ret = -ENOMEM;
++
++ vm_flags = vma->vm_flags;
++ /*
++ * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
++ * reserving here. Note: only for SHM hugetlbfs file, the inode
++ * flag S_PRIVATE is set.
++ */
++ if (inode->i_flags & S_PRIVATE)
++ vm_flags |= VM_NORESERVE;
++
+ if (!hugetlb_reserve_pages(inode,
+ vma->vm_pgoff >> huge_page_order(h),
+ len >> huge_page_shift(h), vma,
+- vma->vm_flags))
++ vm_flags))
+ goto out;
+
+ ret = 0;
+--
+2.43.0
+
--- /dev/null
+From 9b1c36493c827100b8aa5b3dd82cdd808f29d801 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 17:27:56 -0700
+Subject: net: Change sock_getsockopt() to take the sk ptr instead of the sock
+ ptr
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit ba74a7608dc12fbbd8ea36e660087f08a81ef26a ]
+
+A latter patch refactors bpf_getsockopt(SOL_SOCKET) with the
+sock_getsockopt() to avoid code duplication and code
+drift between the two duplicates.
+
+The current sock_getsockopt() takes sock ptr as the argument.
+The very first thing of this function is to get back the sk ptr
+by 'sk = sock->sk'.
+
+bpf_getsockopt() could be called when the sk does not have
+the sock ptr created. Meaning sk->sk_socket is NULL. For example,
+when a passive tcp connection has just been established but has yet
+been accept()-ed. Thus, it cannot use the sock_getsockopt(sk->sk_socket)
+or else it will pass a NULL ptr.
+
+This patch moves all sock_getsockopt implementation to the newly
+added sk_getsockopt(). The new sk_getsockopt() takes a sk ptr
+and immediately gets the sock ptr by 'sock = sk->sk_socket'
+
+The existing sock_getsockopt(sock) is changed to call
+sk_getsockopt(sock->sk). All existing callers have both sock->sk
+and sk->sk_socket pointer.
+
+The latter patch will make bpf_getsockopt(SOL_SOCKET) call
+sk_getsockopt(sk) directly. The bpf_getsockopt(SOL_SOCKET) does
+not use the optnames that require sk->sk_socket, so it will
+be safe.
+
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Link: https://lore.kernel.org/r/20220902002756.2887884-1-kafai@fb.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 769e969cd1dc5..95559d088a169 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1293,10 +1293,10 @@ static int groups_to_user(gid_t __user *dst, const struct group_info *src)
+ return 0;
+ }
+
+-int sock_getsockopt(struct socket *sock, int level, int optname,
+- char __user *optval, int __user *optlen)
++static int sk_getsockopt(struct sock *sk, int level, int optname,
++ char __user *optval, int __user *optlen)
+ {
+- struct sock *sk = sock->sk;
++ struct socket *sock = sk->sk_socket;
+
+ union {
+ int val;
+@@ -1633,6 +1633,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ return 0;
+ }
+
++int sock_getsockopt(struct socket *sock, int level, int optname,
++ char __user *optval, int __user *optlen)
++{
++ return sk_getsockopt(sock->sk, level, optname, optval, optlen);
++}
++
+ /*
+ * Initialize an sk_lock.
+ *
+--
+2.43.0
+
--- /dev/null
+From 37e6ea8769930834d3f74552c946172c8ef147e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 May 2022 12:34:38 -0400
+Subject: NFSD: Add documenting comment for nfsd4_release_lockowner()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 043862b09cc00273e35e6c3a6389957953a34207 ]
+
+And return explicit nfserr values that match what is documented in the
+new comment / API contract.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4state.c | 23 ++++++++++++++++++++---
+ 1 file changed, 20 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 1b40b2197ce66..b6480be7b5e6a 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7107,6 +7107,23 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ return status;
+ }
+
++/**
++ * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations
++ * @rqstp: RPC transaction
++ * @cstate: NFSv4 COMPOUND state
++ * @u: RELEASE_LOCKOWNER arguments
++ *
++ * The lockowner's so_count is bumped when a lock record is added
++ * or when copying a conflicting lock. The latter case is brief,
++ * but can lead to fleeting false positives when looking for
++ * locks-in-use.
++ *
++ * Return values:
++ * %nfs_ok: lockowner released or not found
++ * %nfserr_locks_held: lockowner still in use
++ * %nfserr_stale_clientid: clientid no longer active
++ * %nfserr_expired: clientid not recognized
++ */
+ __be32
+ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+@@ -7133,7 +7150,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner);
+ if (!lo) {
+ spin_unlock(&clp->cl_lock);
+- return status;
++ return nfs_ok;
+ }
+ if (atomic_read(&lo->lo_owner.so_count) != 2) {
+ spin_unlock(&clp->cl_lock);
+@@ -7149,11 +7166,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ put_ol_stateid_locked(stp, &reaplist);
+ }
+ spin_unlock(&clp->cl_lock);
++
+ free_ol_stateid_reaplist(&reaplist);
+ remove_blocked_locks(lo);
+ nfs4_put_stateowner(&lo->lo_owner);
+-
+- return status;
++ return nfs_ok;
+ }
+
+ static inline struct nfs4_client_reclaim *
+--
+2.43.0
+
--- /dev/null
+From 72721cc9efa6c9217b39095adbc30d0efd0b10db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Feb 2024 13:22:39 +1100
+Subject: nfsd: don't take fi_lock in nfsd_break_deleg_cb()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 ]
+
+A recent change to check_for_locks() changed it to take ->flc_lock while
+holding ->fi_lock. This creates a lock inversion (reported by lockdep)
+because there is a case where ->fi_lock is taken while holding
+->flc_lock.
+
+->flc_lock is held across ->fl_lmops callbacks, and
+nfsd_break_deleg_cb() is one of those and does take ->fi_lock. However
+it doesn't need to.
+
+Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each
+delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list
+and so needed the lock. Since then it doesn't walk the list and doesn't
+need the lock.
+
+Two actions are performed under the lock. One is to call
+nfsd_break_one_deleg which calls nfsd4_run_cb(). These doesn't act on
+the nfs4_file at all, so don't need the lock.
+
+The other is to set ->fi_had_conflict which is in the nfs4_file.
+This field is only ever set here (except when initialised to false)
+so there is no possible problem will multiple threads racing when
+setting it.
+
+The field is tested twice in nfs4_set_delegation(). The first test does
+not hold a lock and is documented as an opportunistic optimisation, so
+it doesn't impose any need to hold ->fi_lock while setting
+->fi_had_conflict.
+
+The second test in nfs4_set_delegation() *is* make under ->fi_lock, so
+removing the locking when ->fi_had_conflict is set could make a change.
+The change could only be interesting if ->fi_had_conflict tested as
+false even though nfsd_break_one_deleg() ran before ->fi_lock was
+unlocked. i.e. while hash_delegation_locked() was running.
+As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb()
+there can be no importance to this interaction.
+
+So this patch removes the locking from nfsd_break_one_deleg() and moves
+the final test on ->fi_had_conflict out of the locked region to make it
+clear that locking isn't important to the test. It is still tested
+*after* vfs_setlease() has succeeded. This might be significant and as
+vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called
+under ->flc_lock this "after" is a true ordering provided by a spinlock.
+
+Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4state.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 16b073c637986..7ff1f85f1dd9a 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4617,10 +4617,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
+ */
+ fl->fl_break_time = 0;
+
+- spin_lock(&fp->fi_lock);
+ fp->fi_had_conflict = true;
+ nfsd_break_one_deleg(dp);
+- spin_unlock(&fp->fi_lock);
+ return ret;
+ }
+
+@@ -5049,12 +5047,13 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
+ if (status)
+ goto out_clnt_odstate;
+
++ status = -EAGAIN;
++ if (fp->fi_had_conflict)
++ goto out_unlock;
++
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+- if (fp->fi_had_conflict)
+- status = -EAGAIN;
+- else
+- status = hash_delegation_locked(dp, fp);
++ status = hash_delegation_locked(dp, fp);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+
+--
+2.43.0
+
--- /dev/null
+From 094bb06a555bffa2d5058ea6657fea919095e171 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 14:58:16 +1100
+Subject: nfsd: fix RELEASE_LOCKOWNER
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit edcf9725150e42beeca42d085149f4c88fa97afd ]
+
+The test on so_count in nfsd4_release_lockowner() is nonsense and
+harmful. Revert to using check_for_locks(), changing that to not sleep.
+
+First: harmful.
+As is documented in the kdoc comment for nfsd4_release_lockowner(), the
+test on so_count can transiently return a false positive resulting in a
+return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is
+clearly a protocol violation and with the Linux NFS client it can cause
+incorrect behaviour.
+
+If RELEASE_LOCKOWNER is sent while some other thread is still
+processing a LOCK request which failed because, at the time that request
+was received, the given owner held a conflicting lock, then the nfsd
+thread processing that LOCK request can hold a reference (conflock) to
+the lock owner that causes nfsd4_release_lockowner() to return an
+incorrect error.
+
+The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
+never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
+it knows that the error is impossible. It assumes the lock owner was in
+fact released so it feels free to use the same lock owner identifier in
+some later locking request.
+
+When it does reuse a lock owner identifier for which a previous RELEASE
+failed, it will naturally use a lock_seqid of zero. However the server,
+which didn't release the lock owner, will expect a larger lock_seqid and
+so will respond with NFS4ERR_BAD_SEQID.
+
+So clearly it is harmful to allow a false positive, which testing
+so_count allows.
+
+The test is nonsense because ... well... it doesn't mean anything.
+
+so_count is the sum of three different counts.
+1/ the set of states listed on so_stateids
+2/ the set of active vfs locks owned by any of those states
+3/ various transient counts such as for conflicting locks.
+
+When it is tested against '2' it is clear that one of these is the
+transient reference obtained by find_lockowner_str_locked(). It is not
+clear what the other one is expected to be.
+
+In practice, the count is often 2 because there is precisely one state
+on so_stateids. If there were more, this would fail.
+
+In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
+In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in
+all the lock states being removed, and so the lockowner being discarded
+(it is removed when there are no more references which usually happens
+when the lock state is discarded). When nfsd4_release_lockowner() finds
+that the lock owner doesn't exist, it returns success.
+
+The other case shows an so_count of '2' and precisely one state listed
+in so_stateid. It appears that the Linux client uses a separate lock
+owner for each file resulting in one lock state per lock owner, so this
+test on '2' is safe. For another client it might not be safe.
+
+So this patch changes check_for_locks() to use the (newish)
+find_any_file_locked() so that it doesn't take a reference on the
+nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With
+this check is it safe to restore the use of check_for_locks() rather
+than testing so_count against the mysterious '2'.
+
+Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Cc: stable@vger.kernel.org # v6.2+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4state.c | 26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index b6480be7b5e6a..16b073c637986 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7080,14 +7080,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ {
+ struct file_lock *fl;
+ int status = false;
+- struct nfsd_file *nf = find_any_file(fp);
++ struct nfsd_file *nf;
+ struct inode *inode;
+ struct file_lock_context *flctx;
+
++ spin_lock(&fp->fi_lock);
++ nf = find_any_file_locked(fp);
+ if (!nf) {
+ /* Any valid lock stateid should have some sort of access */
+ WARN_ON_ONCE(1);
+- return status;
++ goto out;
+ }
+
+ inode = locks_inode(nf->nf_file);
+@@ -7103,7 +7105,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ }
+ spin_unlock(&flctx->flc_lock);
+ }
+- nfsd_file_put(nf);
++out:
++ spin_unlock(&fp->fi_lock);
+ return status;
+ }
+
+@@ -7113,10 +7116,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ * @cstate: NFSv4 COMPOUND state
+ * @u: RELEASE_LOCKOWNER arguments
+ *
+- * The lockowner's so_count is bumped when a lock record is added
+- * or when copying a conflicting lock. The latter case is brief,
+- * but can lead to fleeting false positives when looking for
+- * locks-in-use.
++ * Check if theree are any locks still held and if not - free the lockowner
++ * and any lock state that is owned.
+ *
+ * Return values:
+ * %nfs_ok: lockowner released or not found
+@@ -7152,10 +7153,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ spin_unlock(&clp->cl_lock);
+ return nfs_ok;
+ }
+- if (atomic_read(&lo->lo_owner.so_count) != 2) {
+- spin_unlock(&clp->cl_lock);
+- nfs4_put_stateowner(&lo->lo_owner);
+- return nfserr_locks_held;
++
++ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
++ if (check_for_locks(stp->st_stid.sc_file, lo)) {
++ spin_unlock(&clp->cl_lock);
++ nfs4_put_stateowner(&lo->lo_owner);
++ return nfserr_locks_held;
++ }
+ }
+ unhash_lockowner_locked(lo);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {
+--
+2.43.0
+
--- /dev/null
+From 73f6ed66fe4fc6f405e3d72225f9d5cc964c5265 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 May 2022 12:07:18 -0400
+Subject: NFSD: Modernize nfsd4_release_lockowner()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit bd8fdb6e545f950f4654a9a10d7e819ad48146e5 ]
+
+Refactor: Use existing helpers that other lock operations use. This
+change removes several automatic variables, so re-organize the
+variable declarations for readability.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4state.c | 36 +++++++++++-------------------------
+ 1 file changed, 11 insertions(+), 25 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index d402ca0b535f0..1b40b2197ce66 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7113,16 +7113,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ union nfsd4_op_u *u)
+ {
+ struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ clientid_t *clid = &rlockowner->rl_clientid;
+- struct nfs4_stateowner *sop;
+- struct nfs4_lockowner *lo = NULL;
+ struct nfs4_ol_stateid *stp;
+- struct xdr_netobj *owner = &rlockowner->rl_owner;
+- unsigned int hashval = ownerstr_hashval(owner);
+- __be32 status;
+- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
++ struct nfs4_lockowner *lo;
+ struct nfs4_client *clp;
+- LIST_HEAD (reaplist);
++ LIST_HEAD(reaplist);
++ __be32 status;
+
+ dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
+ clid->cl_boot, clid->cl_id);
+@@ -7130,30 +7127,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ status = lookup_clientid(clid, cstate, nn, false);
+ if (status)
+ return status;
+-
+ clp = cstate->clp;
+- /* Find the matching lock stateowner */
+- spin_lock(&clp->cl_lock);
+- list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
+- so_strhash) {
+
+- if (sop->so_is_open_owner || !same_owner_str(sop, owner))
+- continue;
+-
+- if (atomic_read(&sop->so_count) != 1) {
+- spin_unlock(&clp->cl_lock);
+- return nfserr_locks_held;
+- }
+-
+- lo = lockowner(sop);
+- nfs4_get_stateowner(sop);
+- break;
+- }
++ spin_lock(&clp->cl_lock);
++ lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner);
+ if (!lo) {
+ spin_unlock(&clp->cl_lock);
+ return status;
+ }
+-
++ if (atomic_read(&lo->lo_owner.so_count) != 2) {
++ spin_unlock(&clp->cl_lock);
++ nfs4_put_stateowner(&lo->lo_owner);
++ return nfserr_locks_held;
++ }
+ unhash_lockowner_locked(lo);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {
+ stp = list_first_entry(&lo->lo_owner.so_stateids,
+--
+2.43.0
+
--- /dev/null
+From 1cac9c5509fd16616dd5ba9dbaa2ab787b4f5b89 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Apr 2022 04:51:44 +0200
+Subject: regmap: Add bulk read/write callbacks into regmap_config
+
+From: Marek Vasut <marex@denx.de>
+
+[ Upstream commit d77e745613680c54708470402e2b623dcd769681 ]
+
+Currently the regmap_config structure only allows the user to implement
+single element register read/write using .reg_read/.reg_write callbacks.
+The regmap_bus already implements bulk counterparts of both, and is being
+misused as a workaround for the missing bulk read/write callbacks in
+regmap_config by a couple of drivers. To stop this misuse, add the bulk
+read/write callbacks to regmap_config and call them from the regmap core
+code.
+
+Signed-off-by: Marek Vasut <marex@denx.de>
+Cc: Jagan Teki <jagan@amarulasolutions.com>
+Cc: Mark Brown <broonie@kernel.org>
+Cc: Maxime Ripard <maxime@cerno.tech>
+Cc: Robert Foss <robert.foss@linaro.org>
+Cc: Sam Ravnborg <sam@ravnborg.org>
+Cc: Thomas Zimmermann <tzimmermann@suse.de>
+To: dri-devel@lists.freedesktop.org
+Link: https://lore.kernel.org/r/20220430025145.640305-1-marex@denx.de
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/regmap/internal.h | 4 ++
+ drivers/base/regmap/regmap.c | 76 ++++++++++++++++++----------------
+ include/linux/regmap.h | 12 ++++++
+ 3 files changed, 56 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
+index 0097696c31de2..2720d8d7bbfc9 100644
+--- a/drivers/base/regmap/internal.h
++++ b/drivers/base/regmap/internal.h
+@@ -104,6 +104,10 @@ struct regmap {
+ int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+ int (*reg_update_bits)(void *context, unsigned int reg,
+ unsigned int mask, unsigned int val);
++ /* Bulk read/write */
++ int (*read)(void *context, const void *reg_buf, size_t reg_size,
++ void *val_buf, size_t val_size);
++ int (*write)(void *context, const void *data, size_t count);
+
+ bool defer_caching;
+
+diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
+index 8f39aacdad0dc..2dfd6aa600450 100644
+--- a/drivers/base/regmap/regmap.c
++++ b/drivers/base/regmap/regmap.c
+@@ -800,12 +800,15 @@ struct regmap *__regmap_init(struct device *dev,
+ map->reg_stride_order = ilog2(map->reg_stride);
+ else
+ map->reg_stride_order = -1;
+- map->use_single_read = config->use_single_read || !bus || !bus->read;
+- map->use_single_write = config->use_single_write || !bus || !bus->write;
+- map->can_multi_write = config->can_multi_write && bus && bus->write;
++ map->use_single_read = config->use_single_read || !(config->read || (bus && bus->read));
++ map->use_single_write = config->use_single_write || !(config->write || (bus && bus->write));
++ map->can_multi_write = config->can_multi_write && (config->write || (bus && bus->write));
+ if (bus) {
+ map->max_raw_read = bus->max_raw_read;
+ map->max_raw_write = bus->max_raw_write;
++ } else if (config->max_raw_read && config->max_raw_write) {
++ map->max_raw_read = config->max_raw_read;
++ map->max_raw_write = config->max_raw_write;
+ }
+ map->dev = dev;
+ map->bus = bus;
+@@ -839,7 +842,16 @@ struct regmap *__regmap_init(struct device *dev,
+ map->read_flag_mask = bus->read_flag_mask;
+ }
+
+- if (!bus) {
++ if (config && config->read && config->write) {
++ map->reg_read = _regmap_bus_read;
++
++ /* Bulk read/write */
++ map->read = config->read;
++ map->write = config->write;
++
++ reg_endian = REGMAP_ENDIAN_NATIVE;
++ val_endian = REGMAP_ENDIAN_NATIVE;
++ } else if (!bus) {
+ map->reg_read = config->reg_read;
+ map->reg_write = config->reg_write;
+ map->reg_update_bits = config->reg_update_bits;
+@@ -856,10 +868,13 @@ struct regmap *__regmap_init(struct device *dev,
+ } else {
+ map->reg_read = _regmap_bus_read;
+ map->reg_update_bits = bus->reg_update_bits;
+- }
++ /* Bulk read/write */
++ map->read = bus->read;
++ map->write = bus->write;
+
+- reg_endian = regmap_get_reg_endian(bus, config);
+- val_endian = regmap_get_val_endian(dev, bus, config);
++ reg_endian = regmap_get_reg_endian(bus, config);
++ val_endian = regmap_get_val_endian(dev, bus, config);
++ }
+
+ switch (config->reg_bits + map->reg_shift) {
+ case 2:
+@@ -1628,8 +1643,6 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
+ size_t len;
+ int i;
+
+- WARN_ON(!map->bus);
+-
+ /* Check for unwritable or noinc registers in range
+ * before we start
+ */
+@@ -1711,7 +1724,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
+ val = work_val;
+ }
+
+- if (map->async && map->bus->async_write) {
++ if (map->async && map->bus && map->bus->async_write) {
+ struct regmap_async *async;
+
+ trace_regmap_async_write_start(map, reg, val_len);
+@@ -1779,10 +1792,10 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
+ * write.
+ */
+ if (val == work_val)
+- ret = map->bus->write(map->bus_context, map->work_buf,
+- map->format.reg_bytes +
+- map->format.pad_bytes +
+- val_len);
++ ret = map->write(map->bus_context, map->work_buf,
++ map->format.reg_bytes +
++ map->format.pad_bytes +
++ val_len);
+ else if (map->bus->gather_write)
+ ret = map->bus->gather_write(map->bus_context, map->work_buf,
+ map->format.reg_bytes +
+@@ -1801,7 +1814,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
+ memcpy(buf, map->work_buf, map->format.reg_bytes);
+ memcpy(buf + map->format.reg_bytes + map->format.pad_bytes,
+ val, val_len);
+- ret = map->bus->write(map->bus_context, buf, len);
++ ret = map->write(map->bus_context, buf, len);
+
+ kfree(buf);
+ } else if (ret != 0 && !map->cache_bypass && map->format.parse_val) {
+@@ -1858,7 +1871,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
+ struct regmap_range_node *range;
+ struct regmap *map = context;
+
+- WARN_ON(!map->bus || !map->format.format_write);
++ WARN_ON(!map->format.format_write);
+
+ range = _regmap_range_lookup(map, reg);
+ if (range) {
+@@ -1871,8 +1884,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
+
+ trace_regmap_hw_write_start(map, reg, 1);
+
+- ret = map->bus->write(map->bus_context, map->work_buf,
+- map->format.buf_size);
++ ret = map->write(map->bus_context, map->work_buf, map->format.buf_size);
+
+ trace_regmap_hw_write_done(map, reg, 1);
+
+@@ -1892,7 +1904,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
+ {
+ struct regmap *map = context;
+
+- WARN_ON(!map->bus || !map->format.format_val);
++ WARN_ON(!map->format.format_val);
+
+ map->format.format_val(map->work_buf + map->format.reg_bytes
+ + map->format.pad_bytes, val, 0);
+@@ -1906,7 +1918,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
+
+ static inline void *_regmap_map_get_context(struct regmap *map)
+ {
+- return (map->bus) ? map : map->bus_context;
++ return (map->bus || (!map->bus && map->read)) ? map : map->bus_context;
+ }
+
+ int _regmap_write(struct regmap *map, unsigned int reg,
+@@ -2313,7 +2325,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
+ u8 = buf;
+ *u8 |= map->write_flag_mask;
+
+- ret = map->bus->write(map->bus_context, buf, len);
++ ret = map->write(map->bus_context, buf, len);
+
+ kfree(buf);
+
+@@ -2619,9 +2631,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+ struct regmap_range_node *range;
+ int ret;
+
+- WARN_ON(!map->bus);
+-
+- if (!map->bus || !map->bus->read)
++ if (!map->read)
+ return -EINVAL;
+
+ range = _regmap_range_lookup(map, reg);
+@@ -2637,9 +2647,9 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+ map->read_flag_mask);
+ trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes);
+
+- ret = map->bus->read(map->bus_context, map->work_buf,
+- map->format.reg_bytes + map->format.pad_bytes,
+- val, val_len);
++ ret = map->read(map->bus_context, map->work_buf,
++ map->format.reg_bytes + map->format.pad_bytes,
++ val, val_len);
+
+ trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes);
+
+@@ -2750,8 +2760,6 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+ unsigned int v;
+ int ret, i;
+
+- if (!map->bus)
+- return -EINVAL;
+ if (val_len % map->format.val_bytes)
+ return -EINVAL;
+ if (!IS_ALIGNED(reg, map->reg_stride))
+@@ -2766,7 +2774,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
+ size_t chunk_count, chunk_bytes;
+ size_t chunk_regs = val_count;
+
+- if (!map->bus->read) {
++ if (!map->read) {
+ ret = -ENOTSUPP;
+ goto out;
+ }
+@@ -2826,7 +2834,7 @@ EXPORT_SYMBOL_GPL(regmap_raw_read);
+ * @val: Pointer to data buffer
+ * @val_len: Length of output buffer in bytes.
+ *
+- * The regmap API usually assumes that bulk bus read operations will read a
++ * The regmap API usually assumes that bulk read operations will read a
+ * range of registers. Some devices have certain registers for which a read
+ * operation read will read from an internal FIFO.
+ *
+@@ -2844,10 +2852,6 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg,
+ size_t read_len;
+ int ret;
+
+- if (!map->bus)
+- return -EINVAL;
+- if (!map->bus->read)
+- return -ENOTSUPP;
+ if (val_len % map->format.val_bytes)
+ return -EINVAL;
+ if (!IS_ALIGNED(reg, map->reg_stride))
+@@ -2961,7 +2965,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
+ if (val_count == 0)
+ return -EINVAL;
+
+- if (map->bus && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
++ if (map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
+ ret = regmap_raw_read(map, reg, val, val_bytes * val_count);
+ if (ret != 0)
+ return ret;
+diff --git a/include/linux/regmap.h b/include/linux/regmap.h
+index d6f0d876fa424..83a7485de78fb 100644
+--- a/include/linux/regmap.h
++++ b/include/linux/regmap.h
+@@ -294,6 +294,12 @@ typedef void (*regmap_unlock)(void *);
+ * if the function require special handling with lock and reg
+ * handling and the operation cannot be represented as a simple
+ * update_bits operation on a bus such as SPI, I2C, etc.
++ * @read: Optional callback that if filled will be used to perform all the
++ * bulk reads from the registers. Data is returned in the buffer used
++ * to transmit data.
++ * @write: Same as above for writing.
++ * @max_raw_read: Max raw read size that can be used on the device.
++ * @max_raw_write: Max raw write size that can be used on the device.
+ * @fast_io: Register IO is fast. Use a spinlock instead of a mutex
+ * to perform locking. This field is ignored if custom lock/unlock
+ * functions are used (see fields lock/unlock of struct regmap_config).
+@@ -373,6 +379,12 @@ struct regmap_config {
+ int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+ int (*reg_update_bits)(void *context, unsigned int reg,
+ unsigned int mask, unsigned int val);
++ /* Bulk read/write */
++ int (*read)(void *context, const void *reg_buf, size_t reg_size,
++ void *val_buf, size_t val_size);
++ int (*write)(void *context, const void *data, size_t count);
++ size_t max_raw_read;
++ size_t max_raw_write;
+
+ bool fast_io;
+
+--
+2.43.0
+
--- /dev/null
+From 6e5147c99310b8ead55ed6c777a40e69c6c04a3d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Nov 2021 16:00:40 +0100
+Subject: regmap: allow to define reg_update_bits for no bus configuration
+
+From: Ansuel Smith <ansuelsmth@gmail.com>
+
+[ Upstream commit 02d6fdecb9c38de19065f6bed8d5214556fd061d ]
+
+Some device requires a special handling for reg_update_bits and can't use
+the normal regmap read write logic. An example is when locking is
+handled by the device and rmw operations requires to do atomic operations.
+Allow to declare a dedicated function in regmap_config for
+reg_update_bits in no bus configuration.
+
+Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
+Link: https://lore.kernel.org/r/20211104150040.1260-1-ansuelsmth@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/regmap/regmap.c | 1 +
+ include/linux/regmap.h | 7 +++++++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
+index 7bc603145bd98..8f39aacdad0dc 100644
+--- a/drivers/base/regmap/regmap.c
++++ b/drivers/base/regmap/regmap.c
+@@ -842,6 +842,7 @@ struct regmap *__regmap_init(struct device *dev,
+ if (!bus) {
+ map->reg_read = config->reg_read;
+ map->reg_write = config->reg_write;
++ map->reg_update_bits = config->reg_update_bits;
+
+ map->defer_caching = false;
+ goto skip_format_initialization;
+diff --git a/include/linux/regmap.h b/include/linux/regmap.h
+index e7834d98207f7..d6f0d876fa424 100644
+--- a/include/linux/regmap.h
++++ b/include/linux/regmap.h
+@@ -289,6 +289,11 @@ typedef void (*regmap_unlock)(void *);
+ * read operation on a bus such as SPI, I2C, etc. Most of the
+ * devices do not need this.
+ * @reg_write: Same as above for writing.
++ * @reg_update_bits: Optional callback that if filled will be used to perform
++ * all the update_bits(rmw) operation. Should only be provided
++ * if the function require special handling with lock and reg
++ * handling and the operation cannot be represented as a simple
++ * update_bits operation on a bus such as SPI, I2C, etc.
+ * @fast_io: Register IO is fast. Use a spinlock instead of a mutex
+ * to perform locking. This field is ignored if custom lock/unlock
+ * functions are used (see fields lock/unlock of struct regmap_config).
+@@ -366,6 +371,8 @@ struct regmap_config {
+
+ int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
+ int (*reg_write)(void *context, unsigned int reg, unsigned int val);
++ int (*reg_update_bits)(void *context, unsigned int reg,
++ unsigned int mask, unsigned int val);
+
+ bool fast_io;
+
+--
+2.43.0
+
--- /dev/null
+From 10bf73f868331f0eee3a96f64b8c5a59c14151c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Jan 2024 06:14:29 -0700
+Subject: selftests: mm: fix map_hugetlb failure on 64K page size systems
+
+From: Nico Pache <npache@redhat.com>
+
+[ Upstream commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 ]
+
+On systems with 64k page size and 512M huge page sizes, the allocation and
+test succeeds but errors out at the munmap. As the comment states, munmap
+will failure if its not HUGEPAGE aligned. This is due to the length of
+the mapping being 1/2 the size of the hugepage causing the munmap to not
+be hugepage aligned. Fix this by making the mapping length the full
+hugepage if the hugepage is larger than the length of the mapping.
+
+Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com
+Signed-off-by: Nico Pache <npache@redhat.com>
+Cc: Donet Tom <donettom@linux.vnet.ibm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Christophe Leroy <christophe.leroy@c-s.fr>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/map_hugetlb.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
+index 312889edb84ab..c65c55b7a789f 100644
+--- a/tools/testing/selftests/vm/map_hugetlb.c
++++ b/tools/testing/selftests/vm/map_hugetlb.c
+@@ -15,6 +15,7 @@
+ #include <unistd.h>
+ #include <sys/mman.h>
+ #include <fcntl.h>
++#include "vm_util.h"
+
+ #define LENGTH (256UL*1024*1024)
+ #define PROTECTION (PROT_READ | PROT_WRITE)
+@@ -70,10 +71,16 @@ int main(int argc, char **argv)
+ {
+ void *addr;
+ int ret;
++ size_t hugepage_size;
+ size_t length = LENGTH;
+ int flags = FLAGS;
+ int shift = 0;
+
++ hugepage_size = default_huge_page_size();
++ /* munmap with fail if the length is not page aligned */
++ if (hugepage_size > length)
++ length = hugepage_size;
++
+ if (argc > 1)
+ length = atol(argv[1]) << 20;
+ if (argc > 2) {
+--
+2.43.0
+
--- /dev/null
+From 94d5ee7a0a350e94c7ba7c41e284fd21934fa921 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jan 2024 14:04:54 +0500
+Subject: selftests/mm: switch to bash from sh
+
+From: Muhammad Usama Anjum <usama.anjum@collabora.com>
+
+[ Upstream commit bc29036e1da1cf66e5f8312649aeec2d51ea3d86 ]
+
+Running charge_reserved_hugetlb.sh generates errors if sh is set to
+dash:
+
+./charge_reserved_hugetlb.sh: 9: [[: not found
+./charge_reserved_hugetlb.sh: 19: [[: not found
+./charge_reserved_hugetlb.sh: 27: [[: not found
+./charge_reserved_hugetlb.sh: 37: [[: not found
+./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected
+
+Switch to using /bin/bash instead of /bin/sh. Make the switch for
+write_hugetlb_memory.sh as well which is called from
+charge_reserved_hugetlb.sh.
+
+Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com
+Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: David Laight <David.Laight@ACULAB.COM>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 2 +-
+ tools/testing/selftests/vm/write_hugetlb_memory.sh | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+index 7536ff2f890a1..d0107f8ae6213 100644
+--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
++++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+@@ -1,4 +1,4 @@
+-#!/bin/sh
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
+ set -e
+diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+index 70a02301f4c27..3d2d2eb9d6fff 100644
+--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
++++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+@@ -1,4 +1,4 @@
+-#!/bin/sh
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
+ set -e
+--
+2.43.0
+
--- /dev/null
+From 0545f60f222499c7ab4f10ef0b2cd8a6c17cf290 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jan 2024 16:30:00 -0500
+Subject: serial: max310x: fail probe if clock crystal is unstable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+
+[ Upstream commit 8afa6c6decea37e7cb473d2c60473f37f46cea35 ]
+
+A stable clock is really required in order to use this UART, so log an
+error message and bail out if the chip reports that the clock is not
+stable.
+
+Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness")
+Cc: stable@vger.kernel.org
+Suggested-by: Jan Kundrát <jan.kundrat@cesnet.cz>
+Link: https://www.spinics.net/lists/linux-serial/msg35773.html
+Signed-off-by: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+Link: https://lore.kernel.org/r/20240116213001.3691629-4-hugo@hugovil.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 0e0f778d75cd4..bbf45c0626681 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -556,7 +556,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr)
+ return 1;
+ }
+
+-static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
++static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+ unsigned long freq, bool xtal)
+ {
+ unsigned int div, clksrc, pllcfg = 0;
+@@ -626,7 +626,8 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+ } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES));
+
+ if (!stable)
+- dev_warn(dev, "clock is not stable yet\n");
++ return dev_err_probe(dev, -EAGAIN,
++ "clock is not stable\n");
+ }
+
+ return bestfreq;
+@@ -1266,7 +1267,7 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+ {
+ int i, ret, fmin, fmax, freq;
+ struct max310x_port *s;
+- u32 uartclk = 0;
++ s32 uartclk = 0;
+ bool xtal;
+
+ if (IS_ERR(regmap))
+@@ -1350,6 +1351,11 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+ }
+
+ uartclk = max310x_set_ref_clk(dev, s, freq, xtal);
++ if (uartclk < 0) {
++ ret = uartclk;
++ goto out_uart;
++ }
++
+ dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk);
+
+ for (i = 0; i < devtype->nr; i++) {
+--
+2.43.0
+
--- /dev/null
+From 32f4aa03c524cbbfdbcdc089e09b2ad40769388d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Apr 2023 22:14:23 +0200
+Subject: serial: max310x: fix IO data corruption in batched operations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jan Kundrát <jan.kundrat@cesnet.cz>
+
+[ Upstream commit 3f42b142ea1171967e40e10e4b0241c0d6d28d41 ]
+
+After upgrading from 5.16 to 6.1, our board with a MAX14830 started
+producing lots of garbage data over UART. Bisection pointed out commit
+285e76fc049c as the culprit. That patch tried to replace hand-written
+code which I added in 2b4bac48c1084 ("serial: max310x: Use batched reads
+when reasonably safe") with the generic regmap infrastructure for
+batched operations.
+
+Unfortunately, the `regmap_raw_read` and `regmap_raw_write` which were
+used are actually functions which perform IO over *multiple* registers.
+That's not what is needed for accessing these Tx/Rx FIFOs; the
+appropriate functions are the `_noinc_` versions, not the `_raw_` ones.
+
+Fix this regression by using `regmap_noinc_read()` and
+`regmap_noinc_write()` along with the necessary `regmap_config` setup;
+with this patch in place, our board communicates happily again. Since
+our board uses SPI for talking to this chip, the I2C part is completely
+untested.
+
+Fixes: 285e76fc049c ("serial: max310x: use regmap methods for SPI batch operations")
+Cc: stable@vger.kernel.org
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Jan Kundrát <jan.kundrat@cesnet.cz>
+Link: https://lore.kernel.org/r/79db8e82aadb0e174bc82b9996423c3503c8fb37.1680732084.git.jan.kundrat@cesnet.cz
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index ed1aaa19854fd..2f88eae8a55a1 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -533,6 +533,11 @@ static bool max310x_reg_precious(struct device *dev, unsigned int reg)
+ return false;
+ }
+
++static bool max310x_reg_noinc(struct device *dev, unsigned int reg)
++{
++ return reg == MAX310X_RHR_REG;
++}
++
+ static int max310x_set_baud(struct uart_port *port, int baud)
+ {
+ unsigned int mode = 0, div = 0, frac = 0, c = 0, F = 0;
+@@ -667,14 +672,14 @@ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int
+ {
+ struct max310x_one *one = to_max310x_port(port);
+
+- regmap_raw_write(one->regmap, MAX310X_THR_REG, txbuf, len);
++ regmap_noinc_write(one->regmap, MAX310X_THR_REG, txbuf, len);
+ }
+
+ static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len)
+ {
+ struct max310x_one *one = to_max310x_port(port);
+
+- regmap_raw_read(one->regmap, MAX310X_RHR_REG, rxbuf, len);
++ regmap_noinc_read(one->regmap, MAX310X_RHR_REG, rxbuf, len);
+ }
+
+ static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen)
+@@ -1508,6 +1513,10 @@ static struct regmap_config regcfg = {
+ .writeable_reg = max310x_reg_writeable,
+ .volatile_reg = max310x_reg_volatile,
+ .precious_reg = max310x_reg_precious,
++ .writeable_noinc_reg = max310x_reg_noinc,
++ .readable_noinc_reg = max310x_reg_noinc,
++ .max_raw_read = MAX310X_FIFO_SIZE,
++ .max_raw_write = MAX310X_FIFO_SIZE,
+ };
+
+ #ifdef CONFIG_SPI_MASTER
+@@ -1593,6 +1602,10 @@ static struct regmap_config regcfg_i2c = {
+ .volatile_reg = max310x_reg_volatile,
+ .precious_reg = max310x_reg_precious,
+ .max_register = MAX310X_I2C_REVID_EXTREG,
++ .writeable_noinc_reg = max310x_reg_noinc,
++ .readable_noinc_reg = max310x_reg_noinc,
++ .max_raw_read = MAX310X_FIFO_SIZE,
++ .max_raw_write = MAX310X_FIFO_SIZE,
+ };
+
+ static const struct max310x_if_cfg max310x_i2c_if_cfg = {
+--
+2.43.0
+
--- /dev/null
+From fdcfee2740c6daddaa534df38a219b30e9f9f638 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jun 2022 17:46:59 +0300
+Subject: serial: max310x: implement I2C support
+
+From: Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+[ Upstream commit 2e1f2d9a9bdbe12ee475c82a45ac46a278e8049a ]
+
+I2C implementation on this chip has a few key differences
+compared to SPI, as described in previous patches.
+ * extended register space access needs no extra logic
+ * slave address is used to select which UART to communicate
+ with
+
+To accommodate these differences, add an I2C interface config,
+set the RevID register address and implement an empty method
+for setting the GlobalCommand register, since no special handling
+is needed for the extended register space.
+
+To handle the port-specific slave address, create an I2C dummy
+device for each port, except the base one (UART0), which is
+expected to be the one specified in firmware, and create a
+regmap for each I2C device.
+Add minimum and maximum slave addresses to each devtype for
+sanity checking.
+
+Also, use a separate regmap config with no write_flag_mask,
+since I2C has a R/W bit in its slave address, and set the
+max register to the address of the RevID register, since the
+extended register space needs no extra logic.
+
+Finally, add the I2C driver.
+
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
+Link: https://lore.kernel.org/r/20220605144659.4169853-5-demonsingur@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/Kconfig | 1 +
+ drivers/tty/serial/max310x.c | 135 ++++++++++++++++++++++++++++++++++-
+ 2 files changed, 135 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
+index 28f22e58639c6..bd30ae9751bf5 100644
+--- a/drivers/tty/serial/Kconfig
++++ b/drivers/tty/serial/Kconfig
+@@ -343,6 +343,7 @@ config SERIAL_MAX310X
+ depends on SPI_MASTER
+ select SERIAL_CORE
+ select REGMAP_SPI if SPI_MASTER
++ select REGMAP_I2C if I2C
+ help
+ This selects support for an advanced UART from Maxim (Dallas).
+ Supported ICs are MAX3107, MAX3108, MAX3109, MAX14830.
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index b90281ac54c85..ed1aaa19854fd 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -14,6 +14,7 @@
+ #include <linux/delay.h>
+ #include <linux/device.h>
+ #include <linux/gpio/driver.h>
++#include <linux/i2c.h>
+ #include <linux/module.h>
+ #include <linux/mod_devicetable.h>
+ #include <linux/property.h>
+@@ -73,6 +74,7 @@
+
+ /* Extended registers */
+ #define MAX310X_SPI_REVID_EXTREG MAX310X_REG_05 /* Revision ID */
++#define MAX310X_I2C_REVID_EXTREG (0x25) /* Revision ID */
+
+ /* IRQ register bits */
+ #define MAX310X_IRQ_LSR_BIT (1 << 0) /* LSR interrupt */
+@@ -260,6 +262,10 @@ struct max310x_if_cfg {
+ };
+
+ struct max310x_devtype {
++ struct {
++ unsigned short min;
++ unsigned short max;
++ } slave_addr;
+ char name[9];
+ int nr;
+ u8 mode1;
+@@ -431,6 +437,10 @@ static const struct max310x_devtype max3107_devtype = {
+ .mode1 = MAX310X_MODE1_AUTOSLEEP_BIT | MAX310X_MODE1_IRQSEL_BIT,
+ .detect = max3107_detect,
+ .power = max310x_power,
++ .slave_addr = {
++ .min = 0x2c,
++ .max = 0x2f,
++ },
+ };
+
+ static const struct max310x_devtype max3108_devtype = {
+@@ -439,6 +449,10 @@ static const struct max310x_devtype max3108_devtype = {
+ .mode1 = MAX310X_MODE1_AUTOSLEEP_BIT,
+ .detect = max3108_detect,
+ .power = max310x_power,
++ .slave_addr = {
++ .min = 0x60,
++ .max = 0x6f,
++ },
+ };
+
+ static const struct max310x_devtype max3109_devtype = {
+@@ -447,6 +461,10 @@ static const struct max310x_devtype max3109_devtype = {
+ .mode1 = MAX310X_MODE1_AUTOSLEEP_BIT,
+ .detect = max3109_detect,
+ .power = max310x_power,
++ .slave_addr = {
++ .min = 0x60,
++ .max = 0x6f,
++ },
+ };
+
+ static const struct max310x_devtype max14830_devtype = {
+@@ -455,6 +473,10 @@ static const struct max310x_devtype max14830_devtype = {
+ .mode1 = MAX310X_MODE1_IRQSEL_BIT,
+ .detect = max14830_detect,
+ .power = max14830_power,
++ .slave_addr = {
++ .min = 0x60,
++ .max = 0x6f,
++ },
+ };
+
+ static bool max310x_reg_writeable(struct device *dev, unsigned int reg)
+@@ -1557,6 +1579,97 @@ static struct spi_driver max310x_spi_driver = {
+ };
+ #endif
+
++#ifdef CONFIG_I2C
++static int max310x_i2c_extended_reg_enable(struct device *dev, bool enable)
++{
++ return 0;
++}
++
++static struct regmap_config regcfg_i2c = {
++ .reg_bits = 8,
++ .val_bits = 8,
++ .cache_type = REGCACHE_RBTREE,
++ .writeable_reg = max310x_reg_writeable,
++ .volatile_reg = max310x_reg_volatile,
++ .precious_reg = max310x_reg_precious,
++ .max_register = MAX310X_I2C_REVID_EXTREG,
++};
++
++static const struct max310x_if_cfg max310x_i2c_if_cfg = {
++ .extended_reg_enable = max310x_i2c_extended_reg_enable,
++ .rev_id_reg = MAX310X_I2C_REVID_EXTREG,
++};
++
++static unsigned short max310x_i2c_slave_addr(unsigned short addr,
++ unsigned int nr)
++{
++ /*
++ * For MAX14830 and MAX3109, the slave address depends on what the
++ * A0 and A1 pins are tied to.
++ * See Table I2C Address Map of the datasheet.
++ * Based on that table, the following formulas were determined.
++ * UART1 - UART0 = 0x10
++ * UART2 - UART1 = 0x20 + 0x10
++ * UART3 - UART2 = 0x10
++ */
++
++ addr -= nr * 0x10;
++
++ if (nr >= 2)
++ addr -= 0x20;
++
++ return addr;
++}
++
++static int max310x_i2c_probe(struct i2c_client *client)
++{
++ const struct max310x_devtype *devtype =
++ device_get_match_data(&client->dev);
++ struct i2c_client *port_client;
++ struct regmap *regmaps[4];
++ unsigned int i;
++ u8 port_addr;
++
++ if (client->addr < devtype->slave_addr.min ||
++ client->addr > devtype->slave_addr.max)
++ return dev_err_probe(&client->dev, -EINVAL,
++ "Slave addr 0x%x outside of range [0x%x, 0x%x]\n",
++ client->addr, devtype->slave_addr.min,
++ devtype->slave_addr.max);
++
++ regmaps[0] = devm_regmap_init_i2c(client, ®cfg_i2c);
++
++ for (i = 1; i < devtype->nr; i++) {
++ port_addr = max310x_i2c_slave_addr(client->addr, i);
++ port_client = devm_i2c_new_dummy_device(&client->dev,
++ client->adapter,
++ port_addr);
++
++ regmaps[i] = devm_regmap_init_i2c(port_client, ®cfg_i2c);
++ }
++
++ return max310x_probe(&client->dev, devtype, &max310x_i2c_if_cfg,
++ regmaps, client->irq);
++}
++
++static int max310x_i2c_remove(struct i2c_client *client)
++{
++ max310x_remove(&client->dev);
++
++ return 0;
++}
++
++static struct i2c_driver max310x_i2c_driver = {
++ .driver = {
++ .name = MAX310X_NAME,
++ .of_match_table = max310x_dt_ids,
++ .pm = &max310x_pm_ops,
++ },
++ .probe_new = max310x_i2c_probe,
++ .remove = max310x_i2c_remove,
++};
++#endif
++
+ static int __init max310x_uart_init(void)
+ {
+ int ret;
+@@ -1570,15 +1683,35 @@ static int __init max310x_uart_init(void)
+ #ifdef CONFIG_SPI_MASTER
+ ret = spi_register_driver(&max310x_spi_driver);
+ if (ret)
+- uart_unregister_driver(&max310x_uart);
++ goto err_spi_register;
++#endif
++
++#ifdef CONFIG_I2C
++ ret = i2c_add_driver(&max310x_i2c_driver);
++ if (ret)
++ goto err_i2c_register;
+ #endif
+
++ return 0;
++
++#ifdef CONFIG_I2C
++err_i2c_register:
++ spi_unregister_driver(&max310x_spi_driver);
++#endif
++
++err_spi_register:
++ uart_unregister_driver(&max310x_uart);
++
+ return ret;
+ }
+ module_init(max310x_uart_init);
+
+ static void __exit max310x_uart_exit(void)
+ {
++#ifdef CONFIG_I2C
++ i2c_del_driver(&max310x_i2c_driver);
++#endif
++
+ #ifdef CONFIG_SPI_MASTER
+ spi_unregister_driver(&max310x_spi_driver);
+ #endif
+--
+2.43.0
+
--- /dev/null
+From 6bb1445dd5bb1d4549f294e07cad6f3dd9b5d476 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jun 2022 17:46:58 +0300
+Subject: serial: max310x: make accessing revision id interface-agnostic
+
+From: Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+[ Upstream commit b3883ab5e95713e479f774ea68be275413e8e5b2 ]
+
+SPI can only use 5 address bits, since one bit is reserved for
+specifying R/W and 2 bits are used to specify the UART port.
+To access registers that have addresses past 0x1F, an extended
+register space can be enabled by writing to the GlobalCommand
+register (address 0x1F).
+
+I2C uses 8 address bits. The R/W bit is placed in the slave
+address, and so is the UART port. Because of this, registers
+that have addresses higher than 0x1F can be accessed normally.
+
+To access the RevID register, on SPI, 0xCE must be written to
+the 0x1F address to enable the extended register space, after
+which the RevID register is accessible at address 0x5. 0xCD
+must be written to the 0x1F address to disable the extended
+register space.
+
+On I2C, the RevID register is accessible at address 0x25.
+
+Create an interface config struct, and add a method for
+toggling the extended register space and a member for the RevId
+register address. Implement these for SPI.
+
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
+Link: https://lore.kernel.org/r/20220605144659.4169853-4-demonsingur@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 40 +++++++++++++++++++++++++++---------
+ 1 file changed, 30 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index a09ec46e0310d..b90281ac54c85 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -72,7 +72,7 @@
+ #define MAX310X_GLOBALCMD_REG MAX310X_REG_1F /* Global Command (WO) */
+
+ /* Extended registers */
+-#define MAX310X_REVID_EXTREG MAX310X_REG_05 /* Revision ID */
++#define MAX310X_SPI_REVID_EXTREG MAX310X_REG_05 /* Revision ID */
+
+ /* IRQ register bits */
+ #define MAX310X_IRQ_LSR_BIT (1 << 0) /* LSR interrupt */
+@@ -253,6 +253,12 @@
+ #define MAX14830_BRGCFG_CLKDIS_BIT (1 << 6) /* Clock Disable */
+ #define MAX14830_REV_ID (0xb0)
+
++struct max310x_if_cfg {
++ int (*extended_reg_enable)(struct device *dev, bool enable);
++
++ unsigned int rev_id_reg;
++};
++
+ struct max310x_devtype {
+ char name[9];
+ int nr;
+@@ -275,6 +281,7 @@ struct max310x_one {
+
+ struct max310x_port {
+ const struct max310x_devtype *devtype;
++ const struct max310x_if_cfg *if_cfg;
+ struct regmap *regmap;
+ struct clk *clk;
+ #ifdef CONFIG_GPIOLIB
+@@ -364,13 +371,12 @@ static int max3109_detect(struct device *dev)
+ unsigned int val = 0;
+ int ret;
+
+- ret = regmap_write(s->regmap, MAX310X_GLOBALCMD_REG,
+- MAX310X_EXTREG_ENBL);
++ ret = s->if_cfg->extended_reg_enable(dev, true);
+ if (ret)
+ return ret;
+
+- regmap_read(s->regmap, MAX310X_REVID_EXTREG, &val);
+- regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, MAX310X_EXTREG_DSBL);
++ regmap_read(s->regmap, s->if_cfg->rev_id_reg, &val);
++ s->if_cfg->extended_reg_enable(dev, false);
+ if (((val & MAX310x_REV_MASK) != MAX3109_REV_ID)) {
+ dev_err(dev,
+ "%s ID 0x%02x does not match\n", s->devtype->name, val);
+@@ -395,13 +401,12 @@ static int max14830_detect(struct device *dev)
+ unsigned int val = 0;
+ int ret;
+
+- ret = regmap_write(s->regmap, MAX310X_GLOBALCMD_REG,
+- MAX310X_EXTREG_ENBL);
++ ret = s->if_cfg->extended_reg_enable(dev, true);
+ if (ret)
+ return ret;
+
+- regmap_read(s->regmap, MAX310X_REVID_EXTREG, &val);
+- regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, MAX310X_EXTREG_DSBL);
++ regmap_read(s->regmap, s->if_cfg->rev_id_reg, &val);
++ s->if_cfg->extended_reg_enable(dev, false);
+ if (((val & MAX310x_REV_MASK) != MAX14830_REV_ID)) {
+ dev_err(dev,
+ "%s ID 0x%02x does not match\n", s->devtype->name, val);
+@@ -1250,6 +1255,7 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
+ #endif
+
+ static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype,
++ const struct max310x_if_cfg *if_cfg,
+ struct regmap *regmaps[], int irq)
+ {
+ int i, ret, fmin, fmax, freq;
+@@ -1313,6 +1319,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+
+ s->regmap = regmaps[0];
+ s->devtype = devtype;
++ s->if_cfg = if_cfg;
+ dev_set_drvdata(dev, s);
+
+ /* Check device to ensure we are talking to what we expect */
+@@ -1482,6 +1489,19 @@ static struct regmap_config regcfg = {
+ };
+
+ #ifdef CONFIG_SPI_MASTER
++static int max310x_spi_extended_reg_enable(struct device *dev, bool enable)
++{
++ struct max310x_port *s = dev_get_drvdata(dev);
++
++ return regmap_write(s->regmap, MAX310X_GLOBALCMD_REG,
++ enable ? MAX310X_EXTREG_ENBL : MAX310X_EXTREG_DSBL);
++}
++
++static const struct max310x_if_cfg __maybe_unused max310x_spi_if_cfg = {
++ .extended_reg_enable = max310x_spi_extended_reg_enable,
++ .rev_id_reg = MAX310X_SPI_REVID_EXTREG,
++};
++
+ static int max310x_spi_probe(struct spi_device *spi)
+ {
+ const struct max310x_devtype *devtype;
+@@ -1508,7 +1528,7 @@ static int max310x_spi_probe(struct spi_device *spi)
+ regmaps[i] = devm_regmap_init_spi(spi, ®cfg);
+ }
+
+- return max310x_probe(&spi->dev, devtype, regmaps, spi->irq);
++ return max310x_probe(&spi->dev, devtype, &max310x_spi_if_cfg, regmaps, spi->irq);
+ }
+
+ static int max310x_spi_remove(struct spi_device *spi)
+--
+2.43.0
+
--- /dev/null
+From 866a5aceec6bce05749d0a998ceb472da9dd98b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Oct 2020 11:46:34 +0300
+Subject: serial: max310x: Make use of device properties
+
+From: Andy Shevchenko <andy.shevchenko@gmail.com>
+
+[ Upstream commit c808fab604ca62cff19ee6b261211483830807aa ]
+
+Device property API allows to gather device resources from different sources,
+such as ACPI. Convert the drivers to unleash the power of device property API.
+
+Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Link: https://lore.kernel.org/r/20201007084635.594991-1-andy.shevchenko@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 27 +++++++++------------------
+ 1 file changed, 9 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index bbf45c0626681..8d42c537ee5ea 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -15,8 +15,8 @@
+ #include <linux/device.h>
+ #include <linux/gpio/driver.h>
+ #include <linux/module.h>
+-#include <linux/of.h>
+-#include <linux/of_device.h>
++#include <linux/mod_devicetable.h>
++#include <linux/property.h>
+ #include <linux/regmap.h>
+ #include <linux/serial_core.h>
+ #include <linux/serial.h>
+@@ -271,7 +271,7 @@ struct max310x_one {
+ container_of(_port, struct max310x_one, port)
+
+ struct max310x_port {
+- struct max310x_devtype *devtype;
++ const struct max310x_devtype *devtype;
+ struct regmap *regmap;
+ struct clk *clk;
+ #ifdef CONFIG_GPIOLIB
+@@ -1262,7 +1262,7 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
+ }
+ #endif
+
+-static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
++static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype,
+ struct regmap *regmap, int irq)
+ {
+ int i, ret, fmin, fmax, freq;
+@@ -1488,7 +1488,7 @@ static struct regmap_config regcfg = {
+ #ifdef CONFIG_SPI_MASTER
+ static int max310x_spi_probe(struct spi_device *spi)
+ {
+- struct max310x_devtype *devtype;
++ const struct max310x_devtype *devtype;
+ struct regmap *regmap;
+ int ret;
+
+@@ -1500,18 +1500,9 @@ static int max310x_spi_probe(struct spi_device *spi)
+ if (ret)
+ return ret;
+
+- if (spi->dev.of_node) {
+- const struct of_device_id *of_id =
+- of_match_device(max310x_dt_ids, &spi->dev);
+- if (!of_id)
+- return -ENODEV;
+-
+- devtype = (struct max310x_devtype *)of_id->data;
+- } else {
+- const struct spi_device_id *id_entry = spi_get_device_id(spi);
+-
+- devtype = (struct max310x_devtype *)id_entry->driver_data;
+- }
++ devtype = device_get_match_data(&spi->dev);
++ if (!devtype)
++ devtype = (struct max310x_devtype *)spi_get_device_id(spi)->driver_data;
+
+ regcfg.max_register = devtype->nr * 0x20 - 1;
+ regmap = devm_regmap_init_spi(spi, ®cfg);
+@@ -1536,7 +1527,7 @@ MODULE_DEVICE_TABLE(spi, max310x_id_table);
+ static struct spi_driver max310x_spi_driver = {
+ .driver = {
+ .name = MAX310X_NAME,
+- .of_match_table = of_match_ptr(max310x_dt_ids),
++ .of_match_table = max310x_dt_ids,
+ .pm = &max310x_pm_ops,
+ },
+ .probe = max310x_spi_probe,
+--
+2.43.0
+
--- /dev/null
+From 86c63668f8208c860ff435e1ce718253043893a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Jan 2024 16:30:01 -0500
+Subject: serial: max310x: prevent infinite while() loop in port startup
+
+From: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+
+[ Upstream commit b35f8dbbce818b02c730dc85133dc7754266e084 ]
+
+If there is a problem after resetting a port, the do/while() loop that
+checks the default value of DIVLSB register may run forever and spam the
+I2C bus.
+
+Add a delay before each read of DIVLSB, and a maximum number of tries to
+prevent that situation from happening.
+
+Also fail probe if port reset is unsuccessful.
+
+Fixes: 10d8b34a4217 ("serial: max310x: Driver rework")
+Cc: stable@vger.kernel.org
+Signed-off-by: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+Link: https://lore.kernel.org/r/20240116213001.3691629-5-hugo@hugovil.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 80298a5714bcb..978d9d93127e5 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -235,6 +235,10 @@
+ #define MAX310x_REV_MASK (0xf8)
+ #define MAX310X_WRITE_BIT 0x80
+
++/* Port startup definitions */
++#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */
++#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */
++
+ /* Crystal-related definitions */
+ #define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */
+ #define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */
+@@ -1316,6 +1320,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+ goto out_clk;
+
+ for (i = 0; i < devtype->nr; i++) {
++ bool started = false;
++ unsigned int try = 0, val = 0;
++
+ /* Reset port */
+ regmap_write(regmaps[i], MAX310X_MODE2_REG,
+ MAX310X_MODE2_RST_BIT);
+@@ -1324,8 +1331,17 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+
+ /* Wait for port startup */
+ do {
+- regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret);
+- } while (ret != 0x01);
++ msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS);
++ regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val);
++
++ if (val == 0x01)
++ started = true;
++ } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES));
++
++ if (!started) {
++ ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n");
++ goto out_uart;
++ }
+
+ regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1);
+ }
+--
+2.43.0
+
--- /dev/null
+From fbb1b8f42c124d4d27ab9da383decb1ce9c6fe16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 May 2021 20:29:30 +0300
+Subject: serial: max310x: Try to get crystal clock rate from property
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit d4d6f03c4fb3a91dadfe147b47edd40e4d7e4d36 ]
+
+In some configurations, mainly ACPI-based, the clock frequency of the device
+is supplied by very well established 'clock-frequency' property. Hence, try
+to get it from the property at last if no other providers are available.
+
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20210517172930.83353-1-andriy.shevchenko@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8afa6c6decea ("serial: max310x: fail probe if clock crystal is unstable")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 40 +++++++++++++++++++++++-------------
+ 1 file changed, 26 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 8bf3c5ab59431..0e0f778d75cd4 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -556,7 +556,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr)
+ return 1;
+ }
+
+-static int max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
++static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+ unsigned long freq, bool xtal)
+ {
+ unsigned int div, clksrc, pllcfg = 0;
+@@ -629,7 +629,7 @@ static int max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+ dev_warn(dev, "clock is not stable yet\n");
+ }
+
+- return (int)bestfreq;
++ return bestfreq;
+ }
+
+ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len)
+@@ -1264,9 +1264,10 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
+ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+ struct regmap *regmap, int irq)
+ {
+- int i, ret, fmin, fmax, freq, uartclk;
++ int i, ret, fmin, fmax, freq;
+ struct max310x_port *s;
+- bool xtal = false;
++ u32 uartclk = 0;
++ bool xtal;
+
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+@@ -1278,24 +1279,20 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+ return -ENOMEM;
+ }
+
++ /* Always ask for fixed clock rate from a property. */
++ device_property_read_u32(dev, "clock-frequency", &uartclk);
++
+ s->clk = devm_clk_get_optional(dev, "osc");
+ if (IS_ERR(s->clk))
+ return PTR_ERR(s->clk);
+ if (s->clk) {
+- fmin = 500000;
+- fmax = 35000000;
++ xtal = false;
+ } else {
+ s->clk = devm_clk_get_optional(dev, "xtal");
+ if (IS_ERR(s->clk))
+ return PTR_ERR(s->clk);
+- if (s->clk) {
+- fmin = 1000000;
+- fmax = 4000000;
+- xtal = true;
+- } else {
+- dev_err(dev, "Cannot get clock\n");
+- return -EINVAL;
+- }
++
++ xtal = true;
+ }
+
+ ret = clk_prepare_enable(s->clk);
+@@ -1303,6 +1300,21 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+ return ret;
+
+ freq = clk_get_rate(s->clk);
++ if (freq == 0)
++ freq = uartclk;
++ if (freq == 0) {
++ dev_err(dev, "Cannot get clock rate\n");
++ return -EINVAL;
++ }
++
++ if (xtal) {
++ fmin = 1000000;
++ fmax = 4000000;
++ } else {
++ fmin = 500000;
++ fmax = 35000000;
++ }
++
+ /* Check frequency limits */
+ if (freq < fmin || freq > fmax) {
+ ret = -ERANGE;
+--
+2.43.0
+
--- /dev/null
+From 96e94efbea517370c304ba5f5f0689deaefd232a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Jun 2021 18:37:33 +0300
+Subject: serial: max310x: Unprepare and disable clock in error path
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit 61acabaae5ba58b3c32e6e90d24c2c0827fd27a8 ]
+
+In one error case the clock may be left prepared and enabled.
+Unprepare and disable clock in that case to balance state of
+the hardware.
+
+Fixes: d4d6f03c4fb3 ("serial: max310x: Try to get crystal clock rate from property")
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20210625153733.12911-1-andriy.shevchenko@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 978d9d93127e5..a09ec46e0310d 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -1293,7 +1293,8 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+ freq = uartclk;
+ if (freq == 0) {
+ dev_err(dev, "Cannot get clock rate\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto out_clk;
+ }
+
+ if (xtal) {
+--
+2.43.0
+
--- /dev/null
+From bd91908dc08161b09b7f50372e564bfa3381c1b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jun 2022 17:46:57 +0300
+Subject: serial: max310x: use a separate regmap for each port
+
+From: Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+[ Upstream commit 6ef281daf020592c219fa91780abc381c6c20db5 ]
+
+The driver currently does manual register manipulation in
+multiple places to talk to a specific UART port.
+
+In order to talk to a specific UART port over SPI, the bits U1
+and U0 of the register address can be set, as explained in the
+Command byte configuration section of the datasheet.
+
+Make this more elegant by creating regmaps for each UART port
+and setting the read_flag_mask and write_flag_mask
+accordingly.
+
+All communcations regarding global registers are done on UART
+port 0, so replace the global regmap entirely with the port 0
+regmap.
+
+Also, remove the 0x1f masks from reg_writeable(), reg_volatile()
+and reg_precious() methods, since setting the U1 and U0 bits of
+the register address happens inside the regmap core now.
+
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
+Link: https://lore.kernel.org/r/20220605144659.4169853-3-demonsingur@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 68 +++++++++++++++++++-----------------
+ 1 file changed, 36 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index c0fa4ad104774..80298a5714bcb 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -262,6 +262,7 @@ struct max310x_one {
+ struct work_struct tx_work;
+ struct work_struct md_work;
+ struct work_struct rs_work;
++ struct regmap *regmap;
+
+ u8 rx_buf[MAX310X_FIFO_SIZE];
+ };
+@@ -291,26 +292,26 @@ static DECLARE_BITMAP(max310x_lines, MAX310X_UART_NRMAX);
+
+ static u8 max310x_port_read(struct uart_port *port, u8 reg)
+ {
+- struct max310x_port *s = dev_get_drvdata(port->dev);
++ struct max310x_one *one = to_max310x_port(port);
+ unsigned int val = 0;
+
+- regmap_read(s->regmap, port->iobase + reg, &val);
++ regmap_read(one->regmap, reg, &val);
+
+ return val;
+ }
+
+ static void max310x_port_write(struct uart_port *port, u8 reg, u8 val)
+ {
+- struct max310x_port *s = dev_get_drvdata(port->dev);
++ struct max310x_one *one = to_max310x_port(port);
+
+- regmap_write(s->regmap, port->iobase + reg, val);
++ regmap_write(one->regmap, reg, val);
+ }
+
+ static void max310x_port_update(struct uart_port *port, u8 reg, u8 mask, u8 val)
+ {
+- struct max310x_port *s = dev_get_drvdata(port->dev);
++ struct max310x_one *one = to_max310x_port(port);
+
+- regmap_update_bits(s->regmap, port->iobase + reg, mask, val);
++ regmap_update_bits(one->regmap, reg, mask, val);
+ }
+
+ static int max3107_detect(struct device *dev)
+@@ -449,7 +450,7 @@ static const struct max310x_devtype max14830_devtype = {
+
+ static bool max310x_reg_writeable(struct device *dev, unsigned int reg)
+ {
+- switch (reg & 0x1f) {
++ switch (reg) {
+ case MAX310X_IRQSTS_REG:
+ case MAX310X_LSR_IRQSTS_REG:
+ case MAX310X_SPCHR_IRQSTS_REG:
+@@ -466,7 +467,7 @@ static bool max310x_reg_writeable(struct device *dev, unsigned int reg)
+
+ static bool max310x_reg_volatile(struct device *dev, unsigned int reg)
+ {
+- switch (reg & 0x1f) {
++ switch (reg) {
+ case MAX310X_RHR_REG:
+ case MAX310X_IRQSTS_REG:
+ case MAX310X_LSR_IRQSTS_REG:
+@@ -488,7 +489,7 @@ static bool max310x_reg_volatile(struct device *dev, unsigned int reg)
+
+ static bool max310x_reg_precious(struct device *dev, unsigned int reg)
+ {
+- switch (reg & 0x1f) {
++ switch (reg) {
+ case MAX310X_RHR_REG:
+ case MAX310X_IRQSTS_REG:
+ case MAX310X_SPCHR_IRQSTS_REG:
+@@ -633,18 +634,16 @@ static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+
+ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len)
+ {
+- struct max310x_port *s = dev_get_drvdata(port->dev);
+- u8 reg = port->iobase + MAX310X_THR_REG;
++ struct max310x_one *one = to_max310x_port(port);
+
+- regmap_raw_write(s->regmap, reg, txbuf, len);
++ regmap_raw_write(one->regmap, MAX310X_THR_REG, txbuf, len);
+ }
+
+ static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len)
+ {
+- struct max310x_port *s = dev_get_drvdata(port->dev);
+- u8 reg = port->iobase + MAX310X_RHR_REG;
++ struct max310x_one *one = to_max310x_port(port);
+
+- regmap_raw_read(s->regmap, reg, rxbuf, len);
++ regmap_raw_read(one->regmap, MAX310X_RHR_REG, rxbuf, len);
+ }
+
+ static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen)
+@@ -1247,15 +1246,16 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
+ #endif
+
+ static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype,
+- struct regmap *regmap, int irq)
++ struct regmap *regmaps[], int irq)
+ {
+ int i, ret, fmin, fmax, freq;
+ struct max310x_port *s;
+ s32 uartclk = 0;
+ bool xtal;
+
+- if (IS_ERR(regmap))
+- return PTR_ERR(regmap);
++ for (i = 0; i < devtype->nr; i++)
++ if (IS_ERR(regmaps[i]))
++ return PTR_ERR(regmaps[i]);
+
+ /* Alloc port structure */
+ s = devm_kzalloc(dev, struct_size(s, p, devtype->nr), GFP_KERNEL);
+@@ -1306,7 +1306,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+ goto out_clk;
+ }
+
+- s->regmap = regmap;
++ s->regmap = regmaps[0];
+ s->devtype = devtype;
+ dev_set_drvdata(dev, s);
+
+@@ -1316,22 +1316,18 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+ goto out_clk;
+
+ for (i = 0; i < devtype->nr; i++) {
+- unsigned int offs = i << 5;
+-
+ /* Reset port */
+- regmap_write(s->regmap, MAX310X_MODE2_REG + offs,
++ regmap_write(regmaps[i], MAX310X_MODE2_REG,
+ MAX310X_MODE2_RST_BIT);
+ /* Clear port reset */
+- regmap_write(s->regmap, MAX310X_MODE2_REG + offs, 0);
++ regmap_write(regmaps[i], MAX310X_MODE2_REG, 0);
+
+ /* Wait for port startup */
+ do {
+- regmap_read(s->regmap,
+- MAX310X_BRGDIVLSB_REG + offs, &ret);
++ regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret);
+ } while (ret != 0x01);
+
+- regmap_write(s->regmap, MAX310X_MODE1_REG + offs,
+- devtype->mode1);
++ regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1);
+ }
+
+ uartclk = max310x_set_ref_clk(dev, s, freq, xtal);
+@@ -1359,11 +1355,13 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+ s->p[i].port.fifosize = MAX310X_FIFO_SIZE;
+ s->p[i].port.flags = UPF_FIXED_TYPE | UPF_LOW_LATENCY;
+ s->p[i].port.iotype = UPIO_PORT;
+- s->p[i].port.iobase = i * 0x20;
++ s->p[i].port.iobase = i;
+ s->p[i].port.membase = (void __iomem *)~0;
+ s->p[i].port.uartclk = uartclk;
+ s->p[i].port.rs485_config = max310x_rs485_config;
+ s->p[i].port.ops = &max310x_ops;
++ s->p[i].regmap = regmaps[i];
++
+ /* Disable all interrupts */
+ max310x_port_write(&s->p[i].port, MAX310X_IRQEN_REG, 0);
+ /* Clear IRQ status register */
+@@ -1460,6 +1458,7 @@ static struct regmap_config regcfg = {
+ .val_bits = 8,
+ .write_flag_mask = MAX310X_WRITE_BIT,
+ .cache_type = REGCACHE_RBTREE,
++ .max_register = MAX310X_REG_1F,
+ .writeable_reg = max310x_reg_writeable,
+ .volatile_reg = max310x_reg_volatile,
+ .precious_reg = max310x_reg_precious,
+@@ -1469,7 +1468,8 @@ static struct regmap_config regcfg = {
+ static int max310x_spi_probe(struct spi_device *spi)
+ {
+ const struct max310x_devtype *devtype;
+- struct regmap *regmap;
++ struct regmap *regmaps[4];
++ unsigned int i;
+ int ret;
+
+ /* Setup SPI bus */
+@@ -1484,10 +1484,14 @@ static int max310x_spi_probe(struct spi_device *spi)
+ if (!devtype)
+ devtype = (struct max310x_devtype *)spi_get_device_id(spi)->driver_data;
+
+- regcfg.max_register = devtype->nr * 0x20 - 1;
+- regmap = devm_regmap_init_spi(spi, ®cfg);
++ for (i = 0; i < devtype->nr; i++) {
++ u8 port_mask = i * 0x20;
++ regcfg.read_flag_mask = port_mask;
++ regcfg.write_flag_mask = port_mask | MAX310X_WRITE_BIT;
++ regmaps[i] = devm_regmap_init_spi(spi, ®cfg);
++ }
+
+- return max310x_probe(&spi->dev, devtype, regmap, spi->irq);
++ return max310x_probe(&spi->dev, devtype, regmaps, spi->irq);
+ }
+
+ static int max310x_spi_remove(struct spi_device *spi)
+--
+2.43.0
+
--- /dev/null
+From a4b4ca6718f9264e4ceb95efdbc44618ef8102ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Oct 2020 11:46:35 +0300
+Subject: serial: max310x: Use devm_clk_get_optional() to get the input clock
+
+From: Andy Shevchenko <andy.shevchenko@gmail.com>
+
+[ Upstream commit 974e454d6f96da0c0ab1b4115b92587dd9406f6a ]
+
+Simplify the code which fetches the input clock by using
+devm_clk_get_optional(). If no input clock is present
+devm_clk_get_optional() will return NULL instead of an error
+which matches the behavior of the old code.
+
+Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Link: https://lore.kernel.org/r/20201007084635.594991-2-andy.shevchenko@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8afa6c6decea ("serial: max310x: fail probe if clock crystal is unstable")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 14537878f9855..8bf3c5ab59431 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -1265,7 +1265,6 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+ struct regmap *regmap, int irq)
+ {
+ int i, ret, fmin, fmax, freq, uartclk;
+- struct clk *clk_osc, *clk_xtal;
+ struct max310x_port *s;
+ bool xtal = false;
+
+@@ -1279,23 +1278,24 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype,
+ return -ENOMEM;
+ }
+
+- clk_osc = devm_clk_get(dev, "osc");
+- clk_xtal = devm_clk_get(dev, "xtal");
+- if (!IS_ERR(clk_osc)) {
+- s->clk = clk_osc;
++ s->clk = devm_clk_get_optional(dev, "osc");
++ if (IS_ERR(s->clk))
++ return PTR_ERR(s->clk);
++ if (s->clk) {
+ fmin = 500000;
+ fmax = 35000000;
+- } else if (!IS_ERR(clk_xtal)) {
+- s->clk = clk_xtal;
+- fmin = 1000000;
+- fmax = 4000000;
+- xtal = true;
+- } else if (PTR_ERR(clk_osc) == -EPROBE_DEFER ||
+- PTR_ERR(clk_xtal) == -EPROBE_DEFER) {
+- return -EPROBE_DEFER;
+ } else {
+- dev_err(dev, "Cannot get clock\n");
+- return -EINVAL;
++ s->clk = devm_clk_get_optional(dev, "xtal");
++ if (IS_ERR(s->clk))
++ return PTR_ERR(s->clk);
++ if (s->clk) {
++ fmin = 1000000;
++ fmax = 4000000;
++ xtal = true;
++ } else {
++ dev_err(dev, "Cannot get clock\n");
++ return -EINVAL;
++ }
+ }
+
+ ret = clk_prepare_enable(s->clk);
+--
+2.43.0
+
--- /dev/null
+From 9ef224786adcc93be42e08d3aaf13a93699d18b2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 Jun 2022 17:46:56 +0300
+Subject: serial: max310x: use regmap methods for SPI batch operations
+
+From: Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+[ Upstream commit 285e76fc049c4d32c772eea9460a7ef28a193802 ]
+
+The SPI batch read/write operations can be implemented as simple
+regmap raw read and write, which will also try to do a gather
+write just as it is done here.
+
+Use the regmap raw read and write methods.
+
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
+Link: https://lore.kernel.org/r/20220605144659.4169853-2-demonsingur@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/max310x.c | 36 ++++++++----------------------------
+ 1 file changed, 8 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
+index 8d42c537ee5ea..c0fa4ad104774 100644
+--- a/drivers/tty/serial/max310x.c
++++ b/drivers/tty/serial/max310x.c
+@@ -263,8 +263,6 @@ struct max310x_one {
+ struct work_struct md_work;
+ struct work_struct rs_work;
+
+- u8 wr_header;
+- u8 rd_header;
+ u8 rx_buf[MAX310X_FIFO_SIZE];
+ };
+ #define to_max310x_port(_port) \
+@@ -635,32 +633,18 @@ static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+
+ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len)
+ {
+- struct max310x_one *one = to_max310x_port(port);
+- struct spi_transfer xfer[] = {
+- {
+- .tx_buf = &one->wr_header,
+- .len = sizeof(one->wr_header),
+- }, {
+- .tx_buf = txbuf,
+- .len = len,
+- }
+- };
+- spi_sync_transfer(to_spi_device(port->dev), xfer, ARRAY_SIZE(xfer));
++ struct max310x_port *s = dev_get_drvdata(port->dev);
++ u8 reg = port->iobase + MAX310X_THR_REG;
++
++ regmap_raw_write(s->regmap, reg, txbuf, len);
+ }
+
+ static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len)
+ {
+- struct max310x_one *one = to_max310x_port(port);
+- struct spi_transfer xfer[] = {
+- {
+- .tx_buf = &one->rd_header,
+- .len = sizeof(one->rd_header),
+- }, {
+- .rx_buf = rxbuf,
+- .len = len,
+- }
+- };
+- spi_sync_transfer(to_spi_device(port->dev), xfer, ARRAY_SIZE(xfer));
++ struct max310x_port *s = dev_get_drvdata(port->dev);
++ u8 reg = port->iobase + MAX310X_RHR_REG;
++
++ regmap_raw_read(s->regmap, reg, rxbuf, len);
+ }
+
+ static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen)
+@@ -1390,10 +1374,6 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
+ INIT_WORK(&s->p[i].md_work, max310x_md_proc);
+ /* Initialize queue for changing RS485 mode */
+ INIT_WORK(&s->p[i].rs_work, max310x_rs_proc);
+- /* Initialize SPI-transfer buffers */
+- s->p[i].wr_header = (s->p[i].port.iobase + MAX310X_THR_REG) |
+- MAX310X_WRITE_BIT;
+- s->p[i].rd_header = (s->p[i].port.iobase + MAX310X_RHR_REG);
+
+ /* Register port */
+ ret = uart_add_one_port(&max310x_uart, &s->p[i].port);
+--
+2.43.0
+
netrom-fix-a-data-race-around-sysctl_netrom_routing_.patch
netrom-fix-a-data-race-around-sysctl_netrom_link_fai.patch
netrom-fix-data-races-around-sysctl_net_busy_read.patch
+nfsd-modernize-nfsd4_release_lockowner.patch
+nfsd-add-documenting-comment-for-nfsd4_release_locko.patch
+nfsd-fix-release_lockowner.patch
+selftests-mm-switch-to-bash-from-sh.patch
+selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch
+um-allow-not-setting-extra-rpaths-in-the-linux-binar.patch
+um-fix-adding-no-pie-for-clang.patch
+xhci-remove-extra-loop-in-interrupt-context.patch
+xhci-prevent-double-fetch-of-transfer-and-transfer-e.patch
+xhci-process-isoc-td-properly-when-there-was-a-trans.patch
+xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch
+serial-max310x-use-devm_clk_get_optional-to-get-the-.patch
+serial-max310x-try-to-get-crystal-clock-rate-from-pr.patch
+serial-max310x-fail-probe-if-clock-crystal-is-unstab.patch
+serial-max310x-make-use-of-device-properties.patch
+serial-max310x-use-regmap-methods-for-spi-batch-oper.patch
+serial-max310x-use-a-separate-regmap-for-each-port.patch
+serial-max310x-prevent-infinite-while-loop-in-port-s.patch
+net-change-sock_getsockopt-to-take-the-sk-ptr-instea.patch
+bpf-net-change-sk_getsockopt-to-take-the-sockptr_t-a.patch
+lsm-make-security_socket_getpeersec_stream-sockptr_t.patch
+lsm-fix-default-return-value-of-the-socket_getpeerse.patch
+ext4-make-ext4_es_insert_extent-return-void.patch
+ext4-refactor-ext4_da_map_blocks.patch
+ext4-convert-to-exclusive-lock-while-inserting-delal.patch
+drivers-hv-vmbus-add-vmbus_requestor-data-structure-.patch
+hv_netvsc-use-vmbus_requestor-to-generate-transactio.patch
+hv_netvsc-wait-for-completion-on-request-switch_data.patch
+hv_netvsc-process-netdev_going_down-on-vf-hot-remove.patch
+hv_netvsc-make-netvsc-vf-binding-check-both-mac-and-.patch
+hv_netvsc-use-netif_is_bond_master-instead-of-open-c.patch
+hv_netvsc-register-vf-in-netvsc_probe-if-net_device_.patch
+mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch
+mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-.patch
+getrusage-add-the-signal_struct-sig-local-variable.patch
+getrusage-move-thread_group_cputime_adjusted-outside.patch
+getrusage-use-__for_each_thread.patch
+getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch
+exit-fix-typo-in-comment-s-sub-theads-sub-threads.patch
+exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch
+serial-max310x-unprepare-and-disable-clock-in-error-.patch
+drivers-hv-vmbus-drop-error-message-when-no-request-.patch
+nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch
+regmap-allow-to-define-reg_update_bits-for-no-bus-co.patch
+regmap-add-bulk-read-write-callbacks-into-regmap_con.patch
+serial-max310x-make-accessing-revision-id-interface-.patch
+serial-max310x-implement-i2c-support.patch
+serial-max310x-fix-io-data-corruption-in-batched-ope.patch
--- /dev/null
+From a62300c8e6ec3597b4c1f4360679e84b12a3a1e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Mar 2021 14:02:37 +0100
+Subject: um: allow not setting extra rpaths in the linux binary
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 386093c68ba3e8bcfe7f46deba901e0e80713c29 ]
+
+There doesn't seem to be any reason for the rpath being set in
+the binaries, at on systems that I tested on. On the other hand,
+setting rpath is actually harming binaries in some cases, e.g.
+if using nix-based compilation environments where /lib & /lib64
+are not part of the actual environment.
+
+Add a new Kconfig option (under EXPERT, for less user confusion)
+that allows disabling the rpath additions.
+
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/um/Kconfig | 13 +++++++++++++
+ arch/um/Makefile | 3 ++-
+ arch/x86/Makefile.um | 2 +-
+ 3 files changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/arch/um/Kconfig b/arch/um/Kconfig
+index eb1c6880bde49..20264b47dcffc 100644
+--- a/arch/um/Kconfig
++++ b/arch/um/Kconfig
+@@ -92,6 +92,19 @@ config LD_SCRIPT_DYN
+ depends on !LD_SCRIPT_STATIC
+ select MODULE_REL_CRCS if MODVERSIONS
+
++config LD_SCRIPT_DYN_RPATH
++ bool "set rpath in the binary" if EXPERT
++ default y
++ depends on LD_SCRIPT_DYN
++ help
++ Add /lib (and /lib64 for 64-bit) to the linux binary's rpath
++ explicitly.
++
++ You may need to turn this off if compiling for nix systems
++ that have their libraries in random /nix directories and
++ might otherwise unexpected use libraries from /lib or /lib64
++ instead of the desired ones.
++
+ config HOSTFS
+ tristate "Host filesystem"
+ help
+diff --git a/arch/um/Makefile b/arch/um/Makefile
+index 56e5320da7624..4211e23a2f68f 100644
+--- a/arch/um/Makefile
++++ b/arch/um/Makefile
+@@ -118,7 +118,8 @@ archprepare:
+ $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
+
+ LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
+-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie)
++LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
++LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
+
+ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
+ -fno-stack-protector $(call cc-option, -fno-stack-protector-all)
+diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
+index 1db7913795f51..b3c1ae084180d 100644
+--- a/arch/x86/Makefile.um
++++ b/arch/x86/Makefile.um
+@@ -44,7 +44,7 @@ ELF_FORMAT := elf64-x86-64
+
+ # Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
+
+-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
++LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib64
+ LINK-y += -m64
+
+ endif
+--
+2.43.0
+
--- /dev/null
+From e9d85964ffc6c7b86d09408ccde4e85c228101ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 15:59:54 -0700
+Subject: um: Fix adding '-no-pie' for clang
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+[ Upstream commit 846cfbeed09b45d985079a9173cf390cc053715b ]
+
+The kernel builds with -fno-PIE, so commit 883354afbc10 ("um: link
+vmlinux with -no-pie") added the compiler linker flag '-no-pie' via
+cc-option because '-no-pie' was only supported in GCC 6.1.0 and newer.
+
+While this works for GCC, this does not work for clang because cc-option
+uses '-c', which stops the pipeline right before linking, so '-no-pie'
+is unconsumed and clang warns, causing cc-option to fail just as it
+would if the option was entirely unsupported:
+
+ $ clang -Werror -no-pie -c -o /dev/null -x c /dev/null
+ clang-16: error: argument unused during compilation: '-no-pie' [-Werror,-Wunused-command-line-argument]
+
+A recent version of clang exposes this because it generates a relocation
+under '-mcmodel=large' that is not supported in PIE mode:
+
+ /usr/sbin/ld: init/main.o: relocation R_X86_64_32 against symbol `saved_command_line' can not be used when making a PIE object; recompile with -fPIE
+ /usr/sbin/ld: failed to set dynamic section sizes: bad value
+ clang: error: linker command failed with exit code 1 (use -v to see invocation)
+
+Remove the cc-option check altogether. It is wasteful to invoke the
+compiler to check for '-no-pie' because only one supported compiler
+version does not support it, GCC 5.x (as it is supported with the
+minimum version of clang and GCC 6.1.0+). Use a combination of the
+gcc-min-version macro and CONFIG_CC_IS_CLANG to unconditionally add
+'-no-pie' with CONFIG_LD_SCRIPT_DYN=y, so that it is enabled with all
+compilers that support this. Furthermore, using gcc-min-version can help
+turn this back into
+
+ LINK-$(CONFIG_LD_SCRIPT_DYN) += -no-pie
+
+when the minimum version of GCC is bumped past 6.1.0.
+
+Cc: stable@vger.kernel.org
+Closes: https://github.com/ClangBuiltLinux/linux/issues/1982
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/um/Makefile | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/um/Makefile b/arch/um/Makefile
+index 4211e23a2f68f..81d35b1f315ae 100644
+--- a/arch/um/Makefile
++++ b/arch/um/Makefile
+@@ -118,7 +118,9 @@ archprepare:
+ $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
+
+ LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
+-LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
++ifdef CONFIG_LD_SCRIPT_DYN
++LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie
++endif
+ LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
+
+ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
+--
+2.43.0
+
--- /dev/null
+From 1750a9b03f733605bfe32a0394c292f09ec37f8c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 17:27:37 +0200
+Subject: xhci: handle isoc Babble and Buffer Overrun events properly
+
+From: Michal Pecio <michal.pecio@gmail.com>
+
+[ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ]
+
+xHCI 4.9 explicitly forbids assuming that the xHC has released its
+ownership of a multi-TRB TD when it reports an error on one of the
+early TRBs. Yet the driver makes such assumption and releases the TD,
+allowing the remaining TRBs to be freed or overwritten by new TDs.
+
+The xHC should also report completion of the final TRB due to its IOC
+flag being set by us, regardless of prior errors. This event cannot
+be recognized if the TD has already been freed earlier, resulting in
+"Transfer event TRB DMA ptr not part of current TD" error message.
+
+Fix this by reusing the logic for processing isoc Transaction Errors.
+This also handles hosts which fail to report the final completion.
+
+Fix transfer length reporting on Babble errors. They may be caused by
+device malfunction, no guarantee that the buffer has been filled.
+
+Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 883cf477a70b9..4fa387e447f08 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2262,9 +2262,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ case COMP_BANDWIDTH_OVERRUN_ERROR:
+ frame->status = -ECOMM;
+ break;
+- case COMP_ISOCH_BUFFER_OVERRUN:
+ case COMP_BABBLE_DETECTED_ERROR:
++ sum_trbs_for_length = true;
++ fallthrough;
++ case COMP_ISOCH_BUFFER_OVERRUN:
+ frame->status = -EOVERFLOW;
++ if (ep_trb != td->last_trb)
++ td->error_mid_td = true;
+ break;
+ case COMP_INCOMPATIBLE_DEVICE_ERROR:
+ case COMP_STALL_ERROR:
+--
+2.43.0
+
--- /dev/null
+From 9c80999c4b17a687981d277bb41ed65de6e95686 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Apr 2021 10:02:08 +0300
+Subject: xhci: prevent double-fetch of transfer and transfer event TRBs
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+[ Upstream commit e9fcb07704fcef6fa6d0333fd2b3a62442eaf45b ]
+
+The same values are parsed several times from transfer and event
+TRBs by different functions in the same call path, all while processing
+one transfer event.
+
+As the TRBs are in DMA memory and can be accessed by the xHC host we want
+to avoid this to prevent double-fetch issues.
+
+To resolve this pass the already parsed values to the different functions
+in the path of parsing a transfer event
+
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20210406070208.3406266-5-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 42 ++++++++++++++++--------------------
+ 1 file changed, 19 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index b814dc07116da..62d92da7016e7 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2059,16 +2059,13 @@ int xhci_is_vendor_info_code(struct xhci_hcd *xhci, unsigned int trb_comp_code)
+ return 0;
+ }
+
+-static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td,
+- struct xhci_transfer_event *event, struct xhci_virt_ep *ep)
++static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
++ struct xhci_ring *ep_ring, struct xhci_td *td,
++ u32 trb_comp_code)
+ {
+ struct xhci_ep_ctx *ep_ctx;
+- struct xhci_ring *ep_ring;
+- u32 trb_comp_code;
+
+- ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
+ ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index);
+- trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
+
+ if (trb_comp_code == COMP_STOPPED_LENGTH_INVALID ||
+ trb_comp_code == COMP_STOPPED ||
+@@ -2126,9 +2123,9 @@ static int sum_trb_lengths(struct xhci_hcd *xhci, struct xhci_ring *ring,
+ /*
+ * Process control tds, update urb status and actual_length.
+ */
+-static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
+- union xhci_trb *ep_trb, struct xhci_transfer_event *event,
+- struct xhci_virt_ep *ep)
++static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
++ struct xhci_ring *ep_ring, struct xhci_td *td,
++ union xhci_trb *ep_trb, struct xhci_transfer_event *event)
+ {
+ struct xhci_ep_ctx *ep_ctx;
+ u32 trb_comp_code;
+@@ -2216,15 +2213,15 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
+ td->urb->actual_length = requested;
+
+ finish_td:
+- return finish_td(xhci, td, event, ep);
++ return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+
+ /*
+ * Process isochronous tds, update urb packet status and actual_length.
+ */
+-static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+- union xhci_trb *ep_trb, struct xhci_transfer_event *event,
+- struct xhci_virt_ep *ep)
++static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
++ struct xhci_ring *ep_ring, struct xhci_td *td,
++ union xhci_trb *ep_trb, struct xhci_transfer_event *event)
+ {
+ struct urb_priv *urb_priv;
+ int idx;
+@@ -2301,7 +2298,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+
+ td->urb->actual_length += frame->actual_length;
+
+- return finish_td(xhci, td, event, ep);
++ return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+
+ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+@@ -2333,17 +2330,15 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+ /*
+ * Process bulk and interrupt tds, update urb status and actual_length.
+ */
+-static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
+- union xhci_trb *ep_trb, struct xhci_transfer_event *event,
+- struct xhci_virt_ep *ep)
++static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
++ struct xhci_ring *ep_ring, struct xhci_td *td,
++ union xhci_trb *ep_trb, struct xhci_transfer_event *event)
+ {
+ struct xhci_slot_ctx *slot_ctx;
+- struct xhci_ring *ep_ring;
+ u32 trb_comp_code;
+ u32 remaining, requested, ep_trb_len;
+
+ slot_ctx = xhci_get_slot_ctx(xhci, ep->vdev->out_ctx);
+- ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
+ trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
+ remaining = EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
+ ep_trb_len = TRB_LEN(le32_to_cpu(ep_trb->generic.field[2]));
+@@ -2403,7 +2398,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
+ remaining);
+ td->urb->actual_length = 0;
+ }
+- return finish_td(xhci, td, event, ep);
++
++ return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+
+ /*
+@@ -2754,11 +2750,11 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+
+ /* update the urb's actual_length and give back to the core */
+ if (usb_endpoint_xfer_control(&td->urb->ep->desc))
+- process_ctrl_td(xhci, td, ep_trb, event, ep);
++ process_ctrl_td(xhci, ep, ep_ring, td, ep_trb, event);
+ else if (usb_endpoint_xfer_isoc(&td->urb->ep->desc))
+- process_isoc_td(xhci, td, ep_trb, event, ep);
++ process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event);
+ else
+- process_bulk_intr_td(xhci, td, ep_trb, event, ep);
++ process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event);
+ cleanup:
+ handling_skipped_tds = ep->skip &&
+ trb_comp_code != COMP_MISSED_SERVICE_ERROR &&
+--
+2.43.0
+
--- /dev/null
+From 6bece1cf082431fcec40f34122386e466f10b62f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 17:27:36 +0200
+Subject: xhci: process isoc TD properly when there was a transaction error mid
+ TD.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+[ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ]
+
+The last TRB of a isoc TD might not trigger an event if there was
+an error event for a TRB mid TD. This is seen on a NEC Corporation
+uPD720200 USB 3.0 Host
+
+After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1
+generate events for passed TRBs with IOC flag set if it proceeds to the
+next TD. This event is either a copy of the original error, or a
+"success" transfer event.
+
+If that event is missing then the driver and xHC host get out of sync as
+the driver is still expecting a transfer event for that first TD, while
+xHC host is already sending events for the next TD in the list.
+This leads to
+"Transfer event TRB DMA ptr not part of current TD" messages.
+
+As a solution we tag the isoc TDs that get error events mid TD.
+If an event doesn't match the first TD, then check if the tag is
+set, and event points to the next TD.
+In that case give back the fist TD and process the next TD normally
+
+Make sure TD status and transferred length stay valid in both cases
+with and without final TD completion event.
+
+Reported-by: Michał Pecio <michal.pecio@gmail.com>
+Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/
+Tested-by: Michał Pecio <michal.pecio@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++-------
+ drivers/usb/host/xhci.h | 1 +
+ 2 files changed, 61 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 62d92da7016e7..883cf477a70b9 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2244,6 +2244,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ /* handle completion code */
+ switch (trb_comp_code) {
+ case COMP_SUCCESS:
++ /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */
++ if (td->error_mid_td)
++ break;
+ if (remaining) {
+ frame->status = short_framestatus;
+ if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
+@@ -2269,8 +2272,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ break;
+ case COMP_USB_TRANSACTION_ERROR:
+ frame->status = -EPROTO;
++ sum_trbs_for_length = true;
+ if (ep_trb != td->last_trb)
+- return 0;
++ td->error_mid_td = true;
+ break;
+ case COMP_STOPPED:
+ sum_trbs_for_length = true;
+@@ -2290,6 +2294,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ break;
+ }
+
++ if (td->urb_length_set)
++ goto finish_td;
++
+ if (sum_trbs_for_length)
+ frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) +
+ ep_trb_len - remaining;
+@@ -2298,6 +2305,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+
+ td->urb->actual_length += frame->actual_length;
+
++finish_td:
++ /* Don't give back TD yet if we encountered an error mid TD */
++ if (td->error_mid_td && ep_trb != td->last_trb) {
++ xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n");
++ td->urb_length_set = true;
++ return 0;
++ }
++
+ return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
+ }
+
+@@ -2684,17 +2699,51 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+ }
+
+ if (!ep_seg) {
+- if (!ep->skip ||
+- !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
+- /* Some host controllers give a spurious
+- * successful event after a short transfer.
+- * Ignore it.
+- */
+- if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
+- ep_ring->last_td_was_short) {
+- ep_ring->last_td_was_short = false;
+- goto cleanup;
++
++ if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
++ skip_isoc_td(xhci, td, ep, status);
++ goto cleanup;
++ }
++
++ /*
++ * Some hosts give a spurious success event after a short
++ * transfer. Ignore it.
++ */
++ if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
++ ep_ring->last_td_was_short) {
++ ep_ring->last_td_was_short = false;
++ goto cleanup;
++ }
++
++ /*
++ * xhci 4.10.2 states isoc endpoints should continue
++ * processing the next TD if there was an error mid TD.
++ * So host like NEC don't generate an event for the last
++ * isoc TRB even if the IOC flag is set.
++ * xhci 4.9.1 states that if there are errors in mult-TRB
++ * TDs xHC should generate an error for that TRB, and if xHC
++ * proceeds to the next TD it should genete an event for
++ * any TRB with IOC flag on the way. Other host follow this.
++ * So this event might be for the next TD.
++ */
++ if (td->error_mid_td &&
++ !list_is_last(&td->td_list, &ep_ring->td_list)) {
++ struct xhci_td *td_next = list_next_entry(td, td_list);
++
++ ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb,
++ td_next->last_trb, ep_trb_dma, false);
++ if (ep_seg) {
++ /* give back previous TD, start handling new */
++ xhci_dbg(xhci, "Missing TD completion event after mid TD error\n");
++ ep_ring->dequeue = td->last_trb;
++ ep_ring->deq_seg = td->last_trb_seg;
++ inc_deq(xhci, ep_ring);
++ xhci_td_cleanup(xhci, td, ep_ring, td->status);
++ td = td_next;
+ }
++ }
++
++ if (!ep_seg) {
+ /* HC is busted, give up! */
+ xhci_err(xhci,
+ "ERROR Transfer event TRB DMA ptr not "
+@@ -2706,9 +2755,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+ ep_trb_dma, true);
+ return -ESHUTDOWN;
+ }
+-
+- skip_isoc_td(xhci, td, ep, status);
+- goto cleanup;
+ }
+ if (trb_comp_code == COMP_SHORT_PACKET)
+ ep_ring->last_td_was_short = true;
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 85ab213c7940a..5a8443f6ed703 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1554,6 +1554,7 @@ struct xhci_td {
+ struct xhci_segment *bounce_seg;
+ /* actual_length of the URB has already been set */
+ bool urb_length_set;
++ bool error_mid_td;
+ unsigned int num_trbs;
+ };
+
+--
+2.43.0
+
--- /dev/null
+From dcd48a2dfd74f9eb405b0181d978261958264b64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Jan 2021 15:00:28 +0200
+Subject: xhci: remove extra loop in interrupt context
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+[ Upstream commit 55f6153d8cc8eff0852d108f80087fdf41dc2169 ]
+
+When finishing a TD we walk the endpoint dequeue trb pointer
+until it matches the last TRB of the TD.
+
+TDs can contain over 100 TRBs, meaning we call a function 100 times,
+do a few comparisons and increase a couple values for each of these calls,
+all in interrupt context.
+
+This can all be avoided by adding a pointer to the last TRB segment, and
+a number of TRBs in the TD. So instead of walking through each TRB just
+set the new dequeue segment, pointer, and number of free TRBs directly.
+
+Getting rid of the while loop also reduces the risk of getting stuck in a
+infinite loop in the interrupt handler. Loop relied on valid matching
+dequeue and last_trb values to break.
+
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20210129130044.206855-12-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 21 ++++++++++++++-------
+ drivers/usb/host/xhci.h | 2 ++
+ 2 files changed, 16 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index eb70f07e3623a..b814dc07116da 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2099,8 +2099,9 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td,
+ EP_HARD_RESET);
+ } else {
+ /* Update ring dequeue pointer */
+- while (ep_ring->dequeue != td->last_trb)
+- inc_deq(xhci, ep_ring);
++ ep_ring->dequeue = td->last_trb;
++ ep_ring->deq_seg = td->last_trb_seg;
++ ep_ring->num_trbs_free += td->num_trbs - 1;
+ inc_deq(xhci, ep_ring);
+ }
+
+@@ -2321,8 +2322,9 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+ frame->actual_length = 0;
+
+ /* Update ring dequeue pointer */
+- while (ep->ring->dequeue != td->last_trb)
+- inc_deq(xhci, ep->ring);
++ ep->ring->dequeue = td->last_trb;
++ ep->ring->deq_seg = td->last_trb_seg;
++ ep->ring->num_trbs_free += td->num_trbs - 1;
+ inc_deq(xhci, ep->ring);
+
+ return xhci_td_cleanup(xhci, td, ep->ring, status);
+@@ -3487,7 +3489,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+ field |= TRB_IOC;
+ more_trbs_coming = false;
+ td->last_trb = ring->enqueue;
+-
++ td->last_trb_seg = ring->enq_seg;
+ if (xhci_urb_suitable_for_idt(urb)) {
+ memcpy(&send_addr, urb->transfer_buffer,
+ trb_buff_len);
+@@ -3513,7 +3515,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+ upper_32_bits(send_addr),
+ length_field,
+ field);
+-
++ td->num_trbs++;
+ addr += trb_buff_len;
+ sent_len = trb_buff_len;
+
+@@ -3537,8 +3539,10 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+ ep_index, urb->stream_id,
+ 1, urb, 1, mem_flags);
+ urb_priv->td[1].last_trb = ring->enqueue;
++ urb_priv->td[1].last_trb_seg = ring->enq_seg;
+ field = TRB_TYPE(TRB_NORMAL) | ring->cycle_state | TRB_IOC;
+ queue_trb(xhci, ring, 0, 0, 0, TRB_INTR_TARGET(0), field);
++ urb_priv->td[1].num_trbs++;
+ }
+
+ check_trb_math(urb, enqd_len);
+@@ -3589,6 +3593,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+
+ urb_priv = urb->hcpriv;
+ td = &urb_priv->td[0];
++ td->num_trbs = num_trbs;
+
+ /*
+ * Don't give the first TRB to the hardware (by toggling the cycle bit)
+@@ -3661,6 +3666,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+
+ /* Save the DMA address of the last TRB in the TD */
+ td->last_trb = ep_ring->enqueue;
++ td->last_trb_seg = ep_ring->enq_seg;
+
+ /* Queue status TRB - see Table 7 and sections 4.11.2.2 and 6.4.1.2.3 */
+ /* If the device sent data, the status stage is an OUT transfer */
+@@ -3905,7 +3911,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+ goto cleanup;
+ }
+ td = &urb_priv->td[i];
+-
++ td->num_trbs = trbs_per_td;
+ /* use SIA as default, if frame id is used overwrite it */
+ sia_frame_id = TRB_SIA;
+ if (!(urb->transfer_flags & URB_ISO_ASAP) &&
+@@ -3948,6 +3954,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+ } else {
+ more_trbs_coming = false;
+ td->last_trb = ep_ring->enqueue;
++ td->last_trb_seg = ep_ring->enq_seg;
+ field |= TRB_IOC;
+ if (trb_block_event_intr(xhci, num_tds, i))
+ field |= TRB_BEI;
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index bb3c362a194b2..85ab213c7940a 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1550,9 +1550,11 @@ struct xhci_td {
+ struct xhci_segment *start_seg;
+ union xhci_trb *first_trb;
+ union xhci_trb *last_trb;
++ struct xhci_segment *last_trb_seg;
+ struct xhci_segment *bounce_seg;
+ /* actual_length of the URB has already been set */
+ bool urb_length_set;
++ unsigned int num_trbs;
+ };
+
+ /* xHCI command default timeout value */
+--
+2.43.0
+