From: Sasha Levin Date: Sun, 10 Mar 2024 02:31:48 +0000 (-0500) Subject: Fixes for 5.10 X-Git-Tag: v6.8.1~30 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3d41a0174a99693281f714212bf4a74582d209a8;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/bpf-net-change-sk_getsockopt-to-take-the-sockptr_t-a.patch b/queue-5.10/bpf-net-change-sk_getsockopt-to-take-the-sockptr_t-a.patch new file mode 100644 index 00000000000..9d7e0663649 --- /dev/null +++ b/queue-5.10/bpf-net-change-sk_getsockopt-to-take-the-sockptr_t-a.patch @@ -0,0 +1,236 @@ +From a18447e359ab34c6f720c82fa9bb7785051c94fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Sep 2022 17:28:02 -0700 +Subject: bpf: net: Change sk_getsockopt() to take the sockptr_t argument + +From: Martin KaFai Lau + +[ Upstream commit 4ff09db1b79b98b4a2a7511571c640b76cab3beb ] + +This patch changes sk_getsockopt() to take the sockptr_t argument +such that it can be used by bpf_getsockopt(SOL_SOCKET) in a +latter patch. + +security_socket_getpeersec_stream() is not changed. It stays +with the __user ptr (optval.user and optlen.user) to avoid changes +to other security hooks. bpf_getsockopt(SOL_SOCKET) also does not +support SO_PEERSEC. + +Signed-off-by: Martin KaFai Lau +Link: https://lore.kernel.org/r/20220902002802.2888419-1-kafai@fb.com +Signed-off-by: Alexei Starovoitov +Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks") +Signed-off-by: Sasha Levin +--- + include/linux/filter.h | 3 +-- + include/linux/sockptr.h | 5 +++++ + net/core/filter.c | 5 ++--- + net/core/sock.c | 43 +++++++++++++++++++++++------------------ + 4 files changed, 32 insertions(+), 24 deletions(-) + +diff --git a/include/linux/filter.h b/include/linux/filter.h +index bc6ce4b202a80..cd56e53bd42e2 100644 +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -892,8 +892,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); + int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); + void sk_reuseport_prog_free(struct bpf_prog *prog); + int sk_detach_filter(struct sock *sk); +-int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, +- unsigned int len); ++int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len); + + bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); + void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); +diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h +index ea193414298b7..38862819e77a1 100644 +--- a/include/linux/sockptr.h ++++ b/include/linux/sockptr.h +@@ -64,6 +64,11 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, + return 0; + } + ++static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size) ++{ ++ return copy_to_sockptr_offset(dst, 0, src, size); ++} ++ + static inline void *memdup_sockptr(sockptr_t src, size_t len) + { + void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); +diff --git a/net/core/filter.c b/net/core/filter.c +index 6cfc8fb0562a2..49e4d1535cc82 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -9903,8 +9903,7 @@ int sk_detach_filter(struct sock *sk) + } + EXPORT_SYMBOL_GPL(sk_detach_filter); + +-int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, +- unsigned int len) ++int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len) + { + struct sock_fprog_kern *fprog; + struct sk_filter *filter; +@@ -9935,7 +9934,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, + goto out; + + ret = -EFAULT; +- if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog))) ++ if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog))) + goto out; + + /* Instead of bytes, the API requests to return the number +diff --git a/net/core/sock.c b/net/core/sock.c +index 95559d088a169..42da46965b16f 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -644,8 +644,8 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) + return ret; + } + +-static int sock_getbindtodevice(struct sock *sk, char __user *optval, +- int __user *optlen, int len) ++static int sock_getbindtodevice(struct sock *sk, sockptr_t optval, ++ sockptr_t optlen, int len) + { + int ret = -ENOPROTOOPT; + #ifdef CONFIG_NETDEVICES +@@ -668,12 +668,12 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, + len = strlen(devname) + 1; + + ret = -EFAULT; +- if (copy_to_user(optval, devname, len)) ++ if (copy_to_sockptr(optval, devname, len)) + goto out; + + zero: + ret = -EFAULT; +- if (put_user(len, optlen)) ++ if (copy_to_sockptr(optlen, &len, sizeof(int))) + goto out; + + ret = 0; +@@ -1281,20 +1281,23 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred, + } + } + +-static int groups_to_user(gid_t __user *dst, const struct group_info *src) ++static int groups_to_user(sockptr_t dst, const struct group_info *src) + { + struct user_namespace *user_ns = current_user_ns(); + int i; + +- for (i = 0; i < src->ngroups; i++) +- if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i)) ++ for (i = 0; i < src->ngroups; i++) { ++ gid_t gid = from_kgid_munged(user_ns, src->gid[i]); ++ ++ if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid))) + return -EFAULT; ++ } + + return 0; + } + + static int sk_getsockopt(struct sock *sk, int level, int optname, +- char __user *optval, int __user *optlen) ++ sockptr_t optval, sockptr_t optlen) + { + struct socket *sock = sk->sk_socket; + +@@ -1312,7 +1315,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + int lv = sizeof(int); + int len; + +- if (get_user(len, optlen)) ++ if (copy_from_sockptr(&len, optlen, sizeof(int))) + return -EFAULT; + if (len < 0) + return -EINVAL; +@@ -1445,7 +1448,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); + spin_unlock(&sk->sk_peer_lock); + +- if (copy_to_user(optval, &peercred, len)) ++ if (copy_to_sockptr(optval, &peercred, len)) + return -EFAULT; + goto lenout; + } +@@ -1463,11 +1466,11 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + if (len < n * sizeof(gid_t)) { + len = n * sizeof(gid_t); + put_cred(cred); +- return put_user(len, optlen) ? -EFAULT : -ERANGE; ++ return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE; + } + len = n * sizeof(gid_t); + +- ret = groups_to_user((gid_t __user *)optval, cred->group_info); ++ ret = groups_to_user(optval, cred->group_info); + put_cred(cred); + if (ret) + return ret; +@@ -1483,7 +1486,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + return -ENOTCONN; + if (lv < len) + return -EINVAL; +- if (copy_to_user(optval, address, len)) ++ if (copy_to_sockptr(optval, address, len)) + return -EFAULT; + goto lenout; + } +@@ -1500,7 +1503,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + break; + + case SO_PEERSEC: +- return security_socket_getpeersec_stream(sock, optval, optlen, len); ++ return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len); + + case SO_MARK: + v.val = sk->sk_mark; +@@ -1528,7 +1531,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + return sock_getbindtodevice(sk, optval, optlen, len); + + case SO_GET_FILTER: +- len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); ++ len = sk_get_filter(sk, optval, len); + if (len < 0) + return len; + +@@ -1575,7 +1578,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + sk_get_meminfo(sk, meminfo); + + len = min_t(unsigned int, len, sizeof(meminfo)); +- if (copy_to_user(optval, &meminfo, len)) ++ if (copy_to_sockptr(optval, &meminfo, len)) + return -EFAULT; + + goto lenout; +@@ -1625,10 +1628,10 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + + if (len > lv) + len = lv; +- if (copy_to_user(optval, &v, len)) ++ if (copy_to_sockptr(optval, &v, len)) + return -EFAULT; + lenout: +- if (put_user(len, optlen)) ++ if (copy_to_sockptr(optlen, &len, sizeof(int))) + return -EFAULT; + return 0; + } +@@ -1636,7 +1639,9 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + int sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) + { +- return sk_getsockopt(sock->sk, level, optname, optval, optlen); ++ return sk_getsockopt(sock->sk, level, optname, ++ USER_SOCKPTR(optval), ++ USER_SOCKPTR(optlen)); + } + + /* +-- +2.43.0 + diff --git a/queue-5.10/drivers-hv-vmbus-add-vmbus_requestor-data-structure-.patch b/queue-5.10/drivers-hv-vmbus-add-vmbus_requestor-data-structure-.patch new file mode 100644 index 00000000000..21b400087b0 --- /dev/null +++ b/queue-5.10/drivers-hv-vmbus-add-vmbus_requestor-data-structure-.patch @@ -0,0 +1,407 @@ +From f01b7ce60d0b2730d8de1f8f49a16ed9fe64c76e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Nov 2020 11:04:00 +0100 +Subject: Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus + hardening + +From: Andres Beltran + +[ Upstream commit e8b7db38449ac5b950a3f00519171c4be3e226ff ] + +Currently, VMbus drivers use pointers into guest memory as request IDs +for interactions with Hyper-V. To be more robust in the face of errors +or malicious behavior from a compromised Hyper-V, avoid exposing +guest memory addresses to Hyper-V. Also avoid Hyper-V giving back a +bad request ID that is then treated as the address of a guest data +structure with no validation. Instead, encapsulate these memory +addresses and provide small integers as request IDs. + +Signed-off-by: Andres Beltran +Co-developed-by: Andrea Parri (Microsoft) +Signed-off-by: Andrea Parri (Microsoft) +Reviewed-by: Michael Kelley +Reviewed-by: Wei Liu +Link: https://lore.kernel.org/r/20201109100402.8946-2-parri.andrea@gmail.com +Signed-off-by: Wei Liu +Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") +Signed-off-by: Sasha Levin +--- + drivers/hv/channel.c | 174 ++++++++++++++++++++++++++++++++++++-- + drivers/hv/hyperv_vmbus.h | 3 +- + drivers/hv/ring_buffer.c | 29 ++++++- + include/linux/hyperv.h | 22 +++++ + 4 files changed, 219 insertions(+), 9 deletions(-) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index f064fa6ef181a..a59ab2f3d68e1 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -503,6 +503,70 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, + } + EXPORT_SYMBOL_GPL(vmbus_establish_gpadl); + ++/** ++ * request_arr_init - Allocates memory for the requestor array. Each slot ++ * keeps track of the next available slot in the array. Initially, each ++ * slot points to the next one (as in a Linked List). The last slot ++ * does not point to anything, so its value is U64_MAX by default. ++ * @size The size of the array ++ */ ++static u64 *request_arr_init(u32 size) ++{ ++ int i; ++ u64 *req_arr; ++ ++ req_arr = kcalloc(size, sizeof(u64), GFP_KERNEL); ++ if (!req_arr) ++ return NULL; ++ ++ for (i = 0; i < size - 1; i++) ++ req_arr[i] = i + 1; ++ ++ /* Last slot (no more available slots) */ ++ req_arr[i] = U64_MAX; ++ ++ return req_arr; ++} ++ ++/* ++ * vmbus_alloc_requestor - Initializes @rqstor's fields. ++ * Index 0 is the first free slot ++ * @size: Size of the requestor array ++ */ ++static int vmbus_alloc_requestor(struct vmbus_requestor *rqstor, u32 size) ++{ ++ u64 *rqst_arr; ++ unsigned long *bitmap; ++ ++ rqst_arr = request_arr_init(size); ++ if (!rqst_arr) ++ return -ENOMEM; ++ ++ bitmap = bitmap_zalloc(size, GFP_KERNEL); ++ if (!bitmap) { ++ kfree(rqst_arr); ++ return -ENOMEM; ++ } ++ ++ rqstor->req_arr = rqst_arr; ++ rqstor->req_bitmap = bitmap; ++ rqstor->size = size; ++ rqstor->next_request_id = 0; ++ spin_lock_init(&rqstor->req_lock); ++ ++ return 0; ++} ++ ++/* ++ * vmbus_free_requestor - Frees memory allocated for @rqstor ++ * @rqstor: Pointer to the requestor struct ++ */ ++static void vmbus_free_requestor(struct vmbus_requestor *rqstor) ++{ ++ kfree(rqstor->req_arr); ++ bitmap_free(rqstor->req_bitmap); ++} ++ + static int __vmbus_open(struct vmbus_channel *newchannel, + void *userdata, u32 userdatalen, + void (*onchannelcallback)(void *context), void *context) +@@ -523,6 +587,12 @@ static int __vmbus_open(struct vmbus_channel *newchannel, + if (newchannel->state != CHANNEL_OPEN_STATE) + return -EINVAL; + ++ /* Create and init requestor */ ++ if (newchannel->rqstor_size) { ++ if (vmbus_alloc_requestor(&newchannel->requestor, newchannel->rqstor_size)) ++ return -ENOMEM; ++ } ++ + newchannel->state = CHANNEL_OPENING_STATE; + newchannel->onchannel_callback = onchannelcallback; + newchannel->channel_callback_context = context; +@@ -626,6 +696,7 @@ static int __vmbus_open(struct vmbus_channel *newchannel, + error_clean_ring: + hv_ringbuffer_cleanup(&newchannel->outbound); + hv_ringbuffer_cleanup(&newchannel->inbound); ++ vmbus_free_requestor(&newchannel->requestor); + newchannel->state = CHANNEL_OPEN_STATE; + return err; + } +@@ -808,6 +879,9 @@ static int vmbus_close_internal(struct vmbus_channel *channel) + channel->ringbuffer_gpadlhandle = 0; + } + ++ if (!ret) ++ vmbus_free_requestor(&channel->requestor); ++ + return ret; + } + +@@ -888,7 +962,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, + /* in 8-bytes granularity */ + desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3; + desc.len8 = (u16)(packetlen_aligned >> 3); +- desc.trans_id = requestid; ++ desc.trans_id = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ + + bufferlist[0].iov_base = &desc; + bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor); +@@ -897,7 +971,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + +- return hv_ringbuffer_write(channel, bufferlist, num_vecs); ++ return hv_ringbuffer_write(channel, bufferlist, num_vecs, requestid); + } + EXPORT_SYMBOL(vmbus_sendpacket); + +@@ -939,7 +1013,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, + desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */ + desc.length8 = (u16)(packetlen_aligned >> 3); +- desc.transactionid = requestid; ++ desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ + desc.reserved = 0; + desc.rangecount = pagecount; + +@@ -956,7 +1030,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + +- return hv_ringbuffer_write(channel, bufferlist, 3); ++ return hv_ringbuffer_write(channel, bufferlist, 3, requestid); + } + EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); + +@@ -983,7 +1057,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, + desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc->dataoffset8 = desc_size >> 3; /* in 8-bytes granularity */ + desc->length8 = (u16)(packetlen_aligned >> 3); +- desc->transactionid = requestid; ++ desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ + desc->reserved = 0; + desc->rangecount = 1; + +@@ -994,7 +1068,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + +- return hv_ringbuffer_write(channel, bufferlist, 3); ++ return hv_ringbuffer_write(channel, bufferlist, 3, requestid); + } + EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc); + +@@ -1042,3 +1116,91 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer, + buffer_actual_len, requestid, true); + } + EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw); ++ ++/* ++ * vmbus_next_request_id - Returns a new request id. It is also ++ * the index at which the guest memory address is stored. ++ * Uses a spin lock to avoid race conditions. ++ * @rqstor: Pointer to the requestor struct ++ * @rqst_add: Guest memory address to be stored in the array ++ */ ++u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr) ++{ ++ unsigned long flags; ++ u64 current_id; ++ const struct vmbus_channel *channel = ++ container_of(rqstor, const struct vmbus_channel, requestor); ++ ++ /* Check rqstor has been initialized */ ++ if (!channel->rqstor_size) ++ return VMBUS_NO_RQSTOR; ++ ++ spin_lock_irqsave(&rqstor->req_lock, flags); ++ current_id = rqstor->next_request_id; ++ ++ /* Requestor array is full */ ++ if (current_id >= rqstor->size) { ++ spin_unlock_irqrestore(&rqstor->req_lock, flags); ++ return VMBUS_RQST_ERROR; ++ } ++ ++ rqstor->next_request_id = rqstor->req_arr[current_id]; ++ rqstor->req_arr[current_id] = rqst_addr; ++ ++ /* The already held spin lock provides atomicity */ ++ bitmap_set(rqstor->req_bitmap, current_id, 1); ++ ++ spin_unlock_irqrestore(&rqstor->req_lock, flags); ++ ++ /* ++ * Cannot return an ID of 0, which is reserved for an unsolicited ++ * message from Hyper-V. ++ */ ++ return current_id + 1; ++} ++EXPORT_SYMBOL_GPL(vmbus_next_request_id); ++ ++/* ++ * vmbus_request_addr - Returns the memory address stored at @trans_id ++ * in @rqstor. Uses a spin lock to avoid race conditions. ++ * @rqstor: Pointer to the requestor struct ++ * @trans_id: Request id sent back from Hyper-V. Becomes the requestor's ++ * next request id. ++ */ ++u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id) ++{ ++ unsigned long flags; ++ u64 req_addr; ++ const struct vmbus_channel *channel = ++ container_of(rqstor, const struct vmbus_channel, requestor); ++ ++ /* Check rqstor has been initialized */ ++ if (!channel->rqstor_size) ++ return VMBUS_NO_RQSTOR; ++ ++ /* Hyper-V can send an unsolicited message with ID of 0 */ ++ if (!trans_id) ++ return trans_id; ++ ++ spin_lock_irqsave(&rqstor->req_lock, flags); ++ ++ /* Data corresponding to trans_id is stored at trans_id - 1 */ ++ trans_id--; ++ ++ /* Invalid trans_id */ ++ if (trans_id >= rqstor->size || !test_bit(trans_id, rqstor->req_bitmap)) { ++ spin_unlock_irqrestore(&rqstor->req_lock, flags); ++ return VMBUS_RQST_ERROR; ++ } ++ ++ req_addr = rqstor->req_arr[trans_id]; ++ rqstor->req_arr[trans_id] = rqstor->next_request_id; ++ rqstor->next_request_id = trans_id; ++ ++ /* The already held spin lock provides atomicity */ ++ bitmap_clear(rqstor->req_bitmap, trans_id, 1); ++ ++ spin_unlock_irqrestore(&rqstor->req_lock, flags); ++ return req_addr; ++} ++EXPORT_SYMBOL_GPL(vmbus_request_addr); +diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h +index 7845fa5de79e9..601660bca5d47 100644 +--- a/drivers/hv/hyperv_vmbus.h ++++ b/drivers/hv/hyperv_vmbus.h +@@ -180,7 +180,8 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, + void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); + + int hv_ringbuffer_write(struct vmbus_channel *channel, +- const struct kvec *kv_list, u32 kv_count); ++ const struct kvec *kv_list, u32 kv_count, ++ u64 requestid); + + int hv_ringbuffer_read(struct vmbus_channel *channel, + void *buffer, u32 buflen, u32 *buffer_actual_len, +diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c +index 7ed6fad3fa8ff..a0ba6ac487368 100644 +--- a/drivers/hv/ring_buffer.c ++++ b/drivers/hv/ring_buffer.c +@@ -261,7 +261,8 @@ EXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy); + + /* Write to the ring buffer. */ + int hv_ringbuffer_write(struct vmbus_channel *channel, +- const struct kvec *kv_list, u32 kv_count) ++ const struct kvec *kv_list, u32 kv_count, ++ u64 requestid) + { + int i; + u32 bytes_avail_towrite; +@@ -271,6 +272,8 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, + u64 prev_indices; + unsigned long flags; + struct hv_ring_buffer_info *outring_info = &channel->outbound; ++ struct vmpacket_descriptor *desc = kv_list[0].iov_base; ++ u64 rqst_id = VMBUS_NO_RQSTOR; + + if (channel->rescind) + return -ENODEV; +@@ -313,6 +316,23 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, + kv_list[i].iov_len); + } + ++ /* ++ * Allocate the request ID after the data has been copied into the ++ * ring buffer. Once this request ID is allocated, the completion ++ * path could find the data and free it. ++ */ ++ ++ if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) { ++ rqst_id = vmbus_next_request_id(&channel->requestor, requestid); ++ if (rqst_id == VMBUS_RQST_ERROR) { ++ spin_unlock_irqrestore(&outring_info->ring_lock, flags); ++ pr_err("No request id available\n"); ++ return -EAGAIN; ++ } ++ } ++ desc = hv_get_ring_buffer(outring_info) + old_write; ++ desc->trans_id = (rqst_id == VMBUS_NO_RQSTOR) ? requestid : rqst_id; ++ + /* Set previous packet start */ + prev_indices = hv_get_ring_bufferindices(outring_info); + +@@ -332,8 +352,13 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, + + hv_signal_on_write(old_write, channel); + +- if (channel->rescind) ++ if (channel->rescind) { ++ if (rqst_id != VMBUS_NO_RQSTOR) { ++ /* Reclaim request ID to avoid leak of IDs */ ++ vmbus_request_addr(&channel->requestor, rqst_id); ++ } + return -ENODEV; ++ } + + return 0; + } +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index eada4d8d65879..4cb65a79d92f6 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -764,6 +764,22 @@ enum vmbus_device_type { + HV_UNKNOWN, + }; + ++/* ++ * Provides request ids for VMBus. Encapsulates guest memory ++ * addresses and stores the next available slot in req_arr ++ * to generate new ids in constant time. ++ */ ++struct vmbus_requestor { ++ u64 *req_arr; ++ unsigned long *req_bitmap; /* is a given slot available? */ ++ u32 size; ++ u64 next_request_id; ++ spinlock_t req_lock; /* provides atomicity */ ++}; ++ ++#define VMBUS_NO_RQSTOR U64_MAX ++#define VMBUS_RQST_ERROR (U64_MAX - 1) ++ + struct vmbus_device { + u16 dev_type; + guid_t guid; +@@ -988,8 +1004,14 @@ struct vmbus_channel { + u32 fuzz_testing_interrupt_delay; + u32 fuzz_testing_message_delay; + ++ /* request/transaction ids for VMBus */ ++ struct vmbus_requestor requestor; ++ u32 rqstor_size; + }; + ++u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr); ++u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id); ++ + static inline bool is_hvsock_channel(const struct vmbus_channel *c) + { + return !!(c->offermsg.offer.chn_flags & +-- +2.43.0 + diff --git a/queue-5.10/drivers-hv-vmbus-drop-error-message-when-no-request-.patch b/queue-5.10/drivers-hv-vmbus-drop-error-message-when-no-request-.patch new file mode 100644 index 00000000000..f443a162a93 --- /dev/null +++ b/queue-5.10/drivers-hv-vmbus-drop-error-message-when-no-request-.patch @@ -0,0 +1,40 @@ +From fa8553d3a820c2eb5fe34180444ba299b6087594 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 1 Mar 2021 20:13:48 +0100 +Subject: Drivers: hv: vmbus: Drop error message when 'No request id available' + +From: Andrea Parri (Microsoft) + +[ Upstream commit 0c85c54bf7faeb80c6b76901ed77d93acef0207d ] + +Running out of request IDs on a channel essentially produces the same +effect as running out of space in the ring buffer, in that -EAGAIN is +returned. The error message in hv_ringbuffer_write() should either be +dropped (since we don't output a message when the ring buffer is full) +or be made conditional/debug-only. + +Suggested-by: Michael Kelley +Signed-off-by: Andrea Parri (Microsoft) +Fixes: e8b7db38449ac ("Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus hardening") +Link: https://lore.kernel.org/r/20210301191348.196485-1-parri.andrea@gmail.com +Signed-off-by: Wei Liu +Signed-off-by: Sasha Levin +--- + drivers/hv/ring_buffer.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c +index a0ba6ac487368..a49cc69c56af0 100644 +--- a/drivers/hv/ring_buffer.c ++++ b/drivers/hv/ring_buffer.c +@@ -326,7 +326,6 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, + rqst_id = vmbus_next_request_id(&channel->requestor, requestid); + if (rqst_id == VMBUS_RQST_ERROR) { + spin_unlock_irqrestore(&outring_info->ring_lock, flags); +- pr_err("No request id available\n"); + return -EAGAIN; + } + } +-- +2.43.0 + diff --git a/queue-5.10/exit-fix-typo-in-comment-s-sub-theads-sub-threads.patch b/queue-5.10/exit-fix-typo-in-comment-s-sub-theads-sub-threads.patch new file mode 100644 index 00000000000..790e1bb6c4a --- /dev/null +++ b/queue-5.10/exit-fix-typo-in-comment-s-sub-theads-sub-threads.patch @@ -0,0 +1,33 @@ +From 9cf15c9f4363b09b25014512da81a180f3e5e6c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Aug 2022 10:43:42 +0200 +Subject: exit: Fix typo in comment: s/sub-theads/sub-threads + +From: Ingo Molnar + +[ Upstream commit dcca34754a3f5290406403b8066e3b15dda9f4bf ] + +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar +Stable-dep-of: c1be35a16b2f ("exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock)") +Signed-off-by: Sasha Levin +--- + kernel/exit.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/exit.c b/kernel/exit.c +index bacdaf980933b..c41bdc0a7f06b 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -1105,7 +1105,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) + * p->signal fields because the whole thread group is dead + * and nobody can change them. + * +- * psig->stats_lock also protects us from our sub-theads ++ * psig->stats_lock also protects us from our sub-threads + * which can reap other children at the same time. Until + * we change k_getrusage()-like users to rely on this lock + * we have to take ->siglock as well. +-- +2.43.0 + diff --git a/queue-5.10/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch b/queue-5.10/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch new file mode 100644 index 00000000000..a9f4daa5158 --- /dev/null +++ b/queue-5.10/exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch @@ -0,0 +1,65 @@ +From 2b40d3238f9ea8a9cf9a61ce0cbec94ef5d2ced8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 16:34:00 +0100 +Subject: exit: wait_task_zombie: kill the no longer necessary + spin_lock_irq(siglock) + +From: Oleg Nesterov + +[ Upstream commit c1be35a16b2f1fe21f4f26f9de030ad6eaaf6a25 ] + +After the recent changes nobody use siglock to read the values protected +by stats_lock, we can kill spin_lock_irq(¤t->sighand->siglock) and +update the comment. + +With this patch only __exit_signal() and thread_group_start_cputime() take +stats_lock under siglock. + +Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com +Signed-off-by: Oleg Nesterov +Signed-off-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + kernel/exit.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +diff --git a/kernel/exit.c b/kernel/exit.c +index c41bdc0a7f06b..8f25abdd5fa7d 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -1106,17 +1106,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) + * and nobody can change them. + * + * psig->stats_lock also protects us from our sub-threads +- * which can reap other children at the same time. Until +- * we change k_getrusage()-like users to rely on this lock +- * we have to take ->siglock as well. ++ * which can reap other children at the same time. + * + * We use thread_group_cputime_adjusted() to get times for + * the thread group, which consolidates times for all threads + * in the group including the group leader. + */ + thread_group_cputime_adjusted(p, &tgutime, &tgstime); +- spin_lock_irq(¤t->sighand->siglock); +- write_seqlock(&psig->stats_lock); ++ write_seqlock_irq(&psig->stats_lock); + psig->cutime += tgutime + sig->cutime; + psig->cstime += tgstime + sig->cstime; + psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; +@@ -1139,8 +1136,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) + psig->cmaxrss = maxrss; + task_io_accounting_add(&psig->ioac, &p->ioac); + task_io_accounting_add(&psig->ioac, &sig->ioac); +- write_sequnlock(&psig->stats_lock); +- spin_unlock_irq(¤t->sighand->siglock); ++ write_sequnlock_irq(&psig->stats_lock); + } + + if (wo->wo_rusage) +-- +2.43.0 + diff --git a/queue-5.10/ext4-convert-to-exclusive-lock-while-inserting-delal.patch b/queue-5.10/ext4-convert-to-exclusive-lock-while-inserting-delal.patch new file mode 100644 index 00000000000..890a1d7ac0e --- /dev/null +++ b/queue-5.10/ext4-convert-to-exclusive-lock-while-inserting-delal.patch @@ -0,0 +1,113 @@ +From b26ac4d68bbe68214213c82814d4c7acd12c6a64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 27 Jan 2024 09:58:01 +0800 +Subject: ext4: convert to exclusive lock while inserting delalloc extents + +From: Zhang Yi + +[ Upstream commit acf795dc161f3cf481db20f05db4250714e375e5 ] + +ext4_da_map_blocks() only hold i_data_sem in shared mode and i_rwsem +when inserting delalloc extents, it could be raced by another querying +path of ext4_map_blocks() without i_rwsem, .e.g buffered read path. +Suppose we buffered read a file containing just a hole, and without any +cached extents tree, then it is raced by another delayed buffered write +to the same area or the near area belongs to the same hole, and the new +delalloc extent could be overwritten to a hole extent. + + pread() pwrite() + filemap_read_folio() + ext4_mpage_readpages() + ext4_map_blocks() + down_read(i_data_sem) + ext4_ext_determine_hole() + //find hole + ext4_ext_put_gap_in_cache() + ext4_es_find_extent_range() + //no delalloc extent + ext4_da_map_blocks() + down_read(i_data_sem) + ext4_insert_delayed_block() + //insert delalloc extent + ext4_es_insert_extent() + //overwrite delalloc extent to hole + +This race could lead to inconsistent delalloc extents tree and +incorrect reserved space counter. Fix this by converting to hold +i_data_sem in exclusive mode when adding a new delalloc extent in +ext4_da_map_blocks(). + +Cc: stable@vger.kernel.org +Signed-off-by: Zhang Yi +Suggested-by: Jan Kara +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20240127015825.1608160-3-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 25 +++++++++++-------------- + 1 file changed, 11 insertions(+), 14 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 64162470a7e6c..8b48ed351c4b9 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1728,10 +1728,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + + /* Lookup extent status tree firstly */ + if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { +- if (ext4_es_is_hole(&es)) { +- down_read(&EXT4_I(inode)->i_data_sem); ++ if (ext4_es_is_hole(&es)) + goto add_delayed; +- } + + /* + * Delayed extent could be allocated by fallocate. +@@ -1773,8 +1771,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + retval = ext4_ext_map_blocks(NULL, inode, map, 0); + else + retval = ext4_ind_map_blocks(NULL, inode, map, 0); +- if (retval < 0) +- goto out_unlock; ++ if (retval < 0) { ++ up_read(&EXT4_I(inode)->i_data_sem); ++ return retval; ++ } + if (retval > 0) { + unsigned int status; + +@@ -1790,24 +1790,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); +- goto out_unlock; ++ up_read(&EXT4_I(inode)->i_data_sem); ++ return retval; + } ++ up_read(&EXT4_I(inode)->i_data_sem); + + add_delayed: +- /* +- * XXX: __block_prepare_write() unmaps passed block, +- * is it OK? +- */ ++ down_write(&EXT4_I(inode)->i_data_sem); + retval = ext4_insert_delayed_block(inode, map->m_lblk); ++ up_write(&EXT4_I(inode)->i_data_sem); + if (retval) +- goto out_unlock; ++ return retval; + + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); +- +-out_unlock: +- up_read((&EXT4_I(inode)->i_data_sem)); + return retval; + } + +-- +2.43.0 + diff --git a/queue-5.10/ext4-make-ext4_es_insert_extent-return-void.patch b/queue-5.10/ext4-make-ext4_es_insert_extent-return-void.patch new file mode 100644 index 00000000000..91789fecb14 --- /dev/null +++ b/queue-5.10/ext4-make-ext4_es_insert_extent-return-void.patch @@ -0,0 +1,158 @@ +From 5ad8f4e0d1ca2d96d546f15fefdd9659c3535249 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Apr 2023 11:38:45 +0800 +Subject: ext4: make ext4_es_insert_extent() return void + +From: Baokun Li + +[ Upstream commit 6c120399cde6b1b5cf65ce403765c579fb3d3e50 ] + +Now ext4_es_insert_extent() never return error, so make it return void. + +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230424033846.4732-12-libaokun1@huawei.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: acf795dc161f ("ext4: convert to exclusive lock while inserting delalloc extents") +Signed-off-by: Sasha Levin +--- + fs/ext4/extents.c | 5 +++-- + fs/ext4/extents_status.c | 14 ++++++-------- + fs/ext4/extents_status.h | 6 +++--- + fs/ext4/inode.c | 21 ++++++--------------- + 4 files changed, 18 insertions(+), 28 deletions(-) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 68aa8760cb465..9e12592727914 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -3107,8 +3107,9 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) + if (ee_len == 0) + return 0; + +- return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, +- EXTENT_STATUS_WRITTEN); ++ ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, ++ EXTENT_STATUS_WRITTEN); ++ return 0; + } + + /* FIXME!! we need to try to merge to left or right after zero-out */ +diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c +index cccbdfd49a86b..f37e62546745b 100644 +--- a/fs/ext4/extents_status.c ++++ b/fs/ext4/extents_status.c +@@ -846,12 +846,10 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes, + /* + * ext4_es_insert_extent() adds information to an inode's extent + * status tree. +- * +- * Return 0 on success, error code on failure. + */ +-int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, +- ext4_lblk_t len, ext4_fsblk_t pblk, +- unsigned int status) ++void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ++ ext4_lblk_t len, ext4_fsblk_t pblk, ++ unsigned int status) + { + struct extent_status newes; + ext4_lblk_t end = lblk + len - 1; +@@ -863,13 +861,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, + bool revise_pending = false; + + if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) +- return 0; ++ return; + + es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", + lblk, len, pblk, status, inode->i_ino); + + if (!len) +- return 0; ++ return; + + BUG_ON(end < lblk); + +@@ -938,7 +936,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, + goto retry; + + ext4_es_print_tree(inode); +- return 0; ++ return; + } + + /* +diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h +index 4ec30a7982605..481ec4381bee6 100644 +--- a/fs/ext4/extents_status.h ++++ b/fs/ext4/extents_status.h +@@ -127,9 +127,9 @@ extern int __init ext4_init_es(void); + extern void ext4_exit_es(void); + extern void ext4_es_init_tree(struct ext4_es_tree *tree); + +-extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, +- ext4_lblk_t len, ext4_fsblk_t pblk, +- unsigned int status); ++extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ++ ext4_lblk_t len, ext4_fsblk_t pblk, ++ unsigned int status); + extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, ext4_fsblk_t pblk, + unsigned int status); +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 365c4d3a434ab..ab2a7f9902887 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -589,10 +589,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, + ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, + map->m_lblk + map->m_len - 1)) + status |= EXTENT_STATUS_DELAYED; +- ret = ext4_es_insert_extent(inode, map->m_lblk, +- map->m_len, map->m_pblk, status); +- if (ret < 0) +- retval = ret; ++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ++ map->m_pblk, status); + } + up_read((&EXT4_I(inode)->i_data_sem)); + +@@ -701,12 +699,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, + ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, + map->m_lblk + map->m_len - 1)) + status |= EXTENT_STATUS_DELAYED; +- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, +- map->m_pblk, status); +- if (ret < 0) { +- retval = ret; +- goto out_sem; +- } ++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ++ map->m_pblk, status); + } + + out_sem: +@@ -1800,7 +1794,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + set_buffer_new(bh); + set_buffer_delay(bh); + } else if (retval > 0) { +- int ret; + unsigned int status; + + if (unlikely(retval != map->m_len)) { +@@ -1813,10 +1806,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + + status = map->m_flags & EXT4_MAP_UNWRITTEN ? + EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; +- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, +- map->m_pblk, status); +- if (ret != 0) +- retval = ret; ++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ++ map->m_pblk, status); + } + + out_unlock: +-- +2.43.0 + diff --git a/queue-5.10/ext4-refactor-ext4_da_map_blocks.patch b/queue-5.10/ext4-refactor-ext4_da_map_blocks.patch new file mode 100644 index 00000000000..522ae1d3a3f --- /dev/null +++ b/queue-5.10/ext4-refactor-ext4_da_map_blocks.patch @@ -0,0 +1,93 @@ +From b636ced6288f229dff9b524a82aa30d7cfa5e8f3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 27 Jan 2024 09:58:00 +0800 +Subject: ext4: refactor ext4_da_map_blocks() + +From: Zhang Yi + +[ Upstream commit 3fcc2b887a1ba4c1f45319cd8c54daa263ecbc36 ] + +Refactor and cleanup ext4_da_map_blocks(), reduce some unnecessary +parameters and branches, no logic changes. + +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20240127015825.1608160-2-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: acf795dc161f ("ext4: convert to exclusive lock while inserting delalloc extents") +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 39 +++++++++++++++++---------------------- + 1 file changed, 17 insertions(+), 22 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index ab2a7f9902887..64162470a7e6c 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1729,7 +1729,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + /* Lookup extent status tree firstly */ + if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { + if (ext4_es_is_hole(&es)) { +- retval = 0; + down_read(&EXT4_I(inode)->i_data_sem); + goto add_delayed; + } +@@ -1774,26 +1773,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + retval = ext4_ext_map_blocks(NULL, inode, map, 0); + else + retval = ext4_ind_map_blocks(NULL, inode, map, 0); +- +-add_delayed: +- if (retval == 0) { +- int ret; +- +- /* +- * XXX: __block_prepare_write() unmaps passed block, +- * is it OK? +- */ +- +- ret = ext4_insert_delayed_block(inode, map->m_lblk); +- if (ret != 0) { +- retval = ret; +- goto out_unlock; +- } +- +- map_bh(bh, inode->i_sb, invalid_block); +- set_buffer_new(bh); +- set_buffer_delay(bh); +- } else if (retval > 0) { ++ if (retval < 0) ++ goto out_unlock; ++ if (retval > 0) { + unsigned int status; + + if (unlikely(retval != map->m_len)) { +@@ -1808,11 +1790,24 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); ++ goto out_unlock; + } + ++add_delayed: ++ /* ++ * XXX: __block_prepare_write() unmaps passed block, ++ * is it OK? ++ */ ++ retval = ext4_insert_delayed_block(inode, map->m_lblk); ++ if (retval) ++ goto out_unlock; ++ ++ map_bh(bh, inode->i_sb, invalid_block); ++ set_buffer_new(bh); ++ set_buffer_delay(bh); ++ + out_unlock: + up_read((&EXT4_I(inode)->i_data_sem)); +- + return retval; + } + +-- +2.43.0 + diff --git a/queue-5.10/getrusage-add-the-signal_struct-sig-local-variable.patch b/queue-5.10/getrusage-add-the-signal_struct-sig-local-variable.patch new file mode 100644 index 00000000000..34eef5fef17 --- /dev/null +++ b/queue-5.10/getrusage-add-the-signal_struct-sig-local-variable.patch @@ -0,0 +1,93 @@ +From 2278229f5e1d78b6ef0a2938f41ed309435ff40d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Sep 2023 19:25:54 +0200 +Subject: getrusage: add the "signal_struct *sig" local variable + +From: Oleg Nesterov + +[ Upstream commit c7ac8231ace9b07306d0299969e42073b189c70a ] + +No functional changes, cleanup/preparation. + +Link: https://lkml.kernel.org/r/20230909172554.GA20441@redhat.com +Signed-off-by: Oleg Nesterov +Cc: Eric W. Biederman +Signed-off-by: Andrew Morton +Stable-dep-of: daa694e41375 ("getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()") +Signed-off-by: Sasha Levin +--- + kernel/sys.c | 37 +++++++++++++++++++------------------ + 1 file changed, 19 insertions(+), 18 deletions(-) + +diff --git a/kernel/sys.c b/kernel/sys.c +index bff14910b9262..8a53d858d7375 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -1737,6 +1737,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + unsigned long flags; + u64 tgutime, tgstime, utime, stime; + unsigned long maxrss = 0; ++ struct signal_struct *sig = p->signal; + + memset((char *)r, 0, sizeof (*r)); + utime = stime = 0; +@@ -1744,7 +1745,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + if (who == RUSAGE_THREAD) { + task_cputime_adjusted(current, &utime, &stime); + accumulate_thread_rusage(p, r); +- maxrss = p->signal->maxrss; ++ maxrss = sig->maxrss; + goto out; + } + +@@ -1754,15 +1755,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + switch (who) { + case RUSAGE_BOTH: + case RUSAGE_CHILDREN: +- utime = p->signal->cutime; +- stime = p->signal->cstime; +- r->ru_nvcsw = p->signal->cnvcsw; +- r->ru_nivcsw = p->signal->cnivcsw; +- r->ru_minflt = p->signal->cmin_flt; +- r->ru_majflt = p->signal->cmaj_flt; +- r->ru_inblock = p->signal->cinblock; +- r->ru_oublock = p->signal->coublock; +- maxrss = p->signal->cmaxrss; ++ utime = sig->cutime; ++ stime = sig->cstime; ++ r->ru_nvcsw = sig->cnvcsw; ++ r->ru_nivcsw = sig->cnivcsw; ++ r->ru_minflt = sig->cmin_flt; ++ r->ru_majflt = sig->cmaj_flt; ++ r->ru_inblock = sig->cinblock; ++ r->ru_oublock = sig->coublock; ++ maxrss = sig->cmaxrss; + + if (who == RUSAGE_CHILDREN) + break; +@@ -1772,14 +1773,14 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + thread_group_cputime_adjusted(p, &tgutime, &tgstime); + utime += tgutime; + stime += tgstime; +- r->ru_nvcsw += p->signal->nvcsw; +- r->ru_nivcsw += p->signal->nivcsw; +- r->ru_minflt += p->signal->min_flt; +- r->ru_majflt += p->signal->maj_flt; +- r->ru_inblock += p->signal->inblock; +- r->ru_oublock += p->signal->oublock; +- if (maxrss < p->signal->maxrss) +- maxrss = p->signal->maxrss; ++ r->ru_nvcsw += sig->nvcsw; ++ r->ru_nivcsw += sig->nivcsw; ++ r->ru_minflt += sig->min_flt; ++ r->ru_majflt += sig->maj_flt; ++ r->ru_inblock += sig->inblock; ++ r->ru_oublock += sig->oublock; ++ if (maxrss < sig->maxrss) ++ maxrss = sig->maxrss; + t = p; + do { + accumulate_thread_rusage(t, r); +-- +2.43.0 + diff --git a/queue-5.10/getrusage-move-thread_group_cputime_adjusted-outside.patch b/queue-5.10/getrusage-move-thread_group_cputime_adjusted-outside.patch new file mode 100644 index 00000000000..5072d8e078a --- /dev/null +++ b/queue-5.10/getrusage-move-thread_group_cputime_adjusted-outside.patch @@ -0,0 +1,111 @@ +From fdc881c5376b0354fa1c63267602f1fe09e16092 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jan 2024 16:50:50 +0100 +Subject: getrusage: move thread_group_cputime_adjusted() outside of + lock_task_sighand() + +From: Oleg Nesterov + +[ Upstream commit daa694e4137571b4ebec330f9a9b4d54aa8b8089 ] + +Patch series "getrusage: use sig->stats_lock", v2. + +This patch (of 2): + +thread_group_cputime() does its own locking, we can safely shift +thread_group_cputime_adjusted() which does another for_each_thread loop +outside of ->siglock protected section. + +This is also preparation for the next patch which changes getrusage() to +use stats_lock instead of siglock, thread_group_cputime() takes the same +lock. With the current implementation recursive read_seqbegin_or_lock() +is fine, thread_group_cputime() can't enter the slow mode if the caller +holds stats_lock, yet this looks more safe and better performance-wise. + +Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com +Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com +Signed-off-by: Oleg Nesterov +Reported-by: Dylan Hatch +Tested-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + kernel/sys.c | 34 +++++++++++++++++++--------------- + 1 file changed, 19 insertions(+), 15 deletions(-) + +diff --git a/kernel/sys.c b/kernel/sys.c +index 8a53d858d7375..26c8783bd0757 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -1736,17 +1736,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + struct task_struct *t; + unsigned long flags; + u64 tgutime, tgstime, utime, stime; +- unsigned long maxrss = 0; ++ unsigned long maxrss; ++ struct mm_struct *mm; + struct signal_struct *sig = p->signal; + +- memset((char *)r, 0, sizeof (*r)); ++ memset(r, 0, sizeof(*r)); + utime = stime = 0; ++ maxrss = 0; + + if (who == RUSAGE_THREAD) { + task_cputime_adjusted(current, &utime, &stime); + accumulate_thread_rusage(p, r); + maxrss = sig->maxrss; +- goto out; ++ goto out_thread; + } + + if (!lock_task_sighand(p, &flags)) +@@ -1770,9 +1772,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + fallthrough; + + case RUSAGE_SELF: +- thread_group_cputime_adjusted(p, &tgutime, &tgstime); +- utime += tgutime; +- stime += tgstime; + r->ru_nvcsw += sig->nvcsw; + r->ru_nivcsw += sig->nivcsw; + r->ru_minflt += sig->min_flt; +@@ -1792,19 +1791,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + } + unlock_task_sighand(p, &flags); + +-out: +- r->ru_utime = ns_to_kernel_old_timeval(utime); +- r->ru_stime = ns_to_kernel_old_timeval(stime); ++ if (who == RUSAGE_CHILDREN) ++ goto out_children; + +- if (who != RUSAGE_CHILDREN) { +- struct mm_struct *mm = get_task_mm(p); ++ thread_group_cputime_adjusted(p, &tgutime, &tgstime); ++ utime += tgutime; ++ stime += tgstime; + +- if (mm) { +- setmax_mm_hiwater_rss(&maxrss, mm); +- mmput(mm); +- } ++out_thread: ++ mm = get_task_mm(p); ++ if (mm) { ++ setmax_mm_hiwater_rss(&maxrss, mm); ++ mmput(mm); + } ++ ++out_children: + r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ ++ r->ru_utime = ns_to_kernel_old_timeval(utime); ++ r->ru_stime = ns_to_kernel_old_timeval(stime); + } + + SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) +-- +2.43.0 + diff --git a/queue-5.10/getrusage-use-__for_each_thread.patch b/queue-5.10/getrusage-use-__for_each_thread.patch new file mode 100644 index 00000000000..7c0d2bcaabf --- /dev/null +++ b/queue-5.10/getrusage-use-__for_each_thread.patch @@ -0,0 +1,43 @@ +From d4552b3a4300a1e6456d4a94e2501a8ab77c6b12 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Sep 2023 19:26:29 +0200 +Subject: getrusage: use __for_each_thread() + +From: Oleg Nesterov + +[ Upstream commit 13b7bc60b5353371460a203df6c38ccd38ad7a3a ] + +do/while_each_thread should be avoided when possible. + +Plus this change allows to avoid lock_task_sighand(), we can use rcu +and/or sig->stats_lock instead. + +Link: https://lkml.kernel.org/r/20230909172629.GA20454@redhat.com +Signed-off-by: Oleg Nesterov +Cc: Eric W. Biederman +Signed-off-by: Andrew Morton +Stable-dep-of: f7ec1cd5cc7e ("getrusage: use sig->stats_lock rather than lock_task_sighand()") +Signed-off-by: Sasha Levin +--- + kernel/sys.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/kernel/sys.c b/kernel/sys.c +index 26c8783bd0757..f1ae8fa627145 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -1780,10 +1780,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; +- t = p; +- do { ++ __for_each_thread(sig, t) + accumulate_thread_rusage(t, r); +- } while_each_thread(p, t); + break; + + default: +-- +2.43.0 + diff --git a/queue-5.10/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch b/queue-5.10/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch new file mode 100644 index 00000000000..5c26477fdde --- /dev/null +++ b/queue-5.10/getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch @@ -0,0 +1,92 @@ +From 130977df9b7119b933e5ca62435814492925b7dd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jan 2024 16:50:53 +0100 +Subject: getrusage: use sig->stats_lock rather than lock_task_sighand() + +From: Oleg Nesterov + +[ Upstream commit f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 ] + +lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call +getrusage() at the same time and the process has NR_THREADS, spin_lock_irq +will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. + +Change getrusage() to use sig->stats_lock, it was specifically designed +for this type of use. This way it runs lockless in the likely case. + +TODO: + - Change do_task_stat() to use sig->stats_lock too, then we can + remove spin_lock_irq(siglock) in wait_task_zombie(). + + - Turn sig->stats_lock into seqcount_rwlock_t, this way the + readers in the slow mode won't exclude each other. See + https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/ + + - stats_lock has to disable irqs because ->siglock can be taken + in irq context, it would be very nice to change __exit_signal() + to avoid the siglock->stats_lock dependency. + +Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com +Signed-off-by: Oleg Nesterov +Reported-by: Dylan Hatch +Tested-by: Dylan Hatch +Cc: Eric W. Biederman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + kernel/sys.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +diff --git a/kernel/sys.c b/kernel/sys.c +index f1ae8fa627145..efc213ae4c5ad 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -1739,7 +1739,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + unsigned long maxrss; + struct mm_struct *mm; + struct signal_struct *sig = p->signal; ++ unsigned int seq = 0; + ++retry: + memset(r, 0, sizeof(*r)); + utime = stime = 0; + maxrss = 0; +@@ -1751,8 +1753,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + goto out_thread; + } + +- if (!lock_task_sighand(p, &flags)) +- return; ++ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + + switch (who) { + case RUSAGE_BOTH: +@@ -1780,14 +1781,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; ++ ++ rcu_read_lock(); + __for_each_thread(sig, t) + accumulate_thread_rusage(t, r); ++ rcu_read_unlock(); ++ + break; + + default: + BUG(); + } +- unlock_task_sighand(p, &flags); ++ ++ if (need_seqretry(&sig->stats_lock, seq)) { ++ seq = 1; ++ goto retry; ++ } ++ done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + + if (who == RUSAGE_CHILDREN) + goto out_children; +-- +2.43.0 + diff --git a/queue-5.10/hv_netvsc-make-netvsc-vf-binding-check-both-mac-and-.patch b/queue-5.10/hv_netvsc-make-netvsc-vf-binding-check-both-mac-and-.patch new file mode 100644 index 00000000000..efbd579af80 --- /dev/null +++ b/queue-5.10/hv_netvsc-make-netvsc-vf-binding-check-both-mac-and-.patch @@ -0,0 +1,60 @@ +From d716ca09e4a57e4809e656010300dcdb7f40d4cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Apr 2021 18:12:35 -0700 +Subject: hv_netvsc: Make netvsc/VF binding check both MAC and serial number + +From: Dexuan Cui + +[ Upstream commit 64ff412ad41fe3a5bf759ff4844dc1382176485c ] + +Currently the netvsc/VF binding logic only checks the PCI serial number. + +The Microsoft Azure Network Adapter (MANA) supports multiple net_device +interfaces (each such interface is called a "vPort", and has its unique +MAC address) which are backed by the same VF PCI device, so the binding +logic should check both the MAC address and the PCI serial number. + +The change should not break any other existing VF drivers, because +Hyper-V NIC SR-IOV implementation requires the netvsc network +interface and the VF network interface have the same MAC address. + +Co-developed-by: Haiyang Zhang +Signed-off-by: Haiyang Zhang +Co-developed-by: Shachar Raindel +Signed-off-by: Shachar Raindel +Acked-by: Stephen Hemminger +Signed-off-by: Dexuan Cui +Signed-off-by: David S. Miller +Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") +Signed-off-by: Sasha Levin +--- + drivers/net/hyperv/netvsc_drv.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 057b1a9dde153..9ec1633b89b48 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -2310,8 +2310,17 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) + if (!ndev_ctx->vf_alloc) + continue; + +- if (ndev_ctx->vf_serial == serial) +- return hv_get_drvdata(ndev_ctx->device_ctx); ++ if (ndev_ctx->vf_serial != serial) ++ continue; ++ ++ ndev = hv_get_drvdata(ndev_ctx->device_ctx); ++ if (ndev->addr_len != vf_netdev->addr_len || ++ memcmp(ndev->perm_addr, vf_netdev->perm_addr, ++ ndev->addr_len) != 0) ++ continue; ++ ++ return ndev; ++ + } + + /* Fallback path to check synthetic vf with help of mac addr. +-- +2.43.0 + diff --git a/queue-5.10/hv_netvsc-process-netdev_going_down-on-vf-hot-remove.patch b/queue-5.10/hv_netvsc-process-netdev_going_down-on-vf-hot-remove.patch new file mode 100644 index 00000000000..c69442e94c2 --- /dev/null +++ b/queue-5.10/hv_netvsc-process-netdev_going_down-on-vf-hot-remove.patch @@ -0,0 +1,60 @@ +From 08cd6ba213b3786688214a60972f77235cc64410 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Jan 2021 16:53:43 -0800 +Subject: hv_netvsc: Process NETDEV_GOING_DOWN on VF hot remove + +From: Long Li + +[ Upstream commit 34b06a2eee44d469f2e2c013a83e6dac3aff6411 ] + +On VF hot remove, NETDEV_GOING_DOWN is sent to notify the VF is about to +go down. At this time, the VF is still sending/receiving traffic and we +request the VSP to switch datapath. + +On completion, the datapath is switched to synthetic and we can proceed +with VF hot remove. + +Signed-off-by: Long Li +Reviewed-by: Haiyang Zhang +Signed-off-by: Jakub Kicinski +Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") +Signed-off-by: Sasha Levin +--- + drivers/net/hyperv/netvsc_drv.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 57a5ec098e7e0..057b1a9dde153 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -2411,12 +2411,15 @@ static int netvsc_register_vf(struct net_device *vf_netdev) + * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network + * interface, there is only the CHANGE event and no UP or DOWN event. + */ +-static int netvsc_vf_changed(struct net_device *vf_netdev) ++static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) + { + struct net_device_context *net_device_ctx; + struct netvsc_device *netvsc_dev; + struct net_device *ndev; +- bool vf_is_up = netif_running(vf_netdev); ++ bool vf_is_up = false; ++ ++ if (event != NETDEV_GOING_DOWN) ++ vf_is_up = netif_running(vf_netdev); + + ndev = get_netvsc_byref(vf_netdev); + if (!ndev) +@@ -2762,7 +2765,8 @@ static int netvsc_netdev_event(struct notifier_block *this, + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: +- return netvsc_vf_changed(event_dev); ++ case NETDEV_GOING_DOWN: ++ return netvsc_vf_changed(event_dev, event); + default: + return NOTIFY_DONE; + } +-- +2.43.0 + diff --git a/queue-5.10/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_.patch b/queue-5.10/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_.patch new file mode 100644 index 00000000000..e0491cfc296 --- /dev/null +++ b/queue-5.10/hv_netvsc-register-vf-in-netvsc_probe-if-net_device_.patch @@ -0,0 +1,184 @@ +From 7bd309d035dcf27d9ac09dda05158de11370e3c5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 20:40:38 -0800 +Subject: hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed + +From: Shradha Gupta + +[ Upstream commit 9cae43da9867412f8bd09aee5c8a8dc5e8dc3dc2 ] + +If hv_netvsc driver is unloaded and reloaded, the NET_DEVICE_REGISTER +handler cannot perform VF register successfully as the register call +is received before netvsc_probe is finished. This is because we +register register_netdevice_notifier() very early( even before +vmbus_driver_register()). +To fix this, we try to register each such matching VF( if it is visible +as a netdevice) at the end of netvsc_probe. + +Cc: stable@vger.kernel.org +Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register") +Suggested-by: Dexuan Cui +Signed-off-by: Shradha Gupta +Reviewed-by: Haiyang Zhang +Reviewed-by: Dexuan Cui +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/hyperv/netvsc_drv.c | 82 +++++++++++++++++++++++++-------- + 1 file changed, 62 insertions(+), 20 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index e8efcc6a0b05a..0fc0f9cb3f34b 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -44,6 +44,10 @@ + #define LINKCHANGE_INT (2 * HZ) + #define VF_TAKEOVER_INT (HZ / 10) + ++/* Macros to define the context of vf registration */ ++#define VF_REG_IN_PROBE 1 ++#define VF_REG_IN_NOTIFIER 2 ++ + static unsigned int ring_size __ro_after_init = 128; + module_param(ring_size, uint, 0444); + MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); +@@ -2194,7 +2198,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) + } + + static int netvsc_vf_join(struct net_device *vf_netdev, +- struct net_device *ndev) ++ struct net_device *ndev, int context) + { + struct net_device_context *ndev_ctx = netdev_priv(ndev); + int ret; +@@ -2217,7 +2221,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev, + goto upper_link_failed; + } + +- schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); ++ /* If this registration is called from probe context vf_takeover ++ * is taken care of later in probe itself. ++ */ ++ if (context == VF_REG_IN_NOTIFIER) ++ schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + + call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); + +@@ -2355,7 +2363,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) + return NOTIFY_DONE; + } + +-static int netvsc_register_vf(struct net_device *vf_netdev) ++static int netvsc_register_vf(struct net_device *vf_netdev, int context) + { + struct net_device_context *net_device_ctx; + struct netvsc_device *netvsc_dev; +@@ -2395,7 +2403,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) + + netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); + +- if (netvsc_vf_join(vf_netdev, ndev) != 0) ++ if (netvsc_vf_join(vf_netdev, ndev, context) != 0) + return NOTIFY_DONE; + + dev_hold(vf_netdev); +@@ -2479,10 +2487,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) + return NOTIFY_OK; + } + ++static int check_dev_is_matching_vf(struct net_device *event_ndev) ++{ ++ /* Skip NetVSC interfaces */ ++ if (event_ndev->netdev_ops == &device_ops) ++ return -ENODEV; ++ ++ /* Avoid non-Ethernet type devices */ ++ if (event_ndev->type != ARPHRD_ETHER) ++ return -ENODEV; ++ ++ /* Avoid Vlan dev with same MAC registering as VF */ ++ if (is_vlan_dev(event_ndev)) ++ return -ENODEV; ++ ++ /* Avoid Bonding master dev with same MAC registering as VF */ ++ if (netif_is_bond_master(event_ndev)) ++ return -ENODEV; ++ ++ return 0; ++} ++ + static int netvsc_probe(struct hv_device *dev, + const struct hv_vmbus_device_id *dev_id) + { +- struct net_device *net = NULL; ++ struct net_device *net = NULL, *vf_netdev; + struct net_device_context *net_device_ctx; + struct netvsc_device_info *device_info = NULL; + struct netvsc_device *nvdev; +@@ -2590,6 +2619,30 @@ static int netvsc_probe(struct hv_device *dev, + } + + list_add(&net_device_ctx->list, &netvsc_dev_list); ++ ++ /* When the hv_netvsc driver is unloaded and reloaded, the ++ * NET_DEVICE_REGISTER for the vf device is replayed before probe ++ * is complete. This is because register_netdevice_notifier() gets ++ * registered before vmbus_driver_register() so that callback func ++ * is set before probe and we don't miss events like NETDEV_POST_INIT ++ * So, in this section we try to register the matching vf device that ++ * is present as a netdevice, knowing that its register call is not ++ * processed in the netvsc_netdev_notifier(as probing is progress and ++ * get_netvsc_byslot fails). ++ */ ++ for_each_netdev(dev_net(net), vf_netdev) { ++ ret = check_dev_is_matching_vf(vf_netdev); ++ if (ret != 0) ++ continue; ++ ++ if (net != get_netvsc_byslot(vf_netdev)) ++ continue; ++ ++ netvsc_prepare_bonding(vf_netdev); ++ netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); ++ __netvsc_vf_setup(net, vf_netdev); ++ break; ++ } + rtnl_unlock(); + + netvsc_devinfo_put(device_info); +@@ -2746,28 +2799,17 @@ static int netvsc_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) + { + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); ++ int ret = 0; + +- /* Skip our own events */ +- if (event_dev->netdev_ops == &device_ops) +- return NOTIFY_DONE; +- +- /* Avoid non-Ethernet type devices */ +- if (event_dev->type != ARPHRD_ETHER) +- return NOTIFY_DONE; +- +- /* Avoid Vlan dev with same MAC registering as VF */ +- if (is_vlan_dev(event_dev)) +- return NOTIFY_DONE; +- +- /* Avoid Bonding master dev with same MAC registering as VF */ +- if (netif_is_bond_master(event_dev)) ++ ret = check_dev_is_matching_vf(event_dev); ++ if (ret != 0) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_POST_INIT: + return netvsc_prepare_bonding(event_dev); + case NETDEV_REGISTER: +- return netvsc_register_vf(event_dev); ++ return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER); + case NETDEV_UNREGISTER: + return netvsc_unregister_vf(event_dev); + case NETDEV_UP: +-- +2.43.0 + diff --git a/queue-5.10/hv_netvsc-use-netif_is_bond_master-instead-of-open-c.patch b/queue-5.10/hv_netvsc-use-netif_is_bond_master-instead-of-open-c.patch new file mode 100644 index 00000000000..3435e06a4a9 --- /dev/null +++ b/queue-5.10/hv_netvsc-use-netif_is_bond_master-instead-of-open-c.patch @@ -0,0 +1,38 @@ +From 0d6995f74ccf64cb2ddb909e952411877f54d143 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 10 Oct 2021 13:03:28 +0900 +Subject: hv_netvsc: use netif_is_bond_master() instead of open code + +From: Juhee Kang + +[ Upstream commit c60882a4566a0a62dc3a40c85131103aad83dcb3 ] + +Use netif_is_bond_master() function instead of open code, which is +((event_dev->priv_flags & IFF_BONDING) && (event_dev->flags & IFF_MASTER)). +This patch doesn't change logic. + +Signed-off-by: Juhee Kang +Signed-off-by: David S. Miller +Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") +Signed-off-by: Sasha Levin +--- + drivers/net/hyperv/netvsc_drv.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 9ec1633b89b48..e8efcc6a0b05a 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -2760,8 +2760,7 @@ static int netvsc_netdev_event(struct notifier_block *this, + return NOTIFY_DONE; + + /* Avoid Bonding master dev with same MAC registering as VF */ +- if ((event_dev->priv_flags & IFF_BONDING) && +- (event_dev->flags & IFF_MASTER)) ++ if (netif_is_bond_master(event_dev)) + return NOTIFY_DONE; + + switch (event) { +-- +2.43.0 + diff --git a/queue-5.10/hv_netvsc-use-vmbus_requestor-to-generate-transactio.patch b/queue-5.10/hv_netvsc-use-vmbus_requestor-to-generate-transactio.patch new file mode 100644 index 00000000000..1336e54d6f7 --- /dev/null +++ b/queue-5.10/hv_netvsc-use-vmbus_requestor-to-generate-transactio.patch @@ -0,0 +1,166 @@ +From 49485861d465362468d21d6dde9d62f83ed55c86 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Nov 2020 11:04:02 +0100 +Subject: hv_netvsc: Use vmbus_requestor to generate transaction IDs for VMBus + hardening + +From: Andres Beltran + +[ Upstream commit 4d18fcc95f50950a99bd940d4e61a983f91d267a ] + +Currently, pointers to guest memory are passed to Hyper-V as +transaction IDs in netvsc. In the face of errors or malicious +behavior in Hyper-V, netvsc should not expose or trust the transaction +IDs returned by Hyper-V to be valid guest memory addresses. Instead, +use small integers generated by vmbus_requestor as requests +(transaction) IDs. + +Signed-off-by: Andres Beltran +Co-developed-by: Andrea Parri (Microsoft) +Signed-off-by: Andrea Parri (Microsoft) +Reviewed-by: Michael Kelley +Acked-by: Jakub Kicinski +Reviewed-by: Wei Liu +Cc: "David S. Miller" +Cc: Jakub Kicinski +Cc: netdev@vger.kernel.org +Link: https://lore.kernel.org/r/20201109100402.8946-4-parri.andrea@gmail.com +Signed-off-by: Wei Liu +Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") +Signed-off-by: Sasha Levin +--- + drivers/net/hyperv/hyperv_net.h | 13 +++++++++++++ + drivers/net/hyperv/netvsc.c | 22 ++++++++++++++++------ + drivers/net/hyperv/rndis_filter.c | 1 + + include/linux/hyperv.h | 1 + + 4 files changed, 31 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 367878493e704..15652d7951f9e 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -847,6 +847,19 @@ struct nvsp_message { + + #define NETVSC_XDP_HDRM 256 + ++#define NETVSC_MIN_OUT_MSG_SIZE (sizeof(struct vmpacket_descriptor) + \ ++ sizeof(struct nvsp_message)) ++#define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor) ++ ++/* Estimated requestor size: ++ * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size ++ */ ++static inline u32 netvsc_rqstor_size(unsigned long ringbytes) ++{ ++ return ringbytes / NETVSC_MIN_OUT_MSG_SIZE + ++ ringbytes / NETVSC_MIN_IN_MSG_SIZE; ++} ++ + #define NETVSC_XFER_HEADER_SIZE(rng_cnt) \ + (offsetof(struct vmtransfer_page_packet_header, ranges) + \ + (rng_cnt) * sizeof(struct vmtransfer_page_range)) +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index 3eae31c0f97a6..c9b73a0448813 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -50,7 +50,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) + + vmbus_sendpacket(dev->channel, init_pkt, + sizeof(struct nvsp_message), +- (unsigned long)init_pkt, ++ VMBUS_RQST_ID_NO_RESPONSE, + VM_PKT_DATA_INBAND, 0); + } + +@@ -163,7 +163,7 @@ static void netvsc_revoke_recv_buf(struct hv_device *device, + ret = vmbus_sendpacket(device->channel, + revoke_packet, + sizeof(struct nvsp_message), +- (unsigned long)revoke_packet, ++ VMBUS_RQST_ID_NO_RESPONSE, + VM_PKT_DATA_INBAND, 0); + /* If the failure is because the channel is rescinded; + * ignore the failure since we cannot send on a rescinded +@@ -213,7 +213,7 @@ static void netvsc_revoke_send_buf(struct hv_device *device, + ret = vmbus_sendpacket(device->channel, + revoke_packet, + sizeof(struct nvsp_message), +- (unsigned long)revoke_packet, ++ VMBUS_RQST_ID_NO_RESPONSE, + VM_PKT_DATA_INBAND, 0); + + /* If the failure is because the channel is rescinded; +@@ -557,7 +557,7 @@ static int negotiate_nvsp_ver(struct hv_device *device, + + ret = vmbus_sendpacket(device->channel, init_packet, + sizeof(struct nvsp_message), +- (unsigned long)init_packet, ++ VMBUS_RQST_ID_NO_RESPONSE, + VM_PKT_DATA_INBAND, 0); + + return ret; +@@ -614,7 +614,7 @@ static int netvsc_connect_vsp(struct hv_device *device, + /* Send the init request */ + ret = vmbus_sendpacket(device->channel, init_packet, + sizeof(struct nvsp_message), +- (unsigned long)init_packet, ++ VMBUS_RQST_ID_NO_RESPONSE, + VM_PKT_DATA_INBAND, 0); + if (ret != 0) + goto cleanup; +@@ -698,10 +698,19 @@ static void netvsc_send_tx_complete(struct net_device *ndev, + const struct vmpacket_descriptor *desc, + int budget) + { +- struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; + struct net_device_context *ndev_ctx = netdev_priv(ndev); ++ struct sk_buff *skb; + u16 q_idx = 0; + int queue_sends; ++ u64 cmd_rqst; ++ ++ cmd_rqst = vmbus_request_addr(&channel->requestor, (u64)desc->trans_id); ++ if (cmd_rqst == VMBUS_RQST_ERROR) { ++ netdev_err(ndev, "Incorrect transaction id\n"); ++ return; ++ } ++ ++ skb = (struct sk_buff *)(unsigned long)cmd_rqst; + + /* Notify the layer above us */ + if (likely(skb)) { +@@ -1530,6 +1539,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, + netvsc_poll, NAPI_POLL_WEIGHT); + + /* Open the channel */ ++ device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); + ret = vmbus_open(device->channel, netvsc_ring_bytes, + netvsc_ring_bytes, NULL, 0, + netvsc_channel_cb, net_device->chan_table); +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index 90bc0008fa2fd..13f62950eeb9f 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -1170,6 +1170,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) + /* Set the channel before opening.*/ + nvchan->channel = new_sc; + ++ new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); + ret = vmbus_open(new_sc, netvsc_ring_bytes, + netvsc_ring_bytes, NULL, 0, + netvsc_channel_cb, nvchan); +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 4cb65a79d92f6..2aaf450c8d800 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -779,6 +779,7 @@ struct vmbus_requestor { + + #define VMBUS_NO_RQSTOR U64_MAX + #define VMBUS_RQST_ERROR (U64_MAX - 1) ++#define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2) + + struct vmbus_device { + u16 dev_type; +-- +2.43.0 + diff --git a/queue-5.10/hv_netvsc-wait-for-completion-on-request-switch_data.patch b/queue-5.10/hv_netvsc-wait-for-completion-on-request-switch_data.patch new file mode 100644 index 00000000000..3f27c93a04c --- /dev/null +++ b/queue-5.10/hv_netvsc-wait-for-completion-on-request-switch_data.patch @@ -0,0 +1,108 @@ +From d069a005c45770eced39a4cc7f4048713e0ec0dc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Jan 2021 16:53:42 -0800 +Subject: hv_netvsc: Wait for completion on request SWITCH_DATA_PATH + +From: Long Li + +[ Upstream commit 8b31f8c982b738e4130539e47f03967c599d8e22 ] + +The completion indicates if NVSP_MSG4_TYPE_SWITCH_DATA_PATH has been +processed by the VSP. The traffic is steered to VF or synthetic after we +receive this completion. + +Signed-off-by: Long Li +Reported-by: kernel test robot +Signed-off-by: Jakub Kicinski +Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") +Signed-off-by: Sasha Levin +--- + drivers/net/hyperv/netvsc.c | 37 ++++++++++++++++++++++++++++++--- + drivers/net/hyperv/netvsc_drv.c | 1 - + 2 files changed, 34 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index c9b73a0448813..03333a4136bf4 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -37,6 +37,10 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) + struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev); + struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; + ++ /* Block sending traffic to VF if it's about to be gone */ ++ if (!vf) ++ net_device_ctx->data_path_is_vf = vf; ++ + memset(init_pkt, 0, sizeof(struct nvsp_message)); + init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH; + if (vf) +@@ -50,8 +54,11 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) + + vmbus_sendpacket(dev->channel, init_pkt, + sizeof(struct nvsp_message), +- VMBUS_RQST_ID_NO_RESPONSE, +- VM_PKT_DATA_INBAND, 0); ++ (unsigned long)init_pkt, ++ VM_PKT_DATA_INBAND, ++ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); ++ wait_for_completion(&nv_dev->channel_init_wait); ++ net_device_ctx->data_path_is_vf = vf; + } + + /* Worker to setup sub channels on initial setup +@@ -757,8 +764,31 @@ static void netvsc_send_completion(struct net_device *ndev, + const struct vmpacket_descriptor *desc, + int budget) + { +- const struct nvsp_message *nvsp_packet = hv_pkt_data(desc); ++ const struct nvsp_message *nvsp_packet; + u32 msglen = hv_pkt_datalen(desc); ++ struct nvsp_message *pkt_rqst; ++ u64 cmd_rqst; ++ ++ /* First check if this is a VMBUS completion without data payload */ ++ if (!msglen) { ++ cmd_rqst = vmbus_request_addr(&incoming_channel->requestor, ++ (u64)desc->trans_id); ++ if (cmd_rqst == VMBUS_RQST_ERROR) { ++ netdev_err(ndev, "Invalid transaction id\n"); ++ return; ++ } ++ ++ pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst; ++ switch (pkt_rqst->hdr.msg_type) { ++ case NVSP_MSG4_TYPE_SWITCH_DATA_PATH: ++ complete(&net_device->channel_init_wait); ++ break; ++ ++ default: ++ netdev_err(ndev, "Unexpected VMBUS completion!!\n"); ++ } ++ return; ++ } + + /* Ensure packet is big enough to read header fields */ + if (msglen < sizeof(struct nvsp_message_header)) { +@@ -766,6 +796,7 @@ static void netvsc_send_completion(struct net_device *ndev, + return; + } + ++ nvsp_packet = hv_pkt_data(desc); + switch (nvsp_packet->hdr.msg_type) { + case NVSP_MSG_TYPE_INIT_COMPLETE: + if (msglen < sizeof(struct nvsp_message_header) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 790bf750281ad..57a5ec098e7e0 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -2429,7 +2429,6 @@ static int netvsc_vf_changed(struct net_device *vf_netdev) + + if (net_device_ctx->data_path_is_vf == vf_is_up) + return NOTIFY_OK; +- net_device_ctx->data_path_is_vf = vf_is_up; + + if (vf_is_up && !net_device_ctx->vf_alloc) { + netdev_info(ndev, "Waiting for the VF association from host\n"); +-- +2.43.0 + diff --git a/queue-5.10/lsm-fix-default-return-value-of-the-socket_getpeerse.patch b/queue-5.10/lsm-fix-default-return-value-of-the-socket_getpeerse.patch new file mode 100644 index 00000000000..88375c8b34f --- /dev/null +++ b/queue-5.10/lsm-fix-default-return-value-of-the-socket_getpeerse.patch @@ -0,0 +1,91 @@ +From 412604ade6d81740b674c81ad30bd5d459553e94 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Jan 2024 19:45:31 +0100 +Subject: lsm: fix default return value of the socket_getpeersec_*() hooks + +From: Ondrej Mosnacek + +[ Upstream commit 5a287d3d2b9de2b3e747132c615599907ba5c3c1 ] + +For these hooks the true "neutral" value is -EOPNOTSUPP, which is +currently what is returned when no LSM provides this hook and what LSMs +return when there is no security context set on the socket. Correct the +value in and adjust the dispatch functions in +security/security.c to avoid issues when the BPF LSM is enabled. + +Cc: stable@vger.kernel.org +Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") +Signed-off-by: Ondrej Mosnacek +[PM: subject line tweak] +Signed-off-by: Paul Moore +Signed-off-by: Sasha Levin +--- + include/linux/lsm_hook_defs.h | 4 ++-- + security/security.c | 31 +++++++++++++++++++++++++++---- + 2 files changed, 29 insertions(+), 6 deletions(-) + +diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h +index 9f550eab8ebdb..07abcd384975b 100644 +--- a/include/linux/lsm_hook_defs.h ++++ b/include/linux/lsm_hook_defs.h +@@ -293,9 +293,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) + LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) + LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) + LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) +-LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, ++LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, + sockptr_t optval, sockptr_t optlen, unsigned int len) +-LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, ++LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, + struct sk_buff *skb, u32 *secid) + LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) + LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) +diff --git a/security/security.c b/security/security.c +index e9dcde3c4f14b..0bbcb100ba8e9 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -2227,14 +2227,37 @@ EXPORT_SYMBOL(security_sock_rcv_skb); + int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, + sockptr_t optlen, unsigned int len) + { +- return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, +- optval, optlen, len); ++ struct security_hook_list *hp; ++ int rc; ++ ++ /* ++ * Only one module will provide a security context. ++ */ ++ hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream, ++ list) { ++ rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen, ++ len); ++ if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream)) ++ return rc; ++ } ++ return LSM_RET_DEFAULT(socket_getpeersec_stream); + } + + int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) + { +- return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock, +- skb, secid); ++ struct security_hook_list *hp; ++ int rc; ++ ++ /* ++ * Only one module will provide a security context. ++ */ ++ hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram, ++ list) { ++ rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid); ++ if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram)) ++ return rc; ++ } ++ return LSM_RET_DEFAULT(socket_getpeersec_dgram); + } + EXPORT_SYMBOL(security_socket_getpeersec_dgram); + +-- +2.43.0 + diff --git a/queue-5.10/lsm-make-security_socket_getpeersec_stream-sockptr_t.patch b/queue-5.10/lsm-make-security_socket_getpeersec_stream-sockptr_t.patch new file mode 100644 index 00000000000..21edaccdc91 --- /dev/null +++ b/queue-5.10/lsm-make-security_socket_getpeersec_stream-sockptr_t.patch @@ -0,0 +1,282 @@ +From d0ab4effe8f2356e5ad02e4c5d59527127721934 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Oct 2022 12:31:21 -0400 +Subject: lsm: make security_socket_getpeersec_stream() sockptr_t safe + +From: Paul Moore + +[ Upstream commit b10b9c342f7571f287fd422be5d5c0beb26ba974 ] + +Commit 4ff09db1b79b ("bpf: net: Change sk_getsockopt() to take the +sockptr_t argument") made it possible to call sk_getsockopt() +with both user and kernel address space buffers through the use of +the sockptr_t type. Unfortunately at the time of conversion the +security_socket_getpeersec_stream() LSM hook was written to only +accept userspace buffers, and in a desire to avoid having to change +the LSM hook the commit author simply passed the sockptr_t's +userspace buffer pointer. Since the only sk_getsockopt() callers +at the time of conversion which used kernel sockptr_t buffers did +not allow SO_PEERSEC, and hence the +security_socket_getpeersec_stream() hook, this was acceptable but +also very fragile as future changes presented the possibility of +silently passing kernel space pointers to the LSM hook. + +There are several ways to protect against this, including careful +code review of future commits, but since relying on code review to +catch bugs is a recipe for disaster and the upstream eBPF maintainer +is "strongly against defensive programming", this patch updates the +LSM hook, and all of the implementations to support sockptr_t and +safely handle both user and kernel space buffers. + +Acked-by: Casey Schaufler +Acked-by: John Johansen +Signed-off-by: Paul Moore +Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks") +Signed-off-by: Sasha Levin +--- + include/linux/lsm_hook_defs.h | 2 +- + include/linux/lsm_hooks.h | 4 ++-- + include/linux/security.h | 11 +++++++---- + net/core/sock.c | 3 ++- + security/apparmor/lsm.c | 29 +++++++++++++---------------- + security/security.c | 6 +++--- + security/selinux/hooks.c | 13 ++++++------- + security/smack/smack_lsm.c | 19 ++++++++++--------- + 8 files changed, 44 insertions(+), 43 deletions(-) + +diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h +index 92a76ce0c382d..9f550eab8ebdb 100644 +--- a/include/linux/lsm_hook_defs.h ++++ b/include/linux/lsm_hook_defs.h +@@ -294,7 +294,7 @@ LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) + LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) + LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) + LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, +- char __user *optval, int __user *optlen, unsigned len) ++ sockptr_t optval, sockptr_t optlen, unsigned int len) + LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, + struct sk_buff *skb, u32 *secid) + LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) +diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h +index 64cdf4d7bfb30..bbf9c8c7bd9c5 100644 +--- a/include/linux/lsm_hooks.h ++++ b/include/linux/lsm_hooks.h +@@ -926,8 +926,8 @@ + * SO_GETPEERSEC. For tcp sockets this can be meaningful if the + * socket is associated with an ipsec SA. + * @sock is the local socket. +- * @optval userspace memory where the security state is to be copied. +- * @optlen userspace int where the module should copy the actual length ++ * @optval memory where the security state is to be copied. ++ * @optlen memory where the module should copy the actual length + * of the security state. + * @len as input is the maximum length to copy to userspace provided + * by the caller. +diff --git a/include/linux/security.h b/include/linux/security.h +index e388b1666bcfc..5b61aa19fac66 100644 +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + + struct linux_binprm; + struct cred; +@@ -1366,8 +1367,8 @@ int security_socket_getsockopt(struct socket *sock, int level, int optname); + int security_socket_setsockopt(struct socket *sock, int level, int optname); + int security_socket_shutdown(struct socket *sock, int how); + int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb); +-int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, +- int __user *optlen, unsigned len); ++int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, ++ sockptr_t optlen, unsigned int len); + int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid); + int security_sk_alloc(struct sock *sk, int family, gfp_t priority); + void security_sk_free(struct sock *sk); +@@ -1501,8 +1502,10 @@ static inline int security_sock_rcv_skb(struct sock *sk, + return 0; + } + +-static inline int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, +- int __user *optlen, unsigned len) ++static inline int security_socket_getpeersec_stream(struct socket *sock, ++ sockptr_t optval, ++ sockptr_t optlen, ++ unsigned int len) + { + return -ENOPROTOOPT; + } +diff --git a/net/core/sock.c b/net/core/sock.c +index 42da46965b16f..016c0b9e01b70 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1503,7 +1503,8 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, + break; + + case SO_PEERSEC: +- return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len); ++ return security_socket_getpeersec_stream(sock, ++ optval, optlen, len); + + case SO_MARK: + v.val = sk->sk_mark; +diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c +index 585edcc6814d2..052f1b920e43f 100644 +--- a/security/apparmor/lsm.c ++++ b/security/apparmor/lsm.c +@@ -1070,11 +1070,10 @@ static struct aa_label *sk_peer_label(struct sock *sk) + * Note: for tcp only valid if using ipsec or cipso on lan + */ + static int apparmor_socket_getpeersec_stream(struct socket *sock, +- char __user *optval, +- int __user *optlen, ++ sockptr_t optval, sockptr_t optlen, + unsigned int len) + { +- char *name; ++ char *name = NULL; + int slen, error = 0; + struct aa_label *label; + struct aa_label *peer; +@@ -1091,23 +1090,21 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, + /* don't include terminating \0 in slen, it breaks some apps */ + if (slen < 0) { + error = -ENOMEM; +- } else { +- if (slen > len) { +- error = -ERANGE; +- } else if (copy_to_user(optval, name, slen)) { +- error = -EFAULT; +- goto out; +- } +- if (put_user(slen, optlen)) +- error = -EFAULT; +-out: +- kfree(name); +- ++ goto done; ++ } ++ if (slen > len) { ++ error = -ERANGE; ++ goto done_len; + } + ++ if (copy_to_sockptr(optval, name, slen)) ++ error = -EFAULT; ++done_len: ++ if (copy_to_sockptr(optlen, &slen, sizeof(slen))) ++ error = -EFAULT; + done: + end_current_label_crit_section(label); +- ++ kfree(name); + return error; + } + +diff --git a/security/security.c b/security/security.c +index 269c3965393f4..e9dcde3c4f14b 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -2224,11 +2224,11 @@ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) + } + EXPORT_SYMBOL(security_sock_rcv_skb); + +-int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, +- int __user *optlen, unsigned len) ++int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, ++ sockptr_t optlen, unsigned int len) + { + return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, +- optval, optlen, len); ++ optval, optlen, len); + } + + int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) +diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c +index 50d3ddfe15fd1..46c00a68bb4bd 100644 +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -5110,11 +5110,12 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) + return err; + } + +-static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *optval, +- int __user *optlen, unsigned len) ++static int selinux_socket_getpeersec_stream(struct socket *sock, ++ sockptr_t optval, sockptr_t optlen, ++ unsigned int len) + { + int err = 0; +- char *scontext; ++ char *scontext = NULL; + u32 scontext_len; + struct sk_security_struct *sksec = sock->sk->sk_security; + u32 peer_sid = SECSID_NULL; +@@ -5130,17 +5131,15 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op + &scontext_len); + if (err) + return err; +- + if (scontext_len > len) { + err = -ERANGE; + goto out_len; + } + +- if (copy_to_user(optval, scontext, scontext_len)) ++ if (copy_to_sockptr(optval, scontext, scontext_len)) + err = -EFAULT; +- + out_len: +- if (put_user(scontext_len, optlen)) ++ if (copy_to_sockptr(optlen, &scontext_len, sizeof(scontext_len))) + err = -EFAULT; + kfree(scontext); + return err; +diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c +index e1669759403a6..5388f143eecd8 100644 +--- a/security/smack/smack_lsm.c ++++ b/security/smack/smack_lsm.c +@@ -4022,12 +4022,12 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) + * returns zero on success, an error code otherwise + */ + static int smack_socket_getpeersec_stream(struct socket *sock, +- char __user *optval, +- int __user *optlen, unsigned len) ++ sockptr_t optval, sockptr_t optlen, ++ unsigned int len) + { + struct socket_smack *ssp; + char *rcp = ""; +- int slen = 1; ++ u32 slen = 1; + int rc = 0; + + ssp = sock->sk->sk_security; +@@ -4035,15 +4035,16 @@ static int smack_socket_getpeersec_stream(struct socket *sock, + rcp = ssp->smk_packet->smk_known; + slen = strlen(rcp) + 1; + } +- +- if (slen > len) ++ if (slen > len) { + rc = -ERANGE; +- else if (copy_to_user(optval, rcp, slen) != 0) +- rc = -EFAULT; ++ goto out_len; ++ } + +- if (put_user(slen, optlen) != 0) ++ if (copy_to_sockptr(optval, rcp, slen)) ++ rc = -EFAULT; ++out_len: ++ if (copy_to_sockptr(optlen, &slen, sizeof(slen))) + rc = -EFAULT; +- + return rc; + } + +-- +2.43.0 + diff --git a/queue-5.10/mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch b/queue-5.10/mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch new file mode 100644 index 00000000000..d7463639fbf --- /dev/null +++ b/queue-5.10/mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch @@ -0,0 +1,191 @@ +From 60e180797025bd86a8ca15068002096dbc3f63d8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Feb 2021 12:09:54 -0800 +Subject: mm/hugetlb: change hugetlb_reserve_pages() to type bool + +From: Mike Kravetz + +[ Upstream commit 33b8f84a4ee78491a8f4f9e4c5520c9da4a10983 ] + +While reviewing a bug in hugetlb_reserve_pages, it was noticed that all +callers ignore the return value. Any failure is considered an ENOMEM +error by the callers. + +Change the function to be of type bool. The function will return true if +the reservation was successful, false otherwise. Callers currently assume +a zero return code indicates success. Change the callers to look for true +to indicate success. No functional change, only code cleanup. + +Link: https://lkml.kernel.org/r/20201221192542.15732-1-mike.kravetz@oracle.com +Signed-off-by: Mike Kravetz +Reviewed-by: Matthew Wilcox (Oracle) +Cc: David Hildenbrand +Cc: Dan Carpenter +Cc: Michal Hocko +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: e656c7a9e596 ("mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE") +Signed-off-by: Sasha Levin +--- + fs/hugetlbfs/inode.c | 4 ++-- + include/linux/hugetlb.h | 2 +- + mm/hugetlb.c | 37 ++++++++++++++----------------------- + 3 files changed, 17 insertions(+), 26 deletions(-) + +diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c +index a0edd4b8fa189..c3e9fa7ce75f7 100644 +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -176,7 +176,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) + file_accessed(file); + + ret = -ENOMEM; +- if (hugetlb_reserve_pages(inode, ++ if (!hugetlb_reserve_pages(inode, + vma->vm_pgoff >> huge_page_order(h), + len >> huge_page_shift(h), vma, + vma->vm_flags)) +@@ -1500,7 +1500,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, + inode->i_size = size; + clear_nlink(inode); + +- if (hugetlb_reserve_pages(inode, 0, ++ if (!hugetlb_reserve_pages(inode, 0, + size >> huge_page_shift(hstate_inode(inode)), NULL, + acctflag)) + file = ERR_PTR(-ENOMEM); +diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h +index 99b73fc4a8246..90c66b9458c31 100644 +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -140,7 +140,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep); +-int hugetlb_reserve_pages(struct inode *inode, long from, long to, ++bool hugetlb_reserve_pages(struct inode *inode, long from, long to, + struct vm_area_struct *vma, + vm_flags_t vm_flags); + long hugetlb_unreserve_pages(struct inode *inode, long start, long end, +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index 81949f6d29af5..02b7c8f9b0e87 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5108,12 +5108,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, + return pages << h->order; + } + +-int hugetlb_reserve_pages(struct inode *inode, ++/* Return true if reservation was successful, false otherwise. */ ++bool hugetlb_reserve_pages(struct inode *inode, + long from, long to, + struct vm_area_struct *vma, + vm_flags_t vm_flags) + { +- long ret, chg, add = -1; ++ long chg, add = -1; + struct hstate *h = hstate_inode(inode); + struct hugepage_subpool *spool = subpool_inode(inode); + struct resv_map *resv_map; +@@ -5123,7 +5124,7 @@ int hugetlb_reserve_pages(struct inode *inode, + /* This should never happen */ + if (from > to) { + VM_WARN(1, "%s called with a negative range\n", __func__); +- return -EINVAL; ++ return false; + } + + /* +@@ -5132,7 +5133,7 @@ int hugetlb_reserve_pages(struct inode *inode, + * without using reserves + */ + if (vm_flags & VM_NORESERVE) +- return 0; ++ return true; + + /* + * Shared mappings base their reservation on the number of pages that +@@ -5154,7 +5155,7 @@ int hugetlb_reserve_pages(struct inode *inode, + /* Private mapping. */ + resv_map = resv_map_alloc(); + if (!resv_map) +- return -ENOMEM; ++ return false; + + chg = to - from; + +@@ -5162,18 +5163,12 @@ int hugetlb_reserve_pages(struct inode *inode, + set_vma_resv_flags(vma, HPAGE_RESV_OWNER); + } + +- if (chg < 0) { +- ret = chg; ++ if (chg < 0) + goto out_err; +- } +- +- ret = hugetlb_cgroup_charge_cgroup_rsvd( +- hstate_index(h), chg * pages_per_huge_page(h), &h_cg); + +- if (ret < 0) { +- ret = -ENOMEM; ++ if (hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h), ++ chg * pages_per_huge_page(h), &h_cg) < 0) + goto out_err; +- } + + if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) { + /* For private mappings, the hugetlb_cgroup uncharge info hangs +@@ -5188,19 +5183,15 @@ int hugetlb_reserve_pages(struct inode *inode, + * reservations already in place (gbl_reserve). + */ + gbl_reserve = hugepage_subpool_get_pages(spool, chg); +- if (gbl_reserve < 0) { +- ret = -ENOSPC; ++ if (gbl_reserve < 0) + goto out_uncharge_cgroup; +- } + + /* + * Check enough hugepages are available for the reservation. + * Hand the pages back to the subpool if there are not + */ +- ret = hugetlb_acct_memory(h, gbl_reserve); +- if (ret < 0) { ++ if (hugetlb_acct_memory(h, gbl_reserve) < 0) + goto out_put_pages; +- } + + /* + * Account for the reservations made. Shared mappings record regions +@@ -5218,7 +5209,6 @@ int hugetlb_reserve_pages(struct inode *inode, + + if (unlikely(add < 0)) { + hugetlb_acct_memory(h, -gbl_reserve); +- ret = add; + goto out_put_pages; + } else if (unlikely(chg > add)) { + /* +@@ -5251,7 +5241,8 @@ int hugetlb_reserve_pages(struct inode *inode, + hugetlb_cgroup_put_rsvd_cgroup(h_cg); + } + } +- return 0; ++ return true; ++ + out_put_pages: + /* put back original number of pages, chg */ + (void)hugepage_subpool_put_pages(spool, chg); +@@ -5267,7 +5258,7 @@ int hugetlb_reserve_pages(struct inode *inode, + region_abort(resv_map, from, to, regions_needed); + if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) + kref_put(&resv_map->refs, resv_map_release); +- return ret; ++ return false; + } + + long hugetlb_unreserve_pages(struct inode *inode, long start, long end, +-- +2.43.0 + diff --git a/queue-5.10/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-.patch b/queue-5.10/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-.patch new file mode 100644 index 00000000000..e5c3bb21dcf --- /dev/null +++ b/queue-5.10/mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-.patch @@ -0,0 +1,108 @@ +From 248455f8b451d2ac07c8a295e2686f1524b80383 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 12:04:42 -0800 +Subject: mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE + +From: Prakash Sangappa + +[ Upstream commit e656c7a9e59607d1672d85ffa9a89031876ffe67 ] + +For shared memory of type SHM_HUGETLB, hugetlb pages are reserved in +shmget() call. If SHM_NORESERVE flags is specified then the hugetlb pages +are not reserved. However when the shared memory is attached with the +shmat() call the hugetlb pages are getting reserved incorrectly for +SHM_HUGETLB shared memory created with SHM_NORESERVE which is a bug. + +------------------------------- +Following test shows the issue. + +$cat shmhtb.c + +int main() +{ + int shmflags = 0660 | IPC_CREAT | SHM_HUGETLB | SHM_NORESERVE; + int shmid; + + shmid = shmget(SKEY, SHMSZ, shmflags); + if (shmid < 0) + { + printf("shmat: shmget() failed, %d\n", errno); + return 1; + } + printf("After shmget()\n"); + system("cat /proc/meminfo | grep -i hugepages_"); + + shmat(shmid, NULL, 0); + printf("\nAfter shmat()\n"); + system("cat /proc/meminfo | grep -i hugepages_"); + + shmctl(shmid, IPC_RMID, NULL); + return 0; +} + + #sysctl -w vm.nr_hugepages=20 + #./shmhtb + +After shmget() +HugePages_Total: 20 +HugePages_Free: 20 +HugePages_Rsvd: 0 +HugePages_Surp: 0 + +After shmat() +HugePages_Total: 20 +HugePages_Free: 20 +HugePages_Rsvd: 5 <-- +HugePages_Surp: 0 +-------------------------------- + +Fix is to ensure that hugetlb pages are not reserved for SHM_HUGETLB shared +memory in the shmat() call. + +Link: https://lkml.kernel.org/r/1706040282-12388-1-git-send-email-prakash.sangappa@oracle.com +Signed-off-by: Prakash Sangappa +Acked-by: Muchun Song +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/hugetlbfs/inode.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c +index c3e9fa7ce75f7..bf3cda4989623 100644 +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -135,6 +135,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) + loff_t len, vma_len; + int ret; + struct hstate *h = hstate_file(file); ++ vm_flags_t vm_flags; + + /* + * vma address alignment (but not the pgoff alignment) has +@@ -176,10 +177,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) + file_accessed(file); + + ret = -ENOMEM; ++ ++ vm_flags = vma->vm_flags; ++ /* ++ * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip ++ * reserving here. Note: only for SHM hugetlbfs file, the inode ++ * flag S_PRIVATE is set. ++ */ ++ if (inode->i_flags & S_PRIVATE) ++ vm_flags |= VM_NORESERVE; ++ + if (!hugetlb_reserve_pages(inode, + vma->vm_pgoff >> huge_page_order(h), + len >> huge_page_shift(h), vma, +- vma->vm_flags)) ++ vm_flags)) + goto out; + + ret = 0; +-- +2.43.0 + diff --git a/queue-5.10/net-change-sock_getsockopt-to-take-the-sk-ptr-instea.patch b/queue-5.10/net-change-sock_getsockopt-to-take-the-sk-ptr-instea.patch new file mode 100644 index 00000000000..a092a02818a --- /dev/null +++ b/queue-5.10/net-change-sock_getsockopt-to-take-the-sk-ptr-instea.patch @@ -0,0 +1,80 @@ +From 9b1c36493c827100b8aa5b3dd82cdd808f29d801 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Sep 2022 17:27:56 -0700 +Subject: net: Change sock_getsockopt() to take the sk ptr instead of the sock + ptr + +From: Martin KaFai Lau + +[ Upstream commit ba74a7608dc12fbbd8ea36e660087f08a81ef26a ] + +A latter patch refactors bpf_getsockopt(SOL_SOCKET) with the +sock_getsockopt() to avoid code duplication and code +drift between the two duplicates. + +The current sock_getsockopt() takes sock ptr as the argument. +The very first thing of this function is to get back the sk ptr +by 'sk = sock->sk'. + +bpf_getsockopt() could be called when the sk does not have +the sock ptr created. Meaning sk->sk_socket is NULL. For example, +when a passive tcp connection has just been established but has yet +been accept()-ed. Thus, it cannot use the sock_getsockopt(sk->sk_socket) +or else it will pass a NULL ptr. + +This patch moves all sock_getsockopt implementation to the newly +added sk_getsockopt(). The new sk_getsockopt() takes a sk ptr +and immediately gets the sock ptr by 'sock = sk->sk_socket' + +The existing sock_getsockopt(sock) is changed to call +sk_getsockopt(sock->sk). All existing callers have both sock->sk +and sk->sk_socket pointer. + +The latter patch will make bpf_getsockopt(SOL_SOCKET) call +sk_getsockopt(sk) directly. The bpf_getsockopt(SOL_SOCKET) does +not use the optnames that require sk->sk_socket, so it will +be safe. + +Signed-off-by: Martin KaFai Lau +Link: https://lore.kernel.org/r/20220902002756.2887884-1-kafai@fb.com +Signed-off-by: Alexei Starovoitov +Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks") +Signed-off-by: Sasha Levin +--- + net/core/sock.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index 769e969cd1dc5..95559d088a169 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1293,10 +1293,10 @@ static int groups_to_user(gid_t __user *dst, const struct group_info *src) + return 0; + } + +-int sock_getsockopt(struct socket *sock, int level, int optname, +- char __user *optval, int __user *optlen) ++static int sk_getsockopt(struct sock *sk, int level, int optname, ++ char __user *optval, int __user *optlen) + { +- struct sock *sk = sock->sk; ++ struct socket *sock = sk->sk_socket; + + union { + int val; +@@ -1633,6 +1633,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + return 0; + } + ++int sock_getsockopt(struct socket *sock, int level, int optname, ++ char __user *optval, int __user *optlen) ++{ ++ return sk_getsockopt(sock->sk, level, optname, optval, optlen); ++} ++ + /* + * Initialize an sk_lock. + * +-- +2.43.0 + diff --git a/queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch b/queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch new file mode 100644 index 00000000000..704cb378f90 --- /dev/null +++ b/queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch @@ -0,0 +1,73 @@ +From 37e6ea8769930834d3f74552c946172c8ef147e0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 22 May 2022 12:34:38 -0400 +Subject: NFSD: Add documenting comment for nfsd4_release_lockowner() + +From: Chuck Lever + +[ Upstream commit 043862b09cc00273e35e6c3a6389957953a34207 ] + +And return explicit nfserr values that match what is documented in the +new comment / API contract. + +Signed-off-by: Chuck Lever +Stable-dep-of: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4state.c | 23 ++++++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index 1b40b2197ce66..b6480be7b5e6a 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -7107,6 +7107,23 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + return status; + } + ++/** ++ * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations ++ * @rqstp: RPC transaction ++ * @cstate: NFSv4 COMPOUND state ++ * @u: RELEASE_LOCKOWNER arguments ++ * ++ * The lockowner's so_count is bumped when a lock record is added ++ * or when copying a conflicting lock. The latter case is brief, ++ * but can lead to fleeting false positives when looking for ++ * locks-in-use. ++ * ++ * Return values: ++ * %nfs_ok: lockowner released or not found ++ * %nfserr_locks_held: lockowner still in use ++ * %nfserr_stale_clientid: clientid no longer active ++ * %nfserr_expired: clientid not recognized ++ */ + __be32 + nfsd4_release_lockowner(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, +@@ -7133,7 +7150,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner); + if (!lo) { + spin_unlock(&clp->cl_lock); +- return status; ++ return nfs_ok; + } + if (atomic_read(&lo->lo_owner.so_count) != 2) { + spin_unlock(&clp->cl_lock); +@@ -7149,11 +7166,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + put_ol_stateid_locked(stp, &reaplist); + } + spin_unlock(&clp->cl_lock); ++ + free_ol_stateid_reaplist(&reaplist); + remove_blocked_locks(lo); + nfs4_put_stateowner(&lo->lo_owner); +- +- return status; ++ return nfs_ok; + } + + static inline struct nfs4_client_reclaim * +-- +2.43.0 + diff --git a/queue-5.10/nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch b/queue-5.10/nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch new file mode 100644 index 00000000000..2fbf9296dff --- /dev/null +++ b/queue-5.10/nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch @@ -0,0 +1,97 @@ +From 72721cc9efa6c9217b39095adbc30d0efd0b10db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Feb 2024 13:22:39 +1100 +Subject: nfsd: don't take fi_lock in nfsd_break_deleg_cb() + +From: NeilBrown + +[ Upstream commit 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 ] + +A recent change to check_for_locks() changed it to take ->flc_lock while +holding ->fi_lock. This creates a lock inversion (reported by lockdep) +because there is a case where ->fi_lock is taken while holding +->flc_lock. + +->flc_lock is held across ->fl_lmops callbacks, and +nfsd_break_deleg_cb() is one of those and does take ->fi_lock. However +it doesn't need to. + +Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each +delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list +and so needed the lock. Since then it doesn't walk the list and doesn't +need the lock. + +Two actions are performed under the lock. One is to call +nfsd_break_one_deleg which calls nfsd4_run_cb(). These doesn't act on +the nfs4_file at all, so don't need the lock. + +The other is to set ->fi_had_conflict which is in the nfs4_file. +This field is only ever set here (except when initialised to false) +so there is no possible problem will multiple threads racing when +setting it. + +The field is tested twice in nfs4_set_delegation(). The first test does +not hold a lock and is documented as an opportunistic optimisation, so +it doesn't impose any need to hold ->fi_lock while setting +->fi_had_conflict. + +The second test in nfs4_set_delegation() *is* make under ->fi_lock, so +removing the locking when ->fi_had_conflict is set could make a change. +The change could only be interesting if ->fi_had_conflict tested as +false even though nfsd_break_one_deleg() ran before ->fi_lock was +unlocked. i.e. while hash_delegation_locked() was running. +As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb() +there can be no importance to this interaction. + +So this patch removes the locking from nfsd_break_one_deleg() and moves +the final test on ->fi_had_conflict out of the locked region to make it +clear that locking isn't important to the test. It is still tested +*after* vfs_setlease() has succeeded. This might be significant and as +vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called +under ->flc_lock this "after" is a true ordering provided by a spinlock. + +Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER") +Signed-off-by: NeilBrown +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4state.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index 16b073c637986..7ff1f85f1dd9a 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -4617,10 +4617,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) + */ + fl->fl_break_time = 0; + +- spin_lock(&fp->fi_lock); + fp->fi_had_conflict = true; + nfsd_break_one_deleg(dp); +- spin_unlock(&fp->fi_lock); + return ret; + } + +@@ -5049,12 +5047,13 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, + if (status) + goto out_clnt_odstate; + ++ status = -EAGAIN; ++ if (fp->fi_had_conflict) ++ goto out_unlock; ++ + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); +- if (fp->fi_had_conflict) +- status = -EAGAIN; +- else +- status = hash_delegation_locked(dp, fp); ++ status = hash_delegation_locked(dp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + +-- +2.43.0 + diff --git a/queue-5.10/nfsd-fix-release_lockowner.patch b/queue-5.10/nfsd-fix-release_lockowner.patch new file mode 100644 index 00000000000..2b255c7fb62 --- /dev/null +++ b/queue-5.10/nfsd-fix-release_lockowner.patch @@ -0,0 +1,149 @@ +From 094bb06a555bffa2d5058ea6657fea919095e171 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jan 2024 14:58:16 +1100 +Subject: nfsd: fix RELEASE_LOCKOWNER + +From: NeilBrown + +[ Upstream commit edcf9725150e42beeca42d085149f4c88fa97afd ] + +The test on so_count in nfsd4_release_lockowner() is nonsense and +harmful. Revert to using check_for_locks(), changing that to not sleep. + +First: harmful. +As is documented in the kdoc comment for nfsd4_release_lockowner(), the +test on so_count can transiently return a false positive resulting in a +return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is +clearly a protocol violation and with the Linux NFS client it can cause +incorrect behaviour. + +If RELEASE_LOCKOWNER is sent while some other thread is still +processing a LOCK request which failed because, at the time that request +was received, the given owner held a conflicting lock, then the nfsd +thread processing that LOCK request can hold a reference (conflock) to +the lock owner that causes nfsd4_release_lockowner() to return an +incorrect error. + +The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it +never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so +it knows that the error is impossible. It assumes the lock owner was in +fact released so it feels free to use the same lock owner identifier in +some later locking request. + +When it does reuse a lock owner identifier for which a previous RELEASE +failed, it will naturally use a lock_seqid of zero. However the server, +which didn't release the lock owner, will expect a larger lock_seqid and +so will respond with NFS4ERR_BAD_SEQID. + +So clearly it is harmful to allow a false positive, which testing +so_count allows. + +The test is nonsense because ... well... it doesn't mean anything. + +so_count is the sum of three different counts. +1/ the set of states listed on so_stateids +2/ the set of active vfs locks owned by any of those states +3/ various transient counts such as for conflicting locks. + +When it is tested against '2' it is clear that one of these is the +transient reference obtained by find_lockowner_str_locked(). It is not +clear what the other one is expected to be. + +In practice, the count is often 2 because there is precisely one state +on so_stateids. If there were more, this would fail. + +In my testing I see two circumstances when RELEASE_LOCKOWNER is called. +In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in +all the lock states being removed, and so the lockowner being discarded +(it is removed when there are no more references which usually happens +when the lock state is discarded). When nfsd4_release_lockowner() finds +that the lock owner doesn't exist, it returns success. + +The other case shows an so_count of '2' and precisely one state listed +in so_stateid. It appears that the Linux client uses a separate lock +owner for each file resulting in one lock state per lock owner, so this +test on '2' is safe. For another client it might not be safe. + +So this patch changes check_for_locks() to use the (newish) +find_any_file_locked() so that it doesn't take a reference on the +nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With +this check is it safe to restore the use of check_for_locks() rather +than testing so_count against the mysterious '2'. + +Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()") +Signed-off-by: NeilBrown +Reviewed-by: Jeff Layton +Cc: stable@vger.kernel.org # v6.2+ +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4state.c | 26 +++++++++++++++----------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index b6480be7b5e6a..16b073c637986 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -7080,14 +7080,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + { + struct file_lock *fl; + int status = false; +- struct nfsd_file *nf = find_any_file(fp); ++ struct nfsd_file *nf; + struct inode *inode; + struct file_lock_context *flctx; + ++ spin_lock(&fp->fi_lock); ++ nf = find_any_file_locked(fp); + if (!nf) { + /* Any valid lock stateid should have some sort of access */ + WARN_ON_ONCE(1); +- return status; ++ goto out; + } + + inode = locks_inode(nf->nf_file); +@@ -7103,7 +7105,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + } + spin_unlock(&flctx->flc_lock); + } +- nfsd_file_put(nf); ++out: ++ spin_unlock(&fp->fi_lock); + return status; + } + +@@ -7113,10 +7116,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + * @cstate: NFSv4 COMPOUND state + * @u: RELEASE_LOCKOWNER arguments + * +- * The lockowner's so_count is bumped when a lock record is added +- * or when copying a conflicting lock. The latter case is brief, +- * but can lead to fleeting false positives when looking for +- * locks-in-use. ++ * Check if theree are any locks still held and if not - free the lockowner ++ * and any lock state that is owned. + * + * Return values: + * %nfs_ok: lockowner released or not found +@@ -7152,10 +7153,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + spin_unlock(&clp->cl_lock); + return nfs_ok; + } +- if (atomic_read(&lo->lo_owner.so_count) != 2) { +- spin_unlock(&clp->cl_lock); +- nfs4_put_stateowner(&lo->lo_owner); +- return nfserr_locks_held; ++ ++ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { ++ if (check_for_locks(stp->st_stid.sc_file, lo)) { ++ spin_unlock(&clp->cl_lock); ++ nfs4_put_stateowner(&lo->lo_owner); ++ return nfserr_locks_held; ++ } + } + unhash_lockowner_locked(lo); + while (!list_empty(&lo->lo_owner.so_stateids)) { +-- +2.43.0 + diff --git a/queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch b/queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch new file mode 100644 index 00000000000..202c3d41083 --- /dev/null +++ b/queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch @@ -0,0 +1,86 @@ +From 73f6ed66fe4fc6f405e3d72225f9d5cc964c5265 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 22 May 2022 12:07:18 -0400 +Subject: NFSD: Modernize nfsd4_release_lockowner() + +From: Chuck Lever + +[ Upstream commit bd8fdb6e545f950f4654a9a10d7e819ad48146e5 ] + +Refactor: Use existing helpers that other lock operations use. This +change removes several automatic variables, so re-organize the +variable declarations for readability. + +Signed-off-by: Chuck Lever +Stable-dep-of: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4state.c | 36 +++++++++++------------------------- + 1 file changed, 11 insertions(+), 25 deletions(-) + +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index d402ca0b535f0..1b40b2197ce66 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -7113,16 +7113,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + union nfsd4_op_u *u) + { + struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + clientid_t *clid = &rlockowner->rl_clientid; +- struct nfs4_stateowner *sop; +- struct nfs4_lockowner *lo = NULL; + struct nfs4_ol_stateid *stp; +- struct xdr_netobj *owner = &rlockowner->rl_owner; +- unsigned int hashval = ownerstr_hashval(owner); +- __be32 status; +- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); ++ struct nfs4_lockowner *lo; + struct nfs4_client *clp; +- LIST_HEAD (reaplist); ++ LIST_HEAD(reaplist); ++ __be32 status; + + dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", + clid->cl_boot, clid->cl_id); +@@ -7130,30 +7127,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + status = lookup_clientid(clid, cstate, nn, false); + if (status) + return status; +- + clp = cstate->clp; +- /* Find the matching lock stateowner */ +- spin_lock(&clp->cl_lock); +- list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval], +- so_strhash) { + +- if (sop->so_is_open_owner || !same_owner_str(sop, owner)) +- continue; +- +- if (atomic_read(&sop->so_count) != 1) { +- spin_unlock(&clp->cl_lock); +- return nfserr_locks_held; +- } +- +- lo = lockowner(sop); +- nfs4_get_stateowner(sop); +- break; +- } ++ spin_lock(&clp->cl_lock); ++ lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner); + if (!lo) { + spin_unlock(&clp->cl_lock); + return status; + } +- ++ if (atomic_read(&lo->lo_owner.so_count) != 2) { ++ spin_unlock(&clp->cl_lock); ++ nfs4_put_stateowner(&lo->lo_owner); ++ return nfserr_locks_held; ++ } + unhash_lockowner_locked(lo); + while (!list_empty(&lo->lo_owner.so_stateids)) { + stp = list_first_entry(&lo->lo_owner.so_stateids, +-- +2.43.0 + diff --git a/queue-5.10/regmap-add-bulk-read-write-callbacks-into-regmap_con.patch b/queue-5.10/regmap-add-bulk-read-write-callbacks-into-regmap_con.patch new file mode 100644 index 00000000000..f40a145e263 --- /dev/null +++ b/queue-5.10/regmap-add-bulk-read-write-callbacks-into-regmap_con.patch @@ -0,0 +1,300 @@ +From 1cac9c5509fd16616dd5ba9dbaa2ab787b4f5b89 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 30 Apr 2022 04:51:44 +0200 +Subject: regmap: Add bulk read/write callbacks into regmap_config + +From: Marek Vasut + +[ Upstream commit d77e745613680c54708470402e2b623dcd769681 ] + +Currently the regmap_config structure only allows the user to implement +single element register read/write using .reg_read/.reg_write callbacks. +The regmap_bus already implements bulk counterparts of both, and is being +misused as a workaround for the missing bulk read/write callbacks in +regmap_config by a couple of drivers. To stop this misuse, add the bulk +read/write callbacks to regmap_config and call them from the regmap core +code. + +Signed-off-by: Marek Vasut +Cc: Jagan Teki +Cc: Mark Brown +Cc: Maxime Ripard +Cc: Robert Foss +Cc: Sam Ravnborg +Cc: Thomas Zimmermann +To: dri-devel@lists.freedesktop.org +Link: https://lore.kernel.org/r/20220430025145.640305-1-marex@denx.de +Signed-off-by: Mark Brown +Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations") +Signed-off-by: Sasha Levin +--- + drivers/base/regmap/internal.h | 4 ++ + drivers/base/regmap/regmap.c | 76 ++++++++++++++++++---------------- + include/linux/regmap.h | 12 ++++++ + 3 files changed, 56 insertions(+), 36 deletions(-) + +diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h +index 0097696c31de2..2720d8d7bbfc9 100644 +--- a/drivers/base/regmap/internal.h ++++ b/drivers/base/regmap/internal.h +@@ -104,6 +104,10 @@ struct regmap { + int (*reg_write)(void *context, unsigned int reg, unsigned int val); + int (*reg_update_bits)(void *context, unsigned int reg, + unsigned int mask, unsigned int val); ++ /* Bulk read/write */ ++ int (*read)(void *context, const void *reg_buf, size_t reg_size, ++ void *val_buf, size_t val_size); ++ int (*write)(void *context, const void *data, size_t count); + + bool defer_caching; + +diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c +index 8f39aacdad0dc..2dfd6aa600450 100644 +--- a/drivers/base/regmap/regmap.c ++++ b/drivers/base/regmap/regmap.c +@@ -800,12 +800,15 @@ struct regmap *__regmap_init(struct device *dev, + map->reg_stride_order = ilog2(map->reg_stride); + else + map->reg_stride_order = -1; +- map->use_single_read = config->use_single_read || !bus || !bus->read; +- map->use_single_write = config->use_single_write || !bus || !bus->write; +- map->can_multi_write = config->can_multi_write && bus && bus->write; ++ map->use_single_read = config->use_single_read || !(config->read || (bus && bus->read)); ++ map->use_single_write = config->use_single_write || !(config->write || (bus && bus->write)); ++ map->can_multi_write = config->can_multi_write && (config->write || (bus && bus->write)); + if (bus) { + map->max_raw_read = bus->max_raw_read; + map->max_raw_write = bus->max_raw_write; ++ } else if (config->max_raw_read && config->max_raw_write) { ++ map->max_raw_read = config->max_raw_read; ++ map->max_raw_write = config->max_raw_write; + } + map->dev = dev; + map->bus = bus; +@@ -839,7 +842,16 @@ struct regmap *__regmap_init(struct device *dev, + map->read_flag_mask = bus->read_flag_mask; + } + +- if (!bus) { ++ if (config && config->read && config->write) { ++ map->reg_read = _regmap_bus_read; ++ ++ /* Bulk read/write */ ++ map->read = config->read; ++ map->write = config->write; ++ ++ reg_endian = REGMAP_ENDIAN_NATIVE; ++ val_endian = REGMAP_ENDIAN_NATIVE; ++ } else if (!bus) { + map->reg_read = config->reg_read; + map->reg_write = config->reg_write; + map->reg_update_bits = config->reg_update_bits; +@@ -856,10 +868,13 @@ struct regmap *__regmap_init(struct device *dev, + } else { + map->reg_read = _regmap_bus_read; + map->reg_update_bits = bus->reg_update_bits; +- } ++ /* Bulk read/write */ ++ map->read = bus->read; ++ map->write = bus->write; + +- reg_endian = regmap_get_reg_endian(bus, config); +- val_endian = regmap_get_val_endian(dev, bus, config); ++ reg_endian = regmap_get_reg_endian(bus, config); ++ val_endian = regmap_get_val_endian(dev, bus, config); ++ } + + switch (config->reg_bits + map->reg_shift) { + case 2: +@@ -1628,8 +1643,6 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, + size_t len; + int i; + +- WARN_ON(!map->bus); +- + /* Check for unwritable or noinc registers in range + * before we start + */ +@@ -1711,7 +1724,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, + val = work_val; + } + +- if (map->async && map->bus->async_write) { ++ if (map->async && map->bus && map->bus->async_write) { + struct regmap_async *async; + + trace_regmap_async_write_start(map, reg, val_len); +@@ -1779,10 +1792,10 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, + * write. + */ + if (val == work_val) +- ret = map->bus->write(map->bus_context, map->work_buf, +- map->format.reg_bytes + +- map->format.pad_bytes + +- val_len); ++ ret = map->write(map->bus_context, map->work_buf, ++ map->format.reg_bytes + ++ map->format.pad_bytes + ++ val_len); + else if (map->bus->gather_write) + ret = map->bus->gather_write(map->bus_context, map->work_buf, + map->format.reg_bytes + +@@ -1801,7 +1814,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, + memcpy(buf, map->work_buf, map->format.reg_bytes); + memcpy(buf + map->format.reg_bytes + map->format.pad_bytes, + val, val_len); +- ret = map->bus->write(map->bus_context, buf, len); ++ ret = map->write(map->bus_context, buf, len); + + kfree(buf); + } else if (ret != 0 && !map->cache_bypass && map->format.parse_val) { +@@ -1858,7 +1871,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg, + struct regmap_range_node *range; + struct regmap *map = context; + +- WARN_ON(!map->bus || !map->format.format_write); ++ WARN_ON(!map->format.format_write); + + range = _regmap_range_lookup(map, reg); + if (range) { +@@ -1871,8 +1884,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg, + + trace_regmap_hw_write_start(map, reg, 1); + +- ret = map->bus->write(map->bus_context, map->work_buf, +- map->format.buf_size); ++ ret = map->write(map->bus_context, map->work_buf, map->format.buf_size); + + trace_regmap_hw_write_done(map, reg, 1); + +@@ -1892,7 +1904,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg, + { + struct regmap *map = context; + +- WARN_ON(!map->bus || !map->format.format_val); ++ WARN_ON(!map->format.format_val); + + map->format.format_val(map->work_buf + map->format.reg_bytes + + map->format.pad_bytes, val, 0); +@@ -1906,7 +1918,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg, + + static inline void *_regmap_map_get_context(struct regmap *map) + { +- return (map->bus) ? map : map->bus_context; ++ return (map->bus || (!map->bus && map->read)) ? map : map->bus_context; + } + + int _regmap_write(struct regmap *map, unsigned int reg, +@@ -2313,7 +2325,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map, + u8 = buf; + *u8 |= map->write_flag_mask; + +- ret = map->bus->write(map->bus_context, buf, len); ++ ret = map->write(map->bus_context, buf, len); + + kfree(buf); + +@@ -2619,9 +2631,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, + struct regmap_range_node *range; + int ret; + +- WARN_ON(!map->bus); +- +- if (!map->bus || !map->bus->read) ++ if (!map->read) + return -EINVAL; + + range = _regmap_range_lookup(map, reg); +@@ -2637,9 +2647,9 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, + map->read_flag_mask); + trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes); + +- ret = map->bus->read(map->bus_context, map->work_buf, +- map->format.reg_bytes + map->format.pad_bytes, +- val, val_len); ++ ret = map->read(map->bus_context, map->work_buf, ++ map->format.reg_bytes + map->format.pad_bytes, ++ val, val_len); + + trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes); + +@@ -2750,8 +2760,6 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, + unsigned int v; + int ret, i; + +- if (!map->bus) +- return -EINVAL; + if (val_len % map->format.val_bytes) + return -EINVAL; + if (!IS_ALIGNED(reg, map->reg_stride)) +@@ -2766,7 +2774,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, + size_t chunk_count, chunk_bytes; + size_t chunk_regs = val_count; + +- if (!map->bus->read) { ++ if (!map->read) { + ret = -ENOTSUPP; + goto out; + } +@@ -2826,7 +2834,7 @@ EXPORT_SYMBOL_GPL(regmap_raw_read); + * @val: Pointer to data buffer + * @val_len: Length of output buffer in bytes. + * +- * The regmap API usually assumes that bulk bus read operations will read a ++ * The regmap API usually assumes that bulk read operations will read a + * range of registers. Some devices have certain registers for which a read + * operation read will read from an internal FIFO. + * +@@ -2844,10 +2852,6 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg, + size_t read_len; + int ret; + +- if (!map->bus) +- return -EINVAL; +- if (!map->bus->read) +- return -ENOTSUPP; + if (val_len % map->format.val_bytes) + return -EINVAL; + if (!IS_ALIGNED(reg, map->reg_stride)) +@@ -2961,7 +2965,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, + if (val_count == 0) + return -EINVAL; + +- if (map->bus && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) { ++ if (map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) { + ret = regmap_raw_read(map, reg, val, val_bytes * val_count); + if (ret != 0) + return ret; +diff --git a/include/linux/regmap.h b/include/linux/regmap.h +index d6f0d876fa424..83a7485de78fb 100644 +--- a/include/linux/regmap.h ++++ b/include/linux/regmap.h +@@ -294,6 +294,12 @@ typedef void (*regmap_unlock)(void *); + * if the function require special handling with lock and reg + * handling and the operation cannot be represented as a simple + * update_bits operation on a bus such as SPI, I2C, etc. ++ * @read: Optional callback that if filled will be used to perform all the ++ * bulk reads from the registers. Data is returned in the buffer used ++ * to transmit data. ++ * @write: Same as above for writing. ++ * @max_raw_read: Max raw read size that can be used on the device. ++ * @max_raw_write: Max raw write size that can be used on the device. + * @fast_io: Register IO is fast. Use a spinlock instead of a mutex + * to perform locking. This field is ignored if custom lock/unlock + * functions are used (see fields lock/unlock of struct regmap_config). +@@ -373,6 +379,12 @@ struct regmap_config { + int (*reg_write)(void *context, unsigned int reg, unsigned int val); + int (*reg_update_bits)(void *context, unsigned int reg, + unsigned int mask, unsigned int val); ++ /* Bulk read/write */ ++ int (*read)(void *context, const void *reg_buf, size_t reg_size, ++ void *val_buf, size_t val_size); ++ int (*write)(void *context, const void *data, size_t count); ++ size_t max_raw_read; ++ size_t max_raw_write; + + bool fast_io; + +-- +2.43.0 + diff --git a/queue-5.10/regmap-allow-to-define-reg_update_bits-for-no-bus-co.patch b/queue-5.10/regmap-allow-to-define-reg_update_bits-for-no-bus-co.patch new file mode 100644 index 00000000000..2d8fba10d71 --- /dev/null +++ b/queue-5.10/regmap-allow-to-define-reg_update_bits-for-no-bus-co.patch @@ -0,0 +1,65 @@ +From 6e5147c99310b8ead55ed6c777a40e69c6c04a3d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Nov 2021 16:00:40 +0100 +Subject: regmap: allow to define reg_update_bits for no bus configuration + +From: Ansuel Smith + +[ Upstream commit 02d6fdecb9c38de19065f6bed8d5214556fd061d ] + +Some device requires a special handling for reg_update_bits and can't use +the normal regmap read write logic. An example is when locking is +handled by the device and rmw operations requires to do atomic operations. +Allow to declare a dedicated function in regmap_config for +reg_update_bits in no bus configuration. + +Signed-off-by: Ansuel Smith +Link: https://lore.kernel.org/r/20211104150040.1260-1-ansuelsmth@gmail.com +Signed-off-by: Mark Brown +Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations") +Signed-off-by: Sasha Levin +--- + drivers/base/regmap/regmap.c | 1 + + include/linux/regmap.h | 7 +++++++ + 2 files changed, 8 insertions(+) + +diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c +index 7bc603145bd98..8f39aacdad0dc 100644 +--- a/drivers/base/regmap/regmap.c ++++ b/drivers/base/regmap/regmap.c +@@ -842,6 +842,7 @@ struct regmap *__regmap_init(struct device *dev, + if (!bus) { + map->reg_read = config->reg_read; + map->reg_write = config->reg_write; ++ map->reg_update_bits = config->reg_update_bits; + + map->defer_caching = false; + goto skip_format_initialization; +diff --git a/include/linux/regmap.h b/include/linux/regmap.h +index e7834d98207f7..d6f0d876fa424 100644 +--- a/include/linux/regmap.h ++++ b/include/linux/regmap.h +@@ -289,6 +289,11 @@ typedef void (*regmap_unlock)(void *); + * read operation on a bus such as SPI, I2C, etc. Most of the + * devices do not need this. + * @reg_write: Same as above for writing. ++ * @reg_update_bits: Optional callback that if filled will be used to perform ++ * all the update_bits(rmw) operation. Should only be provided ++ * if the function require special handling with lock and reg ++ * handling and the operation cannot be represented as a simple ++ * update_bits operation on a bus such as SPI, I2C, etc. + * @fast_io: Register IO is fast. Use a spinlock instead of a mutex + * to perform locking. This field is ignored if custom lock/unlock + * functions are used (see fields lock/unlock of struct regmap_config). +@@ -366,6 +371,8 @@ struct regmap_config { + + int (*reg_read)(void *context, unsigned int reg, unsigned int *val); + int (*reg_write)(void *context, unsigned int reg, unsigned int val); ++ int (*reg_update_bits)(void *context, unsigned int reg, ++ unsigned int mask, unsigned int val); + + bool fast_io; + +-- +2.43.0 + diff --git a/queue-5.10/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch b/queue-5.10/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch new file mode 100644 index 00000000000..71507932942 --- /dev/null +++ b/queue-5.10/selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch @@ -0,0 +1,61 @@ +From 10bf73f868331f0eee3a96f64b8c5a59c14151c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 19 Jan 2024 06:14:29 -0700 +Subject: selftests: mm: fix map_hugetlb failure on 64K page size systems + +From: Nico Pache + +[ Upstream commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 ] + +On systems with 64k page size and 512M huge page sizes, the allocation and +test succeeds but errors out at the munmap. As the comment states, munmap +will failure if its not HUGEPAGE aligned. This is due to the length of +the mapping being 1/2 the size of the hugepage causing the munmap to not +be hugepage aligned. Fix this by making the mapping length the full +hugepage if the hugepage is larger than the length of the mapping. + +Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com +Signed-off-by: Nico Pache +Cc: Donet Tom +Cc: Shuah Khan +Cc: Christophe Leroy +Cc: Michael Ellerman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/vm/map_hugetlb.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c +index 312889edb84ab..c65c55b7a789f 100644 +--- a/tools/testing/selftests/vm/map_hugetlb.c ++++ b/tools/testing/selftests/vm/map_hugetlb.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include "vm_util.h" + + #define LENGTH (256UL*1024*1024) + #define PROTECTION (PROT_READ | PROT_WRITE) +@@ -70,10 +71,16 @@ int main(int argc, char **argv) + { + void *addr; + int ret; ++ size_t hugepage_size; + size_t length = LENGTH; + int flags = FLAGS; + int shift = 0; + ++ hugepage_size = default_huge_page_size(); ++ /* munmap with fail if the length is not page aligned */ ++ if (hugepage_size > length) ++ length = hugepage_size; ++ + if (argc > 1) + length = atol(argv[1]) << 20; + if (argc > 2) { +-- +2.43.0 + diff --git a/queue-5.10/selftests-mm-switch-to-bash-from-sh.patch b/queue-5.10/selftests-mm-switch-to-bash-from-sh.patch new file mode 100644 index 00000000000..e5ddb93800c --- /dev/null +++ b/queue-5.10/selftests-mm-switch-to-bash-from-sh.patch @@ -0,0 +1,58 @@ +From 94d5ee7a0a350e94c7ba7c41e284fd21934fa921 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 16 Jan 2024 14:04:54 +0500 +Subject: selftests/mm: switch to bash from sh + +From: Muhammad Usama Anjum + +[ Upstream commit bc29036e1da1cf66e5f8312649aeec2d51ea3d86 ] + +Running charge_reserved_hugetlb.sh generates errors if sh is set to +dash: + +./charge_reserved_hugetlb.sh: 9: [[: not found +./charge_reserved_hugetlb.sh: 19: [[: not found +./charge_reserved_hugetlb.sh: 27: [[: not found +./charge_reserved_hugetlb.sh: 37: [[: not found +./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected + +Switch to using /bin/bash instead of /bin/sh. Make the switch for +write_hugetlb_memory.sh as well which is called from +charge_reserved_hugetlb.sh. + +Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com +Signed-off-by: Muhammad Usama Anjum +Cc: Muhammad Usama Anjum +Cc: Shuah Khan +Cc: David Laight +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 2 +- + tools/testing/selftests/vm/write_hugetlb_memory.sh | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +index 7536ff2f890a1..d0107f8ae6213 100644 +--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh ++++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +@@ -1,4 +1,4 @@ +-#!/bin/sh ++#!/bin/bash + # SPDX-License-Identifier: GPL-2.0 + + set -e +diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh +index 70a02301f4c27..3d2d2eb9d6fff 100644 +--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh ++++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh +@@ -1,4 +1,4 @@ +-#!/bin/sh ++#!/bin/bash + # SPDX-License-Identifier: GPL-2.0 + + set -e +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-fail-probe-if-clock-crystal-is-unstab.patch b/queue-5.10/serial-max310x-fail-probe-if-clock-crystal-is-unstab.patch new file mode 100644 index 00000000000..94519ae1dde --- /dev/null +++ b/queue-5.10/serial-max310x-fail-probe-if-clock-crystal-is-unstab.patch @@ -0,0 +1,75 @@ +From 0545f60f222499c7ab4f10ef0b2cd8a6c17cf290 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 16 Jan 2024 16:30:00 -0500 +Subject: serial: max310x: fail probe if clock crystal is unstable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hugo Villeneuve + +[ Upstream commit 8afa6c6decea37e7cb473d2c60473f37f46cea35 ] + +A stable clock is really required in order to use this UART, so log an +error message and bail out if the chip reports that the clock is not +stable. + +Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") +Cc: stable@vger.kernel.org +Suggested-by: Jan Kundrát +Link: https://www.spinics.net/lists/linux-serial/msg35773.html +Signed-off-by: Hugo Villeneuve +Link: https://lore.kernel.org/r/20240116213001.3691629-4-hugo@hugovil.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index 0e0f778d75cd4..bbf45c0626681 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -556,7 +556,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) + return 1; + } + +-static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, ++static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, + unsigned long freq, bool xtal) + { + unsigned int div, clksrc, pllcfg = 0; +@@ -626,7 +626,8 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, + } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); + + if (!stable) +- dev_warn(dev, "clock is not stable yet\n"); ++ return dev_err_probe(dev, -EAGAIN, ++ "clock is not stable\n"); + } + + return bestfreq; +@@ -1266,7 +1267,7 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, + { + int i, ret, fmin, fmax, freq; + struct max310x_port *s; +- u32 uartclk = 0; ++ s32 uartclk = 0; + bool xtal; + + if (IS_ERR(regmap)) +@@ -1350,6 +1351,11 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, + } + + uartclk = max310x_set_ref_clk(dev, s, freq, xtal); ++ if (uartclk < 0) { ++ ret = uartclk; ++ goto out_uart; ++ } ++ + dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk); + + for (i = 0; i < devtype->nr; i++) { +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-fix-io-data-corruption-in-batched-ope.patch b/queue-5.10/serial-max310x-fix-io-data-corruption-in-batched-ope.patch new file mode 100644 index 00000000000..58b0e46553b --- /dev/null +++ b/queue-5.10/serial-max310x-fix-io-data-corruption-in-batched-ope.patch @@ -0,0 +1,99 @@ +From 32f4aa03c524cbbfdbcdc089e09b2ad40769388d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Apr 2023 22:14:23 +0200 +Subject: serial: max310x: fix IO data corruption in batched operations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jan Kundrát + +[ Upstream commit 3f42b142ea1171967e40e10e4b0241c0d6d28d41 ] + +After upgrading from 5.16 to 6.1, our board with a MAX14830 started +producing lots of garbage data over UART. Bisection pointed out commit +285e76fc049c as the culprit. That patch tried to replace hand-written +code which I added in 2b4bac48c1084 ("serial: max310x: Use batched reads +when reasonably safe") with the generic regmap infrastructure for +batched operations. + +Unfortunately, the `regmap_raw_read` and `regmap_raw_write` which were +used are actually functions which perform IO over *multiple* registers. +That's not what is needed for accessing these Tx/Rx FIFOs; the +appropriate functions are the `_noinc_` versions, not the `_raw_` ones. + +Fix this regression by using `regmap_noinc_read()` and +`regmap_noinc_write()` along with the necessary `regmap_config` setup; +with this patch in place, our board communicates happily again. Since +our board uses SPI for talking to this chip, the I2C part is completely +untested. + +Fixes: 285e76fc049c ("serial: max310x: use regmap methods for SPI batch operations") +Cc: stable@vger.kernel.org +Reviewed-by: Andy Shevchenko +Signed-off-by: Jan Kundrát +Link: https://lore.kernel.org/r/79db8e82aadb0e174bc82b9996423c3503c8fb37.1680732084.git.jan.kundrat@cesnet.cz +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index ed1aaa19854fd..2f88eae8a55a1 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -533,6 +533,11 @@ static bool max310x_reg_precious(struct device *dev, unsigned int reg) + return false; + } + ++static bool max310x_reg_noinc(struct device *dev, unsigned int reg) ++{ ++ return reg == MAX310X_RHR_REG; ++} ++ + static int max310x_set_baud(struct uart_port *port, int baud) + { + unsigned int mode = 0, div = 0, frac = 0, c = 0, F = 0; +@@ -667,14 +672,14 @@ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int + { + struct max310x_one *one = to_max310x_port(port); + +- regmap_raw_write(one->regmap, MAX310X_THR_REG, txbuf, len); ++ regmap_noinc_write(one->regmap, MAX310X_THR_REG, txbuf, len); + } + + static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len) + { + struct max310x_one *one = to_max310x_port(port); + +- regmap_raw_read(one->regmap, MAX310X_RHR_REG, rxbuf, len); ++ regmap_noinc_read(one->regmap, MAX310X_RHR_REG, rxbuf, len); + } + + static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen) +@@ -1508,6 +1513,10 @@ static struct regmap_config regcfg = { + .writeable_reg = max310x_reg_writeable, + .volatile_reg = max310x_reg_volatile, + .precious_reg = max310x_reg_precious, ++ .writeable_noinc_reg = max310x_reg_noinc, ++ .readable_noinc_reg = max310x_reg_noinc, ++ .max_raw_read = MAX310X_FIFO_SIZE, ++ .max_raw_write = MAX310X_FIFO_SIZE, + }; + + #ifdef CONFIG_SPI_MASTER +@@ -1593,6 +1602,10 @@ static struct regmap_config regcfg_i2c = { + .volatile_reg = max310x_reg_volatile, + .precious_reg = max310x_reg_precious, + .max_register = MAX310X_I2C_REVID_EXTREG, ++ .writeable_noinc_reg = max310x_reg_noinc, ++ .readable_noinc_reg = max310x_reg_noinc, ++ .max_raw_read = MAX310X_FIFO_SIZE, ++ .max_raw_write = MAX310X_FIFO_SIZE, + }; + + static const struct max310x_if_cfg max310x_i2c_if_cfg = { +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-implement-i2c-support.patch b/queue-5.10/serial-max310x-implement-i2c-support.patch new file mode 100644 index 00000000000..a528e8dc243 --- /dev/null +++ b/queue-5.10/serial-max310x-implement-i2c-support.patch @@ -0,0 +1,270 @@ +From fdcfee2740c6daddaa534df38a219b30e9f9f638 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 5 Jun 2022 17:46:59 +0300 +Subject: serial: max310x: implement I2C support + +From: Cosmin Tanislav + +[ Upstream commit 2e1f2d9a9bdbe12ee475c82a45ac46a278e8049a ] + +I2C implementation on this chip has a few key differences +compared to SPI, as described in previous patches. + * extended register space access needs no extra logic + * slave address is used to select which UART to communicate + with + +To accommodate these differences, add an I2C interface config, +set the RevID register address and implement an empty method +for setting the GlobalCommand register, since no special handling +is needed for the extended register space. + +To handle the port-specific slave address, create an I2C dummy +device for each port, except the base one (UART0), which is +expected to be the one specified in firmware, and create a +regmap for each I2C device. +Add minimum and maximum slave addresses to each devtype for +sanity checking. + +Also, use a separate regmap config with no write_flag_mask, +since I2C has a R/W bit in its slave address, and set the +max register to the address of the RevID register, since the +extended register space needs no extra logic. + +Finally, add the I2C driver. + +Reviewed-by: Andy Shevchenko +Signed-off-by: Cosmin Tanislav +Link: https://lore.kernel.org/r/20220605144659.4169853-5-demonsingur@gmail.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/Kconfig | 1 + + drivers/tty/serial/max310x.c | 135 ++++++++++++++++++++++++++++++++++- + 2 files changed, 135 insertions(+), 1 deletion(-) + +diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig +index 28f22e58639c6..bd30ae9751bf5 100644 +--- a/drivers/tty/serial/Kconfig ++++ b/drivers/tty/serial/Kconfig +@@ -343,6 +343,7 @@ config SERIAL_MAX310X + depends on SPI_MASTER + select SERIAL_CORE + select REGMAP_SPI if SPI_MASTER ++ select REGMAP_I2C if I2C + help + This selects support for an advanced UART from Maxim (Dallas). + Supported ICs are MAX3107, MAX3108, MAX3109, MAX14830. +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index b90281ac54c85..ed1aaa19854fd 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -73,6 +74,7 @@ + + /* Extended registers */ + #define MAX310X_SPI_REVID_EXTREG MAX310X_REG_05 /* Revision ID */ ++#define MAX310X_I2C_REVID_EXTREG (0x25) /* Revision ID */ + + /* IRQ register bits */ + #define MAX310X_IRQ_LSR_BIT (1 << 0) /* LSR interrupt */ +@@ -260,6 +262,10 @@ struct max310x_if_cfg { + }; + + struct max310x_devtype { ++ struct { ++ unsigned short min; ++ unsigned short max; ++ } slave_addr; + char name[9]; + int nr; + u8 mode1; +@@ -431,6 +437,10 @@ static const struct max310x_devtype max3107_devtype = { + .mode1 = MAX310X_MODE1_AUTOSLEEP_BIT | MAX310X_MODE1_IRQSEL_BIT, + .detect = max3107_detect, + .power = max310x_power, ++ .slave_addr = { ++ .min = 0x2c, ++ .max = 0x2f, ++ }, + }; + + static const struct max310x_devtype max3108_devtype = { +@@ -439,6 +449,10 @@ static const struct max310x_devtype max3108_devtype = { + .mode1 = MAX310X_MODE1_AUTOSLEEP_BIT, + .detect = max3108_detect, + .power = max310x_power, ++ .slave_addr = { ++ .min = 0x60, ++ .max = 0x6f, ++ }, + }; + + static const struct max310x_devtype max3109_devtype = { +@@ -447,6 +461,10 @@ static const struct max310x_devtype max3109_devtype = { + .mode1 = MAX310X_MODE1_AUTOSLEEP_BIT, + .detect = max3109_detect, + .power = max310x_power, ++ .slave_addr = { ++ .min = 0x60, ++ .max = 0x6f, ++ }, + }; + + static const struct max310x_devtype max14830_devtype = { +@@ -455,6 +473,10 @@ static const struct max310x_devtype max14830_devtype = { + .mode1 = MAX310X_MODE1_IRQSEL_BIT, + .detect = max14830_detect, + .power = max14830_power, ++ .slave_addr = { ++ .min = 0x60, ++ .max = 0x6f, ++ }, + }; + + static bool max310x_reg_writeable(struct device *dev, unsigned int reg) +@@ -1557,6 +1579,97 @@ static struct spi_driver max310x_spi_driver = { + }; + #endif + ++#ifdef CONFIG_I2C ++static int max310x_i2c_extended_reg_enable(struct device *dev, bool enable) ++{ ++ return 0; ++} ++ ++static struct regmap_config regcfg_i2c = { ++ .reg_bits = 8, ++ .val_bits = 8, ++ .cache_type = REGCACHE_RBTREE, ++ .writeable_reg = max310x_reg_writeable, ++ .volatile_reg = max310x_reg_volatile, ++ .precious_reg = max310x_reg_precious, ++ .max_register = MAX310X_I2C_REVID_EXTREG, ++}; ++ ++static const struct max310x_if_cfg max310x_i2c_if_cfg = { ++ .extended_reg_enable = max310x_i2c_extended_reg_enable, ++ .rev_id_reg = MAX310X_I2C_REVID_EXTREG, ++}; ++ ++static unsigned short max310x_i2c_slave_addr(unsigned short addr, ++ unsigned int nr) ++{ ++ /* ++ * For MAX14830 and MAX3109, the slave address depends on what the ++ * A0 and A1 pins are tied to. ++ * See Table I2C Address Map of the datasheet. ++ * Based on that table, the following formulas were determined. ++ * UART1 - UART0 = 0x10 ++ * UART2 - UART1 = 0x20 + 0x10 ++ * UART3 - UART2 = 0x10 ++ */ ++ ++ addr -= nr * 0x10; ++ ++ if (nr >= 2) ++ addr -= 0x20; ++ ++ return addr; ++} ++ ++static int max310x_i2c_probe(struct i2c_client *client) ++{ ++ const struct max310x_devtype *devtype = ++ device_get_match_data(&client->dev); ++ struct i2c_client *port_client; ++ struct regmap *regmaps[4]; ++ unsigned int i; ++ u8 port_addr; ++ ++ if (client->addr < devtype->slave_addr.min || ++ client->addr > devtype->slave_addr.max) ++ return dev_err_probe(&client->dev, -EINVAL, ++ "Slave addr 0x%x outside of range [0x%x, 0x%x]\n", ++ client->addr, devtype->slave_addr.min, ++ devtype->slave_addr.max); ++ ++ regmaps[0] = devm_regmap_init_i2c(client, ®cfg_i2c); ++ ++ for (i = 1; i < devtype->nr; i++) { ++ port_addr = max310x_i2c_slave_addr(client->addr, i); ++ port_client = devm_i2c_new_dummy_device(&client->dev, ++ client->adapter, ++ port_addr); ++ ++ regmaps[i] = devm_regmap_init_i2c(port_client, ®cfg_i2c); ++ } ++ ++ return max310x_probe(&client->dev, devtype, &max310x_i2c_if_cfg, ++ regmaps, client->irq); ++} ++ ++static int max310x_i2c_remove(struct i2c_client *client) ++{ ++ max310x_remove(&client->dev); ++ ++ return 0; ++} ++ ++static struct i2c_driver max310x_i2c_driver = { ++ .driver = { ++ .name = MAX310X_NAME, ++ .of_match_table = max310x_dt_ids, ++ .pm = &max310x_pm_ops, ++ }, ++ .probe_new = max310x_i2c_probe, ++ .remove = max310x_i2c_remove, ++}; ++#endif ++ + static int __init max310x_uart_init(void) + { + int ret; +@@ -1570,15 +1683,35 @@ static int __init max310x_uart_init(void) + #ifdef CONFIG_SPI_MASTER + ret = spi_register_driver(&max310x_spi_driver); + if (ret) +- uart_unregister_driver(&max310x_uart); ++ goto err_spi_register; ++#endif ++ ++#ifdef CONFIG_I2C ++ ret = i2c_add_driver(&max310x_i2c_driver); ++ if (ret) ++ goto err_i2c_register; + #endif + ++ return 0; ++ ++#ifdef CONFIG_I2C ++err_i2c_register: ++ spi_unregister_driver(&max310x_spi_driver); ++#endif ++ ++err_spi_register: ++ uart_unregister_driver(&max310x_uart); ++ + return ret; + } + module_init(max310x_uart_init); + + static void __exit max310x_uart_exit(void) + { ++#ifdef CONFIG_I2C ++ i2c_del_driver(&max310x_i2c_driver); ++#endif ++ + #ifdef CONFIG_SPI_MASTER + spi_unregister_driver(&max310x_spi_driver); + #endif +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-make-accessing-revision-id-interface-.patch b/queue-5.10/serial-max310x-make-accessing-revision-id-interface-.patch new file mode 100644 index 00000000000..bb041345e9a --- /dev/null +++ b/queue-5.10/serial-max310x-make-accessing-revision-id-interface-.patch @@ -0,0 +1,157 @@ +From 6bb1445dd5bb1d4549f294e07cad6f3dd9b5d476 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 5 Jun 2022 17:46:58 +0300 +Subject: serial: max310x: make accessing revision id interface-agnostic + +From: Cosmin Tanislav + +[ Upstream commit b3883ab5e95713e479f774ea68be275413e8e5b2 ] + +SPI can only use 5 address bits, since one bit is reserved for +specifying R/W and 2 bits are used to specify the UART port. +To access registers that have addresses past 0x1F, an extended +register space can be enabled by writing to the GlobalCommand +register (address 0x1F). + +I2C uses 8 address bits. The R/W bit is placed in the slave +address, and so is the UART port. Because of this, registers +that have addresses higher than 0x1F can be accessed normally. + +To access the RevID register, on SPI, 0xCE must be written to +the 0x1F address to enable the extended register space, after +which the RevID register is accessible at address 0x5. 0xCD +must be written to the 0x1F address to disable the extended +register space. + +On I2C, the RevID register is accessible at address 0x25. + +Create an interface config struct, and add a method for +toggling the extended register space and a member for the RevId +register address. Implement these for SPI. + +Reviewed-by: Andy Shevchenko +Signed-off-by: Cosmin Tanislav +Link: https://lore.kernel.org/r/20220605144659.4169853-4-demonsingur@gmail.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 40 +++++++++++++++++++++++++++--------- + 1 file changed, 30 insertions(+), 10 deletions(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index a09ec46e0310d..b90281ac54c85 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -72,7 +72,7 @@ + #define MAX310X_GLOBALCMD_REG MAX310X_REG_1F /* Global Command (WO) */ + + /* Extended registers */ +-#define MAX310X_REVID_EXTREG MAX310X_REG_05 /* Revision ID */ ++#define MAX310X_SPI_REVID_EXTREG MAX310X_REG_05 /* Revision ID */ + + /* IRQ register bits */ + #define MAX310X_IRQ_LSR_BIT (1 << 0) /* LSR interrupt */ +@@ -253,6 +253,12 @@ + #define MAX14830_BRGCFG_CLKDIS_BIT (1 << 6) /* Clock Disable */ + #define MAX14830_REV_ID (0xb0) + ++struct max310x_if_cfg { ++ int (*extended_reg_enable)(struct device *dev, bool enable); ++ ++ unsigned int rev_id_reg; ++}; ++ + struct max310x_devtype { + char name[9]; + int nr; +@@ -275,6 +281,7 @@ struct max310x_one { + + struct max310x_port { + const struct max310x_devtype *devtype; ++ const struct max310x_if_cfg *if_cfg; + struct regmap *regmap; + struct clk *clk; + #ifdef CONFIG_GPIOLIB +@@ -364,13 +371,12 @@ static int max3109_detect(struct device *dev) + unsigned int val = 0; + int ret; + +- ret = regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, +- MAX310X_EXTREG_ENBL); ++ ret = s->if_cfg->extended_reg_enable(dev, true); + if (ret) + return ret; + +- regmap_read(s->regmap, MAX310X_REVID_EXTREG, &val); +- regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, MAX310X_EXTREG_DSBL); ++ regmap_read(s->regmap, s->if_cfg->rev_id_reg, &val); ++ s->if_cfg->extended_reg_enable(dev, false); + if (((val & MAX310x_REV_MASK) != MAX3109_REV_ID)) { + dev_err(dev, + "%s ID 0x%02x does not match\n", s->devtype->name, val); +@@ -395,13 +401,12 @@ static int max14830_detect(struct device *dev) + unsigned int val = 0; + int ret; + +- ret = regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, +- MAX310X_EXTREG_ENBL); ++ ret = s->if_cfg->extended_reg_enable(dev, true); + if (ret) + return ret; + +- regmap_read(s->regmap, MAX310X_REVID_EXTREG, &val); +- regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, MAX310X_EXTREG_DSBL); ++ regmap_read(s->regmap, s->if_cfg->rev_id_reg, &val); ++ s->if_cfg->extended_reg_enable(dev, false); + if (((val & MAX310x_REV_MASK) != MAX14830_REV_ID)) { + dev_err(dev, + "%s ID 0x%02x does not match\n", s->devtype->name, val); +@@ -1250,6 +1255,7 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset, + #endif + + static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype, ++ const struct max310x_if_cfg *if_cfg, + struct regmap *regmaps[], int irq) + { + int i, ret, fmin, fmax, freq; +@@ -1313,6 +1319,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty + + s->regmap = regmaps[0]; + s->devtype = devtype; ++ s->if_cfg = if_cfg; + dev_set_drvdata(dev, s); + + /* Check device to ensure we are talking to what we expect */ +@@ -1482,6 +1489,19 @@ static struct regmap_config regcfg = { + }; + + #ifdef CONFIG_SPI_MASTER ++static int max310x_spi_extended_reg_enable(struct device *dev, bool enable) ++{ ++ struct max310x_port *s = dev_get_drvdata(dev); ++ ++ return regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, ++ enable ? MAX310X_EXTREG_ENBL : MAX310X_EXTREG_DSBL); ++} ++ ++static const struct max310x_if_cfg __maybe_unused max310x_spi_if_cfg = { ++ .extended_reg_enable = max310x_spi_extended_reg_enable, ++ .rev_id_reg = MAX310X_SPI_REVID_EXTREG, ++}; ++ + static int max310x_spi_probe(struct spi_device *spi) + { + const struct max310x_devtype *devtype; +@@ -1508,7 +1528,7 @@ static int max310x_spi_probe(struct spi_device *spi) + regmaps[i] = devm_regmap_init_spi(spi, ®cfg); + } + +- return max310x_probe(&spi->dev, devtype, regmaps, spi->irq); ++ return max310x_probe(&spi->dev, devtype, &max310x_spi_if_cfg, regmaps, spi->irq); + } + + static int max310x_spi_remove(struct spi_device *spi) +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-make-use-of-device-properties.patch b/queue-5.10/serial-max310x-make-use-of-device-properties.patch new file mode 100644 index 00000000000..a76dfdabff9 --- /dev/null +++ b/queue-5.10/serial-max310x-make-use-of-device-properties.patch @@ -0,0 +1,97 @@ +From 866a5aceec6bce05749d0a998ceb472da9dd98b4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Oct 2020 11:46:34 +0300 +Subject: serial: max310x: Make use of device properties + +From: Andy Shevchenko + +[ Upstream commit c808fab604ca62cff19ee6b261211483830807aa ] + +Device property API allows to gather device resources from different sources, +such as ACPI. Convert the drivers to unleash the power of device property API. + +Signed-off-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20201007084635.594991-1-andy.shevchenko@gmail.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 27 +++++++++------------------ + 1 file changed, 9 insertions(+), 18 deletions(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index bbf45c0626681..8d42c537ee5ea 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -15,8 +15,8 @@ + #include + #include + #include +-#include +-#include ++#include ++#include + #include + #include + #include +@@ -271,7 +271,7 @@ struct max310x_one { + container_of(_port, struct max310x_one, port) + + struct max310x_port { +- struct max310x_devtype *devtype; ++ const struct max310x_devtype *devtype; + struct regmap *regmap; + struct clk *clk; + #ifdef CONFIG_GPIOLIB +@@ -1262,7 +1262,7 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset, + } + #endif + +-static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, ++static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype, + struct regmap *regmap, int irq) + { + int i, ret, fmin, fmax, freq; +@@ -1488,7 +1488,7 @@ static struct regmap_config regcfg = { + #ifdef CONFIG_SPI_MASTER + static int max310x_spi_probe(struct spi_device *spi) + { +- struct max310x_devtype *devtype; ++ const struct max310x_devtype *devtype; + struct regmap *regmap; + int ret; + +@@ -1500,18 +1500,9 @@ static int max310x_spi_probe(struct spi_device *spi) + if (ret) + return ret; + +- if (spi->dev.of_node) { +- const struct of_device_id *of_id = +- of_match_device(max310x_dt_ids, &spi->dev); +- if (!of_id) +- return -ENODEV; +- +- devtype = (struct max310x_devtype *)of_id->data; +- } else { +- const struct spi_device_id *id_entry = spi_get_device_id(spi); +- +- devtype = (struct max310x_devtype *)id_entry->driver_data; +- } ++ devtype = device_get_match_data(&spi->dev); ++ if (!devtype) ++ devtype = (struct max310x_devtype *)spi_get_device_id(spi)->driver_data; + + regcfg.max_register = devtype->nr * 0x20 - 1; + regmap = devm_regmap_init_spi(spi, ®cfg); +@@ -1536,7 +1527,7 @@ MODULE_DEVICE_TABLE(spi, max310x_id_table); + static struct spi_driver max310x_spi_driver = { + .driver = { + .name = MAX310X_NAME, +- .of_match_table = of_match_ptr(max310x_dt_ids), ++ .of_match_table = max310x_dt_ids, + .pm = &max310x_pm_ops, + }, + .probe = max310x_spi_probe, +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-prevent-infinite-while-loop-in-port-s.patch b/queue-5.10/serial-max310x-prevent-infinite-while-loop-in-port-s.patch new file mode 100644 index 00000000000..552a5340cb5 --- /dev/null +++ b/queue-5.10/serial-max310x-prevent-infinite-while-loop-in-port-s.patch @@ -0,0 +1,76 @@ +From 86c63668f8208c860ff435e1ce718253043893a9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 16 Jan 2024 16:30:01 -0500 +Subject: serial: max310x: prevent infinite while() loop in port startup + +From: Hugo Villeneuve + +[ Upstream commit b35f8dbbce818b02c730dc85133dc7754266e084 ] + +If there is a problem after resetting a port, the do/while() loop that +checks the default value of DIVLSB register may run forever and spam the +I2C bus. + +Add a delay before each read of DIVLSB, and a maximum number of tries to +prevent that situation from happening. + +Also fail probe if port reset is unsuccessful. + +Fixes: 10d8b34a4217 ("serial: max310x: Driver rework") +Cc: stable@vger.kernel.org +Signed-off-by: Hugo Villeneuve +Link: https://lore.kernel.org/r/20240116213001.3691629-5-hugo@hugovil.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index 80298a5714bcb..978d9d93127e5 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -235,6 +235,10 @@ + #define MAX310x_REV_MASK (0xf8) + #define MAX310X_WRITE_BIT 0x80 + ++/* Port startup definitions */ ++#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */ ++#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */ ++ + /* Crystal-related definitions */ + #define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ + #define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ +@@ -1316,6 +1320,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty + goto out_clk; + + for (i = 0; i < devtype->nr; i++) { ++ bool started = false; ++ unsigned int try = 0, val = 0; ++ + /* Reset port */ + regmap_write(regmaps[i], MAX310X_MODE2_REG, + MAX310X_MODE2_RST_BIT); +@@ -1324,8 +1331,17 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty + + /* Wait for port startup */ + do { +- regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); +- } while (ret != 0x01); ++ msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS); ++ regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val); ++ ++ if (val == 0x01) ++ started = true; ++ } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES)); ++ ++ if (!started) { ++ ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n"); ++ goto out_uart; ++ } + + regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); + } +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-try-to-get-crystal-clock-rate-from-pr.patch b/queue-5.10/serial-max310x-try-to-get-crystal-clock-rate-from-pr.patch new file mode 100644 index 00000000000..39f1fae33b6 --- /dev/null +++ b/queue-5.10/serial-max310x-try-to-get-crystal-clock-rate-from-pr.patch @@ -0,0 +1,113 @@ +From fbb1b8f42c124d4d27ab9da383decb1ce9c6fe16 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 May 2021 20:29:30 +0300 +Subject: serial: max310x: Try to get crystal clock rate from property + +From: Andy Shevchenko + +[ Upstream commit d4d6f03c4fb3a91dadfe147b47edd40e4d7e4d36 ] + +In some configurations, mainly ACPI-based, the clock frequency of the device +is supplied by very well established 'clock-frequency' property. Hence, try +to get it from the property at last if no other providers are available. + +Signed-off-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20210517172930.83353-1-andriy.shevchenko@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8afa6c6decea ("serial: max310x: fail probe if clock crystal is unstable") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 40 +++++++++++++++++++++++------------- + 1 file changed, 26 insertions(+), 14 deletions(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index 8bf3c5ab59431..0e0f778d75cd4 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -556,7 +556,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) + return 1; + } + +-static int max310x_set_ref_clk(struct device *dev, struct max310x_port *s, ++static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, + unsigned long freq, bool xtal) + { + unsigned int div, clksrc, pllcfg = 0; +@@ -629,7 +629,7 @@ static int max310x_set_ref_clk(struct device *dev, struct max310x_port *s, + dev_warn(dev, "clock is not stable yet\n"); + } + +- return (int)bestfreq; ++ return bestfreq; + } + + static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len) +@@ -1264,9 +1264,10 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset, + static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, + struct regmap *regmap, int irq) + { +- int i, ret, fmin, fmax, freq, uartclk; ++ int i, ret, fmin, fmax, freq; + struct max310x_port *s; +- bool xtal = false; ++ u32 uartclk = 0; ++ bool xtal; + + if (IS_ERR(regmap)) + return PTR_ERR(regmap); +@@ -1278,24 +1279,20 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, + return -ENOMEM; + } + ++ /* Always ask for fixed clock rate from a property. */ ++ device_property_read_u32(dev, "clock-frequency", &uartclk); ++ + s->clk = devm_clk_get_optional(dev, "osc"); + if (IS_ERR(s->clk)) + return PTR_ERR(s->clk); + if (s->clk) { +- fmin = 500000; +- fmax = 35000000; ++ xtal = false; + } else { + s->clk = devm_clk_get_optional(dev, "xtal"); + if (IS_ERR(s->clk)) + return PTR_ERR(s->clk); +- if (s->clk) { +- fmin = 1000000; +- fmax = 4000000; +- xtal = true; +- } else { +- dev_err(dev, "Cannot get clock\n"); +- return -EINVAL; +- } ++ ++ xtal = true; + } + + ret = clk_prepare_enable(s->clk); +@@ -1303,6 +1300,21 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, + return ret; + + freq = clk_get_rate(s->clk); ++ if (freq == 0) ++ freq = uartclk; ++ if (freq == 0) { ++ dev_err(dev, "Cannot get clock rate\n"); ++ return -EINVAL; ++ } ++ ++ if (xtal) { ++ fmin = 1000000; ++ fmax = 4000000; ++ } else { ++ fmin = 500000; ++ fmax = 35000000; ++ } ++ + /* Check frequency limits */ + if (freq < fmin || freq > fmax) { + ret = -ERANGE; +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-unprepare-and-disable-clock-in-error-.patch b/queue-5.10/serial-max310x-unprepare-and-disable-clock-in-error-.patch new file mode 100644 index 00000000000..38a2febf425 --- /dev/null +++ b/queue-5.10/serial-max310x-unprepare-and-disable-clock-in-error-.patch @@ -0,0 +1,40 @@ +From 96e94efbea517370c304ba5f5f0689deaefd232a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Jun 2021 18:37:33 +0300 +Subject: serial: max310x: Unprepare and disable clock in error path + +From: Andy Shevchenko + +[ Upstream commit 61acabaae5ba58b3c32e6e90d24c2c0827fd27a8 ] + +In one error case the clock may be left prepared and enabled. +Unprepare and disable clock in that case to balance state of +the hardware. + +Fixes: d4d6f03c4fb3 ("serial: max310x: Try to get crystal clock rate from property") +Reported-by: Dan Carpenter +Signed-off-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20210625153733.12911-1-andriy.shevchenko@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index 978d9d93127e5..a09ec46e0310d 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -1293,7 +1293,8 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty + freq = uartclk; + if (freq == 0) { + dev_err(dev, "Cannot get clock rate\n"); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out_clk; + } + + if (xtal) { +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-use-a-separate-regmap-for-each-port.patch b/queue-5.10/serial-max310x-use-a-separate-regmap-for-each-port.patch new file mode 100644 index 00000000000..6359b4bc639 --- /dev/null +++ b/queue-5.10/serial-max310x-use-a-separate-regmap-for-each-port.patch @@ -0,0 +1,243 @@ +From bd91908dc08161b09b7f50372e564bfa3381c1b4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 5 Jun 2022 17:46:57 +0300 +Subject: serial: max310x: use a separate regmap for each port + +From: Cosmin Tanislav + +[ Upstream commit 6ef281daf020592c219fa91780abc381c6c20db5 ] + +The driver currently does manual register manipulation in +multiple places to talk to a specific UART port. + +In order to talk to a specific UART port over SPI, the bits U1 +and U0 of the register address can be set, as explained in the +Command byte configuration section of the datasheet. + +Make this more elegant by creating regmaps for each UART port +and setting the read_flag_mask and write_flag_mask +accordingly. + +All communcations regarding global registers are done on UART +port 0, so replace the global regmap entirely with the port 0 +regmap. + +Also, remove the 0x1f masks from reg_writeable(), reg_volatile() +and reg_precious() methods, since setting the U1 and U0 bits of +the register address happens inside the regmap core now. + +Reviewed-by: Andy Shevchenko +Signed-off-by: Cosmin Tanislav +Link: https://lore.kernel.org/r/20220605144659.4169853-3-demonsingur@gmail.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 68 +++++++++++++++++++----------------- + 1 file changed, 36 insertions(+), 32 deletions(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index c0fa4ad104774..80298a5714bcb 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -262,6 +262,7 @@ struct max310x_one { + struct work_struct tx_work; + struct work_struct md_work; + struct work_struct rs_work; ++ struct regmap *regmap; + + u8 rx_buf[MAX310X_FIFO_SIZE]; + }; +@@ -291,26 +292,26 @@ static DECLARE_BITMAP(max310x_lines, MAX310X_UART_NRMAX); + + static u8 max310x_port_read(struct uart_port *port, u8 reg) + { +- struct max310x_port *s = dev_get_drvdata(port->dev); ++ struct max310x_one *one = to_max310x_port(port); + unsigned int val = 0; + +- regmap_read(s->regmap, port->iobase + reg, &val); ++ regmap_read(one->regmap, reg, &val); + + return val; + } + + static void max310x_port_write(struct uart_port *port, u8 reg, u8 val) + { +- struct max310x_port *s = dev_get_drvdata(port->dev); ++ struct max310x_one *one = to_max310x_port(port); + +- regmap_write(s->regmap, port->iobase + reg, val); ++ regmap_write(one->regmap, reg, val); + } + + static void max310x_port_update(struct uart_port *port, u8 reg, u8 mask, u8 val) + { +- struct max310x_port *s = dev_get_drvdata(port->dev); ++ struct max310x_one *one = to_max310x_port(port); + +- regmap_update_bits(s->regmap, port->iobase + reg, mask, val); ++ regmap_update_bits(one->regmap, reg, mask, val); + } + + static int max3107_detect(struct device *dev) +@@ -449,7 +450,7 @@ static const struct max310x_devtype max14830_devtype = { + + static bool max310x_reg_writeable(struct device *dev, unsigned int reg) + { +- switch (reg & 0x1f) { ++ switch (reg) { + case MAX310X_IRQSTS_REG: + case MAX310X_LSR_IRQSTS_REG: + case MAX310X_SPCHR_IRQSTS_REG: +@@ -466,7 +467,7 @@ static bool max310x_reg_writeable(struct device *dev, unsigned int reg) + + static bool max310x_reg_volatile(struct device *dev, unsigned int reg) + { +- switch (reg & 0x1f) { ++ switch (reg) { + case MAX310X_RHR_REG: + case MAX310X_IRQSTS_REG: + case MAX310X_LSR_IRQSTS_REG: +@@ -488,7 +489,7 @@ static bool max310x_reg_volatile(struct device *dev, unsigned int reg) + + static bool max310x_reg_precious(struct device *dev, unsigned int reg) + { +- switch (reg & 0x1f) { ++ switch (reg) { + case MAX310X_RHR_REG: + case MAX310X_IRQSTS_REG: + case MAX310X_SPCHR_IRQSTS_REG: +@@ -633,18 +634,16 @@ static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, + + static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len) + { +- struct max310x_port *s = dev_get_drvdata(port->dev); +- u8 reg = port->iobase + MAX310X_THR_REG; ++ struct max310x_one *one = to_max310x_port(port); + +- regmap_raw_write(s->regmap, reg, txbuf, len); ++ regmap_raw_write(one->regmap, MAX310X_THR_REG, txbuf, len); + } + + static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len) + { +- struct max310x_port *s = dev_get_drvdata(port->dev); +- u8 reg = port->iobase + MAX310X_RHR_REG; ++ struct max310x_one *one = to_max310x_port(port); + +- regmap_raw_read(s->regmap, reg, rxbuf, len); ++ regmap_raw_read(one->regmap, MAX310X_RHR_REG, rxbuf, len); + } + + static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen) +@@ -1247,15 +1246,16 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset, + #endif + + static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype, +- struct regmap *regmap, int irq) ++ struct regmap *regmaps[], int irq) + { + int i, ret, fmin, fmax, freq; + struct max310x_port *s; + s32 uartclk = 0; + bool xtal; + +- if (IS_ERR(regmap)) +- return PTR_ERR(regmap); ++ for (i = 0; i < devtype->nr; i++) ++ if (IS_ERR(regmaps[i])) ++ return PTR_ERR(regmaps[i]); + + /* Alloc port structure */ + s = devm_kzalloc(dev, struct_size(s, p, devtype->nr), GFP_KERNEL); +@@ -1306,7 +1306,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty + goto out_clk; + } + +- s->regmap = regmap; ++ s->regmap = regmaps[0]; + s->devtype = devtype; + dev_set_drvdata(dev, s); + +@@ -1316,22 +1316,18 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty + goto out_clk; + + for (i = 0; i < devtype->nr; i++) { +- unsigned int offs = i << 5; +- + /* Reset port */ +- regmap_write(s->regmap, MAX310X_MODE2_REG + offs, ++ regmap_write(regmaps[i], MAX310X_MODE2_REG, + MAX310X_MODE2_RST_BIT); + /* Clear port reset */ +- regmap_write(s->regmap, MAX310X_MODE2_REG + offs, 0); ++ regmap_write(regmaps[i], MAX310X_MODE2_REG, 0); + + /* Wait for port startup */ + do { +- regmap_read(s->regmap, +- MAX310X_BRGDIVLSB_REG + offs, &ret); ++ regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); + } while (ret != 0x01); + +- regmap_write(s->regmap, MAX310X_MODE1_REG + offs, +- devtype->mode1); ++ regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); + } + + uartclk = max310x_set_ref_clk(dev, s, freq, xtal); +@@ -1359,11 +1355,13 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty + s->p[i].port.fifosize = MAX310X_FIFO_SIZE; + s->p[i].port.flags = UPF_FIXED_TYPE | UPF_LOW_LATENCY; + s->p[i].port.iotype = UPIO_PORT; +- s->p[i].port.iobase = i * 0x20; ++ s->p[i].port.iobase = i; + s->p[i].port.membase = (void __iomem *)~0; + s->p[i].port.uartclk = uartclk; + s->p[i].port.rs485_config = max310x_rs485_config; + s->p[i].port.ops = &max310x_ops; ++ s->p[i].regmap = regmaps[i]; ++ + /* Disable all interrupts */ + max310x_port_write(&s->p[i].port, MAX310X_IRQEN_REG, 0); + /* Clear IRQ status register */ +@@ -1460,6 +1458,7 @@ static struct regmap_config regcfg = { + .val_bits = 8, + .write_flag_mask = MAX310X_WRITE_BIT, + .cache_type = REGCACHE_RBTREE, ++ .max_register = MAX310X_REG_1F, + .writeable_reg = max310x_reg_writeable, + .volatile_reg = max310x_reg_volatile, + .precious_reg = max310x_reg_precious, +@@ -1469,7 +1468,8 @@ static struct regmap_config regcfg = { + static int max310x_spi_probe(struct spi_device *spi) + { + const struct max310x_devtype *devtype; +- struct regmap *regmap; ++ struct regmap *regmaps[4]; ++ unsigned int i; + int ret; + + /* Setup SPI bus */ +@@ -1484,10 +1484,14 @@ static int max310x_spi_probe(struct spi_device *spi) + if (!devtype) + devtype = (struct max310x_devtype *)spi_get_device_id(spi)->driver_data; + +- regcfg.max_register = devtype->nr * 0x20 - 1; +- regmap = devm_regmap_init_spi(spi, ®cfg); ++ for (i = 0; i < devtype->nr; i++) { ++ u8 port_mask = i * 0x20; ++ regcfg.read_flag_mask = port_mask; ++ regcfg.write_flag_mask = port_mask | MAX310X_WRITE_BIT; ++ regmaps[i] = devm_regmap_init_spi(spi, ®cfg); ++ } + +- return max310x_probe(&spi->dev, devtype, regmap, spi->irq); ++ return max310x_probe(&spi->dev, devtype, regmaps, spi->irq); + } + + static int max310x_spi_remove(struct spi_device *spi) +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-use-devm_clk_get_optional-to-get-the-.patch b/queue-5.10/serial-max310x-use-devm_clk_get_optional-to-get-the-.patch new file mode 100644 index 00000000000..b0cb6e82bf5 --- /dev/null +++ b/queue-5.10/serial-max310x-use-devm_clk_get_optional-to-get-the-.patch @@ -0,0 +1,77 @@ +From a4b4ca6718f9264e4ceb95efdbc44618ef8102ab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Oct 2020 11:46:35 +0300 +Subject: serial: max310x: Use devm_clk_get_optional() to get the input clock + +From: Andy Shevchenko + +[ Upstream commit 974e454d6f96da0c0ab1b4115b92587dd9406f6a ] + +Simplify the code which fetches the input clock by using +devm_clk_get_optional(). If no input clock is present +devm_clk_get_optional() will return NULL instead of an error +which matches the behavior of the old code. + +Signed-off-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20201007084635.594991-2-andy.shevchenko@gmail.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8afa6c6decea ("serial: max310x: fail probe if clock crystal is unstable") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index 14537878f9855..8bf3c5ab59431 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -1265,7 +1265,6 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, + struct regmap *regmap, int irq) + { + int i, ret, fmin, fmax, freq, uartclk; +- struct clk *clk_osc, *clk_xtal; + struct max310x_port *s; + bool xtal = false; + +@@ -1279,23 +1278,24 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, + return -ENOMEM; + } + +- clk_osc = devm_clk_get(dev, "osc"); +- clk_xtal = devm_clk_get(dev, "xtal"); +- if (!IS_ERR(clk_osc)) { +- s->clk = clk_osc; ++ s->clk = devm_clk_get_optional(dev, "osc"); ++ if (IS_ERR(s->clk)) ++ return PTR_ERR(s->clk); ++ if (s->clk) { + fmin = 500000; + fmax = 35000000; +- } else if (!IS_ERR(clk_xtal)) { +- s->clk = clk_xtal; +- fmin = 1000000; +- fmax = 4000000; +- xtal = true; +- } else if (PTR_ERR(clk_osc) == -EPROBE_DEFER || +- PTR_ERR(clk_xtal) == -EPROBE_DEFER) { +- return -EPROBE_DEFER; + } else { +- dev_err(dev, "Cannot get clock\n"); +- return -EINVAL; ++ s->clk = devm_clk_get_optional(dev, "xtal"); ++ if (IS_ERR(s->clk)) ++ return PTR_ERR(s->clk); ++ if (s->clk) { ++ fmin = 1000000; ++ fmax = 4000000; ++ xtal = true; ++ } else { ++ dev_err(dev, "Cannot get clock\n"); ++ return -EINVAL; ++ } + } + + ret = clk_prepare_enable(s->clk); +-- +2.43.0 + diff --git a/queue-5.10/serial-max310x-use-regmap-methods-for-spi-batch-oper.patch b/queue-5.10/serial-max310x-use-regmap-methods-for-spi-batch-oper.patch new file mode 100644 index 00000000000..c10a321115a --- /dev/null +++ b/queue-5.10/serial-max310x-use-regmap-methods-for-spi-batch-oper.patch @@ -0,0 +1,93 @@ +From 9ef224786adcc93be42e08d3aaf13a93699d18b2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 5 Jun 2022 17:46:56 +0300 +Subject: serial: max310x: use regmap methods for SPI batch operations + +From: Cosmin Tanislav + +[ Upstream commit 285e76fc049c4d32c772eea9460a7ef28a193802 ] + +The SPI batch read/write operations can be implemented as simple +regmap raw read and write, which will also try to do a gather +write just as it is done here. + +Use the regmap raw read and write methods. + +Reviewed-by: Andy Shevchenko +Signed-off-by: Cosmin Tanislav +Link: https://lore.kernel.org/r/20220605144659.4169853-2-demonsingur@gmail.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup") +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/max310x.c | 36 ++++++++---------------------------- + 1 file changed, 8 insertions(+), 28 deletions(-) + +diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c +index 8d42c537ee5ea..c0fa4ad104774 100644 +--- a/drivers/tty/serial/max310x.c ++++ b/drivers/tty/serial/max310x.c +@@ -263,8 +263,6 @@ struct max310x_one { + struct work_struct md_work; + struct work_struct rs_work; + +- u8 wr_header; +- u8 rd_header; + u8 rx_buf[MAX310X_FIFO_SIZE]; + }; + #define to_max310x_port(_port) \ +@@ -635,32 +633,18 @@ static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, + + static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len) + { +- struct max310x_one *one = to_max310x_port(port); +- struct spi_transfer xfer[] = { +- { +- .tx_buf = &one->wr_header, +- .len = sizeof(one->wr_header), +- }, { +- .tx_buf = txbuf, +- .len = len, +- } +- }; +- spi_sync_transfer(to_spi_device(port->dev), xfer, ARRAY_SIZE(xfer)); ++ struct max310x_port *s = dev_get_drvdata(port->dev); ++ u8 reg = port->iobase + MAX310X_THR_REG; ++ ++ regmap_raw_write(s->regmap, reg, txbuf, len); + } + + static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len) + { +- struct max310x_one *one = to_max310x_port(port); +- struct spi_transfer xfer[] = { +- { +- .tx_buf = &one->rd_header, +- .len = sizeof(one->rd_header), +- }, { +- .rx_buf = rxbuf, +- .len = len, +- } +- }; +- spi_sync_transfer(to_spi_device(port->dev), xfer, ARRAY_SIZE(xfer)); ++ struct max310x_port *s = dev_get_drvdata(port->dev); ++ u8 reg = port->iobase + MAX310X_RHR_REG; ++ ++ regmap_raw_read(s->regmap, reg, rxbuf, len); + } + + static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen) +@@ -1390,10 +1374,6 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty + INIT_WORK(&s->p[i].md_work, max310x_md_proc); + /* Initialize queue for changing RS485 mode */ + INIT_WORK(&s->p[i].rs_work, max310x_rs_proc); +- /* Initialize SPI-transfer buffers */ +- s->p[i].wr_header = (s->p[i].port.iobase + MAX310X_THR_REG) | +- MAX310X_WRITE_BIT; +- s->p[i].rd_header = (s->p[i].port.iobase + MAX310X_RHR_REG); + + /* Register port */ + ret = uart_add_one_port(&max310x_uart, &s->p[i].port); +-- +2.43.0 + diff --git a/queue-5.10/series b/queue-5.10/series index 2e25614686e..c7142614720 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -28,3 +28,51 @@ netrom-fix-a-data-race-around-sysctl_netrom_transpor.patch-9832 netrom-fix-a-data-race-around-sysctl_netrom_routing_.patch netrom-fix-a-data-race-around-sysctl_netrom_link_fai.patch netrom-fix-data-races-around-sysctl_net_busy_read.patch +nfsd-modernize-nfsd4_release_lockowner.patch +nfsd-add-documenting-comment-for-nfsd4_release_locko.patch +nfsd-fix-release_lockowner.patch +selftests-mm-switch-to-bash-from-sh.patch +selftests-mm-fix-map_hugetlb-failure-on-64k-page-siz.patch +um-allow-not-setting-extra-rpaths-in-the-linux-binar.patch +um-fix-adding-no-pie-for-clang.patch +xhci-remove-extra-loop-in-interrupt-context.patch +xhci-prevent-double-fetch-of-transfer-and-transfer-e.patch +xhci-process-isoc-td-properly-when-there-was-a-trans.patch +xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch +serial-max310x-use-devm_clk_get_optional-to-get-the-.patch +serial-max310x-try-to-get-crystal-clock-rate-from-pr.patch +serial-max310x-fail-probe-if-clock-crystal-is-unstab.patch +serial-max310x-make-use-of-device-properties.patch +serial-max310x-use-regmap-methods-for-spi-batch-oper.patch +serial-max310x-use-a-separate-regmap-for-each-port.patch +serial-max310x-prevent-infinite-while-loop-in-port-s.patch +net-change-sock_getsockopt-to-take-the-sk-ptr-instea.patch +bpf-net-change-sk_getsockopt-to-take-the-sockptr_t-a.patch +lsm-make-security_socket_getpeersec_stream-sockptr_t.patch +lsm-fix-default-return-value-of-the-socket_getpeerse.patch +ext4-make-ext4_es_insert_extent-return-void.patch +ext4-refactor-ext4_da_map_blocks.patch +ext4-convert-to-exclusive-lock-while-inserting-delal.patch +drivers-hv-vmbus-add-vmbus_requestor-data-structure-.patch +hv_netvsc-use-vmbus_requestor-to-generate-transactio.patch +hv_netvsc-wait-for-completion-on-request-switch_data.patch +hv_netvsc-process-netdev_going_down-on-vf-hot-remove.patch +hv_netvsc-make-netvsc-vf-binding-check-both-mac-and-.patch +hv_netvsc-use-netif_is_bond_master-instead-of-open-c.patch +hv_netvsc-register-vf-in-netvsc_probe-if-net_device_.patch +mm-hugetlb-change-hugetlb_reserve_pages-to-type-bool.patch +mm-hugetlb-pages-should-not-be-reserved-by-shmat-if-.patch +getrusage-add-the-signal_struct-sig-local-variable.patch +getrusage-move-thread_group_cputime_adjusted-outside.patch +getrusage-use-__for_each_thread.patch +getrusage-use-sig-stats_lock-rather-than-lock_task_s.patch +exit-fix-typo-in-comment-s-sub-theads-sub-threads.patch +exit-wait_task_zombie-kill-the-no-longer-necessary-s.patch +serial-max310x-unprepare-and-disable-clock-in-error-.patch +drivers-hv-vmbus-drop-error-message-when-no-request-.patch +nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch +regmap-allow-to-define-reg_update_bits-for-no-bus-co.patch +regmap-add-bulk-read-write-callbacks-into-regmap_con.patch +serial-max310x-make-accessing-revision-id-interface-.patch +serial-max310x-implement-i2c-support.patch +serial-max310x-fix-io-data-corruption-in-batched-ope.patch diff --git a/queue-5.10/um-allow-not-setting-extra-rpaths-in-the-linux-binar.patch b/queue-5.10/um-allow-not-setting-extra-rpaths-in-the-linux-binar.patch new file mode 100644 index 00000000000..105f6bea810 --- /dev/null +++ b/queue-5.10/um-allow-not-setting-extra-rpaths-in-the-linux-binar.patch @@ -0,0 +1,82 @@ +From a62300c8e6ec3597b4c1f4360679e84b12a3a1e8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Mar 2021 14:02:37 +0100 +Subject: um: allow not setting extra rpaths in the linux binary + +From: Johannes Berg + +[ Upstream commit 386093c68ba3e8bcfe7f46deba901e0e80713c29 ] + +There doesn't seem to be any reason for the rpath being set in +the binaries, at on systems that I tested on. On the other hand, +setting rpath is actually harming binaries in some cases, e.g. +if using nix-based compilation environments where /lib & /lib64 +are not part of the actual environment. + +Add a new Kconfig option (under EXPERT, for less user confusion) +that allows disabling the rpath additions. + +Signed-off-by: Johannes Berg +Signed-off-by: Richard Weinberger +Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") +Signed-off-by: Sasha Levin +--- + arch/um/Kconfig | 13 +++++++++++++ + arch/um/Makefile | 3 ++- + arch/x86/Makefile.um | 2 +- + 3 files changed, 16 insertions(+), 2 deletions(-) + +diff --git a/arch/um/Kconfig b/arch/um/Kconfig +index eb1c6880bde49..20264b47dcffc 100644 +--- a/arch/um/Kconfig ++++ b/arch/um/Kconfig +@@ -92,6 +92,19 @@ config LD_SCRIPT_DYN + depends on !LD_SCRIPT_STATIC + select MODULE_REL_CRCS if MODVERSIONS + ++config LD_SCRIPT_DYN_RPATH ++ bool "set rpath in the binary" if EXPERT ++ default y ++ depends on LD_SCRIPT_DYN ++ help ++ Add /lib (and /lib64 for 64-bit) to the linux binary's rpath ++ explicitly. ++ ++ You may need to turn this off if compiling for nix systems ++ that have their libraries in random /nix directories and ++ might otherwise unexpected use libraries from /lib or /lib64 ++ instead of the desired ones. ++ + config HOSTFS + tristate "Host filesystem" + help +diff --git a/arch/um/Makefile b/arch/um/Makefile +index 56e5320da7624..4211e23a2f68f 100644 +--- a/arch/um/Makefile ++++ b/arch/um/Makefile +@@ -118,7 +118,8 @@ archprepare: + $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h + + LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static +-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie) ++LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) ++LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib + + CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ + -fno-stack-protector $(call cc-option, -fno-stack-protector-all) +diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um +index 1db7913795f51..b3c1ae084180d 100644 +--- a/arch/x86/Makefile.um ++++ b/arch/x86/Makefile.um +@@ -44,7 +44,7 @@ ELF_FORMAT := elf64-x86-64 + + # Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example. + +-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64 ++LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib64 + LINK-y += -m64 + + endif +-- +2.43.0 + diff --git a/queue-5.10/um-fix-adding-no-pie-for-clang.patch b/queue-5.10/um-fix-adding-no-pie-for-clang.patch new file mode 100644 index 00000000000..5e2a36c6357 --- /dev/null +++ b/queue-5.10/um-fix-adding-no-pie-for-clang.patch @@ -0,0 +1,68 @@ +From e9d85964ffc6c7b86d09408ccde4e85c228101ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 15:59:54 -0700 +Subject: um: Fix adding '-no-pie' for clang + +From: Nathan Chancellor + +[ Upstream commit 846cfbeed09b45d985079a9173cf390cc053715b ] + +The kernel builds with -fno-PIE, so commit 883354afbc10 ("um: link +vmlinux with -no-pie") added the compiler linker flag '-no-pie' via +cc-option because '-no-pie' was only supported in GCC 6.1.0 and newer. + +While this works for GCC, this does not work for clang because cc-option +uses '-c', which stops the pipeline right before linking, so '-no-pie' +is unconsumed and clang warns, causing cc-option to fail just as it +would if the option was entirely unsupported: + + $ clang -Werror -no-pie -c -o /dev/null -x c /dev/null + clang-16: error: argument unused during compilation: '-no-pie' [-Werror,-Wunused-command-line-argument] + +A recent version of clang exposes this because it generates a relocation +under '-mcmodel=large' that is not supported in PIE mode: + + /usr/sbin/ld: init/main.o: relocation R_X86_64_32 against symbol `saved_command_line' can not be used when making a PIE object; recompile with -fPIE + /usr/sbin/ld: failed to set dynamic section sizes: bad value + clang: error: linker command failed with exit code 1 (use -v to see invocation) + +Remove the cc-option check altogether. It is wasteful to invoke the +compiler to check for '-no-pie' because only one supported compiler +version does not support it, GCC 5.x (as it is supported with the +minimum version of clang and GCC 6.1.0+). Use a combination of the +gcc-min-version macro and CONFIG_CC_IS_CLANG to unconditionally add +'-no-pie' with CONFIG_LD_SCRIPT_DYN=y, so that it is enabled with all +compilers that support this. Furthermore, using gcc-min-version can help +turn this back into + + LINK-$(CONFIG_LD_SCRIPT_DYN) += -no-pie + +when the minimum version of GCC is bumped past 6.1.0. + +Cc: stable@vger.kernel.org +Closes: https://github.com/ClangBuiltLinux/linux/issues/1982 +Signed-off-by: Nathan Chancellor +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + arch/um/Makefile | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/arch/um/Makefile b/arch/um/Makefile +index 4211e23a2f68f..81d35b1f315ae 100644 +--- a/arch/um/Makefile ++++ b/arch/um/Makefile +@@ -118,7 +118,9 @@ archprepare: + $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h + + LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static +-LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) ++ifdef CONFIG_LD_SCRIPT_DYN ++LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie ++endif + LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib + + CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ +-- +2.43.0 + diff --git a/queue-5.10/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch b/queue-5.10/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch new file mode 100644 index 00000000000..2ee773747f8 --- /dev/null +++ b/queue-5.10/xhci-handle-isoc-babble-and-buffer-overrun-events-pr.patch @@ -0,0 +1,57 @@ +From 1750a9b03f733605bfe32a0394c292f09ec37f8c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 25 Jan 2024 17:27:37 +0200 +Subject: xhci: handle isoc Babble and Buffer Overrun events properly + +From: Michal Pecio + +[ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ] + +xHCI 4.9 explicitly forbids assuming that the xHC has released its +ownership of a multi-TRB TD when it reports an error on one of the +early TRBs. Yet the driver makes such assumption and releases the TD, +allowing the remaining TRBs to be freed or overwritten by new TDs. + +The xHC should also report completion of the final TRB due to its IOC +flag being set by us, regardless of prior errors. This event cannot +be recognized if the TD has already been freed earlier, resulting in +"Transfer event TRB DMA ptr not part of current TD" error message. + +Fix this by reusing the logic for processing isoc Transaction Errors. +This also handles hosts which fail to report the final completion. + +Fix transfer length reporting on Babble errors. They may be caused by +device malfunction, no guarantee that the buffer has been filled. + +Signed-off-by: Michal Pecio +Cc: stable@vger.kernel.org +Signed-off-by: Mathias Nyman +Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/host/xhci-ring.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c +index 883cf477a70b9..4fa387e447f08 100644 +--- a/drivers/usb/host/xhci-ring.c ++++ b/drivers/usb/host/xhci-ring.c +@@ -2262,9 +2262,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + case COMP_BANDWIDTH_OVERRUN_ERROR: + frame->status = -ECOMM; + break; +- case COMP_ISOCH_BUFFER_OVERRUN: + case COMP_BABBLE_DETECTED_ERROR: ++ sum_trbs_for_length = true; ++ fallthrough; ++ case COMP_ISOCH_BUFFER_OVERRUN: + frame->status = -EOVERFLOW; ++ if (ep_trb != td->last_trb) ++ td->error_mid_td = true; + break; + case COMP_INCOMPATIBLE_DEVICE_ERROR: + case COMP_STALL_ERROR: +-- +2.43.0 + diff --git a/queue-5.10/xhci-prevent-double-fetch-of-transfer-and-transfer-e.patch b/queue-5.10/xhci-prevent-double-fetch-of-transfer-and-transfer-e.patch new file mode 100644 index 00000000000..b13aea678c6 --- /dev/null +++ b/queue-5.10/xhci-prevent-double-fetch-of-transfer-and-transfer-e.patch @@ -0,0 +1,143 @@ +From 9c80999c4b17a687981d277bb41ed65de6e95686 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Apr 2021 10:02:08 +0300 +Subject: xhci: prevent double-fetch of transfer and transfer event TRBs + +From: Mathias Nyman + +[ Upstream commit e9fcb07704fcef6fa6d0333fd2b3a62442eaf45b ] + +The same values are parsed several times from transfer and event +TRBs by different functions in the same call path, all while processing +one transfer event. + +As the TRBs are in DMA memory and can be accessed by the xHC host we want +to avoid this to prevent double-fetch issues. + +To resolve this pass the already parsed values to the different functions +in the path of parsing a transfer event + +Signed-off-by: Mathias Nyman +Link: https://lore.kernel.org/r/20210406070208.3406266-5-mathias.nyman@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.") +Signed-off-by: Sasha Levin +--- + drivers/usb/host/xhci-ring.c | 42 ++++++++++++++++-------------------- + 1 file changed, 19 insertions(+), 23 deletions(-) + +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c +index b814dc07116da..62d92da7016e7 100644 +--- a/drivers/usb/host/xhci-ring.c ++++ b/drivers/usb/host/xhci-ring.c +@@ -2059,16 +2059,13 @@ int xhci_is_vendor_info_code(struct xhci_hcd *xhci, unsigned int trb_comp_code) + return 0; + } + +-static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td, +- struct xhci_transfer_event *event, struct xhci_virt_ep *ep) ++static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, ++ struct xhci_ring *ep_ring, struct xhci_td *td, ++ u32 trb_comp_code) + { + struct xhci_ep_ctx *ep_ctx; +- struct xhci_ring *ep_ring; +- u32 trb_comp_code; + +- ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer)); + ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index); +- trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len)); + + if (trb_comp_code == COMP_STOPPED_LENGTH_INVALID || + trb_comp_code == COMP_STOPPED || +@@ -2126,9 +2123,9 @@ static int sum_trb_lengths(struct xhci_hcd *xhci, struct xhci_ring *ring, + /* + * Process control tds, update urb status and actual_length. + */ +-static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, +- union xhci_trb *ep_trb, struct xhci_transfer_event *event, +- struct xhci_virt_ep *ep) ++static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, ++ struct xhci_ring *ep_ring, struct xhci_td *td, ++ union xhci_trb *ep_trb, struct xhci_transfer_event *event) + { + struct xhci_ep_ctx *ep_ctx; + u32 trb_comp_code; +@@ -2216,15 +2213,15 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, + td->urb->actual_length = requested; + + finish_td: +- return finish_td(xhci, td, event, ep); ++ return finish_td(xhci, ep, ep_ring, td, trb_comp_code); + } + + /* + * Process isochronous tds, update urb packet status and actual_length. + */ +-static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, +- union xhci_trb *ep_trb, struct xhci_transfer_event *event, +- struct xhci_virt_ep *ep) ++static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, ++ struct xhci_ring *ep_ring, struct xhci_td *td, ++ union xhci_trb *ep_trb, struct xhci_transfer_event *event) + { + struct urb_priv *urb_priv; + int idx; +@@ -2301,7 +2298,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, + + td->urb->actual_length += frame->actual_length; + +- return finish_td(xhci, td, event, ep); ++ return finish_td(xhci, ep, ep_ring, td, trb_comp_code); + } + + static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, +@@ -2333,17 +2330,15 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, + /* + * Process bulk and interrupt tds, update urb status and actual_length. + */ +-static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, +- union xhci_trb *ep_trb, struct xhci_transfer_event *event, +- struct xhci_virt_ep *ep) ++static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, ++ struct xhci_ring *ep_ring, struct xhci_td *td, ++ union xhci_trb *ep_trb, struct xhci_transfer_event *event) + { + struct xhci_slot_ctx *slot_ctx; +- struct xhci_ring *ep_ring; + u32 trb_comp_code; + u32 remaining, requested, ep_trb_len; + + slot_ctx = xhci_get_slot_ctx(xhci, ep->vdev->out_ctx); +- ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer)); + trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len)); + remaining = EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); + ep_trb_len = TRB_LEN(le32_to_cpu(ep_trb->generic.field[2])); +@@ -2403,7 +2398,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, + remaining); + td->urb->actual_length = 0; + } +- return finish_td(xhci, td, event, ep); ++ ++ return finish_td(xhci, ep, ep_ring, td, trb_comp_code); + } + + /* +@@ -2754,11 +2750,11 @@ static int handle_tx_event(struct xhci_hcd *xhci, + + /* update the urb's actual_length and give back to the core */ + if (usb_endpoint_xfer_control(&td->urb->ep->desc)) +- process_ctrl_td(xhci, td, ep_trb, event, ep); ++ process_ctrl_td(xhci, ep, ep_ring, td, ep_trb, event); + else if (usb_endpoint_xfer_isoc(&td->urb->ep->desc)) +- process_isoc_td(xhci, td, ep_trb, event, ep); ++ process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event); + else +- process_bulk_intr_td(xhci, td, ep_trb, event, ep); ++ process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event); + cleanup: + handling_skipped_tds = ep->skip && + trb_comp_code != COMP_MISSED_SERVICE_ERROR && +-- +2.43.0 + diff --git a/queue-5.10/xhci-process-isoc-td-properly-when-there-was-a-trans.patch b/queue-5.10/xhci-process-isoc-td-properly-when-there-was-a-trans.patch new file mode 100644 index 00000000000..cefcfc84166 --- /dev/null +++ b/queue-5.10/xhci-process-isoc-td-properly-when-there-was-a-trans.patch @@ -0,0 +1,186 @@ +From 6bece1cf082431fcec40f34122386e466f10b62f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 25 Jan 2024 17:27:36 +0200 +Subject: xhci: process isoc TD properly when there was a transaction error mid + TD. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mathias Nyman + +[ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ] + +The last TRB of a isoc TD might not trigger an event if there was +an error event for a TRB mid TD. This is seen on a NEC Corporation +uPD720200 USB 3.0 Host + +After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1 +generate events for passed TRBs with IOC flag set if it proceeds to the +next TD. This event is either a copy of the original error, or a +"success" transfer event. + +If that event is missing then the driver and xHC host get out of sync as +the driver is still expecting a transfer event for that first TD, while +xHC host is already sending events for the next TD in the list. +This leads to +"Transfer event TRB DMA ptr not part of current TD" messages. + +As a solution we tag the isoc TDs that get error events mid TD. +If an event doesn't match the first TD, then check if the tag is +set, and event points to the next TD. +In that case give back the fist TD and process the next TD normally + +Make sure TD status and transferred length stay valid in both cases +with and without final TD completion event. + +Reported-by: Michał Pecio +Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/ +Tested-by: Michał Pecio +Cc: stable@vger.kernel.org +Signed-off-by: Mathias Nyman +Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++------- + drivers/usb/host/xhci.h | 1 + + 2 files changed, 61 insertions(+), 14 deletions(-) + +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c +index 62d92da7016e7..883cf477a70b9 100644 +--- a/drivers/usb/host/xhci-ring.c ++++ b/drivers/usb/host/xhci-ring.c +@@ -2244,6 +2244,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + /* handle completion code */ + switch (trb_comp_code) { + case COMP_SUCCESS: ++ /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ ++ if (td->error_mid_td) ++ break; + if (remaining) { + frame->status = short_framestatus; + if (xhci->quirks & XHCI_TRUST_TX_LENGTH) +@@ -2269,8 +2272,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + break; + case COMP_USB_TRANSACTION_ERROR: + frame->status = -EPROTO; ++ sum_trbs_for_length = true; + if (ep_trb != td->last_trb) +- return 0; ++ td->error_mid_td = true; + break; + case COMP_STOPPED: + sum_trbs_for_length = true; +@@ -2290,6 +2294,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + break; + } + ++ if (td->urb_length_set) ++ goto finish_td; ++ + if (sum_trbs_for_length) + frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + + ep_trb_len - remaining; +@@ -2298,6 +2305,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + + td->urb->actual_length += frame->actual_length; + ++finish_td: ++ /* Don't give back TD yet if we encountered an error mid TD */ ++ if (td->error_mid_td && ep_trb != td->last_trb) { ++ xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); ++ td->urb_length_set = true; ++ return 0; ++ } ++ + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); + } + +@@ -2684,17 +2699,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, + } + + if (!ep_seg) { +- if (!ep->skip || +- !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { +- /* Some host controllers give a spurious +- * successful event after a short transfer. +- * Ignore it. +- */ +- if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && +- ep_ring->last_td_was_short) { +- ep_ring->last_td_was_short = false; +- goto cleanup; ++ ++ if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { ++ skip_isoc_td(xhci, td, ep, status); ++ goto cleanup; ++ } ++ ++ /* ++ * Some hosts give a spurious success event after a short ++ * transfer. Ignore it. ++ */ ++ if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && ++ ep_ring->last_td_was_short) { ++ ep_ring->last_td_was_short = false; ++ goto cleanup; ++ } ++ ++ /* ++ * xhci 4.10.2 states isoc endpoints should continue ++ * processing the next TD if there was an error mid TD. ++ * So host like NEC don't generate an event for the last ++ * isoc TRB even if the IOC flag is set. ++ * xhci 4.9.1 states that if there are errors in mult-TRB ++ * TDs xHC should generate an error for that TRB, and if xHC ++ * proceeds to the next TD it should genete an event for ++ * any TRB with IOC flag on the way. Other host follow this. ++ * So this event might be for the next TD. ++ */ ++ if (td->error_mid_td && ++ !list_is_last(&td->td_list, &ep_ring->td_list)) { ++ struct xhci_td *td_next = list_next_entry(td, td_list); ++ ++ ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, ++ td_next->last_trb, ep_trb_dma, false); ++ if (ep_seg) { ++ /* give back previous TD, start handling new */ ++ xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); ++ ep_ring->dequeue = td->last_trb; ++ ep_ring->deq_seg = td->last_trb_seg; ++ inc_deq(xhci, ep_ring); ++ xhci_td_cleanup(xhci, td, ep_ring, td->status); ++ td = td_next; + } ++ } ++ ++ if (!ep_seg) { + /* HC is busted, give up! */ + xhci_err(xhci, + "ERROR Transfer event TRB DMA ptr not " +@@ -2706,9 +2755,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, + ep_trb_dma, true); + return -ESHUTDOWN; + } +- +- skip_isoc_td(xhci, td, ep, status); +- goto cleanup; + } + if (trb_comp_code == COMP_SHORT_PACKET) + ep_ring->last_td_was_short = true; +diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h +index 85ab213c7940a..5a8443f6ed703 100644 +--- a/drivers/usb/host/xhci.h ++++ b/drivers/usb/host/xhci.h +@@ -1554,6 +1554,7 @@ struct xhci_td { + struct xhci_segment *bounce_seg; + /* actual_length of the URB has already been set */ + bool urb_length_set; ++ bool error_mid_td; + unsigned int num_trbs; + }; + +-- +2.43.0 + diff --git a/queue-5.10/xhci-remove-extra-loop-in-interrupt-context.patch b/queue-5.10/xhci-remove-extra-loop-in-interrupt-context.patch new file mode 100644 index 00000000000..89b8d454b4c --- /dev/null +++ b/queue-5.10/xhci-remove-extra-loop-in-interrupt-context.patch @@ -0,0 +1,143 @@ +From dcd48a2dfd74f9eb405b0181d978261958264b64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Jan 2021 15:00:28 +0200 +Subject: xhci: remove extra loop in interrupt context + +From: Mathias Nyman + +[ Upstream commit 55f6153d8cc8eff0852d108f80087fdf41dc2169 ] + +When finishing a TD we walk the endpoint dequeue trb pointer +until it matches the last TRB of the TD. + +TDs can contain over 100 TRBs, meaning we call a function 100 times, +do a few comparisons and increase a couple values for each of these calls, +all in interrupt context. + +This can all be avoided by adding a pointer to the last TRB segment, and +a number of TRBs in the TD. So instead of walking through each TRB just +set the new dequeue segment, pointer, and number of free TRBs directly. + +Getting rid of the while loop also reduces the risk of getting stuck in a +infinite loop in the interrupt handler. Loop relied on valid matching +dequeue and last_trb values to break. + +Signed-off-by: Mathias Nyman +Link: https://lore.kernel.org/r/20210129130044.206855-12-mathias.nyman@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.") +Signed-off-by: Sasha Levin +--- + drivers/usb/host/xhci-ring.c | 21 ++++++++++++++------- + drivers/usb/host/xhci.h | 2 ++ + 2 files changed, 16 insertions(+), 7 deletions(-) + +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c +index eb70f07e3623a..b814dc07116da 100644 +--- a/drivers/usb/host/xhci-ring.c ++++ b/drivers/usb/host/xhci-ring.c +@@ -2099,8 +2099,9 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td, + EP_HARD_RESET); + } else { + /* Update ring dequeue pointer */ +- while (ep_ring->dequeue != td->last_trb) +- inc_deq(xhci, ep_ring); ++ ep_ring->dequeue = td->last_trb; ++ ep_ring->deq_seg = td->last_trb_seg; ++ ep_ring->num_trbs_free += td->num_trbs - 1; + inc_deq(xhci, ep_ring); + } + +@@ -2321,8 +2322,9 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, + frame->actual_length = 0; + + /* Update ring dequeue pointer */ +- while (ep->ring->dequeue != td->last_trb) +- inc_deq(xhci, ep->ring); ++ ep->ring->dequeue = td->last_trb; ++ ep->ring->deq_seg = td->last_trb_seg; ++ ep->ring->num_trbs_free += td->num_trbs - 1; + inc_deq(xhci, ep->ring); + + return xhci_td_cleanup(xhci, td, ep->ring, status); +@@ -3487,7 +3489,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, + field |= TRB_IOC; + more_trbs_coming = false; + td->last_trb = ring->enqueue; +- ++ td->last_trb_seg = ring->enq_seg; + if (xhci_urb_suitable_for_idt(urb)) { + memcpy(&send_addr, urb->transfer_buffer, + trb_buff_len); +@@ -3513,7 +3515,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, + upper_32_bits(send_addr), + length_field, + field); +- ++ td->num_trbs++; + addr += trb_buff_len; + sent_len = trb_buff_len; + +@@ -3537,8 +3539,10 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, + ep_index, urb->stream_id, + 1, urb, 1, mem_flags); + urb_priv->td[1].last_trb = ring->enqueue; ++ urb_priv->td[1].last_trb_seg = ring->enq_seg; + field = TRB_TYPE(TRB_NORMAL) | ring->cycle_state | TRB_IOC; + queue_trb(xhci, ring, 0, 0, 0, TRB_INTR_TARGET(0), field); ++ urb_priv->td[1].num_trbs++; + } + + check_trb_math(urb, enqd_len); +@@ -3589,6 +3593,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, + + urb_priv = urb->hcpriv; + td = &urb_priv->td[0]; ++ td->num_trbs = num_trbs; + + /* + * Don't give the first TRB to the hardware (by toggling the cycle bit) +@@ -3661,6 +3666,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, + + /* Save the DMA address of the last TRB in the TD */ + td->last_trb = ep_ring->enqueue; ++ td->last_trb_seg = ep_ring->enq_seg; + + /* Queue status TRB - see Table 7 and sections 4.11.2.2 and 6.4.1.2.3 */ + /* If the device sent data, the status stage is an OUT transfer */ +@@ -3905,7 +3911,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, + goto cleanup; + } + td = &urb_priv->td[i]; +- ++ td->num_trbs = trbs_per_td; + /* use SIA as default, if frame id is used overwrite it */ + sia_frame_id = TRB_SIA; + if (!(urb->transfer_flags & URB_ISO_ASAP) && +@@ -3948,6 +3954,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, + } else { + more_trbs_coming = false; + td->last_trb = ep_ring->enqueue; ++ td->last_trb_seg = ep_ring->enq_seg; + field |= TRB_IOC; + if (trb_block_event_intr(xhci, num_tds, i)) + field |= TRB_BEI; +diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h +index bb3c362a194b2..85ab213c7940a 100644 +--- a/drivers/usb/host/xhci.h ++++ b/drivers/usb/host/xhci.h +@@ -1550,9 +1550,11 @@ struct xhci_td { + struct xhci_segment *start_seg; + union xhci_trb *first_trb; + union xhci_trb *last_trb; ++ struct xhci_segment *last_trb_seg; + struct xhci_segment *bounce_seg; + /* actual_length of the URB has already been set */ + bool urb_length_set; ++ unsigned int num_trbs; + }; + + /* xHCI command default timeout value */ +-- +2.43.0 +