From: Linus Torvalds Date: Tue, 16 Jun 2026 07:23:59 +0000 (+0530) Subject: Merge tag 'for-7.2/io_uring-20260615' of git://git.kernel.org/pub/scm/linux/kernel... X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9b40ba14edcdf70240af8114092a76f75f070774;p=thirdparty%2Flinux.git Merge tag 'for-7.2/io_uring-20260615' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux Pull io_uring updates from Jens Axboe: - Rework the task_work infrastructure. Both the local (DEFER_TASKRUN) and the normal (tctx) task_work lists were llist based, which is LIFO ordered, and hence each run had to do an O(n) list reversal pass first to restore queue order. Additionally, to cap the amount of task_work run, each method needed a retry list as well. Add a lockless MPCS FIFO queue (based on Dmitry Vyukov's intrusive MPSC algorithm) and switch both task_work lists to it. It performs better than llists and we can then also ditch the retry lists as well as entries are popped one-at-the-time. On top of those changes, run the tctx fallback task_work directly and remove the now-unused per-ctx fallback machinery entirely. - zcrx user notifications. Add a mechanism for zcrx to communicate conditions back to userspace via a dedicated CQE, with the initial users being notification on running out of buffers and on a frag copy fallback, plus shared-memory notification statistics. Alongside that, a series of zcrx reliability and cleanup fixes: more reliable scrubbing, poisoning pointers on unregistration, dropping an extra ifq close, adding a ctx back-pointer, reordering fd allocation in the export path, and killing a dead 'sock' member. - Allow using io_uring registered buffers for plain SEND and RECV, not just for the zero-copy send path. This enables targets like ublk's NBD backend to push/pull IO data directly to/from a registered buffer over a plain send/recv on a TCP socket. - Registered buffer improvements: account huge pages correctly, bump the io_mapped_ubuf length field to size_t, and raise the previous 1GB registered buffer size limit. - Restrict the ctx access exposed to io_uring BPF struct_ops programs by handing them an opaque type rather than the full io_ring_ctx, and add a separate MAINTAINERS entry for the bpf-ops code. - Allow opcode filtering on IORING_OP_CONNECT. - Validate ring-provided buffer addresses with access_ok(), and align the legacy buffer add limit with MAX_BIDS_PER_BGID. - Various other cleanups and minor fixes, including avoiding msghdr async data on connect/bind, dropping async_size for OP_LISTEN, making the POLL_FIRST receive side checks consistent, re-checking IO_WQ_BIT_EXIT for each linked work item, and using trace_call__##name() at guarded tracepoint call sites. * tag 'for-7.2/io_uring-20260615' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (31 commits) io_uring/bpf-ops: add a separate maintainer entry io_uring/net: make POLL_FIRST receive side checks consistent io_uring: remove the per-ctx fallback task_work machinery io_uring: run the tctx task_work fallback directly io_uring: switch normal task_work to a mpscq io_uring: switch local task_work to a mpscq io_uring/mpscq: add lockless multi-producer, single-consumer FIFO queue io_uring: grab RCU read lock marking task run io_uring/zcrx: kill dead 'sock' member in struct io_zcrx_args io_uring/kbuf: validate ring provided buffer addresses with access_ok() io_uring/net: support registered buffer for plain send and recv io_uring/nop: Drop a wrong comment in struct io_nop io_uring/net: Remove async_size for OP_LISTEN io_uring/net: Avoid msghdr on op_connect/op_bind async data io_uring/bpf-ops: restrict ctx access to BPF io_uring/io-wq: re-check IO_WQ_BIT_EXIT for each linked work item io_uring/kbuf: align legacy buffer add limit with MAX_BIDS_PER_BGID io_uring/zcrx: add shared-memory notification statistics io_uring/zcrx: notify user on frag copy fallback io_uring/zcrx: notify user when out of buffers ... --- 9b40ba14edcdf70240af8114092a76f75f070774 diff --cc io_uring/cancel.c index 4aa3103ba9c39,b0259e74f6784..8c6fa6f367e4f --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@@ -561,12 -561,10 +561,10 @@@ __cold bool io_uring_try_cancel_request ret |= io_waitid_remove_all(ctx, tctx, cancel_all); ret |= io_futex_remove_all(ctx, tctx, cancel_all); ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all); - mutex_unlock(&ctx->uring_lock); ret |= io_kill_timeouts(ctx, tctx, cancel_all); + mutex_unlock(&ctx->uring_lock); if (tctx) ret |= io_run_task_work() > 0; - else - ret |= flush_delayed_work(&ctx->fallback_work); return ret; } diff --cc io_uring/net.c index 34ab1f97cc5a3,7deb62e3b4c0c..00a7df803b99b --- a/io_uring/net.c +++ b/io_uring/net.c @@@ -1800,29 -1879,11 +1880,29 @@@ out return IOU_COMPLETE; } +/* + * Check if bind request would potentially end up with filename_create(), + * which in turn end up in mnt_want_write() which will grab the fs + * percpu start write sem. This can trigger a lockdep warning. + */ - static int io_bind_file_create(const struct io_async_msghdr *io, int addr_len) ++static int io_bind_file_create(const struct sockaddr_storage *addr, int addr_len) +{ + const struct sockaddr_un *sun; + - if (io->addr.ss_family != AF_UNIX) ++ if (addr->ss_family != AF_UNIX) + return 0; + if (addr_len <= offsetof(struct sockaddr_un, sun_path)) + return 0; - sun = (const struct sockaddr_un *) &io->addr; ++ sun = (const struct sockaddr_un *) addr; + return sun->sun_path[0] != '\0'; +} + int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); struct sockaddr __user *uaddr; - struct io_async_msghdr *io; + struct sockaddr_storage *addr; + int ret; if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; @@@ -1830,17 -1891,13 +1910,18 @@@ uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); bind->addr_len = READ_ONCE(sqe->addr2); - io = io_msg_alloc_async(req); - if (unlikely(!io)) + addr = io_uring_alloc_async_data(NULL, req); + if (unlikely(!addr)) return -ENOMEM; - ret = move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); - return move_addr_to_kernel(uaddr, bind->addr_len, addr); ++ ret = move_addr_to_kernel(uaddr, bind->addr_len, addr); + if (unlikely(ret)) + return ret; - if (io_bind_file_create(io, bind->addr_len)) ++ if (io_bind_file_create(addr, bind->addr_len)) + req->flags |= REQ_F_FORCE_ASYNC; + return 0; } + int io_bind(struct io_kiocb *req, unsigned int issue_flags) { struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);