return 0;
}
+/* Lock two rings at once. The rings must be different! */
+static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2)
+{
+ if (ctx1 > ctx2)
+ swap(ctx1, ctx2);
+ mutex_lock(&ctx1->uring_lock);
+ mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING);
+}
+
+/* Both rings are locked by the caller. */
static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
struct io_uring_clone_buffers *arg)
{
int i, ret, off, nr;
unsigned int nbufs;
+ lockdep_assert_held(&ctx->uring_lock);
+ lockdep_assert_held(&src_ctx->uring_lock);
+
/*
* Accounting state is shared between the two rings; that only works if
* both rings are accounted towards the same counters.
if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE))
return -EBUSY;
- nbufs = READ_ONCE(src_ctx->buf_table.nr);
+ nbufs = src_ctx->buf_table.nr;
if (!arg->nr)
arg->nr = nbufs;
else if (arg->nr > nbufs)
}
}
- /*
- * Drop our own lock here. We'll setup the data we need and reference
- * the source buffers, then re-grab, check, and assign at the end.
- */
- mutex_unlock(&ctx->uring_lock);
-
- mutex_lock(&src_ctx->uring_lock);
ret = -ENXIO;
nbufs = src_ctx->buf_table.nr;
if (!nbufs)
- goto out_unlock;
+ goto out_free;
ret = -EINVAL;
if (!arg->nr)
arg->nr = nbufs;
else if (arg->nr > nbufs)
- goto out_unlock;
+ goto out_free;
ret = -EOVERFLOW;
if (check_add_overflow(arg->nr, arg->src_off, &off))
- goto out_unlock;
+ goto out_free;
if (off > nbufs)
- goto out_unlock;
+ goto out_free;
off = arg->dst_off;
i = arg->src_off;
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!dst_node) {
ret = -ENOMEM;
- goto out_unlock;
+ goto out_free;
}
refcount_inc(&src_node->buf->refs);
i++;
}
- /* Have a ref on the bufs now, drop src lock and re-grab our own lock */
- mutex_unlock(&src_ctx->uring_lock);
- mutex_lock(&ctx->uring_lock);
-
/*
* If asked for replace, put the old table. data->nodes[] holds both
* old and new nodes at this point.
io_rsrc_data_free(ctx, &ctx->buf_table);
/*
- * ctx->buf_table should be empty now - either the contents are being
- * replaced and we just freed the table, or someone raced setting up
- * a buffer table while the clone was happening. If not empty, fall
- * through to failure handling.
+ * ctx->buf_table must be empty now - either the contents are being
+ * replaced and we just freed the table, or the contents are being
+ * copied to a ring that does not have buffers yet (checked at function
+ * entry).
*/
- if (!ctx->buf_table.nr) {
- ctx->buf_table = data;
- return 0;
- }
+ WARN_ON_ONCE(ctx->buf_table.nr);
+ ctx->buf_table = data;
+ return 0;
- mutex_unlock(&ctx->uring_lock);
- mutex_lock(&src_ctx->uring_lock);
- /* someone raced setting up buffers, dump ours */
- ret = -EBUSY;
-out_unlock:
+out_free:
io_rsrc_data_free(ctx, &data);
- mutex_unlock(&src_ctx->uring_lock);
- mutex_lock(&ctx->uring_lock);
return ret;
}
int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_clone_buffers buf;
+ struct io_ring_ctx *src_ctx;
bool registered_src;
struct file *file;
int ret;
file = io_uring_register_get_file(buf.src_fd, registered_src);
if (IS_ERR(file))
return PTR_ERR(file);
- ret = io_clone_buffers(ctx, file->private_data, &buf);
+
+ src_ctx = file->private_data;
+ if (src_ctx != ctx) {
+ mutex_unlock(&ctx->uring_lock);
+ lock_two_rings(ctx, src_ctx);
+ }
+
+ ret = io_clone_buffers(ctx, src_ctx, &buf);
+
+ if (src_ctx != ctx)
+ mutex_unlock(&src_ctx->uring_lock);
+
if (!registered_src)
fput(file);
return ret;