]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
io_uring: user registered clockid for wait timeouts
authorPavel Begunkov <asml.silence@gmail.com>
Wed, 7 Aug 2024 14:18:14 +0000 (15:18 +0100)
committerJens Axboe <axboe@kernel.dk>
Sun, 25 Aug 2024 14:27:01 +0000 (08:27 -0600)
Add a new registration opcode IORING_REGISTER_CLOCK, which allows the
user to select which clock id it wants to use with CQ waiting timeouts.
It only allows a subset of all posix clocks and currently supports
CLOCK_MONOTONIC and CLOCK_BOOTTIME.

Suggested-by: Lewis Baker <lewissbaker@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/98f2bc8a3c36cdf8f0e6a275245e81e903459703.1723039801.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/linux/io_uring_types.h
include/uapi/linux/io_uring.h
io_uring/io_uring.c
io_uring/io_uring.h
io_uring/napi.c
io_uring/register.c

index 3315005df117b8b1c5ddb1fab39db6c77bd7af81..4b9ba523978d203ae23fb4ec9622d6e4e35a5e36 100644 (file)
@@ -239,6 +239,9 @@ struct io_ring_ctx {
                struct io_rings         *rings;
                struct percpu_ref       refs;
 
+               clockid_t               clockid;
+               enum tk_offsets         clock_offset;
+
                enum task_work_notify_mode      notify_method;
                unsigned                        sq_thread_idle;
        } ____cacheline_aligned_in_smp;
index 6a81f55fcd0d27a01cf38f63db9bafbd93594cab..7af716136df93a33265b96460a82ba20a99d7b56 100644 (file)
@@ -596,6 +596,8 @@ enum io_uring_register_op {
        IORING_REGISTER_NAPI                    = 27,
        IORING_UNREGISTER_NAPI                  = 28,
 
+       IORING_REGISTER_CLOCK                   = 29,
+
        /* this goes last */
        IORING_REGISTER_LAST,
 
@@ -676,6 +678,11 @@ struct io_uring_restriction {
        __u32 resv2[3];
 };
 
+struct io_uring_clock_register {
+       __u32   clockid;
+       __u32   __resv[3];
+};
+
 struct io_uring_buf {
        __u64   addr;
        __u32   len;
index 5282f988744093e100e72080287ac7c5a34bbc52..20229e72b65cafb73a273ecac3eebfa5bd8650bf 100644 (file)
@@ -2377,7 +2377,8 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
        ret = 0;
        if (iowq->timeout == KTIME_MAX)
                schedule();
-       else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
+       else if (!schedule_hrtimeout_range_clock(&iowq->timeout, 0,
+                                                HRTIMER_MODE_ABS, ctx->clockid))
                ret = -ETIME;
        current->in_iowait = 0;
        return ret;
@@ -2422,7 +2423,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
 
                iowq.timeout = timespec64_to_ktime(ts);
                if (!(flags & IORING_ENTER_ABS_TIMER))
-                       iowq.timeout = ktime_add(iowq.timeout, ktime_get());
+                       iowq.timeout = ktime_add(iowq.timeout, io_get_time(ctx));
        }
 
        if (sig) {
@@ -3424,6 +3425,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
        if (!ctx)
                return -ENOMEM;
 
+       ctx->clockid = CLOCK_MONOTONIC;
+       ctx->clock_offset = 0;
+
        if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
            !(ctx->flags & IORING_SETUP_IOPOLL) &&
            !(ctx->flags & IORING_SETUP_SQPOLL))
index c2acf6180845db760c3ae4917ff8227302139e95..9935819f12b711533d19c25cc9b2266b75fdf653 100644 (file)
@@ -437,6 +437,14 @@ static inline bool io_file_can_poll(struct io_kiocb *req)
        return false;
 }
 
+static inline ktime_t io_get_time(struct io_ring_ctx *ctx)
+{
+       if (ctx->clockid == CLOCK_MONOTONIC)
+               return ktime_get();
+
+       return ktime_get_with_offset(ctx->clock_offset);
+}
+
 enum {
        IO_CHECK_CQ_OVERFLOW_BIT,
        IO_CHECK_CQ_DROPPED_BIT,
index d78fcbecdd27dbab334b760d5b2021a66a293d10..d0cf694d0172d4f10f30f541e69b3924420db60f 100644 (file)
@@ -283,7 +283,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
 
        iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
        if (iowq->timeout != KTIME_MAX) {
-               ktime_t dt = ktime_sub(iowq->timeout, ktime_get());
+               ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx));
 
                iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt);
        }
index e3c20be5a198033fbb2c1289f8b901068918bbff..57cb85c42526b0074bbfa5c8159e30f8b221770a 100644 (file)
@@ -335,6 +335,31 @@ err:
        return ret;
 }
 
+static int io_register_clock(struct io_ring_ctx *ctx,
+                            struct io_uring_clock_register __user *arg)
+{
+       struct io_uring_clock_register reg;
+
+       if (copy_from_user(&reg, arg, sizeof(reg)))
+               return -EFAULT;
+       if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
+               return -EINVAL;
+
+       switch (reg.clockid) {
+       case CLOCK_MONOTONIC:
+               ctx->clock_offset = 0;
+               break;
+       case CLOCK_BOOTTIME:
+               ctx->clock_offset = TK_OFFS_BOOT;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       ctx->clockid = reg.clockid;
+       return 0;
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                               void __user *arg, unsigned nr_args)
        __releases(ctx->uring_lock)
@@ -511,6 +536,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                        break;
                ret = io_unregister_napi(ctx, arg);
                break;
+       case IORING_REGISTER_CLOCK:
+               ret = -EINVAL;
+               if (!arg || nr_args)
+                       break;
+               ret = io_register_clock(ctx, arg);
+               break;
        default:
                ret = -EINVAL;
                break;