Only meaningful for asynchronous I/O, and only if the entire I/O can
be issued as a single ``struct bio``.
- * ``IOCB_DIO_CALLER_COMP``: Try to run I/O completion from the caller's
- process context.
- See ``linux/fs.h`` for more details.
-
Filesystems should call ``iomap_dio_rw`` from ``->read_iter`` and
``->write_iter``, and set ``FMODE_CAN_ODIRECT`` in the ``->open``
function for the file.
!(file->f_mode & FMODE_CAN_ODIRECT))
return -EINVAL;
- /*
- * Stacked filesystems don't support deferred completions, don't copy
- * this property in case it is set by the issuer.
- */
- flags &= ~IOCB_DIO_CALLER_COMP;
-
old_cred = override_creds(ctx->cred);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(flags);
* Private flags for iomap_dio, must not overlap with the public ones in
* iomap.h:
*/
-#define IOMAP_DIO_NO_INVALIDATE (1U << 25)
-#define IOMAP_DIO_CALLER_COMP (1U << 26)
+#define IOMAP_DIO_NO_INVALIDATE (1U << 26)
#define IOMAP_DIO_INLINE_COMP (1U << 27)
#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
#define IOMAP_DIO_NEED_SYNC (1U << 29)
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);
-static ssize_t iomap_dio_deferred_complete(void *data)
-{
- return iomap_dio_complete(data);
-}
-
static void iomap_dio_complete_work(struct work_struct *work)
{
struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
} else if (dio->flags & IOMAP_DIO_INLINE_COMP) {
WRITE_ONCE(iocb->private, NULL);
iomap_dio_complete_work(&dio->aio.work);
- } else if (dio->flags & IOMAP_DIO_CALLER_COMP) {
- /*
- * If this dio is flagged with IOMAP_DIO_CALLER_COMP, then
- * schedule our completion that way to avoid an async punt to a
- * workqueue.
- */
- /* only polled IO cares about private cleared */
- iocb->private = dio;
- iocb->dio_complete = iomap_dio_deferred_complete;
-
- /*
- * Invoke ->ki_complete() directly. We've assigned our
- * dio_complete callback handler, and since the issuer set
- * IOCB_DIO_CALLER_COMP, we know their ki_complete handler will
- * notice ->dio_complete being set and will defer calling that
- * handler until it can be done from a safe task context.
- *
- * Note that the 'res' being passed in here is not important
- * for this case. The actual completion value of the request
- * will be gotten from dio_complete when that is run by the
- * issuer.
- */
- iocb->ki_complete(iocb, 0);
} else {
struct inode *inode = file_inode(iocb->ki_filp);
dio->flags |= IOMAP_DIO_INLINE_COMP;
dio->flags |= IOMAP_DIO_NO_INVALIDATE;
}
- dio->flags &= ~IOMAP_DIO_CALLER_COMP;
iomap_dio_done(dio);
}
if (!(bio_opf & REQ_FUA))
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
-
- /*
- * We can only do deferred completion for pure overwrites that
- * don't require additional I/O at completion time.
- *
- * This rules out writes that need zeroing or extent conversion,
- * extend the file size, or issue metadata I/O or cache flushes
- * during completion processing.
- */
- if (need_zeroout || (pos >= i_size_read(inode)) ||
- ((dio->flags & IOMAP_DIO_NEED_SYNC) &&
- !(bio_opf & REQ_FUA)))
- dio->flags &= ~IOMAP_DIO_CALLER_COMP;
} else {
bio_opf |= REQ_OP_READ;
}
* ones we set for inline and deferred completions. If none of those
* are available for this IO, clear the polled flag.
*/
- if (!(dio->flags & (IOMAP_DIO_INLINE_COMP|IOMAP_DIO_CALLER_COMP)))
+ if (!(dio->flags & IOMAP_DIO_INLINE_COMP))
dio->iocb->ki_flags &= ~IOCB_HIPRI;
if (need_zeroout) {
iomi.flags |= IOMAP_WRITE;
dio->flags |= IOMAP_DIO_WRITE;
- /*
- * Flag as supporting deferred completions, if the issuer
- * groks it. This can avoid a workqueue punt for writes.
- * We may later clear this flag if we need to do other IO
- * as part of this IO completion.
- */
- if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
- dio->flags |= IOMAP_DIO_CALLER_COMP;
-
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
ret = -EAGAIN;
if (iomi.pos >= dio->i_size ||
#define IOCB_NOIO (1 << 20)
/* can use bio alloc cache */
#define IOCB_ALLOC_CACHE (1 << 21)
-/*
- * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
- * iocb completion can be passed back to the owner for execution from a safe
- * context rather than needing to be punted through a workqueue. If this
- * flag is set, the bio completion handling may set iocb->dio_complete to a
- * handler function and iocb->private to context information for that handler.
- * The issuer should call the handler with that context information from task
- * context to complete the processing of the iocb. Note that while this
- * provides a task context for the dio_complete() callback, it should only be
- * used on the completion side for non-IO generating completions. It's fine to
- * call blocking functions from this callback, but they should not wait for
- * unrelated IO (like cache flushing, new IO generation, etc).
- */
-#define IOCB_DIO_CALLER_COMP (1 << 22)
/* kiocb is a read or write operation submitted by fs/aio.c. */
-#define IOCB_AIO_RW (1 << 23)
-#define IOCB_HAS_METADATA (1 << 24)
+#define IOCB_AIO_RW (1 << 22)
+#define IOCB_HAS_METADATA (1 << 23)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
{ IOCB_WAITQ, "WAITQ" }, \
{ IOCB_NOIO, "NOIO" }, \
{ IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
- { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \
{ IOCB_AIO_RW, "AIO_RW" }, \
{ IOCB_HAS_METADATA, "AIO_HAS_METADATA" }
int ki_flags;
u16 ki_ioprio; /* See linux/ioprio.h */
u8 ki_write_stream;
- union {
- /*
- * Only used for async buffered reads, where it denotes the
- * page waitqueue associated with completing the read. Valid
- * IFF IOCB_WAITQ is set.
- */
- struct wait_page_queue *ki_waitq;
- /*
- * Can be used for O_DIRECT IO, where the completion handling
- * is punted back to the issuer of the IO. May only be set
- * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
- * must then check for presence of this handler when ki_complete
- * is invoked. The data passed in to this handler must be
- * assigned to ->private when dio_complete is assigned.
- */
- ssize_t (*dio_complete)(void *data);
- };
+
+ /*
+ * Only used for async buffered reads, where it denotes the page
+ * waitqueue associated with completing the read.
+ * Valid IFF IOCB_WAITQ is set.
+ */
+ struct wait_page_queue *ki_waitq;
};
static inline bool is_sync_kiocb(struct kiocb *kiocb)
} else {
rw->kiocb.ki_ioprio = get_current_ioprio();
}
- rw->kiocb.dio_complete = NULL;
rw->kiocb.ki_flags = 0;
rw->kiocb.ki_write_stream = READ_ONCE(sqe->write_stream);
void io_req_rw_complete(struct io_kiocb *req, io_tw_token_t tw)
{
- struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
- struct kiocb *kiocb = &rw->kiocb;
-
- if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
- long res = kiocb->dio_complete(rw->kiocb.private);
-
- io_req_set_res(req, io_fixup_rw_res(req, res), 0);
- }
-
io_req_io_end(req);
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
struct io_kiocb *req = cmd_to_io_kiocb(rw);
- if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
- __io_complete_rw_common(req, res);
- io_req_set_res(req, io_fixup_rw_res(req, res), 0);
- }
+ __io_complete_rw_common(req, res);
+ io_req_set_res(req, io_fixup_rw_res(req, res), 0);
req->io_task_work.func = io_req_rw_complete;
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
}