1 --- linux-2.6.27.21/fs/aio.c 2009-03-23 22:04:09.000000000 +0000
2 +++ linux-2.6.27.8/fs/aio.c 2009-03-29 15:53:57.000000000 +0000
4 #include <asm/uaccess.h>
5 #include <asm/mmu_context.h>
8 +#include <linux/poll.h>
9 +#include <linux/eventpoll.h>
13 #define dprintk printk
18 INIT_LIST_HEAD(&req->ki_run_list);
19 - req->ki_eventfd = NULL;
20 + req->ki_eventfd = ERR_PTR(-EINVAL);
22 /* Check if the completion queue has enough free space to
23 * accept an event from this io.
26 assert_spin_locked(&ctx->ctx_lock);
28 + if (!IS_ERR(req->ki_eventfd))
29 + fput(req->ki_eventfd);
32 if (req->ki_iovec != &req->ki_inline_vec)
34 list_del(&req->ki_list);
35 spin_unlock_irq(&fput_lock);
37 - /* Complete the fput(s) */
38 - if (req->ki_filp != NULL)
39 - __fput(req->ki_filp);
40 - if (req->ki_eventfd != NULL)
41 - __fput(req->ki_eventfd);
42 + /* Complete the fput */
43 + __fput(req->ki_filp);
45 /* Link the iocb into the context's free list */
46 spin_lock_irq(&ctx->ctx_lock);
49 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
51 - int schedule_putreq = 0;
53 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
54 req, atomic_long_read(&req->ki_filp->f_count));
56 assert_spin_locked(&ctx->ctx_lock);
60 BUG_ON(req->ki_users < 0);
61 if (likely(req->ki_users))
64 req->ki_cancel = NULL;
68 - * Try to optimize the aio and eventfd file* puts, by avoiding to
69 - * schedule work in case it is not __fput() time. In normal cases,
70 - * we would not be holding the last reference to the file*, so
71 - * this function will be executed w/out any aio kthread wakeup.
72 + /* Must be done under the lock to serialise against cancellation.
73 + * Call this aio_fput as it duplicates fput via the fput_work.
75 - if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
78 - req->ki_filp = NULL;
79 - if (req->ki_eventfd != NULL) {
80 - if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
83 - req->ki_eventfd = NULL;
85 - if (unlikely(schedule_putreq)) {
86 + if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
88 spin_lock(&fput_lock);
89 list_add(&req->ki_list, &fput_head);
91 * eventfd. The eventfd_signal() function is safe to be called
94 - if (iocb->ki_eventfd != NULL)
95 + if (!IS_ERR(iocb->ki_eventfd))
96 eventfd_signal(iocb->ki_eventfd, 1);
99 @@ -1026,6 +1015,11 @@
100 if (waitqueue_active(&ctx->wait))
104 + if (ctx->file && waitqueue_active(&ctx->poll_wait))
105 + wake_up(&ctx->poll_wait);
108 spin_unlock_irqrestore(&ctx->ctx_lock, flags);
111 @@ -1033,6 +1027,8 @@
113 * Pull an event off of the ioctx's event ring. Returns the number of
114 * events fetched (0 or 1 ;-)
115 + * If ent parameter is 0, just returns the number of events that would
117 * FIXME: make this use cmpxchg.
118 * TODO: make the ringbuffer user mmap()able (requires FIXME).
120 @@ -1055,13 +1051,18 @@
122 head = ring->head % info->nr;
123 if (head != ring->tail) {
124 - struct io_event *evp = aio_ring_event(info, head, KM_USER1);
126 - head = (head + 1) % info->nr;
127 - smp_mb(); /* finish reading the event before updatng the head */
130 - put_aio_ring_event(evp, KM_USER1);
131 + if (ent) { /* event requested */
132 + struct io_event *evp =
133 + aio_ring_event(info, head, KM_USER1);
135 + head = (head + 1) % info->nr;
136 + /* finish reading the event before updatng the head */
140 + put_aio_ring_event(evp, KM_USER1);
141 + } else /* only need to know availability */
144 spin_unlock(&info->ring_lock);
146 @@ -1251,6 +1252,13 @@
148 aio_cancel_all(ioctx);
149 wait_for_all_aios(ioctx);
151 + /* forget the poll file, but it's up to the user to close it */
153 + ioctx->file->private_data = 0;
159 * Wake up any waiters. The setting of ctx->dead must be seen
160 @@ -1261,6 +1269,67 @@
161 put_ioctx(ioctx); /* once for the lookup */
166 +static int aio_queue_fd_close(struct inode *inode, struct file *file)
168 + struct kioctx *ioctx = file->private_data;
170 + file->private_data = 0;
171 + spin_lock_irq(&ioctx->ctx_lock);
173 + spin_unlock_irq(&ioctx->ctx_lock);
178 +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
179 +{ unsigned int pollflags = 0;
180 + struct kioctx *ioctx = file->private_data;
184 + spin_lock_irq(&ioctx->ctx_lock);
185 + /* Insert inside our poll wait queue */
186 + poll_wait(file, &ioctx->poll_wait, wait);
188 + /* Check our condition */
189 + if (aio_read_evt(ioctx, 0))
190 + pollflags = POLLIN | POLLRDNORM;
191 + spin_unlock_irq(&ioctx->ctx_lock);
197 +static const struct file_operations aioq_fops = {
198 + .release = aio_queue_fd_close,
199 + .poll = aio_queue_fd_poll
203 + * Create a file descriptor that can be used to poll the event queue.
204 + * Based and piggybacked on the excellent epoll code.
207 +static int make_aio_fd(struct kioctx *ioctx)
210 + struct inode *inode;
213 + error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
217 + /* associate the file with the IO context */
218 + file->private_data = ioctx;
219 + ioctx->file = file;
220 + init_waitqueue_head(&ioctx->poll_wait);
226 * Create an aio_context capable of receiving at least nr_events.
227 * ctxp must not point to an aio_context that already exists, and
228 @@ -1273,18 +1342,30 @@
229 * resources are available. May fail with -EFAULT if an invalid
230 * pointer is passed for ctxp. Will fail with -ENOSYS if not
233 + * To request a selectable fd, the user context has to be initialized
234 + * to 1, instead of 0, and the return value is the fd.
235 + * This keeps the system call compatible, since a non-zero value
236 + * was not allowed so far.
238 -SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
239 +asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
241 struct kioctx *ioctx = NULL;
246 ret = get_user(ctx, ctxp);
257 if (unlikely(ctx || nr_events == 0)) {
258 pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
260 @@ -1295,8 +1376,12 @@
261 ret = PTR_ERR(ioctx);
262 if (!IS_ERR(ioctx)) {
263 ret = put_user(ioctx->user_id, ctxp);
267 + if (make_fd && ret >= 0)
268 + ret = make_aio_fd(ioctx);
273 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
275 @@ -1312,7 +1397,7 @@
276 * implemented. May fail with -EFAULT if the context pointed to
279 -SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
280 +asmlinkage long sys_io_destroy(aio_context_t ctx)
282 struct kioctx *ioctx = lookup_ioctx(ctx);
283 if (likely(NULL != ioctx)) {
284 @@ -1612,7 +1697,6 @@
285 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
286 if (IS_ERR(req->ki_eventfd)) {
287 ret = PTR_ERR(req->ki_eventfd);
288 - req->ki_eventfd = NULL;
292 @@ -1667,8 +1751,8 @@
293 * are available to queue any iocbs. Will return 0 if nr is 0. Will
294 * fail with -ENOSYS if not implemented.
296 -SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
297 - struct iocb __user * __user *, iocbpp)
298 +asmlinkage long sys_io_submit(aio_context_t ctx_id, long nr,
299 + struct iocb __user * __user *iocbpp)
303 @@ -1742,8 +1826,8 @@
304 * invalid. May fail with -EAGAIN if the iocb specified was not
305 * cancelled. Will fail with -ENOSYS if not implemented.
307 -SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
308 - struct io_event __user *, result)
309 +asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
310 + struct io_event __user *result)
312 int (*cancel)(struct kiocb *iocb, struct io_event *res);
314 @@ -1804,11 +1888,11 @@
315 * will be updated if not NULL and the operation blocks. Will fail
316 * with -ENOSYS if not implemented.
318 -SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
321 - struct io_event __user *, events,
322 - struct timespec __user *, timeout)
323 +asmlinkage long sys_io_getevents(aio_context_t ctx_id,
326 + struct io_event __user *events,
327 + struct timespec __user *timeout)
329 struct kioctx *ioctx = lookup_ioctx(ctx_id);