]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/xen_aio.diff
Imported xen patches.
[people/teissler/ipfire-2.x.git] / src / patches / xen_aio.diff
1 --- linux-2.6.27.21/fs/aio.c 2009-03-23 22:04:09.000000000 +0000
2 +++ linux-2.6.27.8/fs/aio.c 2009-03-29 15:53:57.000000000 +0000
3 @@ -36,6 +36,11 @@
4 #include <asm/uaccess.h>
5 #include <asm/mmu_context.h>
6
7 +#ifdef CONFIG_EPOLL
8 +#include <linux/poll.h>
9 +#include <linux/eventpoll.h>
10 +#endif
11 +
12 #if DEBUG > 1
13 #define dprintk printk
14 #else
15 @@ -428,7 +433,7 @@
16 req->private = NULL;
17 req->ki_iovec = NULL;
18 INIT_LIST_HEAD(&req->ki_run_list);
19 - req->ki_eventfd = NULL;
20 + req->ki_eventfd = ERR_PTR(-EINVAL);
21
22 /* Check if the completion queue has enough free space to
23 * accept an event from this io.
24 @@ -470,6 +475,8 @@
25 {
26 assert_spin_locked(&ctx->ctx_lock);
27
28 + if (!IS_ERR(req->ki_eventfd))
29 + fput(req->ki_eventfd);
30 if (req->ki_dtor)
31 req->ki_dtor(req);
32 if (req->ki_iovec != &req->ki_inline_vec)
33 @@ -491,11 +498,8 @@
34 list_del(&req->ki_list);
35 spin_unlock_irq(&fput_lock);
36
37 - /* Complete the fput(s) */
38 - if (req->ki_filp != NULL)
39 - __fput(req->ki_filp);
40 - if (req->ki_eventfd != NULL)
41 - __fput(req->ki_eventfd);
42 + /* Complete the fput */
43 + __fput(req->ki_filp);
44
45 /* Link the iocb into the context's free list */
46 spin_lock_irq(&ctx->ctx_lock);
47 @@ -513,14 +517,12 @@
48 */
49 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
50 {
51 - int schedule_putreq = 0;
52 -
53 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
54 req, atomic_long_read(&req->ki_filp->f_count));
55
56 assert_spin_locked(&ctx->ctx_lock);
57
58 - req->ki_users--;
59 + req->ki_users --;
60 BUG_ON(req->ki_users < 0);
61 if (likely(req->ki_users))
62 return 0;
63 @@ -528,23 +530,10 @@
64 req->ki_cancel = NULL;
65 req->ki_retry = NULL;
66
67 - /*
68 - * Try to optimize the aio and eventfd file* puts, by avoiding to
69 - * schedule work in case it is not __fput() time. In normal cases,
70 - * we would not be holding the last reference to the file*, so
71 - * this function will be executed w/out any aio kthread wakeup.
72 + /* Must be done under the lock to serialise against cancellation.
73 + * Call this aio_fput as it duplicates fput via the fput_work.
74 */
75 - if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
76 - schedule_putreq++;
77 - else
78 - req->ki_filp = NULL;
79 - if (req->ki_eventfd != NULL) {
80 - if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
81 - schedule_putreq++;
82 - else
83 - req->ki_eventfd = NULL;
84 - }
85 - if (unlikely(schedule_putreq)) {
86 + if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
87 get_ioctx(ctx);
88 spin_lock(&fput_lock);
89 list_add(&req->ki_list, &fput_head);
90 @@ -1008,7 +997,7 @@
91 * eventfd. The eventfd_signal() function is safe to be called
92 * from IRQ context.
93 */
94 - if (iocb->ki_eventfd != NULL)
95 + if (!IS_ERR(iocb->ki_eventfd))
96 eventfd_signal(iocb->ki_eventfd, 1);
97
98 put_rq:
99 @@ -1026,6 +1015,11 @@
100 if (waitqueue_active(&ctx->wait))
101 wake_up(&ctx->wait);
102
103 +#ifdef CONFIG_EPOLL
104 + if (ctx->file && waitqueue_active(&ctx->poll_wait))
105 + wake_up(&ctx->poll_wait);
106 +#endif
107 +
108 spin_unlock_irqrestore(&ctx->ctx_lock, flags);
109 return ret;
110 }
111 @@ -1033,6 +1027,8 @@
112 /* aio_read_evt
113 * Pull an event off of the ioctx's event ring. Returns the number of
114 * events fetched (0 or 1 ;-)
115 + * If ent parameter is 0, just returns the number of events that would
116 + * be fetched.
117 * FIXME: make this use cmpxchg.
118 * TODO: make the ringbuffer user mmap()able (requires FIXME).
119 */
120 @@ -1055,13 +1051,18 @@
121
122 head = ring->head % info->nr;
123 if (head != ring->tail) {
124 - struct io_event *evp = aio_ring_event(info, head, KM_USER1);
125 - *ent = *evp;
126 - head = (head + 1) % info->nr;
127 - smp_mb(); /* finish reading the event before updatng the head */
128 - ring->head = head;
129 - ret = 1;
130 - put_aio_ring_event(evp, KM_USER1);
131 + if (ent) { /* event requested */
132 + struct io_event *evp =
133 + aio_ring_event(info, head, KM_USER1);
134 + *ent = *evp;
135 + head = (head + 1) % info->nr;
136 + /* finish reading the event before updatng the head */
137 + smp_mb();
138 + ring->head = head;
139 + ret = 1;
140 + put_aio_ring_event(evp, KM_USER1);
141 + } else /* only need to know availability */
142 + ret = 1;
143 }
144 spin_unlock(&info->ring_lock);
145
146 @@ -1251,6 +1252,13 @@
147
148 aio_cancel_all(ioctx);
149 wait_for_all_aios(ioctx);
150 +#ifdef CONFIG_EPOLL
151 + /* forget the poll file, but it's up to the user to close it */
152 + if (ioctx->file) {
153 + ioctx->file->private_data = 0;
154 + ioctx->file = 0;
155 + }
156 +#endif
157
158 /*
159 * Wake up any waiters. The setting of ctx->dead must be seen
160 @@ -1261,6 +1269,67 @@
161 put_ioctx(ioctx); /* once for the lookup */
162 }
163
164 +#ifdef CONFIG_EPOLL
165 +
166 +static int aio_queue_fd_close(struct inode *inode, struct file *file)
167 +{
168 + struct kioctx *ioctx = file->private_data;
169 + if (ioctx) {
170 + file->private_data = 0;
171 + spin_lock_irq(&ioctx->ctx_lock);
172 + ioctx->file = 0;
173 + spin_unlock_irq(&ioctx->ctx_lock);
174 + }
175 + return 0;
176 +}
177 +
178 +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
179 +{ unsigned int pollflags = 0;
180 + struct kioctx *ioctx = file->private_data;
181 +
182 + if (ioctx) {
183 +
184 + spin_lock_irq(&ioctx->ctx_lock);
185 + /* Insert inside our poll wait queue */
186 + poll_wait(file, &ioctx->poll_wait, wait);
187 +
188 + /* Check our condition */
189 + if (aio_read_evt(ioctx, 0))
190 + pollflags = POLLIN | POLLRDNORM;
191 + spin_unlock_irq(&ioctx->ctx_lock);
192 + }
193 +
194 + return pollflags;
195 +}
196 +
197 +static const struct file_operations aioq_fops = {
198 + .release = aio_queue_fd_close,
199 + .poll = aio_queue_fd_poll
200 +};
201 +
202 +/* make_aio_fd:
203 + * Create a file descriptor that can be used to poll the event queue.
204 + * Based and piggybacked on the excellent epoll code.
205 + */
206 +
207 +static int make_aio_fd(struct kioctx *ioctx)
208 +{
209 + int error, fd;
210 + struct inode *inode;
211 + struct file *file;
212 +
213 + error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
214 + if (error)
215 + return error;
216 +
217 + /* associate the file with the IO context */
218 + file->private_data = ioctx;
219 + ioctx->file = file;
220 + init_waitqueue_head(&ioctx->poll_wait);
221 + return fd;
222 +}
223 +#endif
224 +
225 /* sys_io_setup:
226 * Create an aio_context capable of receiving at least nr_events.
227 * ctxp must not point to an aio_context that already exists, and
228 @@ -1273,18 +1342,30 @@
229 * resources are available. May fail with -EFAULT if an invalid
230 * pointer is passed for ctxp. Will fail with -ENOSYS if not
231 * implemented.
232 + *
233 + * To request a selectable fd, the user context has to be initialized
234 + * to 1, instead of 0, and the return value is the fd.
235 + * This keeps the system call compatible, since a non-zero value
236 + * was not allowed so far.
237 */
238 -SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
239 +asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
240 {
241 struct kioctx *ioctx = NULL;
242 unsigned long ctx;
243 long ret;
244 + int make_fd = 0;
245
246 ret = get_user(ctx, ctxp);
247 if (unlikely(ret))
248 goto out;
249
250 ret = -EINVAL;
251 +#ifdef CONFIG_EPOLL
252 + if (ctx == 1) {
253 + make_fd = 1;
254 + ctx = 0;
255 + }
256 +#endif
257 if (unlikely(ctx || nr_events == 0)) {
258 pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
259 ctx, nr_events);
260 @@ -1295,8 +1376,12 @@
261 ret = PTR_ERR(ioctx);
262 if (!IS_ERR(ioctx)) {
263 ret = put_user(ioctx->user_id, ctxp);
264 - if (!ret)
265 - return 0;
266 +#ifdef CONFIG_EPOLL
267 + if (make_fd && ret >= 0)
268 + ret = make_aio_fd(ioctx);
269 +#endif
270 + if (ret >= 0)
271 + return ret;
272
273 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
274 io_destroy(ioctx);
275 @@ -1312,7 +1397,7 @@
276 * implemented. May fail with -EFAULT if the context pointed to
277 * is invalid.
278 */
279 -SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
280 +asmlinkage long sys_io_destroy(aio_context_t ctx)
281 {
282 struct kioctx *ioctx = lookup_ioctx(ctx);
283 if (likely(NULL != ioctx)) {
284 @@ -1612,7 +1697,6 @@
285 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
286 if (IS_ERR(req->ki_eventfd)) {
287 ret = PTR_ERR(req->ki_eventfd);
288 - req->ki_eventfd = NULL;
289 goto out_put_req;
290 }
291 }
292 @@ -1667,8 +1751,8 @@
293 * are available to queue any iocbs. Will return 0 if nr is 0. Will
294 * fail with -ENOSYS if not implemented.
295 */
296 -SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
297 - struct iocb __user * __user *, iocbpp)
298 +asmlinkage long sys_io_submit(aio_context_t ctx_id, long nr,
299 + struct iocb __user * __user *iocbpp)
300 {
301 struct kioctx *ctx;
302 long ret = 0;
303 @@ -1742,8 +1826,8 @@
304 * invalid. May fail with -EAGAIN if the iocb specified was not
305 * cancelled. Will fail with -ENOSYS if not implemented.
306 */
307 -SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
308 - struct io_event __user *, result)
309 +asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
310 + struct io_event __user *result)
311 {
312 int (*cancel)(struct kiocb *iocb, struct io_event *res);
313 struct kioctx *ctx;
314 @@ -1804,11 +1888,11 @@
315 * will be updated if not NULL and the operation blocks. Will fail
316 * with -ENOSYS if not implemented.
317 */
318 -SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
319 - long, min_nr,
320 - long, nr,
321 - struct io_event __user *, events,
322 - struct timespec __user *, timeout)
323 +asmlinkage long sys_io_getevents(aio_context_t ctx_id,
324 + long min_nr,
325 + long nr,
326 + struct io_event __user *events,
327 + struct timespec __user *timeout)
328 {
329 struct kioctx *ioctx = lookup_ioctx(ctx_id);
330 long ret = -EINVAL;