1 From: Greg Kroah-Hartman <gregkh@suse.de>
2 Subject: Upstream 2.6.27.24 release from kernel.org
4 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
6 diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
7 index 8362860..0a7c8a9 100644
8 --- a/Documentation/filesystems/Locking
9 +++ b/Documentation/filesystems/Locking
10 @@ -502,23 +502,31 @@ prototypes:
11 void (*open)(struct vm_area_struct*);
12 void (*close)(struct vm_area_struct*);
13 int (*fault)(struct vm_area_struct*, struct vm_fault *);
14 - int (*page_mkwrite)(struct vm_area_struct *, struct page *);
15 + int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
16 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
19 BKL mmap_sem PageLocked(page)
23 -page_mkwrite: no yes no
24 +fault: no yes can return with page locked
25 +page_mkwrite: no yes can return with page locked
28 - ->page_mkwrite() is called when a previously read-only page is
29 -about to become writeable. The file system is responsible for
30 -protecting against truncate races. Once appropriate action has been
31 -taking to lock out truncate, the page range should be verified to be
32 -within i_size. The page mapping should also be checked that it is not
34 + ->fault() is called when a previously not present pte is about
35 +to be faulted in. The filesystem must find and return the page associated
36 +with the passed in "pgoff" in the vm_fault structure. If it is possible that
37 +the page may be truncated and/or invalidated, then the filesystem must lock
38 +the page, then ensure it is not already truncated (the page lock will block
39 +subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
40 +locked. The VM will unlock the page.
42 + ->page_mkwrite() is called when a previously read-only pte is
43 +about to become writeable. The filesystem again must ensure that there are
44 +no truncate/invalidate races, and then return with the page locked. If
45 +the page has been truncated, the filesystem should not look up a new page
46 +like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
47 +will cause the VM to retry the fault.
49 ->access() is called when get_user_pages() fails in
50 acces_process_vm(), typically used to debug a process through
51 diff --git a/Makefile b/Makefile
52 index a5c7ae5..2b8138a 100644
61 NAME = Trembling Tortoise
64 diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
65 index eb8f72c..0e034a4 100644
66 --- a/drivers/i2c/algos/i2c-algo-bit.c
67 +++ b/drivers/i2c/algos/i2c-algo-bit.c
68 @@ -104,7 +104,7 @@ static int sclhi(struct i2c_algo_bit_data *adap)
69 * chips may hold it low ("clock stretching") while they
70 * are processing data internally.
72 - if (time_after_eq(jiffies, start + adap->timeout))
73 + if (time_after(jiffies, start + adap->timeout))
77 diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c
78 index d50b329..2346a89 100644
79 --- a/drivers/i2c/algos/i2c-algo-pca.c
80 +++ b/drivers/i2c/algos/i2c-algo-pca.c
81 @@ -270,10 +270,21 @@ static int pca_xfer(struct i2c_adapter *i2c_adap,
83 case 0x30: /* Data byte in I2CDAT has been transmitted; NOT ACK has been received */
84 DEB2("NOT ACK received after data byte\n");
88 case 0x38: /* Arbitration lost during SLA+W, SLA+R or data bytes */
89 DEB2("Arbitration lost\n");
91 + * The PCA9564 data sheet (2006-09-01) says "A
92 + * START condition will be transmitted when the
93 + * bus becomes free (STOP or SCL and SDA high)"
94 + * when the STA bit is set (p. 11).
96 + * In case this won't work, try pca_reset()
102 case 0x58: /* Data byte has been received; NOT ACK has been returned */
103 diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
104 index 666b7ba..8c50857 100644
105 --- a/drivers/md/bitmap.c
106 +++ b/drivers/md/bitmap.c
107 @@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
111 + bitmap->filemap[bitmap->file_pages++] = page;
112 + bitmap->last_page_size = count;
116 * if bitmap is out of date, dirty the
117 @@ -998,15 +1001,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
118 write_page(bitmap, page, 1);
121 - if (bitmap->flags & BITMAP_WRITE_ERROR) {
122 - /* release, page not in filemap yet */
124 + if (bitmap->flags & BITMAP_WRITE_ERROR)
129 - bitmap->filemap[bitmap->file_pages++] = page;
130 - bitmap->last_page_size = count;
132 paddr = kmap_atomic(page, KM_USER0);
133 if (bitmap->flags & BITMAP_HOSTENDIAN)
134 @@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
135 kunmap_atomic(paddr, KM_USER0);
137 /* if the disk bit is set, set the memory bit */
138 - bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
139 - ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start)
141 + int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
143 + bitmap_set_memory_bits(bitmap,
144 + (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
147 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
149 @@ -1154,8 +1153,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
150 spin_lock_irqsave(&bitmap->lock, flags);
151 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
153 - bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
155 + bmc = bitmap_get_counter(bitmap,
156 + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
160 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
161 @@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
162 } else if (*bmc == 1) {
163 /* we can clear the bit */
165 - bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
166 + bitmap_count_page(bitmap,
167 + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
171 @@ -1485,7 +1486,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
174 for (chunk = s; chunk <= e; chunk++) {
175 - sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap);
176 + sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
177 bitmap_set_memory_bits(bitmap, sec, 1);
178 bitmap_file_set_bit(bitmap, sec);
180 diff --git a/drivers/md/md.c b/drivers/md/md.c
181 index 60f3e59..ebbc3bb 100644
182 --- a/drivers/md/md.c
183 +++ b/drivers/md/md.c
184 @@ -2772,11 +2772,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
187 spin_unlock_irq(&mddev->write_lock);
190 - mddev->recovery_cp = MaxSector;
191 - err = do_md_run(mddev);
198 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
199 index dc50f98..b08dd95 100644
200 --- a/drivers/md/raid10.c
201 +++ b/drivers/md/raid10.c
202 @@ -1805,17 +1805,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
203 r10_bio->sector = sect;
205 raid10_find_phys(conf, r10_bio);
206 - /* Need to check if this section will still be
208 + /* Need to check if the array will still be
211 - for (j=0; j<conf->copies;j++) {
212 - int d = r10_bio->devs[j].devnum;
213 - if (conf->mirrors[d].rdev == NULL ||
214 - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
215 + for (j=0; j<conf->raid_disks; j++)
216 + if (conf->mirrors[j].rdev == NULL ||
217 + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
223 must_sync = bitmap_start_sync(mddev->bitmap, sect,
224 &sync_blocks, still_degraded);
226 diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
227 index b70c531..a6e730f 100644
228 --- a/drivers/net/ehea/ehea_main.c
229 +++ b/drivers/net/ehea/ehea_main.c
230 @@ -529,14 +529,17 @@ static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array,
235 - prefetchw(pref + EHEA_CACHE_LINE);
237 - pref = (skb_array[x]->data);
239 - prefetch(pref + EHEA_CACHE_LINE);
240 - prefetch(pref + EHEA_CACHE_LINE * 2);
241 - prefetch(pref + EHEA_CACHE_LINE * 3);
244 + prefetchw(pref + EHEA_CACHE_LINE);
246 + pref = (skb_array[x]->data);
248 + prefetch(pref + EHEA_CACHE_LINE);
249 + prefetch(pref + EHEA_CACHE_LINE * 2);
250 + prefetch(pref + EHEA_CACHE_LINE * 3);
253 skb = skb_array[skb_index];
254 skb_array[skb_index] = NULL;
256 @@ -553,12 +556,14 @@ static inline struct sk_buff *get_skb_by_index_ll(struct sk_buff **skb_array,
261 - prefetchw(pref + EHEA_CACHE_LINE);
264 + prefetchw(pref + EHEA_CACHE_LINE);
266 - pref = (skb_array[x]->data);
268 - prefetchw(pref + EHEA_CACHE_LINE);
269 + pref = (skb_array[x]->data);
271 + prefetchw(pref + EHEA_CACHE_LINE);
274 skb = skb_array[wqe_index];
275 skb_array[wqe_index] = NULL;
276 diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
277 index 3612607..32e7acb 100644
278 --- a/drivers/serial/mpc52xx_uart.c
279 +++ b/drivers/serial/mpc52xx_uart.c
280 @@ -515,7 +515,7 @@ mpc52xx_uart_startup(struct uart_port *port)
283 ret = request_irq(port->irq, mpc52xx_uart_int,
284 - IRQF_DISABLED | IRQF_SAMPLE_RANDOM | IRQF_SHARED,
285 + IRQF_DISABLED | IRQF_SAMPLE_RANDOM,
286 "mpc52xx_psc_uart", port);
289 diff --git a/drivers/usb/gadget/usbstring.c b/drivers/usb/gadget/usbstring.c
290 index 4154be3..58c4d37 100644
291 --- a/drivers/usb/gadget/usbstring.c
292 +++ b/drivers/usb/gadget/usbstring.c
293 @@ -38,7 +38,7 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len)
294 uchar = (c & 0x1f) << 6;
297 - if ((c & 0xc0) != 0xc0)
298 + if ((c & 0xc0) != 0x80)
302 @@ -49,13 +49,13 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len)
303 uchar = (c & 0x0f) << 12;
306 - if ((c & 0xc0) != 0xc0)
307 + if ((c & 0xc0) != 0x80)
313 - if ((c & 0xc0) != 0xc0)
314 + if ((c & 0xc0) != 0x80)
318 diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
319 index 4835bdc..d1c3cba 100644
320 --- a/drivers/video/fb_defio.c
321 +++ b/drivers/video/fb_defio.c
322 @@ -70,8 +70,9 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);
324 /* vm_ops->page_mkwrite handler */
325 static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
327 + struct vm_fault *vmf)
329 + struct page *page = vmf->page;
330 struct fb_info *info = vma->vm_private_data;
331 struct fb_deferred_io *fbdefio = info->fbdefio;
333 diff --git a/fs/buffer.c b/fs/buffer.c
334 index a5d806d..abe9640 100644
337 @@ -2402,20 +2402,22 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
341 -block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
342 +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
343 get_block_t get_block)
345 + struct page *page = vmf->page;
346 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
350 + int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
353 size = i_size_read(inode);
354 if ((page->mapping != inode->i_mapping) ||
355 (page_offset(page) > size)) {
356 /* page got truncated out from underneath us */
362 /* page is wholly or partially inside EOF */
363 @@ -2428,8 +2430,16 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
365 ret = block_commit_write(page, 0, end);
369 + if (unlikely(ret)) {
371 + if (ret == -ENOMEM)
372 + ret = VM_FAULT_OOM;
373 + else /* -ENOSPC, -EIO, etc */
374 + ret = VM_FAULT_SIGBUS;
376 + ret = VM_FAULT_LOCKED;
382 diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
383 index 14eb9a2..604ce8a 100644
384 --- a/fs/cifs/cifs_unicode.h
385 +++ b/fs/cifs/cifs_unicode.h
386 @@ -64,6 +64,13 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
390 + * To be safe - for UCS to UTF-8 with strings loaded with the rare long
391 + * characters alloc more to account for such multibyte target UTF-8
394 +#define UNICODE_NAME_MAX ((4 * NAME_MAX) + 2)
397 * UniStrcat: Concatenate the second string to the first
400 diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
401 index 9231e0a..cff0c53 100644
402 --- a/fs/cifs/cifssmb.c
403 +++ b/fs/cifs/cifssmb.c
404 @@ -91,23 +91,22 @@ static int
405 cifs_strncpy_to_host(char **dst, const char *src, const int maxlen,
406 const bool is_unicode, const struct nls_table *nls_codepage)
409 + int src_len, dst_len;
412 - plen = UniStrnlen((wchar_t *)src, maxlen);
413 - *dst = kmalloc(plen + 2, GFP_KERNEL);
414 + src_len = UniStrnlen((wchar_t *)src, maxlen);
415 + *dst = kmalloc((4 * src_len) + 2, GFP_KERNEL);
417 goto cifs_strncpy_to_host_ErrExit;
418 - cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage);
419 + dst_len = cifs_strfromUCS_le(*dst, (__le16 *)src, src_len, nls_codepage);
420 + (*dst)[dst_len + 1] = 0;
422 - plen = strnlen(src, maxlen);
423 - *dst = kmalloc(plen + 2, GFP_KERNEL);
424 + src_len = strnlen(src, maxlen);
425 + *dst = kmalloc(src_len + 1, GFP_KERNEL);
427 goto cifs_strncpy_to_host_ErrExit;
428 - strncpy(*dst, src, plen);
429 + strlcpy(*dst, src, src_len + 1);
432 - (*dst)[plen+1] = 0; /* harmless for ASCII case, needed for Unicode */
435 cifs_strncpy_to_host_ErrExit:
436 diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
437 index 21a1abf..d059b3f 100644
438 --- a/fs/cifs/connect.c
439 +++ b/fs/cifs/connect.c
440 @@ -3549,16 +3549,12 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
441 BCC(smb_buffer_response)) {
442 kfree(tcon->nativeFileSystem);
443 tcon->nativeFileSystem =
444 - kzalloc(2*(length + 1), GFP_KERNEL);
445 + kzalloc((4 * length) + 2, GFP_KERNEL);
446 if (tcon->nativeFileSystem)
448 tcon->nativeFileSystem,
450 length, nls_codepage);
451 - bcc_ptr += 2 * length;
452 - bcc_ptr[0] = 0; /* null terminate the string */
456 /* else do not bother copying these information fields*/
458 diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
459 index b891553..6205593 100644
462 @@ -685,14 +685,15 @@ cifs_convertUCSpath(char *target, const __le16 *source, int maxlen,
463 NLS_MAX_CHARSET_SIZE);
473 /* make sure we do not overrun callers allocated temp buffer */
474 - if (j >= (2 * NAME_MAX))
476 + if (j >= UNICODE_NAME_MAX)
480 diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
481 index 58d5729..2878892 100644
482 --- a/fs/cifs/readdir.c
483 +++ b/fs/cifs/readdir.c
484 @@ -1075,7 +1075,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
485 with the rare long characters alloc more to account for
486 such multibyte target UTF-8 characters. cifs_unicode.c,
487 which actually does the conversion, has the same limit */
488 - tmp_buf = kmalloc((2 * NAME_MAX) + 4, GFP_KERNEL);
489 + tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
490 for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
491 if (current_entry == NULL) {
492 /* evaluate whether this case is an error */
493 diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
494 index 89fac77..3890cc2 100644
497 @@ -202,27 +202,26 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
499 char *data = *pbcc_area;
503 cFYI(1, ("bleft %d", bleft));
506 - /* SMB header is unaligned, so cifs servers word align start of
509 - bleft--; /* Windows servers do not always double null terminate
510 - their final Unicode string - in which case we
511 - now will not attempt to decode the byte of junk
512 - which follows it */
514 + * Windows servers do not always double null terminate their final
515 + * Unicode string. Check to see if there are an uneven number of bytes
516 + * left. If so, then add an extra NULL pad byte to the end of the
519 + * See section 2.7.2 in "Implementing CIFS" for details
526 words_left = bleft / 2;
528 /* save off server operating system */
529 len = UniStrnlen((wchar_t *) data, words_left);
531 -/* We look for obvious messed up bcc or strings in response so we do not go off
532 - the end since (at least) WIN2K and Windows XP have a major bug in not null
533 - terminating last Unicode string in response */
534 if (len >= words_left)
537 @@ -260,13 +259,10 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
540 kfree(ses->serverDomain);
541 - ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
542 - if (ses->serverDomain != NULL) {
543 + ses->serverDomain = kzalloc((4 * len) + 2, GFP_KERNEL);
544 + if (ses->serverDomain != NULL)
545 cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
547 - ses->serverDomain[2*len] = 0;
548 - ses->serverDomain[(2*len) + 1] = 0;
550 data += 2 * (len + 1);
551 words_left -= len + 1;
553 @@ -616,12 +612,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
556 /* BB check if Unicode and decode strings */
557 - if (smb_buf->Flags2 & SMBFLG2_UNICODE)
558 + if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
559 + /* unicode string area must be word-aligned */
560 + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
564 rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining,
569 rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining,
575 diff --git a/fs/eventpoll.c b/fs/eventpoll.c
576 index 801de2c..fd5835b 100644
579 @@ -1132,7 +1132,7 @@ error_return:
581 SYSCALL_DEFINE1(epoll_create, int, size)
587 return sys_epoll_create1(0);
588 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
589 index f613d57..eadbee3 100644
592 @@ -1084,7 +1084,7 @@ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
593 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
594 extern int ext4_block_truncate_page(handle_t *handle,
595 struct address_space *mapping, loff_t from);
596 -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
597 +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
600 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
601 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
602 index b233ade..63b911b 100644
603 --- a/fs/ext4/inode.c
604 +++ b/fs/ext4/inode.c
605 @@ -4861,8 +4861,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
606 return !buffer_mapped(bh);
609 -int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
610 +int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
612 + struct page *page = vmf->page;
616 @@ -4913,6 +4914,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
621 + ret = VM_FAULT_SIGBUS;
622 up_read(&inode->i_alloc_sem);
625 diff --git a/fs/fcntl.c b/fs/fcntl.c
626 index 08a109b..ac79b7e 100644
629 @@ -117,11 +117,13 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
631 if (unlikely(newfd == oldfd)) { /* corner case */
632 struct files_struct *files = current->files;
633 + int retval = oldfd;
636 if (!fcheck_files(files, oldfd))
643 return sys_dup3(oldfd, newfd, 0);
645 diff --git a/fs/fuse/file.c b/fs/fuse/file.c
646 index 3ada9d7..0c92f15 100644
649 @@ -1219,8 +1219,9 @@ static void fuse_vma_close(struct vm_area_struct *vma)
651 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
653 -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
654 +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
656 + struct page *page = vmf->page;
658 * Don't use page->mapping as it may become NULL from a
659 * concurrent truncate.
660 diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
661 index e9a366d..641c43b 100644
662 --- a/fs/gfs2/ops_file.c
663 +++ b/fs/gfs2/ops_file.c
664 @@ -338,8 +338,9 @@ static int gfs2_allocate_page_backing(struct page *page)
665 * blocks allocated on disk to back that page.
668 -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
669 +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
671 + struct page *page = vmf->page;
672 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
673 struct gfs2_inode *ip = GFS2_I(inode);
674 struct gfs2_sbd *sdp = GFS2_SB(inode);
675 @@ -411,6 +412,10 @@ out_unlock:
678 gfs2_holder_uninit(&gh);
679 + if (ret == -ENOMEM)
680 + ret = VM_FAULT_OOM;
682 + ret = VM_FAULT_SIGBUS;
686 diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
687 index 74f92b7..bff8733 100644
690 @@ -1613,8 +1613,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
691 } else if (atomic_read(&new_dentry->d_count) > 1)
692 /* dentry still busy? */
695 - nfs_drop_nlink(new_inode);
700 @@ -1627,10 +1626,8 @@ go_ahead:
702 nfs_inode_return_delegation(old_inode);
704 - if (new_inode != NULL) {
705 + if (new_inode != NULL)
706 nfs_inode_return_delegation(new_inode);
707 - d_delete(new_dentry);
710 error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
711 new_dir, &new_dentry->d_name);
712 @@ -1639,6 +1636,8 @@ out:
716 + if (new_inode != NULL)
717 + nfs_drop_nlink(new_inode);
718 d_move(old_dentry, new_dentry);
719 nfs_set_verifier(new_dentry,
720 nfs_save_change_attribute(new_dir));
721 diff --git a/fs/nfs/file.c b/fs/nfs/file.c
722 index 30541f0..4a57a0f 100644
725 @@ -448,8 +448,9 @@ const struct address_space_operations nfs_file_aops = {
726 .launder_page = nfs_launder_page,
729 -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
730 +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
732 + struct page *page = vmf->page;
733 struct file *filp = vma->vm_file;
734 struct dentry *dentry = filp->f_path.dentry;
736 @@ -476,11 +477,11 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
739 ret = nfs_updatepage(filp, page, 0, pagelen);
744 + return VM_FAULT_LOCKED;
747 + return VM_FAULT_SIGBUS;
750 static struct vm_operations_struct nfs_file_vm_ops = {
751 diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
752 index b0b07df..abffc90 100644
753 --- a/fs/nfsd/nfs4xdr.c
754 +++ b/fs/nfsd/nfs4xdr.c
755 @@ -1833,6 +1833,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
756 dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
758 return nfserrno(PTR_ERR(dentry));
759 + if (!dentry->d_inode) {
761 + * nfsd_buffered_readdir drops the i_mutex between
762 + * readdir and calling this callback, leaving a window
763 + * where this directory entry could have gone away.
766 + return nfserr_noent;
771 @@ -1895,6 +1904,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
772 struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
774 __be32 *p = cd->buffer;
776 __be32 nfserr = nfserr_toosmall;
778 /* In nfsv4, "." and ".." never make it onto the wire.. */
779 @@ -1911,7 +1921,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
782 *p++ = xdr_one; /* mark entry present */
783 - cd->offset = p; /* remember pointer */
785 p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
786 p = xdr_encode_array(p, name, namlen); /* name length & name */
788 @@ -1925,6 +1935,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
796 * If the client requested the RDATTR_ERROR attribute,
797 @@ -1943,6 +1955,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
799 cd->buflen -= (p - cd->buffer);
801 + cd->offset = cookiep;
803 cd->common.err = nfs_ok;
806 diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
807 index 6f7ea0a..08af0ed 100644
808 --- a/fs/ocfs2/file.c
809 +++ b/fs/ocfs2/file.c
810 @@ -2075,6 +2075,22 @@ out_sems:
811 return written ? written : ret;
814 +static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
816 + struct splice_desc *sd)
820 + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
821 + sd->total_len, 0, NULL);
827 + return splice_from_pipe_feed(pipe, sd, pipe_to_file);
830 static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
833 @@ -2082,38 +2098,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
837 - struct inode *inode = out->f_path.dentry->d_inode;
838 + struct address_space *mapping = out->f_mapping;
839 + struct inode *inode = mapping->host;
840 + struct splice_desc sd = {
847 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
849 out->f_path.dentry->d_name.len,
850 out->f_path.dentry->d_name.name);
852 - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
854 + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
856 - ret = ocfs2_rw_lock(inode, 1);
861 + splice_from_pipe_begin(&sd);
863 + ret = splice_from_pipe_next(pipe, &sd);
867 - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0,
873 + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
874 + ret = ocfs2_rw_lock(inode, 1);
878 + ret = ocfs2_splice_to_file(pipe, out, &sd);
879 + ocfs2_rw_unlock(inode, 1);
881 + mutex_unlock(&inode->i_mutex);
883 + splice_from_pipe_end(pipe, &sd);
886 - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
887 - ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
889 mutex_unlock(&pipe->inode->i_mutex);
892 - ocfs2_rw_unlock(inode, 1);
894 - mutex_unlock(&inode->i_mutex);
895 + if (sd.num_spliced)
896 + ret = sd.num_spliced;
899 + unsigned long nr_pages;
902 + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
905 + * If file or inode is SYNC and we actually wrote some data,
908 + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
911 + mutex_lock(&inode->i_mutex);
912 + err = ocfs2_rw_lock(inode, 1);
916 + err = generic_osync_inode(inode, mapping,
917 + OSYNC_METADATA|OSYNC_DATA);
918 + ocfs2_rw_unlock(inode, 1);
920 + mutex_unlock(&inode->i_mutex);
925 + balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
930 diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
931 index 3dc18d6..2383cbd 100644
932 --- a/fs/ocfs2/mmap.c
933 +++ b/fs/ocfs2/mmap.c
934 @@ -150,8 +150,9 @@ out:
938 -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
939 +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
941 + struct page *page = vmf->page;
942 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
943 struct buffer_head *di_bh = NULL;
944 sigset_t blocked, oldset;
945 @@ -192,7 +193,8 @@ out:
946 ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
951 + ret = VM_FAULT_SIGBUS;
955 diff --git a/fs/splice.c b/fs/splice.c
956 index aea1eb4..2f2d8c1 100644
959 @@ -553,8 +553,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
960 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
961 * a new page in the output file page cache and fill/dirty that.
963 -static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
964 - struct splice_desc *sd)
965 +int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
966 + struct splice_desc *sd)
968 struct file *file = sd->u.file;
969 struct address_space *mapping = file->f_mapping;
970 @@ -598,108 +598,178 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
974 +EXPORT_SYMBOL(pipe_to_file);
976 +static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
979 + if (waitqueue_active(&pipe->wait))
980 + wake_up_interruptible(&pipe->wait);
981 + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
985 - * __splice_from_pipe - splice data from a pipe to given actor
986 + * splice_from_pipe_feed - feed available data from a pipe to a file
987 * @pipe: pipe to splice from
988 * @sd: information to @actor
989 * @actor: handler that splices the data
992 - * This function does little more than loop over the pipe and call
993 - * @actor to do the actual moving of a single struct pipe_buffer to
994 - * the desired destination. See pipe_to_file, pipe_to_sendpage, or
997 + * This function loops over the pipe and calls @actor to do the
998 + * actual moving of a single struct pipe_buffer to the desired
999 + * destination. It returns when there's no more buffers left in
1000 + * the pipe or if the requested number of bytes (@sd->total_len)
1001 + * have been copied. It returns a positive number (one) if the
1002 + * pipe needs to be filled with more data, zero if the required
1003 + * number of bytes have been copied and -errno on error.
1005 + * This, together with splice_from_pipe_{begin,end,next}, may be
1006 + * used to implement the functionality of __splice_from_pipe() when
1007 + * locking is required around copying the pipe buffers to the
1010 -ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
1011 - splice_actor *actor)
1012 +int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
1013 + splice_actor *actor)
1015 - int ret, do_wakeup, err;
1021 - if (pipe->nrbufs) {
1022 - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
1023 - const struct pipe_buf_operations *ops = buf->ops;
1026 - sd->len = buf->len;
1027 - if (sd->len > sd->total_len)
1028 - sd->len = sd->total_len;
1029 + while (pipe->nrbufs) {
1030 + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
1031 + const struct pipe_buf_operations *ops = buf->ops;
1033 - err = actor(pipe, buf, sd);
1035 - if (!ret && err != -ENODATA)
1037 + sd->len = buf->len;
1038 + if (sd->len > sd->total_len)
1039 + sd->len = sd->total_len;
1043 + ret = actor(pipe, buf, sd);
1045 + if (ret == -ENODATA)
1049 + buf->offset += ret;
1053 - buf->offset += err;
1055 + sd->num_spliced += ret;
1058 + sd->total_len -= ret;
1062 - sd->total_len -= err;
1067 + ops->release(pipe, buf);
1068 + pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
1071 + sd->need_wakeup = true;
1076 - ops->release(pipe, buf);
1077 - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
1082 + if (!sd->total_len)
1086 - if (!sd->total_len)
1091 +EXPORT_SYMBOL(splice_from_pipe_feed);
1096 + * splice_from_pipe_next - wait for some data to splice from
1097 + * @pipe: pipe to splice from
1098 + * @sd: information about the splice operation
1101 + * This function will wait for some data and return a positive
1102 + * value (one) if pipe buffers are available. It will return zero
1103 + * or -errno if no more data needs to be spliced.
1105 +int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
1107 + while (!pipe->nrbufs) {
1110 - if (!pipe->waiting_writers) {
1116 - if (sd->flags & SPLICE_F_NONBLOCK) {
1121 + if (!pipe->waiting_writers && sd->num_spliced)
1124 - if (signal_pending(current)) {
1126 - ret = -ERESTARTSYS;
1129 + if (sd->flags & SPLICE_F_NONBLOCK)
1134 - if (waitqueue_active(&pipe->wait))
1135 - wake_up_interruptible_sync(&pipe->wait);
1136 - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1138 + if (signal_pending(current))
1139 + return -ERESTARTSYS;
1141 + if (sd->need_wakeup) {
1142 + wakeup_pipe_writers(pipe);
1143 + sd->need_wakeup = false;
1151 - if (waitqueue_active(&pipe->wait))
1152 - wake_up_interruptible(&pipe->wait);
1153 - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1157 +EXPORT_SYMBOL(splice_from_pipe_next);
1161 + * splice_from_pipe_begin - start splicing from pipe
1162 + * @pipe: pipe to splice from
1165 + * This function should be called before a loop containing
1166 + * splice_from_pipe_next() and splice_from_pipe_feed() to
1167 + * initialize the necessary fields of @sd.
1169 +void splice_from_pipe_begin(struct splice_desc *sd)
1171 + sd->num_spliced = 0;
1172 + sd->need_wakeup = false;
1174 +EXPORT_SYMBOL(splice_from_pipe_begin);
1177 + * splice_from_pipe_end - finish splicing from pipe
1178 + * @pipe: pipe to splice from
1179 + * @sd: information about the splice operation
1182 + * This function will wake up pipe writers if necessary. It should
1183 + * be called after a loop containing splice_from_pipe_next() and
1184 + * splice_from_pipe_feed().
1186 +void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
1188 + if (sd->need_wakeup)
1189 + wakeup_pipe_writers(pipe);
1191 +EXPORT_SYMBOL(splice_from_pipe_end);
1194 + * __splice_from_pipe - splice data from a pipe to given actor
1195 + * @pipe: pipe to splice from
1196 + * @sd: information to @actor
1197 + * @actor: handler that splices the data
1200 + * This function does little more than loop over the pipe and call
1201 + * @actor to do the actual moving of a single struct pipe_buffer to
1202 + * the desired destination. See pipe_to_file, pipe_to_sendpage, or
1206 +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
1207 + splice_actor *actor)
1211 + splice_from_pipe_begin(sd);
1213 + ret = splice_from_pipe_next(pipe, sd);
1215 + ret = splice_from_pipe_feed(pipe, sd, actor);
1216 + } while (ret > 0);
1217 + splice_from_pipe_end(pipe, sd);
1219 + return sd->num_spliced ? sd->num_spliced : ret;
1221 EXPORT_SYMBOL(__splice_from_pipe);
1223 @@ -713,7 +783,7 @@ EXPORT_SYMBOL(__splice_from_pipe);
1224 * @actor: handler that splices the data
1227 - * See __splice_from_pipe. This function locks the input and output inodes,
1228 + * See __splice_from_pipe. This function locks the pipe inode,
1229 * otherwise it's identical to __splice_from_pipe().
1232 @@ -722,7 +792,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
1233 splice_actor *actor)
1236 - struct inode *inode = out->f_mapping->host;
1237 struct splice_desc sd = {
1240 @@ -730,24 +799,11 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
1245 - * The actor worker might be calling ->prepare_write and
1246 - * ->commit_write. Most of the time, these expect i_mutex to
1247 - * be held. Since this may result in an ABBA deadlock with
1248 - * pipe->inode, we have to order lock acquiry here.
1250 - * Outer lock must be inode->i_mutex, as pipe_wait() will
1251 - * release and reacquire pipe->inode->i_mutex, AND inode must
1252 - * never be a pipe.
1254 - WARN_ON(S_ISFIFO(inode->i_mode));
1255 - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1257 - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
1258 + mutex_lock(&pipe->inode->i_mutex);
1259 ret = __splice_from_pipe(pipe, &sd, actor);
1261 mutex_unlock(&pipe->inode->i_mutex);
1262 - mutex_unlock(&inode->i_mutex);
1266 @@ -838,17 +894,29 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1270 - WARN_ON(S_ISFIFO(inode->i_mode));
1271 - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1272 - ret = file_remove_suid(out);
1273 - if (likely(!ret)) {
1275 - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
1276 - ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
1278 - mutex_unlock(&pipe->inode->i_mutex);
1280 - mutex_unlock(&inode->i_mutex);
1282 + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
1284 + splice_from_pipe_begin(&sd);
1286 + ret = splice_from_pipe_next(pipe, &sd);
1290 + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1291 + ret = file_remove_suid(out);
1293 + ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
1294 + mutex_unlock(&inode->i_mutex);
1295 + } while (ret > 0);
1296 + splice_from_pipe_end(pipe, &sd);
1299 + mutex_unlock(&pipe->inode->i_mutex);
1301 + if (sd.num_spliced)
1302 + ret = sd.num_spliced;
1305 unsigned long nr_pages;
1307 diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
1308 index 40033dc..82b1c4a 100644
1309 --- a/fs/ubifs/file.c
1310 +++ b/fs/ubifs/file.c
1311 @@ -1140,8 +1140,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1312 * mmap()d file has taken write protection fault and is being made
1313 * writable. UBIFS must ensure page is budgeted for.
1315 -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1316 +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1318 + struct page *page = vmf->page;
1319 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1320 struct ubifs_info *c = inode->i_sb->s_fs_info;
1321 struct timespec now = ubifs_current_time(inode);
1322 @@ -1153,7 +1154,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1323 ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
1325 if (unlikely(c->ro_media))
1327 + return VM_FAULT_SIGBUS; /* -EROFS */
1330 * We have not locked @page so far so we may budget for changing the
1331 @@ -1186,7 +1187,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1333 ubifs_warn("out of space for mmapped file "
1334 "(inode number %lu)", inode->i_ino);
1336 + return VM_FAULT_SIGBUS;
1340 @@ -1226,6 +1227,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1343 ubifs_release_budget(c, &req);
1345 + err = VM_FAULT_SIGBUS;
1349 diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
1350 index 5311c1a..469502c 100644
1351 --- a/fs/xfs/linux-2.6/xfs_file.c
1352 +++ b/fs/xfs/linux-2.6/xfs_file.c
1353 @@ -427,9 +427,9 @@ xfs_file_ioctl_invis(
1355 xfs_vm_page_mkwrite(
1356 struct vm_area_struct *vma,
1357 - struct page *page)
1358 + struct vm_fault *vmf)
1360 - return block_page_mkwrite(vma, page, xfs_get_blocks);
1361 + return block_page_mkwrite(vma, vmf, xfs_get_blocks);
1364 const struct file_operations xfs_file_operations = {
1365 diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
1366 index eadaab4..657c072 100644
1367 --- a/include/linux/buffer_head.h
1368 +++ b/include/linux/buffer_head.h
1369 @@ -222,7 +222,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t,
1370 get_block_t *, loff_t *);
1371 int generic_cont_expand_simple(struct inode *inode, loff_t size);
1372 int block_commit_write(struct page *page, unsigned from, unsigned to);
1373 -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
1374 +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
1375 get_block_t get_block);
1376 void block_sync_page(struct page *);
1377 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
1378 diff --git a/include/linux/mm.h b/include/linux/mm.h
1379 index 2a75579..ae9775d 100644
1380 --- a/include/linux/mm.h
1381 +++ b/include/linux/mm.h
1382 @@ -138,6 +138,7 @@ extern pgprot_t protection_map[16];
1384 #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
1385 #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
1386 +#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */
1390 @@ -173,7 +174,7 @@ struct vm_operations_struct {
1392 /* notification that a previously read-only page is about to become
1393 * writable, if an error is returned it will cause a SIGBUS */
1394 - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
1395 + int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
1397 /* called by access_process_vm when get_user_pages() fails, typically
1398 * for use by special VMAs that can switch between memory and hardware
1399 diff --git a/include/linux/splice.h b/include/linux/splice.h
1400 index 528dcb9..5f3faa9 100644
1401 --- a/include/linux/splice.h
1402 +++ b/include/linux/splice.h
1403 @@ -36,6 +36,8 @@ struct splice_desc {
1404 void *data; /* cookie */
1406 loff_t pos; /* file position */
1407 + size_t num_spliced; /* number of bytes already spliced */
1408 + bool need_wakeup; /* need to wake up writer */
1411 struct partial_page {
1412 @@ -66,6 +68,16 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
1414 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
1415 struct splice_desc *, splice_actor *);
1416 +extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
1418 +extern int splice_from_pipe_next(struct pipe_inode_info *,
1419 + struct splice_desc *);
1420 +extern void splice_from_pipe_begin(struct splice_desc *);
1421 +extern void splice_from_pipe_end(struct pipe_inode_info *,
1422 + struct splice_desc *);
1423 +extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
1424 + struct splice_desc *);
1426 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
1427 struct splice_pipe_desc *);
1428 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
1429 diff --git a/mm/memory.c b/mm/memory.c
1430 index 1002f47..3856c36 100644
1433 @@ -1801,6 +1801,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1434 * get_user_pages(.write=1, .force=1).
1436 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
1437 + struct vm_fault vmf;
1440 + vmf.virtual_address = (void __user *)(address &
1442 + vmf.pgoff = old_page->index;
1443 + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
1444 + vmf.page = old_page;
1447 * Notify the address space that the page is about to
1448 * become writable so that it can prohibit this or wait
1449 @@ -1812,8 +1821,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1450 page_cache_get(old_page);
1451 pte_unmap_unlock(page_table, ptl);
1453 - if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
1454 + tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
1455 + if (unlikely(tmp &
1456 + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
1458 goto unwritable_page;
1460 + if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
1461 + lock_page(old_page);
1462 + if (!old_page->mapping) {
1463 + ret = 0; /* retry the fault */
1464 + unlock_page(old_page);
1465 + goto unwritable_page;
1468 + VM_BUG_ON(!PageLocked(old_page));
1471 * Since we dropped the lock we need to revalidate
1472 @@ -1823,9 +1845,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1474 page_table = pte_offset_map_lock(mm, pmd, address,
1476 - page_cache_release(old_page);
1477 - if (!pte_same(*page_table, orig_pte))
1478 + if (!pte_same(*page_table, orig_pte)) {
1479 + unlock_page(old_page);
1480 + page_cache_release(old_page);
1486 @@ -1930,9 +1954,6 @@ gotten:
1488 pte_unmap_unlock(page_table, ptl);
1491 - file_update_time(vma->vm_file);
1494 * Yes, Virginia, this is actually required to prevent a race
1495 * with clear_page_dirty_for_io() from clearing the page dirty
1496 @@ -1941,21 +1962,46 @@ unlock:
1498 * do_no_page is protected similarly.
1500 - wait_on_page_locked(dirty_page);
1501 - set_page_dirty_balance(dirty_page, page_mkwrite);
1502 + if (!page_mkwrite) {
1503 + wait_on_page_locked(dirty_page);
1504 + set_page_dirty_balance(dirty_page, page_mkwrite);
1506 put_page(dirty_page);
1507 + if (page_mkwrite) {
1508 + struct address_space *mapping = dirty_page->mapping;
1510 + set_page_dirty(dirty_page);
1511 + unlock_page(dirty_page);
1512 + page_cache_release(dirty_page);
1515 + * Some device drivers do not set page.mapping
1516 + * but still dirty their pages
1518 + balance_dirty_pages_ratelimited(mapping);
1522 + /* file_update_time outside page_lock */
1524 + file_update_time(vma->vm_file);
1528 page_cache_release(new_page);
1532 + if (page_mkwrite) {
1533 + unlock_page(old_page);
1534 + page_cache_release(old_page);
1536 page_cache_release(old_page);
1538 return VM_FAULT_OOM;
1541 page_cache_release(old_page);
1542 - return VM_FAULT_SIGBUS;
1547 @@ -2472,25 +2518,25 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
1548 * to become writable
1550 if (vma->vm_ops->page_mkwrite) {
1554 - if (vma->vm_ops->page_mkwrite(vma, page) < 0) {
1555 - ret = VM_FAULT_SIGBUS;
1556 - anon = 1; /* no anon but release vmf.page */
1557 - goto out_unlocked;
1561 - * XXX: this is not quite right (racy vs
1562 - * invalidate) to unlock and relock the page
1563 - * like this, however a better fix requires
1564 - * reworking page_mkwrite locking API, which
1565 - * is better done later.
1567 - if (!page->mapping) {
1569 - anon = 1; /* no anon but release vmf.page */
1571 + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
1572 + tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
1573 + if (unlikely(tmp &
1574 + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
1576 + goto unwritable_page;
1578 + if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
1580 + if (!page->mapping) {
1581 + ret = 0; /* retry the fault */
1582 + unlock_page(page);
1583 + goto unwritable_page;
1586 + VM_BUG_ON(!PageLocked(page));
1590 @@ -2547,19 +2593,35 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
1591 pte_unmap_unlock(page_table, ptl);
1594 - unlock_page(vmf.page);
1597 - page_cache_release(vmf.page);
1598 - else if (dirty_page) {
1600 - file_update_time(vma->vm_file);
1602 + struct address_space *mapping = page->mapping;
1604 - set_page_dirty_balance(dirty_page, page_mkwrite);
1605 + if (set_page_dirty(dirty_page))
1607 + unlock_page(dirty_page);
1608 put_page(dirty_page);
1609 + if (page_mkwrite && mapping) {
1611 + * Some device drivers do not set page.mapping but still
1612 + * dirty their pages
1614 + balance_dirty_pages_ratelimited(mapping);
1617 + /* file_update_time outside page_lock */
1619 + file_update_time(vma->vm_file);
1621 + unlock_page(vmf.page);
1623 + page_cache_release(vmf.page);
1629 + page_cache_release(page);
1633 static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,