]> git.ipfire.org Git - ipfire-2.x.git/blob - src/patches/suse-2.6.27.39/patches.kernel.org/patch-2.6.27.23-24
Imported linux-2.6.27.39 suse/xen patches.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.kernel.org / patch-2.6.27.23-24
1 From: Greg Kroah-Hartman <gregkh@suse.de>
2 Subject: Upstream 2.6.27.24 release from kernel.org
3
4 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
5
6 diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
7 index 8362860..0a7c8a9 100644
8 --- a/Documentation/filesystems/Locking
9 +++ b/Documentation/filesystems/Locking
10 @@ -502,23 +502,31 @@ prototypes:
11 void (*open)(struct vm_area_struct*);
12 void (*close)(struct vm_area_struct*);
13 int (*fault)(struct vm_area_struct*, struct vm_fault *);
14 - int (*page_mkwrite)(struct vm_area_struct *, struct page *);
15 + int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
16 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
17
18 locking rules:
19 BKL mmap_sem PageLocked(page)
20 open: no yes
21 close: no yes
22 -fault: no yes
23 -page_mkwrite: no yes no
24 +fault: no yes can return with page locked
25 +page_mkwrite: no yes can return with page locked
26 access: no yes
27
28 - ->page_mkwrite() is called when a previously read-only page is
29 -about to become writeable. The file system is responsible for
30 -protecting against truncate races. Once appropriate action has been
31 -taking to lock out truncate, the page range should be verified to be
32 -within i_size. The page mapping should also be checked that it is not
33 -NULL.
34 + ->fault() is called when a previously not present pte is about
35 +to be faulted in. The filesystem must find and return the page associated
36 +with the passed in "pgoff" in the vm_fault structure. If it is possible that
37 +the page may be truncated and/or invalidated, then the filesystem must lock
38 +the page, then ensure it is not already truncated (the page lock will block
39 +subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
40 +locked. The VM will unlock the page.
41 +
42 + ->page_mkwrite() is called when a previously read-only pte is
43 +about to become writeable. The filesystem again must ensure that there are
44 +no truncate/invalidate races, and then return with the page locked. If
45 +the page has been truncated, the filesystem should not look up a new page
46 +like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
47 +will cause the VM to retry the fault.
48
49 ->access() is called when get_user_pages() fails in
50 acces_process_vm(), typically used to debug a process through
51 diff --git a/Makefile b/Makefile
52 index a5c7ae5..2b8138a 100644
53 --- a/Makefile
54 +++ b/Makefile
55 @@ -1,7 +1,7 @@
56 VERSION = 2
57 PATCHLEVEL = 6
58 SUBLEVEL = 27
59 -EXTRAVERSION = .23
60 +EXTRAVERSION = .24
61 NAME = Trembling Tortoise
62
63 # *DOCUMENTATION*
64 diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
65 index eb8f72c..0e034a4 100644
66 --- a/drivers/i2c/algos/i2c-algo-bit.c
67 +++ b/drivers/i2c/algos/i2c-algo-bit.c
68 @@ -104,7 +104,7 @@ static int sclhi(struct i2c_algo_bit_data *adap)
69 * chips may hold it low ("clock stretching") while they
70 * are processing data internally.
71 */
72 - if (time_after_eq(jiffies, start + adap->timeout))
73 + if (time_after(jiffies, start + adap->timeout))
74 return -ETIMEDOUT;
75 cond_resched();
76 }
77 diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c
78 index d50b329..2346a89 100644
79 --- a/drivers/i2c/algos/i2c-algo-pca.c
80 +++ b/drivers/i2c/algos/i2c-algo-pca.c
81 @@ -270,10 +270,21 @@ static int pca_xfer(struct i2c_adapter *i2c_adap,
82
83 case 0x30: /* Data byte in I2CDAT has been transmitted; NOT ACK has been received */
84 DEB2("NOT ACK received after data byte\n");
85 + pca_stop(adap);
86 goto out;
87
88 case 0x38: /* Arbitration lost during SLA+W, SLA+R or data bytes */
89 DEB2("Arbitration lost\n");
90 + /*
91 + * The PCA9564 data sheet (2006-09-01) says "A
92 + * START condition will be transmitted when the
93 + * bus becomes free (STOP or SCL and SDA high)"
94 + * when the STA bit is set (p. 11).
95 + *
96 + * In case this won't work, try pca_reset()
97 + * instead.
98 + */
99 + pca_start(adap);
100 goto out;
101
102 case 0x58: /* Data byte has been received; NOT ACK has been returned */
103 diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
104 index 666b7ba..8c50857 100644
105 --- a/drivers/md/bitmap.c
106 +++ b/drivers/md/bitmap.c
107 @@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
108 oldindex = index;
109 oldpage = page;
110
111 + bitmap->filemap[bitmap->file_pages++] = page;
112 + bitmap->last_page_size = count;
113 +
114 if (outofdate) {
115 /*
116 * if bitmap is out of date, dirty the
117 @@ -998,15 +1001,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
118 write_page(bitmap, page, 1);
119
120 ret = -EIO;
121 - if (bitmap->flags & BITMAP_WRITE_ERROR) {
122 - /* release, page not in filemap yet */
123 - put_page(page);
124 + if (bitmap->flags & BITMAP_WRITE_ERROR)
125 goto err;
126 - }
127 }
128 -
129 - bitmap->filemap[bitmap->file_pages++] = page;
130 - bitmap->last_page_size = count;
131 }
132 paddr = kmap_atomic(page, KM_USER0);
133 if (bitmap->flags & BITMAP_HOSTENDIAN)
134 @@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
135 kunmap_atomic(paddr, KM_USER0);
136 if (b) {
137 /* if the disk bit is set, set the memory bit */
138 - bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
139 - ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start)
140 - );
141 + int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
142 + >= start);
143 + bitmap_set_memory_bits(bitmap,
144 + (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
145 + needed);
146 bit_cnt++;
147 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
148 }
149 @@ -1154,8 +1153,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
150 spin_lock_irqsave(&bitmap->lock, flags);
151 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
152 }
153 - bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
154 - &blocks, 0);
155 + bmc = bitmap_get_counter(bitmap,
156 + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
157 + &blocks, 0);
158 if (bmc) {
159 /*
160 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
161 @@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
162 } else if (*bmc == 1) {
163 /* we can clear the bit */
164 *bmc = 0;
165 - bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
166 + bitmap_count_page(bitmap,
167 + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
168 -1);
169
170 /* clear the bit */
171 @@ -1485,7 +1486,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
172 unsigned long chunk;
173
174 for (chunk = s; chunk <= e; chunk++) {
175 - sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap);
176 + sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
177 bitmap_set_memory_bits(bitmap, sec, 1);
178 bitmap_file_set_bit(bitmap, sec);
179 }
180 diff --git a/drivers/md/md.c b/drivers/md/md.c
181 index 60f3e59..ebbc3bb 100644
182 --- a/drivers/md/md.c
183 +++ b/drivers/md/md.c
184 @@ -2772,11 +2772,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
185 } else
186 err = -EBUSY;
187 spin_unlock_irq(&mddev->write_lock);
188 - } else {
189 - mddev->ro = 0;
190 - mddev->recovery_cp = MaxSector;
191 - err = do_md_run(mddev);
192 - }
193 + } else
194 + err = -EINVAL;
195 break;
196 case active:
197 if (mddev->pers) {
198 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
199 index dc50f98..b08dd95 100644
200 --- a/drivers/md/raid10.c
201 +++ b/drivers/md/raid10.c
202 @@ -1805,17 +1805,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
203 r10_bio->sector = sect;
204
205 raid10_find_phys(conf, r10_bio);
206 - /* Need to check if this section will still be
207 +
208 + /* Need to check if the array will still be
209 * degraded
210 */
211 - for (j=0; j<conf->copies;j++) {
212 - int d = r10_bio->devs[j].devnum;
213 - if (conf->mirrors[d].rdev == NULL ||
214 - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
215 + for (j=0; j<conf->raid_disks; j++)
216 + if (conf->mirrors[j].rdev == NULL ||
217 + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
218 still_degraded = 1;
219 break;
220 }
221 - }
222 +
223 must_sync = bitmap_start_sync(mddev->bitmap, sect,
224 &sync_blocks, still_degraded);
225
226 diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
227 index b70c531..a6e730f 100644
228 --- a/drivers/net/ehea/ehea_main.c
229 +++ b/drivers/net/ehea/ehea_main.c
230 @@ -529,14 +529,17 @@ static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array,
231 x &= (arr_len - 1);
232
233 pref = skb_array[x];
234 - prefetchw(pref);
235 - prefetchw(pref + EHEA_CACHE_LINE);
236 -
237 - pref = (skb_array[x]->data);
238 - prefetch(pref);
239 - prefetch(pref + EHEA_CACHE_LINE);
240 - prefetch(pref + EHEA_CACHE_LINE * 2);
241 - prefetch(pref + EHEA_CACHE_LINE * 3);
242 + if (pref) {
243 + prefetchw(pref);
244 + prefetchw(pref + EHEA_CACHE_LINE);
245 +
246 + pref = (skb_array[x]->data);
247 + prefetch(pref);
248 + prefetch(pref + EHEA_CACHE_LINE);
249 + prefetch(pref + EHEA_CACHE_LINE * 2);
250 + prefetch(pref + EHEA_CACHE_LINE * 3);
251 + }
252 +
253 skb = skb_array[skb_index];
254 skb_array[skb_index] = NULL;
255 return skb;
256 @@ -553,12 +556,14 @@ static inline struct sk_buff *get_skb_by_index_ll(struct sk_buff **skb_array,
257 x &= (arr_len - 1);
258
259 pref = skb_array[x];
260 - prefetchw(pref);
261 - prefetchw(pref + EHEA_CACHE_LINE);
262 + if (pref) {
263 + prefetchw(pref);
264 + prefetchw(pref + EHEA_CACHE_LINE);
265
266 - pref = (skb_array[x]->data);
267 - prefetchw(pref);
268 - prefetchw(pref + EHEA_CACHE_LINE);
269 + pref = (skb_array[x]->data);
270 + prefetchw(pref);
271 + prefetchw(pref + EHEA_CACHE_LINE);
272 + }
273
274 skb = skb_array[wqe_index];
275 skb_array[wqe_index] = NULL;
276 diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
277 index 3612607..32e7acb 100644
278 --- a/drivers/serial/mpc52xx_uart.c
279 +++ b/drivers/serial/mpc52xx_uart.c
280 @@ -515,7 +515,7 @@ mpc52xx_uart_startup(struct uart_port *port)
281
282 /* Request IRQ */
283 ret = request_irq(port->irq, mpc52xx_uart_int,
284 - IRQF_DISABLED | IRQF_SAMPLE_RANDOM | IRQF_SHARED,
285 + IRQF_DISABLED | IRQF_SAMPLE_RANDOM,
286 "mpc52xx_psc_uart", port);
287 if (ret)
288 return ret;
289 diff --git a/drivers/usb/gadget/usbstring.c b/drivers/usb/gadget/usbstring.c
290 index 4154be3..58c4d37 100644
291 --- a/drivers/usb/gadget/usbstring.c
292 +++ b/drivers/usb/gadget/usbstring.c
293 @@ -38,7 +38,7 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len)
294 uchar = (c & 0x1f) << 6;
295
296 c = (u8) *s++;
297 - if ((c & 0xc0) != 0xc0)
298 + if ((c & 0xc0) != 0x80)
299 goto fail;
300 c &= 0x3f;
301 uchar |= c;
302 @@ -49,13 +49,13 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len)
303 uchar = (c & 0x0f) << 12;
304
305 c = (u8) *s++;
306 - if ((c & 0xc0) != 0xc0)
307 + if ((c & 0xc0) != 0x80)
308 goto fail;
309 c &= 0x3f;
310 uchar |= c << 6;
311
312 c = (u8) *s++;
313 - if ((c & 0xc0) != 0xc0)
314 + if ((c & 0xc0) != 0x80)
315 goto fail;
316 c &= 0x3f;
317 uchar |= c;
318 diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
319 index 4835bdc..d1c3cba 100644
320 --- a/drivers/video/fb_defio.c
321 +++ b/drivers/video/fb_defio.c
322 @@ -70,8 +70,9 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);
323
324 /* vm_ops->page_mkwrite handler */
325 static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
326 - struct page *page)
327 + struct vm_fault *vmf)
328 {
329 + struct page *page = vmf->page;
330 struct fb_info *info = vma->vm_private_data;
331 struct fb_deferred_io *fbdefio = info->fbdefio;
332 struct page *cur;
333 diff --git a/fs/buffer.c b/fs/buffer.c
334 index a5d806d..abe9640 100644
335 --- a/fs/buffer.c
336 +++ b/fs/buffer.c
337 @@ -2402,20 +2402,22 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
338 * unlock the page.
339 */
340 int
341 -block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
342 +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
343 get_block_t get_block)
344 {
345 + struct page *page = vmf->page;
346 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
347 unsigned long end;
348 loff_t size;
349 - int ret = -EINVAL;
350 + int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
351
352 lock_page(page);
353 size = i_size_read(inode);
354 if ((page->mapping != inode->i_mapping) ||
355 (page_offset(page) > size)) {
356 /* page got truncated out from underneath us */
357 - goto out_unlock;
358 + unlock_page(page);
359 + goto out;
360 }
361
362 /* page is wholly or partially inside EOF */
363 @@ -2428,8 +2430,16 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
364 if (!ret)
365 ret = block_commit_write(page, 0, end);
366
367 -out_unlock:
368 - unlock_page(page);
369 + if (unlikely(ret)) {
370 + unlock_page(page);
371 + if (ret == -ENOMEM)
372 + ret = VM_FAULT_OOM;
373 + else /* -ENOSPC, -EIO, etc */
374 + ret = VM_FAULT_SIGBUS;
375 + } else
376 + ret = VM_FAULT_LOCKED;
377 +
378 +out:
379 return ret;
380 }
381
382 diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
383 index 14eb9a2..604ce8a 100644
384 --- a/fs/cifs/cifs_unicode.h
385 +++ b/fs/cifs/cifs_unicode.h
386 @@ -64,6 +64,13 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
387 #endif
388
389 /*
390 + * To be safe - for UCS to UTF-8 with strings loaded with the rare long
391 + * characters alloc more to account for such multibyte target UTF-8
392 + * characters.
393 + */
394 +#define UNICODE_NAME_MAX ((4 * NAME_MAX) + 2)
395 +
396 +/*
397 * UniStrcat: Concatenate the second string to the first
398 *
399 * Returns:
400 diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
401 index 9231e0a..cff0c53 100644
402 --- a/fs/cifs/cifssmb.c
403 +++ b/fs/cifs/cifssmb.c
404 @@ -91,23 +91,22 @@ static int
405 cifs_strncpy_to_host(char **dst, const char *src, const int maxlen,
406 const bool is_unicode, const struct nls_table *nls_codepage)
407 {
408 - int plen;
409 + int src_len, dst_len;
410
411 if (is_unicode) {
412 - plen = UniStrnlen((wchar_t *)src, maxlen);
413 - *dst = kmalloc(plen + 2, GFP_KERNEL);
414 + src_len = UniStrnlen((wchar_t *)src, maxlen);
415 + *dst = kmalloc((4 * src_len) + 2, GFP_KERNEL);
416 if (!*dst)
417 goto cifs_strncpy_to_host_ErrExit;
418 - cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage);
419 + dst_len = cifs_strfromUCS_le(*dst, (__le16 *)src, src_len, nls_codepage);
420 + (*dst)[dst_len + 1] = 0;
421 } else {
422 - plen = strnlen(src, maxlen);
423 - *dst = kmalloc(plen + 2, GFP_KERNEL);
424 + src_len = strnlen(src, maxlen);
425 + *dst = kmalloc(src_len + 1, GFP_KERNEL);
426 if (!*dst)
427 goto cifs_strncpy_to_host_ErrExit;
428 - strncpy(*dst, src, plen);
429 + strlcpy(*dst, src, src_len + 1);
430 }
431 - (*dst)[plen] = 0;
432 - (*dst)[plen+1] = 0; /* harmless for ASCII case, needed for Unicode */
433 return 0;
434
435 cifs_strncpy_to_host_ErrExit:
436 diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
437 index 21a1abf..d059b3f 100644
438 --- a/fs/cifs/connect.c
439 +++ b/fs/cifs/connect.c
440 @@ -3549,16 +3549,12 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
441 BCC(smb_buffer_response)) {
442 kfree(tcon->nativeFileSystem);
443 tcon->nativeFileSystem =
444 - kzalloc(2*(length + 1), GFP_KERNEL);
445 + kzalloc((4 * length) + 2, GFP_KERNEL);
446 if (tcon->nativeFileSystem)
447 cifs_strfromUCS_le(
448 tcon->nativeFileSystem,
449 (__le16 *) bcc_ptr,
450 length, nls_codepage);
451 - bcc_ptr += 2 * length;
452 - bcc_ptr[0] = 0; /* null terminate the string */
453 - bcc_ptr[1] = 0;
454 - bcc_ptr += 2;
455 }
456 /* else do not bother copying these information fields*/
457 } else {
458 diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
459 index b891553..6205593 100644
460 --- a/fs/cifs/misc.c
461 +++ b/fs/cifs/misc.c
462 @@ -685,14 +685,15 @@ cifs_convertUCSpath(char *target, const __le16 *source, int maxlen,
463 NLS_MAX_CHARSET_SIZE);
464 if (len > 0) {
465 j += len;
466 - continue;
467 + goto overrun_chk;
468 } else {
469 target[j] = '?';
470 }
471 }
472 j++;
473 /* make sure we do not overrun callers allocated temp buffer */
474 - if (j >= (2 * NAME_MAX))
475 +overrun_chk:
476 + if (j >= UNICODE_NAME_MAX)
477 break;
478 }
479 cUCS_out:
480 diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
481 index 58d5729..2878892 100644
482 --- a/fs/cifs/readdir.c
483 +++ b/fs/cifs/readdir.c
484 @@ -1075,7 +1075,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
485 with the rare long characters alloc more to account for
486 such multibyte target UTF-8 characters. cifs_unicode.c,
487 which actually does the conversion, has the same limit */
488 - tmp_buf = kmalloc((2 * NAME_MAX) + 4, GFP_KERNEL);
489 + tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
490 for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
491 if (current_entry == NULL) {
492 /* evaluate whether this case is an error */
493 diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
494 index 89fac77..3890cc2 100644
495 --- a/fs/cifs/sess.c
496 +++ b/fs/cifs/sess.c
497 @@ -202,27 +202,26 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
498 int words_left, len;
499 char *data = *pbcc_area;
500
501 -
502 -
503 cFYI(1, ("bleft %d", bleft));
504
505 -
506 - /* SMB header is unaligned, so cifs servers word align start of
507 - Unicode strings */
508 - data++;
509 - bleft--; /* Windows servers do not always double null terminate
510 - their final Unicode string - in which case we
511 - now will not attempt to decode the byte of junk
512 - which follows it */
513 + /*
514 + * Windows servers do not always double null terminate their final
515 + * Unicode string. Check to see if there are an uneven number of bytes
516 + * left. If so, then add an extra NULL pad byte to the end of the
517 + * response.
518 + *
519 + * See section 2.7.2 in "Implementing CIFS" for details
520 + */
521 + if (bleft % 2) {
522 + data[bleft] = 0;
523 + ++bleft;
524 + }
525
526 words_left = bleft / 2;
527
528 /* save off server operating system */
529 len = UniStrnlen((wchar_t *) data, words_left);
530
531 -/* We look for obvious messed up bcc or strings in response so we do not go off
532 - the end since (at least) WIN2K and Windows XP have a major bug in not null
533 - terminating last Unicode string in response */
534 if (len >= words_left)
535 return rc;
536
537 @@ -260,13 +259,10 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
538 return rc;
539
540 kfree(ses->serverDomain);
541 - ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
542 - if (ses->serverDomain != NULL) {
543 + ses->serverDomain = kzalloc((4 * len) + 2, GFP_KERNEL);
544 + if (ses->serverDomain != NULL)
545 cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
546 nls_cp);
547 - ses->serverDomain[2*len] = 0;
548 - ses->serverDomain[(2*len) + 1] = 0;
549 - }
550 data += 2 * (len + 1);
551 words_left -= len + 1;
552
553 @@ -616,12 +612,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
554 }
555
556 /* BB check if Unicode and decode strings */
557 - if (smb_buf->Flags2 & SMBFLG2_UNICODE)
558 + if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
559 + /* unicode string area must be word-aligned */
560 + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
561 + ++bcc_ptr;
562 + --bytes_remaining;
563 + }
564 rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining,
565 - ses, nls_cp);
566 - else
567 + ses, nls_cp);
568 + } else {
569 rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining,
570 ses, nls_cp);
571 + }
572
573 ssetup_exit:
574 if (spnego_key)
575 diff --git a/fs/eventpoll.c b/fs/eventpoll.c
576 index 801de2c..fd5835b 100644
577 --- a/fs/eventpoll.c
578 +++ b/fs/eventpoll.c
579 @@ -1132,7 +1132,7 @@ error_return:
580
581 SYSCALL_DEFINE1(epoll_create, int, size)
582 {
583 - if (size < 0)
584 + if (size <= 0)
585 return -EINVAL;
586
587 return sys_epoll_create1(0);
588 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
589 index f613d57..eadbee3 100644
590 --- a/fs/ext4/ext4.h
591 +++ b/fs/ext4/ext4.h
592 @@ -1084,7 +1084,7 @@ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
593 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
594 extern int ext4_block_truncate_page(handle_t *handle,
595 struct address_space *mapping, loff_t from);
596 -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
597 +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
598
599 /* ioctl.c */
600 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
601 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
602 index b233ade..63b911b 100644
603 --- a/fs/ext4/inode.c
604 +++ b/fs/ext4/inode.c
605 @@ -4861,8 +4861,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
606 return !buffer_mapped(bh);
607 }
608
609 -int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
610 +int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
611 {
612 + struct page *page = vmf->page;
613 loff_t size;
614 unsigned long len;
615 int ret = -EINVAL;
616 @@ -4913,6 +4914,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
617 goto out_unlock;
618 ret = 0;
619 out_unlock:
620 + if (ret)
621 + ret = VM_FAULT_SIGBUS;
622 up_read(&inode->i_alloc_sem);
623 return ret;
624 }
625 diff --git a/fs/fcntl.c b/fs/fcntl.c
626 index 08a109b..ac79b7e 100644
627 --- a/fs/fcntl.c
628 +++ b/fs/fcntl.c
629 @@ -117,11 +117,13 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
630 {
631 if (unlikely(newfd == oldfd)) { /* corner case */
632 struct files_struct *files = current->files;
633 + int retval = oldfd;
634 +
635 rcu_read_lock();
636 if (!fcheck_files(files, oldfd))
637 - oldfd = -EBADF;
638 + retval = -EBADF;
639 rcu_read_unlock();
640 - return oldfd;
641 + return retval;
642 }
643 return sys_dup3(oldfd, newfd, 0);
644 }
645 diff --git a/fs/fuse/file.c b/fs/fuse/file.c
646 index 3ada9d7..0c92f15 100644
647 --- a/fs/fuse/file.c
648 +++ b/fs/fuse/file.c
649 @@ -1219,8 +1219,9 @@ static void fuse_vma_close(struct vm_area_struct *vma)
650 * - sync(2)
651 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
652 */
653 -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
654 +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
655 {
656 + struct page *page = vmf->page;
657 /*
658 * Don't use page->mapping as it may become NULL from a
659 * concurrent truncate.
660 diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
661 index e9a366d..641c43b 100644
662 --- a/fs/gfs2/ops_file.c
663 +++ b/fs/gfs2/ops_file.c
664 @@ -338,8 +338,9 @@ static int gfs2_allocate_page_backing(struct page *page)
665 * blocks allocated on disk to back that page.
666 */
667
668 -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
669 +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
670 {
671 + struct page *page = vmf->page;
672 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
673 struct gfs2_inode *ip = GFS2_I(inode);
674 struct gfs2_sbd *sdp = GFS2_SB(inode);
675 @@ -411,6 +412,10 @@ out_unlock:
676 gfs2_glock_dq(&gh);
677 out:
678 gfs2_holder_uninit(&gh);
679 + if (ret == -ENOMEM)
680 + ret = VM_FAULT_OOM;
681 + else if (ret)
682 + ret = VM_FAULT_SIGBUS;
683 return ret;
684 }
685
686 diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
687 index 74f92b7..bff8733 100644
688 --- a/fs/nfs/dir.c
689 +++ b/fs/nfs/dir.c
690 @@ -1613,8 +1613,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
691 } else if (atomic_read(&new_dentry->d_count) > 1)
692 /* dentry still busy? */
693 goto out;
694 - } else
695 - nfs_drop_nlink(new_inode);
696 + }
697
698 go_ahead:
699 /*
700 @@ -1627,10 +1626,8 @@ go_ahead:
701 }
702 nfs_inode_return_delegation(old_inode);
703
704 - if (new_inode != NULL) {
705 + if (new_inode != NULL)
706 nfs_inode_return_delegation(new_inode);
707 - d_delete(new_dentry);
708 - }
709
710 error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
711 new_dir, &new_dentry->d_name);
712 @@ -1639,6 +1636,8 @@ out:
713 if (rehash)
714 d_rehash(rehash);
715 if (!error) {
716 + if (new_inode != NULL)
717 + nfs_drop_nlink(new_inode);
718 d_move(old_dentry, new_dentry);
719 nfs_set_verifier(new_dentry,
720 nfs_save_change_attribute(new_dir));
721 diff --git a/fs/nfs/file.c b/fs/nfs/file.c
722 index 30541f0..4a57a0f 100644
723 --- a/fs/nfs/file.c
724 +++ b/fs/nfs/file.c
725 @@ -448,8 +448,9 @@ const struct address_space_operations nfs_file_aops = {
726 .launder_page = nfs_launder_page,
727 };
728
729 -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
730 +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
731 {
732 + struct page *page = vmf->page;
733 struct file *filp = vma->vm_file;
734 struct dentry *dentry = filp->f_path.dentry;
735 unsigned pagelen;
736 @@ -476,11 +477,11 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
737 goto out_unlock;
738
739 ret = nfs_updatepage(filp, page, 0, pagelen);
740 - if (ret == 0)
741 - ret = pagelen;
742 out_unlock:
743 + if (!ret)
744 + return VM_FAULT_LOCKED;
745 unlock_page(page);
746 - return ret;
747 + return VM_FAULT_SIGBUS;
748 }
749
750 static struct vm_operations_struct nfs_file_vm_ops = {
751 diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
752 index b0b07df..abffc90 100644
753 --- a/fs/nfsd/nfs4xdr.c
754 +++ b/fs/nfsd/nfs4xdr.c
755 @@ -1833,6 +1833,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
756 dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
757 if (IS_ERR(dentry))
758 return nfserrno(PTR_ERR(dentry));
759 + if (!dentry->d_inode) {
760 + /*
761 + * nfsd_buffered_readdir drops the i_mutex between
762 + * readdir and calling this callback, leaving a window
763 + * where this directory entry could have gone away.
764 + */
765 + dput(dentry);
766 + return nfserr_noent;
767 + }
768
769 exp_get(exp);
770 /*
771 @@ -1895,6 +1904,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
772 struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
773 int buflen;
774 __be32 *p = cd->buffer;
775 + __be32 *cookiep;
776 __be32 nfserr = nfserr_toosmall;
777
778 /* In nfsv4, "." and ".." never make it onto the wire.. */
779 @@ -1911,7 +1921,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
780 goto fail;
781
782 *p++ = xdr_one; /* mark entry present */
783 - cd->offset = p; /* remember pointer */
784 + cookiep = p;
785 p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
786 p = xdr_encode_array(p, name, namlen); /* name length & name */
787
788 @@ -1925,6 +1935,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
789 goto fail;
790 case nfserr_dropit:
791 goto fail;
792 + case nfserr_noent:
793 + goto skip_entry;
794 default:
795 /*
796 * If the client requested the RDATTR_ERROR attribute,
797 @@ -1943,6 +1955,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
798 }
799 cd->buflen -= (p - cd->buffer);
800 cd->buffer = p;
801 + cd->offset = cookiep;
802 +skip_entry:
803 cd->common.err = nfs_ok;
804 return 0;
805 fail:
806 diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
807 index 6f7ea0a..08af0ed 100644
808 --- a/fs/ocfs2/file.c
809 +++ b/fs/ocfs2/file.c
810 @@ -2075,6 +2075,22 @@ out_sems:
811 return written ? written : ret;
812 }
813
814 +static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
815 + struct file *out,
816 + struct splice_desc *sd)
817 +{
818 + int ret;
819 +
820 + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
821 + sd->total_len, 0, NULL);
822 + if (ret < 0) {
823 + mlog_errno(ret);
824 + return ret;
825 + }
826 +
827 + return splice_from_pipe_feed(pipe, sd, pipe_to_file);
828 +}
829 +
830 static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
831 struct file *out,
832 loff_t *ppos,
833 @@ -2082,38 +2098,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
834 unsigned int flags)
835 {
836 int ret;
837 - struct inode *inode = out->f_path.dentry->d_inode;
838 + struct address_space *mapping = out->f_mapping;
839 + struct inode *inode = mapping->host;
840 + struct splice_desc sd = {
841 + .total_len = len,
842 + .flags = flags,
843 + .pos = *ppos,
844 + .u.file = out,
845 + };
846
847 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
848 (unsigned int)len,
849 out->f_path.dentry->d_name.len,
850 out->f_path.dentry->d_name.name);
851
852 - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
853 + if (pipe->inode)
854 + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
855
856 - ret = ocfs2_rw_lock(inode, 1);
857 - if (ret < 0) {
858 - mlog_errno(ret);
859 - goto out;
860 - }
861 + splice_from_pipe_begin(&sd);
862 + do {
863 + ret = splice_from_pipe_next(pipe, &sd);
864 + if (ret <= 0)
865 + break;
866
867 - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0,
868 - NULL);
869 - if (ret < 0) {
870 - mlog_errno(ret);
871 - goto out_unlock;
872 - }
873 + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
874 + ret = ocfs2_rw_lock(inode, 1);
875 + if (ret < 0)
876 + mlog_errno(ret);
877 + else {
878 + ret = ocfs2_splice_to_file(pipe, out, &sd);
879 + ocfs2_rw_unlock(inode, 1);
880 + }
881 + mutex_unlock(&inode->i_mutex);
882 + } while (ret > 0);
883 + splice_from_pipe_end(pipe, &sd);
884
885 if (pipe->inode)
886 - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
887 - ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
888 - if (pipe->inode)
889 mutex_unlock(&pipe->inode->i_mutex);
890
891 -out_unlock:
892 - ocfs2_rw_unlock(inode, 1);
893 -out:
894 - mutex_unlock(&inode->i_mutex);
895 + if (sd.num_spliced)
896 + ret = sd.num_spliced;
897 +
898 + if (ret > 0) {
899 + unsigned long nr_pages;
900 +
901 + *ppos += ret;
902 + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
903 +
904 + /*
905 + * If file or inode is SYNC and we actually wrote some data,
906 + * sync it.
907 + */
908 + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
909 + int err;
910 +
911 + mutex_lock(&inode->i_mutex);
912 + err = ocfs2_rw_lock(inode, 1);
913 + if (err < 0) {
914 + mlog_errno(err);
915 + } else {
916 + err = generic_osync_inode(inode, mapping,
917 + OSYNC_METADATA|OSYNC_DATA);
918 + ocfs2_rw_unlock(inode, 1);
919 + }
920 + mutex_unlock(&inode->i_mutex);
921 +
922 + if (err)
923 + ret = err;
924 + }
925 + balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
926 + }
927
928 mlog_exit(ret);
929 return ret;
930 diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
931 index 3dc18d6..2383cbd 100644
932 --- a/fs/ocfs2/mmap.c
933 +++ b/fs/ocfs2/mmap.c
934 @@ -150,8 +150,9 @@ out:
935 return ret;
936 }
937
938 -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
939 +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
940 {
941 + struct page *page = vmf->page;
942 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
943 struct buffer_head *di_bh = NULL;
944 sigset_t blocked, oldset;
945 @@ -192,7 +193,8 @@ out:
946 ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
947 if (ret2 < 0)
948 mlog_errno(ret2);
949 -
950 + if (ret)
951 + ret = VM_FAULT_SIGBUS;
952 return ret;
953 }
954
955 diff --git a/fs/splice.c b/fs/splice.c
956 index aea1eb4..2f2d8c1 100644
957 --- a/fs/splice.c
958 +++ b/fs/splice.c
959 @@ -553,8 +553,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
960 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
961 * a new page in the output file page cache and fill/dirty that.
962 */
963 -static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
964 - struct splice_desc *sd)
965 +int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
966 + struct splice_desc *sd)
967 {
968 struct file *file = sd->u.file;
969 struct address_space *mapping = file->f_mapping;
970 @@ -598,108 +598,178 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
971 out:
972 return ret;
973 }
974 +EXPORT_SYMBOL(pipe_to_file);
975 +
976 +static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
977 +{
978 + smp_mb();
979 + if (waitqueue_active(&pipe->wait))
980 + wake_up_interruptible(&pipe->wait);
981 + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
982 +}
983
984 /**
985 - * __splice_from_pipe - splice data from a pipe to given actor
986 + * splice_from_pipe_feed - feed available data from a pipe to a file
987 * @pipe: pipe to splice from
988 * @sd: information to @actor
989 * @actor: handler that splices the data
990 *
991 * Description:
992 - * This function does little more than loop over the pipe and call
993 - * @actor to do the actual moving of a single struct pipe_buffer to
994 - * the desired destination. See pipe_to_file, pipe_to_sendpage, or
995 - * pipe_to_user.
996 +
997 + * This function loops over the pipe and calls @actor to do the
998 + * actual moving of a single struct pipe_buffer to the desired
999 + * destination. It returns when there's no more buffers left in
1000 + * the pipe or if the requested number of bytes (@sd->total_len)
1001 + * have been copied. It returns a positive number (one) if the
1002 + * pipe needs to be filled with more data, zero if the required
1003 + * number of bytes have been copied and -errno on error.
1004 *
1005 + * This, together with splice_from_pipe_{begin,end,next}, may be
1006 + * used to implement the functionality of __splice_from_pipe() when
1007 + * locking is required around copying the pipe buffers to the
1008 + * destination.
1009 */
1010 -ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
1011 - splice_actor *actor)
1012 +int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
1013 + splice_actor *actor)
1014 {
1015 - int ret, do_wakeup, err;
1016 -
1017 - ret = 0;
1018 - do_wakeup = 0;
1019 -
1020 - for (;;) {
1021 - if (pipe->nrbufs) {
1022 - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
1023 - const struct pipe_buf_operations *ops = buf->ops;
1024 + int ret;
1025
1026 - sd->len = buf->len;
1027 - if (sd->len > sd->total_len)
1028 - sd->len = sd->total_len;
1029 + while (pipe->nrbufs) {
1030 + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
1031 + const struct pipe_buf_operations *ops = buf->ops;
1032
1033 - err = actor(pipe, buf, sd);
1034 - if (err <= 0) {
1035 - if (!ret && err != -ENODATA)
1036 - ret = err;
1037 + sd->len = buf->len;
1038 + if (sd->len > sd->total_len)
1039 + sd->len = sd->total_len;
1040
1041 - break;
1042 - }
1043 + ret = actor(pipe, buf, sd);
1044 + if (ret <= 0) {
1045 + if (ret == -ENODATA)
1046 + ret = 0;
1047 + return ret;
1048 + }
1049 + buf->offset += ret;
1050 + buf->len -= ret;
1051
1052 - ret += err;
1053 - buf->offset += err;
1054 - buf->len -= err;
1055 + sd->num_spliced += ret;
1056 + sd->len -= ret;
1057 + sd->pos += ret;
1058 + sd->total_len -= ret;
1059
1060 - sd->len -= err;
1061 - sd->pos += err;
1062 - sd->total_len -= err;
1063 - if (sd->len)
1064 - continue;
1065 + if (!buf->len) {
1066 + buf->ops = NULL;
1067 + ops->release(pipe, buf);
1068 + pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
1069 + pipe->nrbufs--;
1070 + if (pipe->inode)
1071 + sd->need_wakeup = true;
1072 + }
1073
1074 - if (!buf->len) {
1075 - buf->ops = NULL;
1076 - ops->release(pipe, buf);
1077 - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
1078 - pipe->nrbufs--;
1079 - if (pipe->inode)
1080 - do_wakeup = 1;
1081 - }
1082 + if (!sd->total_len)
1083 + return 0;
1084 + }
1085
1086 - if (!sd->total_len)
1087 - break;
1088 - }
1089 + return 1;
1090 +}
1091 +EXPORT_SYMBOL(splice_from_pipe_feed);
1092
1093 - if (pipe->nrbufs)
1094 - continue;
1095 +/**
1096 + * splice_from_pipe_next - wait for some data to splice from
1097 + * @pipe: pipe to splice from
1098 + * @sd: information about the splice operation
1099 + *
1100 + * Description:
1101 + * This function will wait for some data and return a positive
1102 + * value (one) if pipe buffers are available. It will return zero
1103 + * or -errno if no more data needs to be spliced.
1104 + */
1105 +int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
1106 +{
1107 + while (!pipe->nrbufs) {
1108 if (!pipe->writers)
1109 - break;
1110 - if (!pipe->waiting_writers) {
1111 - if (ret)
1112 - break;
1113 - }
1114 + return 0;
1115
1116 - if (sd->flags & SPLICE_F_NONBLOCK) {
1117 - if (!ret)
1118 - ret = -EAGAIN;
1119 - break;
1120 - }
1121 + if (!pipe->waiting_writers && sd->num_spliced)
1122 + return 0;
1123
1124 - if (signal_pending(current)) {
1125 - if (!ret)
1126 - ret = -ERESTARTSYS;
1127 - break;
1128 - }
1129 + if (sd->flags & SPLICE_F_NONBLOCK)
1130 + return -EAGAIN;
1131
1132 - if (do_wakeup) {
1133 - smp_mb();
1134 - if (waitqueue_active(&pipe->wait))
1135 - wake_up_interruptible_sync(&pipe->wait);
1136 - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1137 - do_wakeup = 0;
1138 + if (signal_pending(current))
1139 + return -ERESTARTSYS;
1140 +
1141 + if (sd->need_wakeup) {
1142 + wakeup_pipe_writers(pipe);
1143 + sd->need_wakeup = false;
1144 }
1145
1146 pipe_wait(pipe);
1147 }
1148
1149 - if (do_wakeup) {
1150 - smp_mb();
1151 - if (waitqueue_active(&pipe->wait))
1152 - wake_up_interruptible(&pipe->wait);
1153 - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1154 - }
1155 + return 1;
1156 +}
1157 +EXPORT_SYMBOL(splice_from_pipe_next);
1158
1159 - return ret;
1160 +/**
1161 + * splice_from_pipe_begin - start splicing from pipe
1162 + * @pipe: pipe to splice from
1163 + *
1164 + * Description:
1165 + * This function should be called before a loop containing
1166 + * splice_from_pipe_next() and splice_from_pipe_feed() to
1167 + * initialize the necessary fields of @sd.
1168 + */
1169 +void splice_from_pipe_begin(struct splice_desc *sd)
1170 +{
1171 + sd->num_spliced = 0;
1172 + sd->need_wakeup = false;
1173 +}
1174 +EXPORT_SYMBOL(splice_from_pipe_begin);
1175 +
1176 +/**
1177 + * splice_from_pipe_end - finish splicing from pipe
1178 + * @pipe: pipe to splice from
1179 + * @sd: information about the splice operation
1180 + *
1181 + * Description:
1182 + * This function will wake up pipe writers if necessary. It should
1183 + * be called after a loop containing splice_from_pipe_next() and
1184 + * splice_from_pipe_feed().
1185 + */
1186 +void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
1187 +{
1188 + if (sd->need_wakeup)
1189 + wakeup_pipe_writers(pipe);
1190 +}
1191 +EXPORT_SYMBOL(splice_from_pipe_end);
1192 +
1193 +/**
1194 + * __splice_from_pipe - splice data from a pipe to given actor
1195 + * @pipe: pipe to splice from
1196 + * @sd: information to @actor
1197 + * @actor: handler that splices the data
1198 + *
1199 + * Description:
1200 + * This function does little more than loop over the pipe and call
1201 + * @actor to do the actual moving of a single struct pipe_buffer to
1202 + * the desired destination. See pipe_to_file, pipe_to_sendpage, or
1203 + * pipe_to_user.
1204 + *
1205 + */
1206 +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
1207 + splice_actor *actor)
1208 +{
1209 + int ret;
1210 +
1211 + splice_from_pipe_begin(sd);
1212 + do {
1213 + ret = splice_from_pipe_next(pipe, sd);
1214 + if (ret > 0)
1215 + ret = splice_from_pipe_feed(pipe, sd, actor);
1216 + } while (ret > 0);
1217 + splice_from_pipe_end(pipe, sd);
1218 +
1219 + return sd->num_spliced ? sd->num_spliced : ret;
1220 }
1221 EXPORT_SYMBOL(__splice_from_pipe);
1222
1223 @@ -713,7 +783,7 @@ EXPORT_SYMBOL(__splice_from_pipe);
1224 * @actor: handler that splices the data
1225 *
1226 * Description:
1227 - * See __splice_from_pipe. This function locks the input and output inodes,
1228 + * See __splice_from_pipe. This function locks the pipe inode,
1229 * otherwise it's identical to __splice_from_pipe().
1230 *
1231 */
1232 @@ -722,7 +792,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
1233 splice_actor *actor)
1234 {
1235 ssize_t ret;
1236 - struct inode *inode = out->f_mapping->host;
1237 struct splice_desc sd = {
1238 .total_len = len,
1239 .flags = flags,
1240 @@ -730,24 +799,11 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
1241 .u.file = out,
1242 };
1243
1244 - /*
1245 - * The actor worker might be calling ->prepare_write and
1246 - * ->commit_write. Most of the time, these expect i_mutex to
1247 - * be held. Since this may result in an ABBA deadlock with
1248 - * pipe->inode, we have to order lock acquiry here.
1249 - *
1250 - * Outer lock must be inode->i_mutex, as pipe_wait() will
1251 - * release and reacquire pipe->inode->i_mutex, AND inode must
1252 - * never be a pipe.
1253 - */
1254 - WARN_ON(S_ISFIFO(inode->i_mode));
1255 - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1256 if (pipe->inode)
1257 - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
1258 + mutex_lock(&pipe->inode->i_mutex);
1259 ret = __splice_from_pipe(pipe, &sd, actor);
1260 if (pipe->inode)
1261 mutex_unlock(&pipe->inode->i_mutex);
1262 - mutex_unlock(&inode->i_mutex);
1263
1264 return ret;
1265 }
1266 @@ -838,17 +894,29 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1267 };
1268 ssize_t ret;
1269
1270 - WARN_ON(S_ISFIFO(inode->i_mode));
1271 - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1272 - ret = file_remove_suid(out);
1273 - if (likely(!ret)) {
1274 - if (pipe->inode)
1275 - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
1276 - ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
1277 - if (pipe->inode)
1278 - mutex_unlock(&pipe->inode->i_mutex);
1279 - }
1280 - mutex_unlock(&inode->i_mutex);
1281 + if (pipe->inode)
1282 + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
1283 +
1284 + splice_from_pipe_begin(&sd);
1285 + do {
1286 + ret = splice_from_pipe_next(pipe, &sd);
1287 + if (ret <= 0)
1288 + break;
1289 +
1290 + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1291 + ret = file_remove_suid(out);
1292 + if (!ret)
1293 + ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
1294 + mutex_unlock(&inode->i_mutex);
1295 + } while (ret > 0);
1296 + splice_from_pipe_end(pipe, &sd);
1297 +
1298 + if (pipe->inode)
1299 + mutex_unlock(&pipe->inode->i_mutex);
1300 +
1301 + if (sd.num_spliced)
1302 + ret = sd.num_spliced;
1303 +
1304 if (ret > 0) {
1305 unsigned long nr_pages;
1306
1307 diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
1308 index 40033dc..82b1c4a 100644
1309 --- a/fs/ubifs/file.c
1310 +++ b/fs/ubifs/file.c
1311 @@ -1140,8 +1140,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1312 * mmap()d file has taken write protection fault and is being made
1313 * writable. UBIFS must ensure page is budgeted for.
1314 */
1315 -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1316 +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1317 {
1318 + struct page *page = vmf->page;
1319 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1320 struct ubifs_info *c = inode->i_sb->s_fs_info;
1321 struct timespec now = ubifs_current_time(inode);
1322 @@ -1153,7 +1154,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1323 ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
1324
1325 if (unlikely(c->ro_media))
1326 - return -EROFS;
1327 + return VM_FAULT_SIGBUS; /* -EROFS */
1328
1329 /*
1330 * We have not locked @page so far so we may budget for changing the
1331 @@ -1186,7 +1187,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1332 if (err == -ENOSPC)
1333 ubifs_warn("out of space for mmapped file "
1334 "(inode number %lu)", inode->i_ino);
1335 - return err;
1336 + return VM_FAULT_SIGBUS;
1337 }
1338
1339 lock_page(page);
1340 @@ -1226,6 +1227,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1341 out_unlock:
1342 unlock_page(page);
1343 ubifs_release_budget(c, &req);
1344 + if (err)
1345 + err = VM_FAULT_SIGBUS;
1346 return err;
1347 }
1348
1349 diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
1350 index 5311c1a..469502c 100644
1351 --- a/fs/xfs/linux-2.6/xfs_file.c
1352 +++ b/fs/xfs/linux-2.6/xfs_file.c
1353 @@ -427,9 +427,9 @@ xfs_file_ioctl_invis(
1354 STATIC int
1355 xfs_vm_page_mkwrite(
1356 struct vm_area_struct *vma,
1357 - struct page *page)
1358 + struct vm_fault *vmf)
1359 {
1360 - return block_page_mkwrite(vma, page, xfs_get_blocks);
1361 + return block_page_mkwrite(vma, vmf, xfs_get_blocks);
1362 }
1363
1364 const struct file_operations xfs_file_operations = {
1365 diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
1366 index eadaab4..657c072 100644
1367 --- a/include/linux/buffer_head.h
1368 +++ b/include/linux/buffer_head.h
1369 @@ -222,7 +222,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t,
1370 get_block_t *, loff_t *);
1371 int generic_cont_expand_simple(struct inode *inode, loff_t size);
1372 int block_commit_write(struct page *page, unsigned from, unsigned to);
1373 -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
1374 +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
1375 get_block_t get_block);
1376 void block_sync_page(struct page *);
1377 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
1378 diff --git a/include/linux/mm.h b/include/linux/mm.h
1379 index 2a75579..ae9775d 100644
1380 --- a/include/linux/mm.h
1381 +++ b/include/linux/mm.h
1382 @@ -138,6 +138,7 @@ extern pgprot_t protection_map[16];
1383
1384 #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
1385 #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
1386 +#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */
1387
1388
1389 /*
1390 @@ -173,7 +174,7 @@ struct vm_operations_struct {
1391
1392 /* notification that a previously read-only page is about to become
1393 * writable, if an error is returned it will cause a SIGBUS */
1394 - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
1395 + int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
1396
1397 /* called by access_process_vm when get_user_pages() fails, typically
1398 * for use by special VMAs that can switch between memory and hardware
1399 diff --git a/include/linux/splice.h b/include/linux/splice.h
1400 index 528dcb9..5f3faa9 100644
1401 --- a/include/linux/splice.h
1402 +++ b/include/linux/splice.h
1403 @@ -36,6 +36,8 @@ struct splice_desc {
1404 void *data; /* cookie */
1405 } u;
1406 loff_t pos; /* file position */
1407 + size_t num_spliced; /* number of bytes already spliced */
1408 + bool need_wakeup; /* need to wake up writer */
1409 };
1410
1411 struct partial_page {
1412 @@ -66,6 +68,16 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
1413 splice_actor *);
1414 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
1415 struct splice_desc *, splice_actor *);
1416 +extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
1417 + splice_actor *);
1418 +extern int splice_from_pipe_next(struct pipe_inode_info *,
1419 + struct splice_desc *);
1420 +extern void splice_from_pipe_begin(struct splice_desc *);
1421 +extern void splice_from_pipe_end(struct pipe_inode_info *,
1422 + struct splice_desc *);
1423 +extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
1424 + struct splice_desc *);
1425 +
1426 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
1427 struct splice_pipe_desc *);
1428 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
1429 diff --git a/mm/memory.c b/mm/memory.c
1430 index 1002f47..3856c36 100644
1431 --- a/mm/memory.c
1432 +++ b/mm/memory.c
1433 @@ -1801,6 +1801,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1434 * get_user_pages(.write=1, .force=1).
1435 */
1436 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
1437 + struct vm_fault vmf;
1438 + int tmp;
1439 +
1440 + vmf.virtual_address = (void __user *)(address &
1441 + PAGE_MASK);
1442 + vmf.pgoff = old_page->index;
1443 + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
1444 + vmf.page = old_page;
1445 +
1446 /*
1447 * Notify the address space that the page is about to
1448 * become writable so that it can prohibit this or wait
1449 @@ -1812,8 +1821,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1450 page_cache_get(old_page);
1451 pte_unmap_unlock(page_table, ptl);
1452
1453 - if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
1454 + tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
1455 + if (unlikely(tmp &
1456 + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
1457 + ret = tmp;
1458 goto unwritable_page;
1459 + }
1460 + if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
1461 + lock_page(old_page);
1462 + if (!old_page->mapping) {
1463 + ret = 0; /* retry the fault */
1464 + unlock_page(old_page);
1465 + goto unwritable_page;
1466 + }
1467 + } else
1468 + VM_BUG_ON(!PageLocked(old_page));
1469
1470 /*
1471 * Since we dropped the lock we need to revalidate
1472 @@ -1823,9 +1845,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1473 */
1474 page_table = pte_offset_map_lock(mm, pmd, address,
1475 &ptl);
1476 - page_cache_release(old_page);
1477 - if (!pte_same(*page_table, orig_pte))
1478 + if (!pte_same(*page_table, orig_pte)) {
1479 + unlock_page(old_page);
1480 + page_cache_release(old_page);
1481 goto unlock;
1482 + }
1483
1484 page_mkwrite = 1;
1485 }
1486 @@ -1930,9 +1954,6 @@ gotten:
1487 unlock:
1488 pte_unmap_unlock(page_table, ptl);
1489 if (dirty_page) {
1490 - if (vma->vm_file)
1491 - file_update_time(vma->vm_file);
1492 -
1493 /*
1494 * Yes, Virginia, this is actually required to prevent a race
1495 * with clear_page_dirty_for_io() from clearing the page dirty
1496 @@ -1941,21 +1962,46 @@ unlock:
1497 *
1498 * do_no_page is protected similarly.
1499 */
1500 - wait_on_page_locked(dirty_page);
1501 - set_page_dirty_balance(dirty_page, page_mkwrite);
1502 + if (!page_mkwrite) {
1503 + wait_on_page_locked(dirty_page);
1504 + set_page_dirty_balance(dirty_page, page_mkwrite);
1505 + }
1506 put_page(dirty_page);
1507 + if (page_mkwrite) {
1508 + struct address_space *mapping = dirty_page->mapping;
1509 +
1510 + set_page_dirty(dirty_page);
1511 + unlock_page(dirty_page);
1512 + page_cache_release(dirty_page);
1513 + if (mapping) {
1514 + /*
1515 + * Some device drivers do not set page.mapping
1516 + * but still dirty their pages
1517 + */
1518 + balance_dirty_pages_ratelimited(mapping);
1519 + }
1520 + }
1521 +
1522 + /* file_update_time outside page_lock */
1523 + if (vma->vm_file)
1524 + file_update_time(vma->vm_file);
1525 }
1526 return ret;
1527 oom_free_new:
1528 page_cache_release(new_page);
1529 oom:
1530 - if (old_page)
1531 + if (old_page) {
1532 + if (page_mkwrite) {
1533 + unlock_page(old_page);
1534 + page_cache_release(old_page);
1535 + }
1536 page_cache_release(old_page);
1537 + }
1538 return VM_FAULT_OOM;
1539
1540 unwritable_page:
1541 page_cache_release(old_page);
1542 - return VM_FAULT_SIGBUS;
1543 + return ret;
1544 }
1545
1546 /*
1547 @@ -2472,25 +2518,25 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
1548 * to become writable
1549 */
1550 if (vma->vm_ops->page_mkwrite) {
1551 + int tmp;
1552 +
1553 unlock_page(page);
1554 - if (vma->vm_ops->page_mkwrite(vma, page) < 0) {
1555 - ret = VM_FAULT_SIGBUS;
1556 - anon = 1; /* no anon but release vmf.page */
1557 - goto out_unlocked;
1558 - }
1559 - lock_page(page);
1560 - /*
1561 - * XXX: this is not quite right (racy vs
1562 - * invalidate) to unlock and relock the page
1563 - * like this, however a better fix requires
1564 - * reworking page_mkwrite locking API, which
1565 - * is better done later.
1566 - */
1567 - if (!page->mapping) {
1568 - ret = 0;
1569 - anon = 1; /* no anon but release vmf.page */
1570 - goto out;
1571 + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
1572 + tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
1573 + if (unlikely(tmp &
1574 + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
1575 + ret = tmp;
1576 + goto unwritable_page;
1577 }
1578 + if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
1579 + lock_page(page);
1580 + if (!page->mapping) {
1581 + ret = 0; /* retry the fault */
1582 + unlock_page(page);
1583 + goto unwritable_page;
1584 + }
1585 + } else
1586 + VM_BUG_ON(!PageLocked(page));
1587 page_mkwrite = 1;
1588 }
1589 }
1590 @@ -2547,19 +2593,35 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
1591 pte_unmap_unlock(page_table, ptl);
1592
1593 out:
1594 - unlock_page(vmf.page);
1595 -out_unlocked:
1596 - if (anon)
1597 - page_cache_release(vmf.page);
1598 - else if (dirty_page) {
1599 - if (vma->vm_file)
1600 - file_update_time(vma->vm_file);
1601 + if (dirty_page) {
1602 + struct address_space *mapping = page->mapping;
1603
1604 - set_page_dirty_balance(dirty_page, page_mkwrite);
1605 + if (set_page_dirty(dirty_page))
1606 + page_mkwrite = 1;
1607 + unlock_page(dirty_page);
1608 put_page(dirty_page);
1609 + if (page_mkwrite && mapping) {
1610 + /*
1611 + * Some device drivers do not set page.mapping but still
1612 + * dirty their pages
1613 + */
1614 + balance_dirty_pages_ratelimited(mapping);
1615 + }
1616 +
1617 + /* file_update_time outside page_lock */
1618 + if (vma->vm_file)
1619 + file_update_time(vma->vm_file);
1620 + } else {
1621 + unlock_page(vmf.page);
1622 + if (anon)
1623 + page_cache_release(vmf.page);
1624 }
1625
1626 return ret;
1627 +
1628 +unwritable_page:
1629 + page_cache_release(page);
1630 + return ret;
1631 }
1632
1633 static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,