]>
Commit | Line | Data |
---|---|---|
82094b55 AF |
1 | From: Greg Kroah-Hartman <gregkh@suse.de> |
2 | Subject: Upstream 2.6.27.24 release from kernel.org | |
3 | ||
4 | Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> | |
5 | ||
6 | diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking | |
7 | index 8362860..0a7c8a9 100644 | |
8 | --- a/Documentation/filesystems/Locking | |
9 | +++ b/Documentation/filesystems/Locking | |
10 | @@ -502,23 +502,31 @@ prototypes: | |
11 | void (*open)(struct vm_area_struct*); | |
12 | void (*close)(struct vm_area_struct*); | |
13 | int (*fault)(struct vm_area_struct*, struct vm_fault *); | |
14 | - int (*page_mkwrite)(struct vm_area_struct *, struct page *); | |
15 | + int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); | |
16 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); | |
17 | ||
18 | locking rules: | |
19 | BKL mmap_sem PageLocked(page) | |
20 | open: no yes | |
21 | close: no yes | |
22 | -fault: no yes | |
23 | -page_mkwrite: no yes no | |
24 | +fault: no yes can return with page locked | |
25 | +page_mkwrite: no yes can return with page locked | |
26 | access: no yes | |
27 | ||
28 | - ->page_mkwrite() is called when a previously read-only page is | |
29 | -about to become writeable. The file system is responsible for | |
30 | -protecting against truncate races. Once appropriate action has been | |
31 | -taking to lock out truncate, the page range should be verified to be | |
32 | -within i_size. The page mapping should also be checked that it is not | |
33 | -NULL. | |
34 | + ->fault() is called when a previously not present pte is about | |
35 | +to be faulted in. The filesystem must find and return the page associated | |
36 | +with the passed in "pgoff" in the vm_fault structure. If it is possible that | |
37 | +the page may be truncated and/or invalidated, then the filesystem must lock | |
38 | +the page, then ensure it is not already truncated (the page lock will block | |
39 | +subsequent truncate), and then return with VM_FAULT_LOCKED, and the page | |
40 | +locked. The VM will unlock the page. | |
41 | + | |
42 | + ->page_mkwrite() is called when a previously read-only pte is | |
43 | +about to become writeable. The filesystem again must ensure that there are | |
44 | +no truncate/invalidate races, and then return with the page locked. If | |
45 | +the page has been truncated, the filesystem should not look up a new page | |
46 | +like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which | |
47 | +will cause the VM to retry the fault. | |
48 | ||
49 | ->access() is called when get_user_pages() fails in | |
50 | acces_process_vm(), typically used to debug a process through | |
51 | diff --git a/Makefile b/Makefile | |
52 | index a5c7ae5..2b8138a 100644 | |
53 | --- a/Makefile | |
54 | +++ b/Makefile | |
55 | @@ -1,7 +1,7 @@ | |
56 | VERSION = 2 | |
57 | PATCHLEVEL = 6 | |
58 | SUBLEVEL = 27 | |
59 | -EXTRAVERSION = .23 | |
60 | +EXTRAVERSION = .24 | |
61 | NAME = Trembling Tortoise | |
62 | ||
63 | # *DOCUMENTATION* | |
64 | diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c | |
65 | index eb8f72c..0e034a4 100644 | |
66 | --- a/drivers/i2c/algos/i2c-algo-bit.c | |
67 | +++ b/drivers/i2c/algos/i2c-algo-bit.c | |
68 | @@ -104,7 +104,7 @@ static int sclhi(struct i2c_algo_bit_data *adap) | |
69 | * chips may hold it low ("clock stretching") while they | |
70 | * are processing data internally. | |
71 | */ | |
72 | - if (time_after_eq(jiffies, start + adap->timeout)) | |
73 | + if (time_after(jiffies, start + adap->timeout)) | |
74 | return -ETIMEDOUT; | |
75 | cond_resched(); | |
76 | } | |
77 | diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c | |
78 | index d50b329..2346a89 100644 | |
79 | --- a/drivers/i2c/algos/i2c-algo-pca.c | |
80 | +++ b/drivers/i2c/algos/i2c-algo-pca.c | |
81 | @@ -270,10 +270,21 @@ static int pca_xfer(struct i2c_adapter *i2c_adap, | |
82 | ||
83 | case 0x30: /* Data byte in I2CDAT has been transmitted; NOT ACK has been received */ | |
84 | DEB2("NOT ACK received after data byte\n"); | |
85 | + pca_stop(adap); | |
86 | goto out; | |
87 | ||
88 | case 0x38: /* Arbitration lost during SLA+W, SLA+R or data bytes */ | |
89 | DEB2("Arbitration lost\n"); | |
90 | + /* | |
91 | + * The PCA9564 data sheet (2006-09-01) says "A | |
92 | + * START condition will be transmitted when the | |
93 | + * bus becomes free (STOP or SCL and SDA high)" | |
94 | + * when the STA bit is set (p. 11). | |
95 | + * | |
96 | + * In case this won't work, try pca_reset() | |
97 | + * instead. | |
98 | + */ | |
99 | + pca_start(adap); | |
100 | goto out; | |
101 | ||
102 | case 0x58: /* Data byte has been received; NOT ACK has been returned */ | |
103 | diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c | |
104 | index 666b7ba..8c50857 100644 | |
105 | --- a/drivers/md/bitmap.c | |
106 | +++ b/drivers/md/bitmap.c | |
107 | @@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |
108 | oldindex = index; | |
109 | oldpage = page; | |
110 | ||
111 | + bitmap->filemap[bitmap->file_pages++] = page; | |
112 | + bitmap->last_page_size = count; | |
113 | + | |
114 | if (outofdate) { | |
115 | /* | |
116 | * if bitmap is out of date, dirty the | |
117 | @@ -998,15 +1001,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |
118 | write_page(bitmap, page, 1); | |
119 | ||
120 | ret = -EIO; | |
121 | - if (bitmap->flags & BITMAP_WRITE_ERROR) { | |
122 | - /* release, page not in filemap yet */ | |
123 | - put_page(page); | |
124 | + if (bitmap->flags & BITMAP_WRITE_ERROR) | |
125 | goto err; | |
126 | - } | |
127 | } | |
128 | - | |
129 | - bitmap->filemap[bitmap->file_pages++] = page; | |
130 | - bitmap->last_page_size = count; | |
131 | } | |
132 | paddr = kmap_atomic(page, KM_USER0); | |
133 | if (bitmap->flags & BITMAP_HOSTENDIAN) | |
134 | @@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |
135 | kunmap_atomic(paddr, KM_USER0); | |
136 | if (b) { | |
137 | /* if the disk bit is set, set the memory bit */ | |
138 | - bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), | |
139 | - ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) | |
140 | - ); | |
141 | + int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) | |
142 | + >= start); | |
143 | + bitmap_set_memory_bits(bitmap, | |
144 | + (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), | |
145 | + needed); | |
146 | bit_cnt++; | |
147 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | |
148 | } | |
149 | @@ -1154,8 +1153,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) | |
150 | spin_lock_irqsave(&bitmap->lock, flags); | |
151 | clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | |
152 | } | |
153 | - bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), | |
154 | - &blocks, 0); | |
155 | + bmc = bitmap_get_counter(bitmap, | |
156 | + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | |
157 | + &blocks, 0); | |
158 | if (bmc) { | |
159 | /* | |
160 | if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); | |
161 | @@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) | |
162 | } else if (*bmc == 1) { | |
163 | /* we can clear the bit */ | |
164 | *bmc = 0; | |
165 | - bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), | |
166 | + bitmap_count_page(bitmap, | |
167 | + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | |
168 | -1); | |
169 | ||
170 | /* clear the bit */ | |
171 | @@ -1485,7 +1486,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) | |
172 | unsigned long chunk; | |
173 | ||
174 | for (chunk = s; chunk <= e; chunk++) { | |
175 | - sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); | |
176 | + sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap); | |
177 | bitmap_set_memory_bits(bitmap, sec, 1); | |
178 | bitmap_file_set_bit(bitmap, sec); | |
179 | } | |
180 | diff --git a/drivers/md/md.c b/drivers/md/md.c | |
181 | index 60f3e59..ebbc3bb 100644 | |
182 | --- a/drivers/md/md.c | |
183 | +++ b/drivers/md/md.c | |
184 | @@ -2772,11 +2772,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |
185 | } else | |
186 | err = -EBUSY; | |
187 | spin_unlock_irq(&mddev->write_lock); | |
188 | - } else { | |
189 | - mddev->ro = 0; | |
190 | - mddev->recovery_cp = MaxSector; | |
191 | - err = do_md_run(mddev); | |
192 | - } | |
193 | + } else | |
194 | + err = -EINVAL; | |
195 | break; | |
196 | case active: | |
197 | if (mddev->pers) { | |
198 | diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c | |
199 | index dc50f98..b08dd95 100644 | |
200 | --- a/drivers/md/raid10.c | |
201 | +++ b/drivers/md/raid10.c | |
202 | @@ -1805,17 +1805,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |
203 | r10_bio->sector = sect; | |
204 | ||
205 | raid10_find_phys(conf, r10_bio); | |
206 | - /* Need to check if this section will still be | |
207 | + | |
208 | + /* Need to check if the array will still be | |
209 | * degraded | |
210 | */ | |
211 | - for (j=0; j<conf->copies;j++) { | |
212 | - int d = r10_bio->devs[j].devnum; | |
213 | - if (conf->mirrors[d].rdev == NULL || | |
214 | - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) { | |
215 | + for (j=0; j<conf->raid_disks; j++) | |
216 | + if (conf->mirrors[j].rdev == NULL || | |
217 | + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { | |
218 | still_degraded = 1; | |
219 | break; | |
220 | } | |
221 | - } | |
222 | + | |
223 | must_sync = bitmap_start_sync(mddev->bitmap, sect, | |
224 | &sync_blocks, still_degraded); | |
225 | ||
226 | diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c | |
227 | index b70c531..a6e730f 100644 | |
228 | --- a/drivers/net/ehea/ehea_main.c | |
229 | +++ b/drivers/net/ehea/ehea_main.c | |
230 | @@ -529,14 +529,17 @@ static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array, | |
231 | x &= (arr_len - 1); | |
232 | ||
233 | pref = skb_array[x]; | |
234 | - prefetchw(pref); | |
235 | - prefetchw(pref + EHEA_CACHE_LINE); | |
236 | - | |
237 | - pref = (skb_array[x]->data); | |
238 | - prefetch(pref); | |
239 | - prefetch(pref + EHEA_CACHE_LINE); | |
240 | - prefetch(pref + EHEA_CACHE_LINE * 2); | |
241 | - prefetch(pref + EHEA_CACHE_LINE * 3); | |
242 | + if (pref) { | |
243 | + prefetchw(pref); | |
244 | + prefetchw(pref + EHEA_CACHE_LINE); | |
245 | + | |
246 | + pref = (skb_array[x]->data); | |
247 | + prefetch(pref); | |
248 | + prefetch(pref + EHEA_CACHE_LINE); | |
249 | + prefetch(pref + EHEA_CACHE_LINE * 2); | |
250 | + prefetch(pref + EHEA_CACHE_LINE * 3); | |
251 | + } | |
252 | + | |
253 | skb = skb_array[skb_index]; | |
254 | skb_array[skb_index] = NULL; | |
255 | return skb; | |
256 | @@ -553,12 +556,14 @@ static inline struct sk_buff *get_skb_by_index_ll(struct sk_buff **skb_array, | |
257 | x &= (arr_len - 1); | |
258 | ||
259 | pref = skb_array[x]; | |
260 | - prefetchw(pref); | |
261 | - prefetchw(pref + EHEA_CACHE_LINE); | |
262 | + if (pref) { | |
263 | + prefetchw(pref); | |
264 | + prefetchw(pref + EHEA_CACHE_LINE); | |
265 | ||
266 | - pref = (skb_array[x]->data); | |
267 | - prefetchw(pref); | |
268 | - prefetchw(pref + EHEA_CACHE_LINE); | |
269 | + pref = (skb_array[x]->data); | |
270 | + prefetchw(pref); | |
271 | + prefetchw(pref + EHEA_CACHE_LINE); | |
272 | + } | |
273 | ||
274 | skb = skb_array[wqe_index]; | |
275 | skb_array[wqe_index] = NULL; | |
276 | diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c | |
277 | index 3612607..32e7acb 100644 | |
278 | --- a/drivers/serial/mpc52xx_uart.c | |
279 | +++ b/drivers/serial/mpc52xx_uart.c | |
280 | @@ -515,7 +515,7 @@ mpc52xx_uart_startup(struct uart_port *port) | |
281 | ||
282 | /* Request IRQ */ | |
283 | ret = request_irq(port->irq, mpc52xx_uart_int, | |
284 | - IRQF_DISABLED | IRQF_SAMPLE_RANDOM | IRQF_SHARED, | |
285 | + IRQF_DISABLED | IRQF_SAMPLE_RANDOM, | |
286 | "mpc52xx_psc_uart", port); | |
287 | if (ret) | |
288 | return ret; | |
289 | diff --git a/drivers/usb/gadget/usbstring.c b/drivers/usb/gadget/usbstring.c | |
290 | index 4154be3..58c4d37 100644 | |
291 | --- a/drivers/usb/gadget/usbstring.c | |
292 | +++ b/drivers/usb/gadget/usbstring.c | |
293 | @@ -38,7 +38,7 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len) | |
294 | uchar = (c & 0x1f) << 6; | |
295 | ||
296 | c = (u8) *s++; | |
297 | - if ((c & 0xc0) != 0xc0) | |
298 | + if ((c & 0xc0) != 0x80) | |
299 | goto fail; | |
300 | c &= 0x3f; | |
301 | uchar |= c; | |
302 | @@ -49,13 +49,13 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len) | |
303 | uchar = (c & 0x0f) << 12; | |
304 | ||
305 | c = (u8) *s++; | |
306 | - if ((c & 0xc0) != 0xc0) | |
307 | + if ((c & 0xc0) != 0x80) | |
308 | goto fail; | |
309 | c &= 0x3f; | |
310 | uchar |= c << 6; | |
311 | ||
312 | c = (u8) *s++; | |
313 | - if ((c & 0xc0) != 0xc0) | |
314 | + if ((c & 0xc0) != 0x80) | |
315 | goto fail; | |
316 | c &= 0x3f; | |
317 | uchar |= c; | |
318 | diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c | |
319 | index 4835bdc..d1c3cba 100644 | |
320 | --- a/drivers/video/fb_defio.c | |
321 | +++ b/drivers/video/fb_defio.c | |
322 | @@ -70,8 +70,9 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); | |
323 | ||
324 | /* vm_ops->page_mkwrite handler */ | |
325 | static int fb_deferred_io_mkwrite(struct vm_area_struct *vma, | |
326 | - struct page *page) | |
327 | + struct vm_fault *vmf) | |
328 | { | |
329 | + struct page *page = vmf->page; | |
330 | struct fb_info *info = vma->vm_private_data; | |
331 | struct fb_deferred_io *fbdefio = info->fbdefio; | |
332 | struct page *cur; | |
333 | diff --git a/fs/buffer.c b/fs/buffer.c | |
334 | index a5d806d..abe9640 100644 | |
335 | --- a/fs/buffer.c | |
336 | +++ b/fs/buffer.c | |
337 | @@ -2402,20 +2402,22 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) | |
338 | * unlock the page. | |
339 | */ | |
340 | int | |
341 | -block_page_mkwrite(struct vm_area_struct *vma, struct page *page, | |
342 | +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |
343 | get_block_t get_block) | |
344 | { | |
345 | + struct page *page = vmf->page; | |
346 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | |
347 | unsigned long end; | |
348 | loff_t size; | |
349 | - int ret = -EINVAL; | |
350 | + int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | |
351 | ||
352 | lock_page(page); | |
353 | size = i_size_read(inode); | |
354 | if ((page->mapping != inode->i_mapping) || | |
355 | (page_offset(page) > size)) { | |
356 | /* page got truncated out from underneath us */ | |
357 | - goto out_unlock; | |
358 | + unlock_page(page); | |
359 | + goto out; | |
360 | } | |
361 | ||
362 | /* page is wholly or partially inside EOF */ | |
363 | @@ -2428,8 +2430,16 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page, | |
364 | if (!ret) | |
365 | ret = block_commit_write(page, 0, end); | |
366 | ||
367 | -out_unlock: | |
368 | - unlock_page(page); | |
369 | + if (unlikely(ret)) { | |
370 | + unlock_page(page); | |
371 | + if (ret == -ENOMEM) | |
372 | + ret = VM_FAULT_OOM; | |
373 | + else /* -ENOSPC, -EIO, etc */ | |
374 | + ret = VM_FAULT_SIGBUS; | |
375 | + } else | |
376 | + ret = VM_FAULT_LOCKED; | |
377 | + | |
378 | +out: | |
379 | return ret; | |
380 | } | |
381 | ||
382 | diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h | |
383 | index 14eb9a2..604ce8a 100644 | |
384 | --- a/fs/cifs/cifs_unicode.h | |
385 | +++ b/fs/cifs/cifs_unicode.h | |
386 | @@ -64,6 +64,13 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); | |
387 | #endif | |
388 | ||
389 | /* | |
390 | + * To be safe - for UCS to UTF-8 with strings loaded with the rare long | |
391 | + * characters alloc more to account for such multibyte target UTF-8 | |
392 | + * characters. | |
393 | + */ | |
394 | +#define UNICODE_NAME_MAX ((4 * NAME_MAX) + 2) | |
395 | + | |
396 | +/* | |
397 | * UniStrcat: Concatenate the second string to the first | |
398 | * | |
399 | * Returns: | |
400 | diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c | |
401 | index 9231e0a..cff0c53 100644 | |
402 | --- a/fs/cifs/cifssmb.c | |
403 | +++ b/fs/cifs/cifssmb.c | |
404 | @@ -91,23 +91,22 @@ static int | |
405 | cifs_strncpy_to_host(char **dst, const char *src, const int maxlen, | |
406 | const bool is_unicode, const struct nls_table *nls_codepage) | |
407 | { | |
408 | - int plen; | |
409 | + int src_len, dst_len; | |
410 | ||
411 | if (is_unicode) { | |
412 | - plen = UniStrnlen((wchar_t *)src, maxlen); | |
413 | - *dst = kmalloc(plen + 2, GFP_KERNEL); | |
414 | + src_len = UniStrnlen((wchar_t *)src, maxlen); | |
415 | + *dst = kmalloc((4 * src_len) + 2, GFP_KERNEL); | |
416 | if (!*dst) | |
417 | goto cifs_strncpy_to_host_ErrExit; | |
418 | - cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage); | |
419 | + dst_len = cifs_strfromUCS_le(*dst, (__le16 *)src, src_len, nls_codepage); | |
420 | + (*dst)[dst_len + 1] = 0; | |
421 | } else { | |
422 | - plen = strnlen(src, maxlen); | |
423 | - *dst = kmalloc(plen + 2, GFP_KERNEL); | |
424 | + src_len = strnlen(src, maxlen); | |
425 | + *dst = kmalloc(src_len + 1, GFP_KERNEL); | |
426 | if (!*dst) | |
427 | goto cifs_strncpy_to_host_ErrExit; | |
428 | - strncpy(*dst, src, plen); | |
429 | + strlcpy(*dst, src, src_len + 1); | |
430 | } | |
431 | - (*dst)[plen] = 0; | |
432 | - (*dst)[plen+1] = 0; /* harmless for ASCII case, needed for Unicode */ | |
433 | return 0; | |
434 | ||
435 | cifs_strncpy_to_host_ErrExit: | |
436 | diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c | |
437 | index 21a1abf..d059b3f 100644 | |
438 | --- a/fs/cifs/connect.c | |
439 | +++ b/fs/cifs/connect.c | |
440 | @@ -3549,16 +3549,12 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |
441 | BCC(smb_buffer_response)) { | |
442 | kfree(tcon->nativeFileSystem); | |
443 | tcon->nativeFileSystem = | |
444 | - kzalloc(2*(length + 1), GFP_KERNEL); | |
445 | + kzalloc((4 * length) + 2, GFP_KERNEL); | |
446 | if (tcon->nativeFileSystem) | |
447 | cifs_strfromUCS_le( | |
448 | tcon->nativeFileSystem, | |
449 | (__le16 *) bcc_ptr, | |
450 | length, nls_codepage); | |
451 | - bcc_ptr += 2 * length; | |
452 | - bcc_ptr[0] = 0; /* null terminate the string */ | |
453 | - bcc_ptr[1] = 0; | |
454 | - bcc_ptr += 2; | |
455 | } | |
456 | /* else do not bother copying these information fields*/ | |
457 | } else { | |
458 | diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c | |
459 | index b891553..6205593 100644 | |
460 | --- a/fs/cifs/misc.c | |
461 | +++ b/fs/cifs/misc.c | |
462 | @@ -685,14 +685,15 @@ cifs_convertUCSpath(char *target, const __le16 *source, int maxlen, | |
463 | NLS_MAX_CHARSET_SIZE); | |
464 | if (len > 0) { | |
465 | j += len; | |
466 | - continue; | |
467 | + goto overrun_chk; | |
468 | } else { | |
469 | target[j] = '?'; | |
470 | } | |
471 | } | |
472 | j++; | |
473 | /* make sure we do not overrun callers allocated temp buffer */ | |
474 | - if (j >= (2 * NAME_MAX)) | |
475 | +overrun_chk: | |
476 | + if (j >= UNICODE_NAME_MAX) | |
477 | break; | |
478 | } | |
479 | cUCS_out: | |
480 | diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c | |
481 | index 58d5729..2878892 100644 | |
482 | --- a/fs/cifs/readdir.c | |
483 | +++ b/fs/cifs/readdir.c | |
484 | @@ -1075,7 +1075,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |
485 | with the rare long characters alloc more to account for | |
486 | such multibyte target UTF-8 characters. cifs_unicode.c, | |
487 | which actually does the conversion, has the same limit */ | |
488 | - tmp_buf = kmalloc((2 * NAME_MAX) + 4, GFP_KERNEL); | |
489 | + tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); | |
490 | for (i = 0; (i < num_to_fill) && (rc == 0); i++) { | |
491 | if (current_entry == NULL) { | |
492 | /* evaluate whether this case is an error */ | |
493 | diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c | |
494 | index 89fac77..3890cc2 100644 | |
495 | --- a/fs/cifs/sess.c | |
496 | +++ b/fs/cifs/sess.c | |
497 | @@ -202,27 +202,26 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, | |
498 | int words_left, len; | |
499 | char *data = *pbcc_area; | |
500 | ||
501 | - | |
502 | - | |
503 | cFYI(1, ("bleft %d", bleft)); | |
504 | ||
505 | - | |
506 | - /* SMB header is unaligned, so cifs servers word align start of | |
507 | - Unicode strings */ | |
508 | - data++; | |
509 | - bleft--; /* Windows servers do not always double null terminate | |
510 | - their final Unicode string - in which case we | |
511 | - now will not attempt to decode the byte of junk | |
512 | - which follows it */ | |
513 | + /* | |
514 | + * Windows servers do not always double null terminate their final | |
515 | + * Unicode string. Check to see if there are an uneven number of bytes | |
516 | + * left. If so, then add an extra NULL pad byte to the end of the | |
517 | + * response. | |
518 | + * | |
519 | + * See section 2.7.2 in "Implementing CIFS" for details | |
520 | + */ | |
521 | + if (bleft % 2) { | |
522 | + data[bleft] = 0; | |
523 | + ++bleft; | |
524 | + } | |
525 | ||
526 | words_left = bleft / 2; | |
527 | ||
528 | /* save off server operating system */ | |
529 | len = UniStrnlen((wchar_t *) data, words_left); | |
530 | ||
531 | -/* We look for obvious messed up bcc or strings in response so we do not go off | |
532 | - the end since (at least) WIN2K and Windows XP have a major bug in not null | |
533 | - terminating last Unicode string in response */ | |
534 | if (len >= words_left) | |
535 | return rc; | |
536 | ||
537 | @@ -260,13 +259,10 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, | |
538 | return rc; | |
539 | ||
540 | kfree(ses->serverDomain); | |
541 | - ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */ | |
542 | - if (ses->serverDomain != NULL) { | |
543 | + ses->serverDomain = kzalloc((4 * len) + 2, GFP_KERNEL); | |
544 | + if (ses->serverDomain != NULL) | |
545 | cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len, | |
546 | nls_cp); | |
547 | - ses->serverDomain[2*len] = 0; | |
548 | - ses->serverDomain[(2*len) + 1] = 0; | |
549 | - } | |
550 | data += 2 * (len + 1); | |
551 | words_left -= len + 1; | |
552 | ||
553 | @@ -616,12 +612,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |
554 | } | |
555 | ||
556 | /* BB check if Unicode and decode strings */ | |
557 | - if (smb_buf->Flags2 & SMBFLG2_UNICODE) | |
558 | + if (smb_buf->Flags2 & SMBFLG2_UNICODE) { | |
559 | + /* unicode string area must be word-aligned */ | |
560 | + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { | |
561 | + ++bcc_ptr; | |
562 | + --bytes_remaining; | |
563 | + } | |
564 | rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining, | |
565 | - ses, nls_cp); | |
566 | - else | |
567 | + ses, nls_cp); | |
568 | + } else { | |
569 | rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, | |
570 | ses, nls_cp); | |
571 | + } | |
572 | ||
573 | ssetup_exit: | |
574 | if (spnego_key) | |
575 | diff --git a/fs/eventpoll.c b/fs/eventpoll.c | |
576 | index 801de2c..fd5835b 100644 | |
577 | --- a/fs/eventpoll.c | |
578 | +++ b/fs/eventpoll.c | |
579 | @@ -1132,7 +1132,7 @@ error_return: | |
580 | ||
581 | SYSCALL_DEFINE1(epoll_create, int, size) | |
582 | { | |
583 | - if (size < 0) | |
584 | + if (size <= 0) | |
585 | return -EINVAL; | |
586 | ||
587 | return sys_epoll_create1(0); | |
588 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h | |
589 | index f613d57..eadbee3 100644 | |
590 | --- a/fs/ext4/ext4.h | |
591 | +++ b/fs/ext4/ext4.h | |
592 | @@ -1084,7 +1084,7 @@ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | |
593 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | |
594 | extern int ext4_block_truncate_page(handle_t *handle, | |
595 | struct address_space *mapping, loff_t from); | |
596 | -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | |
597 | +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | |
598 | ||
599 | /* ioctl.c */ | |
600 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | |
601 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c | |
602 | index b233ade..63b911b 100644 | |
603 | --- a/fs/ext4/inode.c | |
604 | +++ b/fs/ext4/inode.c | |
605 | @@ -4861,8 +4861,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) | |
606 | return !buffer_mapped(bh); | |
607 | } | |
608 | ||
609 | -int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
610 | +int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |
611 | { | |
612 | + struct page *page = vmf->page; | |
613 | loff_t size; | |
614 | unsigned long len; | |
615 | int ret = -EINVAL; | |
616 | @@ -4913,6 +4914,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
617 | goto out_unlock; | |
618 | ret = 0; | |
619 | out_unlock: | |
620 | + if (ret) | |
621 | + ret = VM_FAULT_SIGBUS; | |
622 | up_read(&inode->i_alloc_sem); | |
623 | return ret; | |
624 | } | |
625 | diff --git a/fs/fcntl.c b/fs/fcntl.c | |
626 | index 08a109b..ac79b7e 100644 | |
627 | --- a/fs/fcntl.c | |
628 | +++ b/fs/fcntl.c | |
629 | @@ -117,11 +117,13 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) | |
630 | { | |
631 | if (unlikely(newfd == oldfd)) { /* corner case */ | |
632 | struct files_struct *files = current->files; | |
633 | + int retval = oldfd; | |
634 | + | |
635 | rcu_read_lock(); | |
636 | if (!fcheck_files(files, oldfd)) | |
637 | - oldfd = -EBADF; | |
638 | + retval = -EBADF; | |
639 | rcu_read_unlock(); | |
640 | - return oldfd; | |
641 | + return retval; | |
642 | } | |
643 | return sys_dup3(oldfd, newfd, 0); | |
644 | } | |
645 | diff --git a/fs/fuse/file.c b/fs/fuse/file.c | |
646 | index 3ada9d7..0c92f15 100644 | |
647 | --- a/fs/fuse/file.c | |
648 | +++ b/fs/fuse/file.c | |
649 | @@ -1219,8 +1219,9 @@ static void fuse_vma_close(struct vm_area_struct *vma) | |
650 | * - sync(2) | |
651 | * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER | |
652 | */ | |
653 | -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
654 | +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |
655 | { | |
656 | + struct page *page = vmf->page; | |
657 | /* | |
658 | * Don't use page->mapping as it may become NULL from a | |
659 | * concurrent truncate. | |
660 | diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c | |
661 | index e9a366d..641c43b 100644 | |
662 | --- a/fs/gfs2/ops_file.c | |
663 | +++ b/fs/gfs2/ops_file.c | |
664 | @@ -338,8 +338,9 @@ static int gfs2_allocate_page_backing(struct page *page) | |
665 | * blocks allocated on disk to back that page. | |
666 | */ | |
667 | ||
668 | -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
669 | +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |
670 | { | |
671 | + struct page *page = vmf->page; | |
672 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | |
673 | struct gfs2_inode *ip = GFS2_I(inode); | |
674 | struct gfs2_sbd *sdp = GFS2_SB(inode); | |
675 | @@ -411,6 +412,10 @@ out_unlock: | |
676 | gfs2_glock_dq(&gh); | |
677 | out: | |
678 | gfs2_holder_uninit(&gh); | |
679 | + if (ret == -ENOMEM) | |
680 | + ret = VM_FAULT_OOM; | |
681 | + else if (ret) | |
682 | + ret = VM_FAULT_SIGBUS; | |
683 | return ret; | |
684 | } | |
685 | ||
686 | diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c | |
687 | index 74f92b7..bff8733 100644 | |
688 | --- a/fs/nfs/dir.c | |
689 | +++ b/fs/nfs/dir.c | |
690 | @@ -1613,8 +1613,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |
691 | } else if (atomic_read(&new_dentry->d_count) > 1) | |
692 | /* dentry still busy? */ | |
693 | goto out; | |
694 | - } else | |
695 | - nfs_drop_nlink(new_inode); | |
696 | + } | |
697 | ||
698 | go_ahead: | |
699 | /* | |
700 | @@ -1627,10 +1626,8 @@ go_ahead: | |
701 | } | |
702 | nfs_inode_return_delegation(old_inode); | |
703 | ||
704 | - if (new_inode != NULL) { | |
705 | + if (new_inode != NULL) | |
706 | nfs_inode_return_delegation(new_inode); | |
707 | - d_delete(new_dentry); | |
708 | - } | |
709 | ||
710 | error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, | |
711 | new_dir, &new_dentry->d_name); | |
712 | @@ -1639,6 +1636,8 @@ out: | |
713 | if (rehash) | |
714 | d_rehash(rehash); | |
715 | if (!error) { | |
716 | + if (new_inode != NULL) | |
717 | + nfs_drop_nlink(new_inode); | |
718 | d_move(old_dentry, new_dentry); | |
719 | nfs_set_verifier(new_dentry, | |
720 | nfs_save_change_attribute(new_dir)); | |
721 | diff --git a/fs/nfs/file.c b/fs/nfs/file.c | |
722 | index 30541f0..4a57a0f 100644 | |
723 | --- a/fs/nfs/file.c | |
724 | +++ b/fs/nfs/file.c | |
725 | @@ -448,8 +448,9 @@ const struct address_space_operations nfs_file_aops = { | |
726 | .launder_page = nfs_launder_page, | |
727 | }; | |
728 | ||
729 | -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
730 | +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |
731 | { | |
732 | + struct page *page = vmf->page; | |
733 | struct file *filp = vma->vm_file; | |
734 | struct dentry *dentry = filp->f_path.dentry; | |
735 | unsigned pagelen; | |
736 | @@ -476,11 +477,11 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
737 | goto out_unlock; | |
738 | ||
739 | ret = nfs_updatepage(filp, page, 0, pagelen); | |
740 | - if (ret == 0) | |
741 | - ret = pagelen; | |
742 | out_unlock: | |
743 | + if (!ret) | |
744 | + return VM_FAULT_LOCKED; | |
745 | unlock_page(page); | |
746 | - return ret; | |
747 | + return VM_FAULT_SIGBUS; | |
748 | } | |
749 | ||
750 | static struct vm_operations_struct nfs_file_vm_ops = { | |
751 | diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c | |
752 | index b0b07df..abffc90 100644 | |
753 | --- a/fs/nfsd/nfs4xdr.c | |
754 | +++ b/fs/nfsd/nfs4xdr.c | |
755 | @@ -1833,6 +1833,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, | |
756 | dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); | |
757 | if (IS_ERR(dentry)) | |
758 | return nfserrno(PTR_ERR(dentry)); | |
759 | + if (!dentry->d_inode) { | |
760 | + /* | |
761 | + * nfsd_buffered_readdir drops the i_mutex between | |
762 | + * readdir and calling this callback, leaving a window | |
763 | + * where this directory entry could have gone away. | |
764 | + */ | |
765 | + dput(dentry); | |
766 | + return nfserr_noent; | |
767 | + } | |
768 | ||
769 | exp_get(exp); | |
770 | /* | |
771 | @@ -1895,6 +1904,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |
772 | struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); | |
773 | int buflen; | |
774 | __be32 *p = cd->buffer; | |
775 | + __be32 *cookiep; | |
776 | __be32 nfserr = nfserr_toosmall; | |
777 | ||
778 | /* In nfsv4, "." and ".." never make it onto the wire.. */ | |
779 | @@ -1911,7 +1921,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |
780 | goto fail; | |
781 | ||
782 | *p++ = xdr_one; /* mark entry present */ | |
783 | - cd->offset = p; /* remember pointer */ | |
784 | + cookiep = p; | |
785 | p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ | |
786 | p = xdr_encode_array(p, name, namlen); /* name length & name */ | |
787 | ||
788 | @@ -1925,6 +1935,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |
789 | goto fail; | |
790 | case nfserr_dropit: | |
791 | goto fail; | |
792 | + case nfserr_noent: | |
793 | + goto skip_entry; | |
794 | default: | |
795 | /* | |
796 | * If the client requested the RDATTR_ERROR attribute, | |
797 | @@ -1943,6 +1955,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |
798 | } | |
799 | cd->buflen -= (p - cd->buffer); | |
800 | cd->buffer = p; | |
801 | + cd->offset = cookiep; | |
802 | +skip_entry: | |
803 | cd->common.err = nfs_ok; | |
804 | return 0; | |
805 | fail: | |
806 | diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c | |
807 | index 6f7ea0a..08af0ed 100644 | |
808 | --- a/fs/ocfs2/file.c | |
809 | +++ b/fs/ocfs2/file.c | |
810 | @@ -2075,6 +2075,22 @@ out_sems: | |
811 | return written ? written : ret; | |
812 | } | |
813 | ||
814 | +static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | |
815 | + struct file *out, | |
816 | + struct splice_desc *sd) | |
817 | +{ | |
818 | + int ret; | |
819 | + | |
820 | + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, | |
821 | + sd->total_len, 0, NULL); | |
822 | + if (ret < 0) { | |
823 | + mlog_errno(ret); | |
824 | + return ret; | |
825 | + } | |
826 | + | |
827 | + return splice_from_pipe_feed(pipe, sd, pipe_to_file); | |
828 | +} | |
829 | + | |
830 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |
831 | struct file *out, | |
832 | loff_t *ppos, | |
833 | @@ -2082,38 +2098,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |
834 | unsigned int flags) | |
835 | { | |
836 | int ret; | |
837 | - struct inode *inode = out->f_path.dentry->d_inode; | |
838 | + struct address_space *mapping = out->f_mapping; | |
839 | + struct inode *inode = mapping->host; | |
840 | + struct splice_desc sd = { | |
841 | + .total_len = len, | |
842 | + .flags = flags, | |
843 | + .pos = *ppos, | |
844 | + .u.file = out, | |
845 | + }; | |
846 | ||
847 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, | |
848 | (unsigned int)len, | |
849 | out->f_path.dentry->d_name.len, | |
850 | out->f_path.dentry->d_name.name); | |
851 | ||
852 | - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | |
853 | + if (pipe->inode) | |
854 | + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); | |
855 | ||
856 | - ret = ocfs2_rw_lock(inode, 1); | |
857 | - if (ret < 0) { | |
858 | - mlog_errno(ret); | |
859 | - goto out; | |
860 | - } | |
861 | + splice_from_pipe_begin(&sd); | |
862 | + do { | |
863 | + ret = splice_from_pipe_next(pipe, &sd); | |
864 | + if (ret <= 0) | |
865 | + break; | |
866 | ||
867 | - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, | |
868 | - NULL); | |
869 | - if (ret < 0) { | |
870 | - mlog_errno(ret); | |
871 | - goto out_unlock; | |
872 | - } | |
873 | + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | |
874 | + ret = ocfs2_rw_lock(inode, 1); | |
875 | + if (ret < 0) | |
876 | + mlog_errno(ret); | |
877 | + else { | |
878 | + ret = ocfs2_splice_to_file(pipe, out, &sd); | |
879 | + ocfs2_rw_unlock(inode, 1); | |
880 | + } | |
881 | + mutex_unlock(&inode->i_mutex); | |
882 | + } while (ret > 0); | |
883 | + splice_from_pipe_end(pipe, &sd); | |
884 | ||
885 | if (pipe->inode) | |
886 | - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | |
887 | - ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); | |
888 | - if (pipe->inode) | |
889 | mutex_unlock(&pipe->inode->i_mutex); | |
890 | ||
891 | -out_unlock: | |
892 | - ocfs2_rw_unlock(inode, 1); | |
893 | -out: | |
894 | - mutex_unlock(&inode->i_mutex); | |
895 | + if (sd.num_spliced) | |
896 | + ret = sd.num_spliced; | |
897 | + | |
898 | + if (ret > 0) { | |
899 | + unsigned long nr_pages; | |
900 | + | |
901 | + *ppos += ret; | |
902 | + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | |
903 | + | |
904 | + /* | |
905 | + * If file or inode is SYNC and we actually wrote some data, | |
906 | + * sync it. | |
907 | + */ | |
908 | + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | |
909 | + int err; | |
910 | + | |
911 | + mutex_lock(&inode->i_mutex); | |
912 | + err = ocfs2_rw_lock(inode, 1); | |
913 | + if (err < 0) { | |
914 | + mlog_errno(err); | |
915 | + } else { | |
916 | + err = generic_osync_inode(inode, mapping, | |
917 | + OSYNC_METADATA|OSYNC_DATA); | |
918 | + ocfs2_rw_unlock(inode, 1); | |
919 | + } | |
920 | + mutex_unlock(&inode->i_mutex); | |
921 | + | |
922 | + if (err) | |
923 | + ret = err; | |
924 | + } | |
925 | + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | |
926 | + } | |
927 | ||
928 | mlog_exit(ret); | |
929 | return ret; | |
930 | diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c | |
931 | index 3dc18d6..2383cbd 100644 | |
932 | --- a/fs/ocfs2/mmap.c | |
933 | +++ b/fs/ocfs2/mmap.c | |
934 | @@ -150,8 +150,9 @@ out: | |
935 | return ret; | |
936 | } | |
937 | ||
938 | -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
939 | +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |
940 | { | |
941 | + struct page *page = vmf->page; | |
942 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | |
943 | struct buffer_head *di_bh = NULL; | |
944 | sigset_t blocked, oldset; | |
945 | @@ -192,7 +193,8 @@ out: | |
946 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); | |
947 | if (ret2 < 0) | |
948 | mlog_errno(ret2); | |
949 | - | |
950 | + if (ret) | |
951 | + ret = VM_FAULT_SIGBUS; | |
952 | return ret; | |
953 | } | |
954 | ||
955 | diff --git a/fs/splice.c b/fs/splice.c | |
956 | index aea1eb4..2f2d8c1 100644 | |
957 | --- a/fs/splice.c | |
958 | +++ b/fs/splice.c | |
959 | @@ -553,8 +553,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, | |
960 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create | |
961 | * a new page in the output file page cache and fill/dirty that. | |
962 | */ | |
963 | -static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | |
964 | - struct splice_desc *sd) | |
965 | +int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | |
966 | + struct splice_desc *sd) | |
967 | { | |
968 | struct file *file = sd->u.file; | |
969 | struct address_space *mapping = file->f_mapping; | |
970 | @@ -598,108 +598,178 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | |
971 | out: | |
972 | return ret; | |
973 | } | |
974 | +EXPORT_SYMBOL(pipe_to_file); | |
975 | + | |
976 | +static void wakeup_pipe_writers(struct pipe_inode_info *pipe) | |
977 | +{ | |
978 | + smp_mb(); | |
979 | + if (waitqueue_active(&pipe->wait)) | |
980 | + wake_up_interruptible(&pipe->wait); | |
981 | + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | |
982 | +} | |
983 | ||
984 | /** | |
985 | - * __splice_from_pipe - splice data from a pipe to given actor | |
986 | + * splice_from_pipe_feed - feed available data from a pipe to a file | |
987 | * @pipe: pipe to splice from | |
988 | * @sd: information to @actor | |
989 | * @actor: handler that splices the data | |
990 | * | |
991 | * Description: | |
992 | - * This function does little more than loop over the pipe and call | |
993 | - * @actor to do the actual moving of a single struct pipe_buffer to | |
994 | - * the desired destination. See pipe_to_file, pipe_to_sendpage, or | |
995 | - * pipe_to_user. | |
996 | + | |
997 | + * This function loops over the pipe and calls @actor to do the | |
998 | + * actual moving of a single struct pipe_buffer to the desired | |
999 | + * destination. It returns when there's no more buffers left in | |
1000 | + * the pipe or if the requested number of bytes (@sd->total_len) | |
1001 | + * have been copied. It returns a positive number (one) if the | |
1002 | + * pipe needs to be filled with more data, zero if the required | |
1003 | + * number of bytes have been copied and -errno on error. | |
1004 | * | |
1005 | + * This, together with splice_from_pipe_{begin,end,next}, may be | |
1006 | + * used to implement the functionality of __splice_from_pipe() when | |
1007 | + * locking is required around copying the pipe buffers to the | |
1008 | + * destination. | |
1009 | */ | |
1010 | -ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, | |
1011 | - splice_actor *actor) | |
1012 | +int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, | |
1013 | + splice_actor *actor) | |
1014 | { | |
1015 | - int ret, do_wakeup, err; | |
1016 | - | |
1017 | - ret = 0; | |
1018 | - do_wakeup = 0; | |
1019 | - | |
1020 | - for (;;) { | |
1021 | - if (pipe->nrbufs) { | |
1022 | - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; | |
1023 | - const struct pipe_buf_operations *ops = buf->ops; | |
1024 | + int ret; | |
1025 | ||
1026 | - sd->len = buf->len; | |
1027 | - if (sd->len > sd->total_len) | |
1028 | - sd->len = sd->total_len; | |
1029 | + while (pipe->nrbufs) { | |
1030 | + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; | |
1031 | + const struct pipe_buf_operations *ops = buf->ops; | |
1032 | ||
1033 | - err = actor(pipe, buf, sd); | |
1034 | - if (err <= 0) { | |
1035 | - if (!ret && err != -ENODATA) | |
1036 | - ret = err; | |
1037 | + sd->len = buf->len; | |
1038 | + if (sd->len > sd->total_len) | |
1039 | + sd->len = sd->total_len; | |
1040 | ||
1041 | - break; | |
1042 | - } | |
1043 | + ret = actor(pipe, buf, sd); | |
1044 | + if (ret <= 0) { | |
1045 | + if (ret == -ENODATA) | |
1046 | + ret = 0; | |
1047 | + return ret; | |
1048 | + } | |
1049 | + buf->offset += ret; | |
1050 | + buf->len -= ret; | |
1051 | ||
1052 | - ret += err; | |
1053 | - buf->offset += err; | |
1054 | - buf->len -= err; | |
1055 | + sd->num_spliced += ret; | |
1056 | + sd->len -= ret; | |
1057 | + sd->pos += ret; | |
1058 | + sd->total_len -= ret; | |
1059 | ||
1060 | - sd->len -= err; | |
1061 | - sd->pos += err; | |
1062 | - sd->total_len -= err; | |
1063 | - if (sd->len) | |
1064 | - continue; | |
1065 | + if (!buf->len) { | |
1066 | + buf->ops = NULL; | |
1067 | + ops->release(pipe, buf); | |
1068 | + pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); | |
1069 | + pipe->nrbufs--; | |
1070 | + if (pipe->inode) | |
1071 | + sd->need_wakeup = true; | |
1072 | + } | |
1073 | ||
1074 | - if (!buf->len) { | |
1075 | - buf->ops = NULL; | |
1076 | - ops->release(pipe, buf); | |
1077 | - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); | |
1078 | - pipe->nrbufs--; | |
1079 | - if (pipe->inode) | |
1080 | - do_wakeup = 1; | |
1081 | - } | |
1082 | + if (!sd->total_len) | |
1083 | + return 0; | |
1084 | + } | |
1085 | ||
1086 | - if (!sd->total_len) | |
1087 | - break; | |
1088 | - } | |
1089 | + return 1; | |
1090 | +} | |
1091 | +EXPORT_SYMBOL(splice_from_pipe_feed); | |
1092 | ||
1093 | - if (pipe->nrbufs) | |
1094 | - continue; | |
1095 | +/** | |
1096 | + * splice_from_pipe_next - wait for some data to splice from | |
1097 | + * @pipe: pipe to splice from | |
1098 | + * @sd: information about the splice operation | |
1099 | + * | |
1100 | + * Description: | |
1101 | + * This function will wait for some data and return a positive | |
1102 | + * value (one) if pipe buffers are available. It will return zero | |
1103 | + * or -errno if no more data needs to be spliced. | |
1104 | + */ | |
1105 | +int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) | |
1106 | +{ | |
1107 | + while (!pipe->nrbufs) { | |
1108 | if (!pipe->writers) | |
1109 | - break; | |
1110 | - if (!pipe->waiting_writers) { | |
1111 | - if (ret) | |
1112 | - break; | |
1113 | - } | |
1114 | + return 0; | |
1115 | ||
1116 | - if (sd->flags & SPLICE_F_NONBLOCK) { | |
1117 | - if (!ret) | |
1118 | - ret = -EAGAIN; | |
1119 | - break; | |
1120 | - } | |
1121 | + if (!pipe->waiting_writers && sd->num_spliced) | |
1122 | + return 0; | |
1123 | ||
1124 | - if (signal_pending(current)) { | |
1125 | - if (!ret) | |
1126 | - ret = -ERESTARTSYS; | |
1127 | - break; | |
1128 | - } | |
1129 | + if (sd->flags & SPLICE_F_NONBLOCK) | |
1130 | + return -EAGAIN; | |
1131 | ||
1132 | - if (do_wakeup) { | |
1133 | - smp_mb(); | |
1134 | - if (waitqueue_active(&pipe->wait)) | |
1135 | - wake_up_interruptible_sync(&pipe->wait); | |
1136 | - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | |
1137 | - do_wakeup = 0; | |
1138 | + if (signal_pending(current)) | |
1139 | + return -ERESTARTSYS; | |
1140 | + | |
1141 | + if (sd->need_wakeup) { | |
1142 | + wakeup_pipe_writers(pipe); | |
1143 | + sd->need_wakeup = false; | |
1144 | } | |
1145 | ||
1146 | pipe_wait(pipe); | |
1147 | } | |
1148 | ||
1149 | - if (do_wakeup) { | |
1150 | - smp_mb(); | |
1151 | - if (waitqueue_active(&pipe->wait)) | |
1152 | - wake_up_interruptible(&pipe->wait); | |
1153 | - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | |
1154 | - } | |
1155 | + return 1; | |
1156 | +} | |
1157 | +EXPORT_SYMBOL(splice_from_pipe_next); | |
1158 | ||
1159 | - return ret; | |
1160 | +/** | |
1161 | + * splice_from_pipe_begin - start splicing from pipe | |
1162 | + * @pipe: pipe to splice from | |
1163 | + * | |
1164 | + * Description: | |
1165 | + * This function should be called before a loop containing | |
1166 | + * splice_from_pipe_next() and splice_from_pipe_feed() to | |
1167 | + * initialize the necessary fields of @sd. | |
1168 | + */ | |
1169 | +void splice_from_pipe_begin(struct splice_desc *sd) | |
1170 | +{ | |
1171 | + sd->num_spliced = 0; | |
1172 | + sd->need_wakeup = false; | |
1173 | +} | |
1174 | +EXPORT_SYMBOL(splice_from_pipe_begin); | |
1175 | + | |
1176 | +/** | |
1177 | + * splice_from_pipe_end - finish splicing from pipe | |
1178 | + * @pipe: pipe to splice from | |
1179 | + * @sd: information about the splice operation | |
1180 | + * | |
1181 | + * Description: | |
1182 | + * This function will wake up pipe writers if necessary. It should | |
1183 | + * be called after a loop containing splice_from_pipe_next() and | |
1184 | + * splice_from_pipe_feed(). | |
1185 | + */ | |
1186 | +void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) | |
1187 | +{ | |
1188 | + if (sd->need_wakeup) | |
1189 | + wakeup_pipe_writers(pipe); | |
1190 | +} | |
1191 | +EXPORT_SYMBOL(splice_from_pipe_end); | |
1192 | + | |
1193 | +/** | |
1194 | + * __splice_from_pipe - splice data from a pipe to given actor | |
1195 | + * @pipe: pipe to splice from | |
1196 | + * @sd: information to @actor | |
1197 | + * @actor: handler that splices the data | |
1198 | + * | |
1199 | + * Description: | |
1200 | + * This function does little more than loop over the pipe and call | |
1201 | + * @actor to do the actual moving of a single struct pipe_buffer to | |
1202 | + * the desired destination. See pipe_to_file, pipe_to_sendpage, or | |
1203 | + * pipe_to_user. | |
1204 | + * | |
1205 | + */ | |
1206 | +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, | |
1207 | + splice_actor *actor) | |
1208 | +{ | |
1209 | + int ret; | |
1210 | + | |
1211 | + splice_from_pipe_begin(sd); | |
1212 | + do { | |
1213 | + ret = splice_from_pipe_next(pipe, sd); | |
1214 | + if (ret > 0) | |
1215 | + ret = splice_from_pipe_feed(pipe, sd, actor); | |
1216 | + } while (ret > 0); | |
1217 | + splice_from_pipe_end(pipe, sd); | |
1218 | + | |
1219 | + return sd->num_spliced ? sd->num_spliced : ret; | |
1220 | } | |
1221 | EXPORT_SYMBOL(__splice_from_pipe); | |
1222 | ||
1223 | @@ -713,7 +783,7 @@ EXPORT_SYMBOL(__splice_from_pipe); | |
1224 | * @actor: handler that splices the data | |
1225 | * | |
1226 | * Description: | |
1227 | - * See __splice_from_pipe. This function locks the input and output inodes, | |
1228 | + * See __splice_from_pipe. This function locks the pipe inode, | |
1229 | * otherwise it's identical to __splice_from_pipe(). | |
1230 | * | |
1231 | */ | |
1232 | @@ -722,7 +792,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |
1233 | splice_actor *actor) | |
1234 | { | |
1235 | ssize_t ret; | |
1236 | - struct inode *inode = out->f_mapping->host; | |
1237 | struct splice_desc sd = { | |
1238 | .total_len = len, | |
1239 | .flags = flags, | |
1240 | @@ -730,24 +799,11 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |
1241 | .u.file = out, | |
1242 | }; | |
1243 | ||
1244 | - /* | |
1245 | - * The actor worker might be calling ->prepare_write and | |
1246 | - * ->commit_write. Most of the time, these expect i_mutex to | |
1247 | - * be held. Since this may result in an ABBA deadlock with | |
1248 | - * pipe->inode, we have to order lock acquiry here. | |
1249 | - * | |
1250 | - * Outer lock must be inode->i_mutex, as pipe_wait() will | |
1251 | - * release and reacquire pipe->inode->i_mutex, AND inode must | |
1252 | - * never be a pipe. | |
1253 | - */ | |
1254 | - WARN_ON(S_ISFIFO(inode->i_mode)); | |
1255 | - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | |
1256 | if (pipe->inode) | |
1257 | - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | |
1258 | + mutex_lock(&pipe->inode->i_mutex); | |
1259 | ret = __splice_from_pipe(pipe, &sd, actor); | |
1260 | if (pipe->inode) | |
1261 | mutex_unlock(&pipe->inode->i_mutex); | |
1262 | - mutex_unlock(&inode->i_mutex); | |
1263 | ||
1264 | return ret; | |
1265 | } | |
1266 | @@ -838,17 +894,29 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |
1267 | }; | |
1268 | ssize_t ret; | |
1269 | ||
1270 | - WARN_ON(S_ISFIFO(inode->i_mode)); | |
1271 | - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | |
1272 | - ret = file_remove_suid(out); | |
1273 | - if (likely(!ret)) { | |
1274 | - if (pipe->inode) | |
1275 | - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | |
1276 | - ret = __splice_from_pipe(pipe, &sd, pipe_to_file); | |
1277 | - if (pipe->inode) | |
1278 | - mutex_unlock(&pipe->inode->i_mutex); | |
1279 | - } | |
1280 | - mutex_unlock(&inode->i_mutex); | |
1281 | + if (pipe->inode) | |
1282 | + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); | |
1283 | + | |
1284 | + splice_from_pipe_begin(&sd); | |
1285 | + do { | |
1286 | + ret = splice_from_pipe_next(pipe, &sd); | |
1287 | + if (ret <= 0) | |
1288 | + break; | |
1289 | + | |
1290 | + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | |
1291 | + ret = file_remove_suid(out); | |
1292 | + if (!ret) | |
1293 | + ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); | |
1294 | + mutex_unlock(&inode->i_mutex); | |
1295 | + } while (ret > 0); | |
1296 | + splice_from_pipe_end(pipe, &sd); | |
1297 | + | |
1298 | + if (pipe->inode) | |
1299 | + mutex_unlock(&pipe->inode->i_mutex); | |
1300 | + | |
1301 | + if (sd.num_spliced) | |
1302 | + ret = sd.num_spliced; | |
1303 | + | |
1304 | if (ret > 0) { | |
1305 | unsigned long nr_pages; | |
1306 | ||
1307 | diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c | |
1308 | index 40033dc..82b1c4a 100644 | |
1309 | --- a/fs/ubifs/file.c | |
1310 | +++ b/fs/ubifs/file.c | |
1311 | @@ -1140,8 +1140,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) | |
1312 | * mmap()d file has taken write protection fault and is being made | |
1313 | * writable. UBIFS must ensure page is budgeted for. | |
1314 | */ | |
1315 | -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
1316 | +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |
1317 | { | |
1318 | + struct page *page = vmf->page; | |
1319 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | |
1320 | struct ubifs_info *c = inode->i_sb->s_fs_info; | |
1321 | struct timespec now = ubifs_current_time(inode); | |
1322 | @@ -1153,7 +1154,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
1323 | ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); | |
1324 | ||
1325 | if (unlikely(c->ro_media)) | |
1326 | - return -EROFS; | |
1327 | + return VM_FAULT_SIGBUS; /* -EROFS */ | |
1328 | ||
1329 | /* | |
1330 | * We have not locked @page so far so we may budget for changing the | |
1331 | @@ -1186,7 +1187,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
1332 | if (err == -ENOSPC) | |
1333 | ubifs_warn("out of space for mmapped file " | |
1334 | "(inode number %lu)", inode->i_ino); | |
1335 | - return err; | |
1336 | + return VM_FAULT_SIGBUS; | |
1337 | } | |
1338 | ||
1339 | lock_page(page); | |
1340 | @@ -1226,6 +1227,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
1341 | out_unlock: | |
1342 | unlock_page(page); | |
1343 | ubifs_release_budget(c, &req); | |
1344 | + if (err) | |
1345 | + err = VM_FAULT_SIGBUS; | |
1346 | return err; | |
1347 | } | |
1348 | ||
1349 | diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c | |
1350 | index 5311c1a..469502c 100644 | |
1351 | --- a/fs/xfs/linux-2.6/xfs_file.c | |
1352 | +++ b/fs/xfs/linux-2.6/xfs_file.c | |
1353 | @@ -427,9 +427,9 @@ xfs_file_ioctl_invis( | |
1354 | STATIC int | |
1355 | xfs_vm_page_mkwrite( | |
1356 | struct vm_area_struct *vma, | |
1357 | - struct page *page) | |
1358 | + struct vm_fault *vmf) | |
1359 | { | |
1360 | - return block_page_mkwrite(vma, page, xfs_get_blocks); | |
1361 | + return block_page_mkwrite(vma, vmf, xfs_get_blocks); | |
1362 | } | |
1363 | ||
1364 | const struct file_operations xfs_file_operations = { | |
1365 | diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h | |
1366 | index eadaab4..657c072 100644 | |
1367 | --- a/include/linux/buffer_head.h | |
1368 | +++ b/include/linux/buffer_head.h | |
1369 | @@ -222,7 +222,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, | |
1370 | get_block_t *, loff_t *); | |
1371 | int generic_cont_expand_simple(struct inode *inode, loff_t size); | |
1372 | int block_commit_write(struct page *page, unsigned from, unsigned to); | |
1373 | -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, | |
1374 | +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |
1375 | get_block_t get_block); | |
1376 | void block_sync_page(struct page *); | |
1377 | sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); | |
1378 | diff --git a/include/linux/mm.h b/include/linux/mm.h | |
1379 | index 2a75579..ae9775d 100644 | |
1380 | --- a/include/linux/mm.h | |
1381 | +++ b/include/linux/mm.h | |
1382 | @@ -138,6 +138,7 @@ extern pgprot_t protection_map[16]; | |
1383 | ||
1384 | #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ | |
1385 | #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ | |
1386 | +#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ | |
1387 | ||
1388 | ||
1389 | /* | |
1390 | @@ -173,7 +174,7 @@ struct vm_operations_struct { | |
1391 | ||
1392 | /* notification that a previously read-only page is about to become | |
1393 | * writable, if an error is returned it will cause a SIGBUS */ | |
1394 | - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); | |
1395 | + int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); | |
1396 | ||
1397 | /* called by access_process_vm when get_user_pages() fails, typically | |
1398 | * for use by special VMAs that can switch between memory and hardware | |
1399 | diff --git a/include/linux/splice.h b/include/linux/splice.h | |
1400 | index 528dcb9..5f3faa9 100644 | |
1401 | --- a/include/linux/splice.h | |
1402 | +++ b/include/linux/splice.h | |
1403 | @@ -36,6 +36,8 @@ struct splice_desc { | |
1404 | void *data; /* cookie */ | |
1405 | } u; | |
1406 | loff_t pos; /* file position */ | |
1407 | + size_t num_spliced; /* number of bytes already spliced */ | |
1408 | + bool need_wakeup; /* need to wake up writer */ | |
1409 | }; | |
1410 | ||
1411 | struct partial_page { | |
1412 | @@ -66,6 +68,16 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, | |
1413 | splice_actor *); | |
1414 | extern ssize_t __splice_from_pipe(struct pipe_inode_info *, | |
1415 | struct splice_desc *, splice_actor *); | |
1416 | +extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *, | |
1417 | + splice_actor *); | |
1418 | +extern int splice_from_pipe_next(struct pipe_inode_info *, | |
1419 | + struct splice_desc *); | |
1420 | +extern void splice_from_pipe_begin(struct splice_desc *); | |
1421 | +extern void splice_from_pipe_end(struct pipe_inode_info *, | |
1422 | + struct splice_desc *); | |
1423 | +extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *, | |
1424 | + struct splice_desc *); | |
1425 | + | |
1426 | extern ssize_t splice_to_pipe(struct pipe_inode_info *, | |
1427 | struct splice_pipe_desc *); | |
1428 | extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, | |
1429 | diff --git a/mm/memory.c b/mm/memory.c | |
1430 | index 1002f47..3856c36 100644 | |
1431 | --- a/mm/memory.c | |
1432 | +++ b/mm/memory.c | |
1433 | @@ -1801,6 +1801,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |
1434 | * get_user_pages(.write=1, .force=1). | |
1435 | */ | |
1436 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | |
1437 | + struct vm_fault vmf; | |
1438 | + int tmp; | |
1439 | + | |
1440 | + vmf.virtual_address = (void __user *)(address & | |
1441 | + PAGE_MASK); | |
1442 | + vmf.pgoff = old_page->index; | |
1443 | + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; | |
1444 | + vmf.page = old_page; | |
1445 | + | |
1446 | /* | |
1447 | * Notify the address space that the page is about to | |
1448 | * become writable so that it can prohibit this or wait | |
1449 | @@ -1812,8 +1821,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |
1450 | page_cache_get(old_page); | |
1451 | pte_unmap_unlock(page_table, ptl); | |
1452 | ||
1453 | - if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) | |
1454 | + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); | |
1455 | + if (unlikely(tmp & | |
1456 | + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { | |
1457 | + ret = tmp; | |
1458 | goto unwritable_page; | |
1459 | + } | |
1460 | + if (unlikely(!(tmp & VM_FAULT_LOCKED))) { | |
1461 | + lock_page(old_page); | |
1462 | + if (!old_page->mapping) { | |
1463 | + ret = 0; /* retry the fault */ | |
1464 | + unlock_page(old_page); | |
1465 | + goto unwritable_page; | |
1466 | + } | |
1467 | + } else | |
1468 | + VM_BUG_ON(!PageLocked(old_page)); | |
1469 | ||
1470 | /* | |
1471 | * Since we dropped the lock we need to revalidate | |
1472 | @@ -1823,9 +1845,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |
1473 | */ | |
1474 | page_table = pte_offset_map_lock(mm, pmd, address, | |
1475 | &ptl); | |
1476 | - page_cache_release(old_page); | |
1477 | - if (!pte_same(*page_table, orig_pte)) | |
1478 | + if (!pte_same(*page_table, orig_pte)) { | |
1479 | + unlock_page(old_page); | |
1480 | + page_cache_release(old_page); | |
1481 | goto unlock; | |
1482 | + } | |
1483 | ||
1484 | page_mkwrite = 1; | |
1485 | } | |
1486 | @@ -1930,9 +1954,6 @@ gotten: | |
1487 | unlock: | |
1488 | pte_unmap_unlock(page_table, ptl); | |
1489 | if (dirty_page) { | |
1490 | - if (vma->vm_file) | |
1491 | - file_update_time(vma->vm_file); | |
1492 | - | |
1493 | /* | |
1494 | * Yes, Virginia, this is actually required to prevent a race | |
1495 | * with clear_page_dirty_for_io() from clearing the page dirty | |
1496 | @@ -1941,21 +1962,46 @@ unlock: | |
1497 | * | |
1498 | * do_no_page is protected similarly. | |
1499 | */ | |
1500 | - wait_on_page_locked(dirty_page); | |
1501 | - set_page_dirty_balance(dirty_page, page_mkwrite); | |
1502 | + if (!page_mkwrite) { | |
1503 | + wait_on_page_locked(dirty_page); | |
1504 | + set_page_dirty_balance(dirty_page, page_mkwrite); | |
1505 | + } | |
1506 | put_page(dirty_page); | |
1507 | + if (page_mkwrite) { | |
1508 | + struct address_space *mapping = dirty_page->mapping; | |
1509 | + | |
1510 | + set_page_dirty(dirty_page); | |
1511 | + unlock_page(dirty_page); | |
1512 | + page_cache_release(dirty_page); | |
1513 | + if (mapping) { | |
1514 | + /* | |
1515 | + * Some device drivers do not set page.mapping | |
1516 | + * but still dirty their pages | |
1517 | + */ | |
1518 | + balance_dirty_pages_ratelimited(mapping); | |
1519 | + } | |
1520 | + } | |
1521 | + | |
1522 | + /* file_update_time outside page_lock */ | |
1523 | + if (vma->vm_file) | |
1524 | + file_update_time(vma->vm_file); | |
1525 | } | |
1526 | return ret; | |
1527 | oom_free_new: | |
1528 | page_cache_release(new_page); | |
1529 | oom: | |
1530 | - if (old_page) | |
1531 | + if (old_page) { | |
1532 | + if (page_mkwrite) { | |
1533 | + unlock_page(old_page); | |
1534 | + page_cache_release(old_page); | |
1535 | + } | |
1536 | page_cache_release(old_page); | |
1537 | + } | |
1538 | return VM_FAULT_OOM; | |
1539 | ||
1540 | unwritable_page: | |
1541 | page_cache_release(old_page); | |
1542 | - return VM_FAULT_SIGBUS; | |
1543 | + return ret; | |
1544 | } | |
1545 | ||
1546 | /* | |
1547 | @@ -2472,25 +2518,25 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |
1548 | * to become writable | |
1549 | */ | |
1550 | if (vma->vm_ops->page_mkwrite) { | |
1551 | + int tmp; | |
1552 | + | |
1553 | unlock_page(page); | |
1554 | - if (vma->vm_ops->page_mkwrite(vma, page) < 0) { | |
1555 | - ret = VM_FAULT_SIGBUS; | |
1556 | - anon = 1; /* no anon but release vmf.page */ | |
1557 | - goto out_unlocked; | |
1558 | - } | |
1559 | - lock_page(page); | |
1560 | - /* | |
1561 | - * XXX: this is not quite right (racy vs | |
1562 | - * invalidate) to unlock and relock the page | |
1563 | - * like this, however a better fix requires | |
1564 | - * reworking page_mkwrite locking API, which | |
1565 | - * is better done later. | |
1566 | - */ | |
1567 | - if (!page->mapping) { | |
1568 | - ret = 0; | |
1569 | - anon = 1; /* no anon but release vmf.page */ | |
1570 | - goto out; | |
1571 | + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; | |
1572 | + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); | |
1573 | + if (unlikely(tmp & | |
1574 | + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { | |
1575 | + ret = tmp; | |
1576 | + goto unwritable_page; | |
1577 | } | |
1578 | + if (unlikely(!(tmp & VM_FAULT_LOCKED))) { | |
1579 | + lock_page(page); | |
1580 | + if (!page->mapping) { | |
1581 | + ret = 0; /* retry the fault */ | |
1582 | + unlock_page(page); | |
1583 | + goto unwritable_page; | |
1584 | + } | |
1585 | + } else | |
1586 | + VM_BUG_ON(!PageLocked(page)); | |
1587 | page_mkwrite = 1; | |
1588 | } | |
1589 | } | |
1590 | @@ -2547,19 +2593,35 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |
1591 | pte_unmap_unlock(page_table, ptl); | |
1592 | ||
1593 | out: | |
1594 | - unlock_page(vmf.page); | |
1595 | -out_unlocked: | |
1596 | - if (anon) | |
1597 | - page_cache_release(vmf.page); | |
1598 | - else if (dirty_page) { | |
1599 | - if (vma->vm_file) | |
1600 | - file_update_time(vma->vm_file); | |
1601 | + if (dirty_page) { | |
1602 | + struct address_space *mapping = page->mapping; | |
1603 | ||
1604 | - set_page_dirty_balance(dirty_page, page_mkwrite); | |
1605 | + if (set_page_dirty(dirty_page)) | |
1606 | + page_mkwrite = 1; | |
1607 | + unlock_page(dirty_page); | |
1608 | put_page(dirty_page); | |
1609 | + if (page_mkwrite && mapping) { | |
1610 | + /* | |
1611 | + * Some device drivers do not set page.mapping but still | |
1612 | + * dirty their pages | |
1613 | + */ | |
1614 | + balance_dirty_pages_ratelimited(mapping); | |
1615 | + } | |
1616 | + | |
1617 | + /* file_update_time outside page_lock */ | |
1618 | + if (vma->vm_file) | |
1619 | + file_update_time(vma->vm_file); | |
1620 | + } else { | |
1621 | + unlock_page(vmf.page); | |
1622 | + if (anon) | |
1623 | + page_cache_release(vmf.page); | |
1624 | } | |
1625 | ||
1626 | return ret; | |
1627 | + | |
1628 | +unwritable_page: | |
1629 | + page_cache_release(page); | |
1630 | + return ret; | |
1631 | } | |
1632 | ||
1633 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |