]> git.ipfire.org Git - thirdparty/kernel/linux.git/blob - fs/cifs/file.c
CIFS: Fix persistent handles re-opening on reconnect
[thirdparty/kernel/linux.git] / fs / cifs / file.c
1 /*
2 * fs/cifs/file.c
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49 if ((flags & O_ACCMODE) == O_RDONLY)
50 return GENERIC_READ;
51 else if ((flags & O_ACCMODE) == O_WRONLY)
52 return GENERIC_WRITE;
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
58 }
59
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67 u32 posix_flags = 0;
68
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
75
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
78 if (flags & O_EXCL)
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
83
84 if (flags & O_TRUNC)
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
87 if (flags & O_DSYNC)
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
93 if (flags & O_DIRECT)
94 posix_flags |= SMB_O_DIRECT;
95
96 return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102 return FILE_CREATE;
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
106 return FILE_OPEN_IF;
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
109 else
110 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117 int rc;
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
124
125 cifs_dbg(FYI, "posix open %s\n", full_path);
126
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
129 return -ENOMEM;
130
131 tlink = cifs_sb_tlink(cifs_sb);
132 if (IS_ERR(tlink)) {
133 rc = PTR_ERR(tlink);
134 goto posix_open_ret;
135 }
136
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
139
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_remap(cifs_sb));
144 cifs_put_tlink(tlink);
145
146 if (rc)
147 goto posix_open_ret;
148
149 if (presp_data->Type == cpu_to_le32(-1))
150 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152 if (!pinode)
153 goto posix_open_ret; /* caller does not need info */
154
155 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157 /* get new inode and set it up */
158 if (*pinode == NULL) {
159 cifs_fill_uniqueid(sb, &fattr);
160 *pinode = cifs_iget(sb, &fattr);
161 if (!*pinode) {
162 rc = -ENOMEM;
163 goto posix_open_ret;
164 }
165 } else {
166 cifs_fattr_to_inode(*pinode, &fattr);
167 }
168
169 posix_open_ret:
170 kfree(presp_data);
171 return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177 struct cifs_fid *fid, unsigned int xid)
178 {
179 int rc;
180 int desired_access;
181 int disposition;
182 int create_options = CREATE_NOT_DIR;
183 FILE_ALL_INFO *buf;
184 struct TCP_Server_Info *server = tcon->ses->server;
185 struct cifs_open_parms oparms;
186
187 if (!server->ops->open)
188 return -ENOSYS;
189
190 desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193 * open flag mapping table:
194 *
195 * POSIX Flag CIFS Disposition
196 * ---------- ----------------
197 * O_CREAT FILE_OPEN_IF
198 * O_CREAT | O_EXCL FILE_CREATE
199 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
200 * O_TRUNC FILE_OVERWRITE
201 * none of the above FILE_OPEN
202 *
203 * Note that there is not a direct match between disposition
204 * FILE_SUPERSEDE (ie create whether or not file exists although
205 * O_CREAT | O_TRUNC is similar but truncates the existing
206 * file rather than creating a new file as FILE_SUPERSEDE does
207 * (which uses the attributes / metadata passed in on open call)
208 *?
209 *? O_SYNC is a reasonable match to CIFS writethrough flag
210 *? and the read write flags match reasonably. O_LARGEFILE
211 *? is irrelevant because largefile support is always used
212 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214 *********************************************************************/
215
216 disposition = cifs_get_disposition(f_flags);
217
218 /* BB pass O_SYNC flag through on file attributes .. BB */
219
220 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221 if (!buf)
222 return -ENOMEM;
223
224 if (backup_cred(cifs_sb))
225 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227 oparms.tcon = tcon;
228 oparms.cifs_sb = cifs_sb;
229 oparms.desired_access = desired_access;
230 oparms.create_options = create_options;
231 oparms.disposition = disposition;
232 oparms.path = full_path;
233 oparms.fid = fid;
234 oparms.reconnect = false;
235
236 rc = server->ops->open(xid, &oparms, oplock, buf);
237
238 if (rc)
239 goto out;
240
241 if (tcon->unix_ext)
242 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
243 xid);
244 else
245 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
246 xid, fid);
247
248 out:
249 kfree(buf);
250 return rc;
251 }
252
253 static bool
254 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
255 {
256 struct cifs_fid_locks *cur;
257 bool has_locks = false;
258
259 down_read(&cinode->lock_sem);
260 list_for_each_entry(cur, &cinode->llist, llist) {
261 if (!list_empty(&cur->locks)) {
262 has_locks = true;
263 break;
264 }
265 }
266 up_read(&cinode->lock_sem);
267 return has_locks;
268 }
269
270 struct cifsFileInfo *
271 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
272 struct tcon_link *tlink, __u32 oplock)
273 {
274 struct dentry *dentry = file_dentry(file);
275 struct inode *inode = d_inode(dentry);
276 struct cifsInodeInfo *cinode = CIFS_I(inode);
277 struct cifsFileInfo *cfile;
278 struct cifs_fid_locks *fdlocks;
279 struct cifs_tcon *tcon = tlink_tcon(tlink);
280 struct TCP_Server_Info *server = tcon->ses->server;
281
282 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
283 if (cfile == NULL)
284 return cfile;
285
286 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
287 if (!fdlocks) {
288 kfree(cfile);
289 return NULL;
290 }
291
292 INIT_LIST_HEAD(&fdlocks->locks);
293 fdlocks->cfile = cfile;
294 cfile->llist = fdlocks;
295 down_write(&cinode->lock_sem);
296 list_add(&fdlocks->llist, &cinode->llist);
297 up_write(&cinode->lock_sem);
298
299 cfile->count = 1;
300 cfile->pid = current->tgid;
301 cfile->uid = current_fsuid();
302 cfile->dentry = dget(dentry);
303 cfile->f_flags = file->f_flags;
304 cfile->invalidHandle = false;
305 cfile->tlink = cifs_get_tlink(tlink);
306 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
307 mutex_init(&cfile->fh_mutex);
308 spin_lock_init(&cfile->file_info_lock);
309
310 cifs_sb_active(inode->i_sb);
311
312 /*
313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None.
315 */
316 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
318 oplock = 0;
319 }
320
321 spin_lock(&tcon->open_file_lock);
322 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323 oplock = fid->pending_open->oplock;
324 list_del(&fid->pending_open->olist);
325
326 fid->purge_cache = false;
327 server->ops->set_fid(cfile, fid, oplock);
328
329 list_add(&cfile->tlist, &tcon->openFileList);
330
331 /* if readable file instance put first in list*/
332 if (file->f_mode & FMODE_READ)
333 list_add(&cfile->flist, &cinode->openFileList);
334 else
335 list_add_tail(&cfile->flist, &cinode->openFileList);
336 spin_unlock(&tcon->open_file_lock);
337
338 if (fid->purge_cache)
339 cifs_zap_mapping(inode);
340
341 file->private_data = cfile;
342 return cfile;
343 }
344
345 struct cifsFileInfo *
346 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
347 {
348 spin_lock(&cifs_file->file_info_lock);
349 cifsFileInfo_get_locked(cifs_file);
350 spin_unlock(&cifs_file->file_info_lock);
351 return cifs_file;
352 }
353
354 /*
355 * Release a reference on the file private data. This may involve closing
356 * the filehandle out on the server. Must be called without holding
357 * tcon->open_file_lock and cifs_file->file_info_lock.
358 */
359 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
360 {
361 struct inode *inode = d_inode(cifs_file->dentry);
362 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
363 struct TCP_Server_Info *server = tcon->ses->server;
364 struct cifsInodeInfo *cifsi = CIFS_I(inode);
365 struct super_block *sb = inode->i_sb;
366 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
367 struct cifsLockInfo *li, *tmp;
368 struct cifs_fid fid;
369 struct cifs_pending_open open;
370 bool oplock_break_cancelled;
371
372 spin_lock(&tcon->open_file_lock);
373
374 spin_lock(&cifs_file->file_info_lock);
375 if (--cifs_file->count > 0) {
376 spin_unlock(&cifs_file->file_info_lock);
377 spin_unlock(&tcon->open_file_lock);
378 return;
379 }
380 spin_unlock(&cifs_file->file_info_lock);
381
382 if (server->ops->get_lease_key)
383 server->ops->get_lease_key(inode, &fid);
384
385 /* store open in pending opens to make sure we don't miss lease break */
386 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
387
388 /* remove it from the lists */
389 list_del(&cifs_file->flist);
390 list_del(&cifs_file->tlist);
391
392 if (list_empty(&cifsi->openFileList)) {
393 cifs_dbg(FYI, "closing last open instance for inode %p\n",
394 d_inode(cifs_file->dentry));
395 /*
396 * In strict cache mode we need invalidate mapping on the last
397 * close because it may cause a error when we open this file
398 * again and get at least level II oplock.
399 */
400 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
401 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
402 cifs_set_oplock_level(cifsi, 0);
403 }
404
405 spin_unlock(&tcon->open_file_lock);
406
407 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
408
409 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
410 struct TCP_Server_Info *server = tcon->ses->server;
411 unsigned int xid;
412
413 xid = get_xid();
414 if (server->ops->close)
415 server->ops->close(xid, tcon, &cifs_file->fid);
416 _free_xid(xid);
417 }
418
419 if (oplock_break_cancelled)
420 cifs_done_oplock_break(cifsi);
421
422 cifs_del_pending_open(&open);
423
424 /*
425 * Delete any outstanding lock records. We'll lose them when the file
426 * is closed anyway.
427 */
428 down_write(&cifsi->lock_sem);
429 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
430 list_del(&li->llist);
431 cifs_del_lock_waiters(li);
432 kfree(li);
433 }
434 list_del(&cifs_file->llist->llist);
435 kfree(cifs_file->llist);
436 up_write(&cifsi->lock_sem);
437
438 cifs_put_tlink(cifs_file->tlink);
439 dput(cifs_file->dentry);
440 cifs_sb_deactive(sb);
441 kfree(cifs_file);
442 }
443
444 int cifs_open(struct inode *inode, struct file *file)
445
446 {
447 int rc = -EACCES;
448 unsigned int xid;
449 __u32 oplock;
450 struct cifs_sb_info *cifs_sb;
451 struct TCP_Server_Info *server;
452 struct cifs_tcon *tcon;
453 struct tcon_link *tlink;
454 struct cifsFileInfo *cfile = NULL;
455 char *full_path = NULL;
456 bool posix_open_ok = false;
457 struct cifs_fid fid;
458 struct cifs_pending_open open;
459
460 xid = get_xid();
461
462 cifs_sb = CIFS_SB(inode->i_sb);
463 tlink = cifs_sb_tlink(cifs_sb);
464 if (IS_ERR(tlink)) {
465 free_xid(xid);
466 return PTR_ERR(tlink);
467 }
468 tcon = tlink_tcon(tlink);
469 server = tcon->ses->server;
470
471 full_path = build_path_from_dentry(file_dentry(file));
472 if (full_path == NULL) {
473 rc = -ENOMEM;
474 goto out;
475 }
476
477 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
478 inode, file->f_flags, full_path);
479
480 if (file->f_flags & O_DIRECT &&
481 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
482 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
483 file->f_op = &cifs_file_direct_nobrl_ops;
484 else
485 file->f_op = &cifs_file_direct_ops;
486 }
487
488 if (server->oplocks)
489 oplock = REQ_OPLOCK;
490 else
491 oplock = 0;
492
493 if (!tcon->broken_posix_open && tcon->unix_ext &&
494 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
495 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
496 /* can not refresh inode info since size could be stale */
497 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
498 cifs_sb->mnt_file_mode /* ignored */,
499 file->f_flags, &oplock, &fid.netfid, xid);
500 if (rc == 0) {
501 cifs_dbg(FYI, "posix open succeeded\n");
502 posix_open_ok = true;
503 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
504 if (tcon->ses->serverNOS)
505 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
506 tcon->ses->serverName,
507 tcon->ses->serverNOS);
508 tcon->broken_posix_open = true;
509 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
510 (rc != -EOPNOTSUPP)) /* path not found or net err */
511 goto out;
512 /*
513 * Else fallthrough to retry open the old way on network i/o
514 * or DFS errors.
515 */
516 }
517
518 if (server->ops->get_lease_key)
519 server->ops->get_lease_key(inode, &fid);
520
521 cifs_add_pending_open(&fid, tlink, &open);
522
523 if (!posix_open_ok) {
524 if (server->ops->get_lease_key)
525 server->ops->get_lease_key(inode, &fid);
526
527 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
528 file->f_flags, &oplock, &fid, xid);
529 if (rc) {
530 cifs_del_pending_open(&open);
531 goto out;
532 }
533 }
534
535 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
536 if (cfile == NULL) {
537 if (server->ops->close)
538 server->ops->close(xid, tcon, &fid);
539 cifs_del_pending_open(&open);
540 rc = -ENOMEM;
541 goto out;
542 }
543
544 cifs_fscache_set_inode_cookie(inode, file);
545
546 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
547 /*
548 * Time to set mode which we can not set earlier due to
549 * problems creating new read-only files.
550 */
551 struct cifs_unix_set_info_args args = {
552 .mode = inode->i_mode,
553 .uid = INVALID_UID, /* no change */
554 .gid = INVALID_GID, /* no change */
555 .ctime = NO_CHANGE_64,
556 .atime = NO_CHANGE_64,
557 .mtime = NO_CHANGE_64,
558 .device = 0,
559 };
560 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
561 cfile->pid);
562 }
563
564 out:
565 kfree(full_path);
566 free_xid(xid);
567 cifs_put_tlink(tlink);
568 return rc;
569 }
570
571 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
572
573 /*
574 * Try to reacquire byte range locks that were released when session
575 * to server was lost.
576 */
577 static int
578 cifs_relock_file(struct cifsFileInfo *cfile)
579 {
580 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
581 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
582 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
583 int rc = 0;
584
585 down_read(&cinode->lock_sem);
586 if (cinode->can_cache_brlcks) {
587 /* can cache locks - no need to relock */
588 up_read(&cinode->lock_sem);
589 return rc;
590 }
591
592 if (cap_unix(tcon->ses) &&
593 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
594 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
595 rc = cifs_push_posix_locks(cfile);
596 else
597 rc = tcon->ses->server->ops->push_mand_locks(cfile);
598
599 up_read(&cinode->lock_sem);
600 return rc;
601 }
602
603 static int
604 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
605 {
606 int rc = -EACCES;
607 unsigned int xid;
608 __u32 oplock;
609 struct cifs_sb_info *cifs_sb;
610 struct cifs_tcon *tcon;
611 struct TCP_Server_Info *server;
612 struct cifsInodeInfo *cinode;
613 struct inode *inode;
614 char *full_path = NULL;
615 int desired_access;
616 int disposition = FILE_OPEN;
617 int create_options = CREATE_NOT_DIR;
618 struct cifs_open_parms oparms;
619
620 xid = get_xid();
621 mutex_lock(&cfile->fh_mutex);
622 if (!cfile->invalidHandle) {
623 mutex_unlock(&cfile->fh_mutex);
624 rc = 0;
625 free_xid(xid);
626 return rc;
627 }
628
629 inode = d_inode(cfile->dentry);
630 cifs_sb = CIFS_SB(inode->i_sb);
631 tcon = tlink_tcon(cfile->tlink);
632 server = tcon->ses->server;
633
634 /*
635 * Can not grab rename sem here because various ops, including those
636 * that already have the rename sem can end up causing writepage to get
637 * called and if the server was down that means we end up here, and we
638 * can never tell if the caller already has the rename_sem.
639 */
640 full_path = build_path_from_dentry(cfile->dentry);
641 if (full_path == NULL) {
642 rc = -ENOMEM;
643 mutex_unlock(&cfile->fh_mutex);
644 free_xid(xid);
645 return rc;
646 }
647
648 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
649 inode, cfile->f_flags, full_path);
650
651 if (tcon->ses->server->oplocks)
652 oplock = REQ_OPLOCK;
653 else
654 oplock = 0;
655
656 if (tcon->unix_ext && cap_unix(tcon->ses) &&
657 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
658 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
659 /*
660 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
661 * original open. Must mask them off for a reopen.
662 */
663 unsigned int oflags = cfile->f_flags &
664 ~(O_CREAT | O_EXCL | O_TRUNC);
665
666 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
667 cifs_sb->mnt_file_mode /* ignored */,
668 oflags, &oplock, &cfile->fid.netfid, xid);
669 if (rc == 0) {
670 cifs_dbg(FYI, "posix reopen succeeded\n");
671 oparms.reconnect = true;
672 goto reopen_success;
673 }
674 /*
675 * fallthrough to retry open the old way on errors, especially
676 * in the reconnect path it is important to retry hard
677 */
678 }
679
680 desired_access = cifs_convert_flags(cfile->f_flags);
681
682 if (backup_cred(cifs_sb))
683 create_options |= CREATE_OPEN_BACKUP_INTENT;
684
685 if (server->ops->get_lease_key)
686 server->ops->get_lease_key(inode, &cfile->fid);
687
688 oparms.tcon = tcon;
689 oparms.cifs_sb = cifs_sb;
690 oparms.desired_access = desired_access;
691 oparms.create_options = create_options;
692 oparms.disposition = disposition;
693 oparms.path = full_path;
694 oparms.fid = &cfile->fid;
695 oparms.reconnect = true;
696
697 /*
698 * Can not refresh inode by passing in file_info buf to be returned by
699 * ops->open and then calling get_inode_info with returned buf since
700 * file might have write behind data that needs to be flushed and server
701 * version of file size can be stale. If we knew for sure that inode was
702 * not dirty locally we could do this.
703 */
704 rc = server->ops->open(xid, &oparms, &oplock, NULL);
705 if (rc == -ENOENT && oparms.reconnect == false) {
706 /* durable handle timeout is expired - open the file again */
707 rc = server->ops->open(xid, &oparms, &oplock, NULL);
708 /* indicate that we need to relock the file */
709 oparms.reconnect = true;
710 }
711
712 if (rc) {
713 mutex_unlock(&cfile->fh_mutex);
714 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
715 cifs_dbg(FYI, "oplock: %d\n", oplock);
716 goto reopen_error_exit;
717 }
718
719 reopen_success:
720 cfile->invalidHandle = false;
721 mutex_unlock(&cfile->fh_mutex);
722 cinode = CIFS_I(inode);
723
724 if (can_flush) {
725 rc = filemap_write_and_wait(inode->i_mapping);
726 mapping_set_error(inode->i_mapping, rc);
727
728 if (tcon->unix_ext)
729 rc = cifs_get_inode_info_unix(&inode, full_path,
730 inode->i_sb, xid);
731 else
732 rc = cifs_get_inode_info(&inode, full_path, NULL,
733 inode->i_sb, xid, NULL);
734 }
735 /*
736 * Else we are writing out data to server already and could deadlock if
737 * we tried to flush data, and since we do not know if we have data that
738 * would invalidate the current end of file on the server we can not go
739 * to the server to get the new inode info.
740 */
741
742 server->ops->set_fid(cfile, &cfile->fid, oplock);
743 if (oparms.reconnect)
744 cifs_relock_file(cfile);
745
746 reopen_error_exit:
747 kfree(full_path);
748 free_xid(xid);
749 return rc;
750 }
751
752 int cifs_close(struct inode *inode, struct file *file)
753 {
754 if (file->private_data != NULL) {
755 cifsFileInfo_put(file->private_data);
756 file->private_data = NULL;
757 }
758
759 /* return code from the ->release op is always ignored */
760 return 0;
761 }
762
763 void
764 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
765 {
766 struct cifsFileInfo *open_file;
767 struct list_head *tmp;
768 struct list_head *tmp1;
769 struct list_head tmp_list;
770
771 cifs_dbg(FYI, "Reopen persistent handles");
772 INIT_LIST_HEAD(&tmp_list);
773
774 /* list all files open on tree connection, reopen resilient handles */
775 spin_lock(&tcon->open_file_lock);
776 list_for_each(tmp, &tcon->openFileList) {
777 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
778 if (!open_file->invalidHandle)
779 continue;
780 cifsFileInfo_get(open_file);
781 list_add_tail(&open_file->rlist, &tmp_list);
782 }
783 spin_unlock(&tcon->open_file_lock);
784
785 list_for_each_safe(tmp, tmp1, &tmp_list) {
786 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
787 cifs_reopen_file(open_file, false /* do not flush */);
788 list_del_init(&open_file->rlist);
789 cifsFileInfo_put(open_file);
790 }
791 }
792
793 int cifs_closedir(struct inode *inode, struct file *file)
794 {
795 int rc = 0;
796 unsigned int xid;
797 struct cifsFileInfo *cfile = file->private_data;
798 struct cifs_tcon *tcon;
799 struct TCP_Server_Info *server;
800 char *buf;
801
802 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
803
804 if (cfile == NULL)
805 return rc;
806
807 xid = get_xid();
808 tcon = tlink_tcon(cfile->tlink);
809 server = tcon->ses->server;
810
811 cifs_dbg(FYI, "Freeing private data in close dir\n");
812 spin_lock(&cfile->file_info_lock);
813 if (server->ops->dir_needs_close(cfile)) {
814 cfile->invalidHandle = true;
815 spin_unlock(&cfile->file_info_lock);
816 if (server->ops->close_dir)
817 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
818 else
819 rc = -ENOSYS;
820 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
821 /* not much we can do if it fails anyway, ignore rc */
822 rc = 0;
823 } else
824 spin_unlock(&cfile->file_info_lock);
825
826 buf = cfile->srch_inf.ntwrk_buf_start;
827 if (buf) {
828 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
829 cfile->srch_inf.ntwrk_buf_start = NULL;
830 if (cfile->srch_inf.smallBuf)
831 cifs_small_buf_release(buf);
832 else
833 cifs_buf_release(buf);
834 }
835
836 cifs_put_tlink(cfile->tlink);
837 kfree(file->private_data);
838 file->private_data = NULL;
839 /* BB can we lock the filestruct while this is going on? */
840 free_xid(xid);
841 return rc;
842 }
843
844 static struct cifsLockInfo *
845 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
846 {
847 struct cifsLockInfo *lock =
848 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
849 if (!lock)
850 return lock;
851 lock->offset = offset;
852 lock->length = length;
853 lock->type = type;
854 lock->pid = current->tgid;
855 INIT_LIST_HEAD(&lock->blist);
856 init_waitqueue_head(&lock->block_q);
857 return lock;
858 }
859
860 void
861 cifs_del_lock_waiters(struct cifsLockInfo *lock)
862 {
863 struct cifsLockInfo *li, *tmp;
864 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
865 list_del_init(&li->blist);
866 wake_up(&li->block_q);
867 }
868 }
869
870 #define CIFS_LOCK_OP 0
871 #define CIFS_READ_OP 1
872 #define CIFS_WRITE_OP 2
873
874 /* @rw_check : 0 - no op, 1 - read, 2 - write */
875 static bool
876 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
877 __u64 length, __u8 type, struct cifsFileInfo *cfile,
878 struct cifsLockInfo **conf_lock, int rw_check)
879 {
880 struct cifsLockInfo *li;
881 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
882 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
883
884 list_for_each_entry(li, &fdlocks->locks, llist) {
885 if (offset + length <= li->offset ||
886 offset >= li->offset + li->length)
887 continue;
888 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
889 server->ops->compare_fids(cfile, cur_cfile)) {
890 /* shared lock prevents write op through the same fid */
891 if (!(li->type & server->vals->shared_lock_type) ||
892 rw_check != CIFS_WRITE_OP)
893 continue;
894 }
895 if ((type & server->vals->shared_lock_type) &&
896 ((server->ops->compare_fids(cfile, cur_cfile) &&
897 current->tgid == li->pid) || type == li->type))
898 continue;
899 if (conf_lock)
900 *conf_lock = li;
901 return true;
902 }
903 return false;
904 }
905
906 bool
907 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
908 __u8 type, struct cifsLockInfo **conf_lock,
909 int rw_check)
910 {
911 bool rc = false;
912 struct cifs_fid_locks *cur;
913 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
914
915 list_for_each_entry(cur, &cinode->llist, llist) {
916 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
917 cfile, conf_lock, rw_check);
918 if (rc)
919 break;
920 }
921
922 return rc;
923 }
924
925 /*
926 * Check if there is another lock that prevents us to set the lock (mandatory
927 * style). If such a lock exists, update the flock structure with its
928 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
929 * or leave it the same if we can't. Returns 0 if we don't need to request to
930 * the server or 1 otherwise.
931 */
932 static int
933 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
934 __u8 type, struct file_lock *flock)
935 {
936 int rc = 0;
937 struct cifsLockInfo *conf_lock;
938 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
939 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
940 bool exist;
941
942 down_read(&cinode->lock_sem);
943
944 exist = cifs_find_lock_conflict(cfile, offset, length, type,
945 &conf_lock, CIFS_LOCK_OP);
946 if (exist) {
947 flock->fl_start = conf_lock->offset;
948 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
949 flock->fl_pid = conf_lock->pid;
950 if (conf_lock->type & server->vals->shared_lock_type)
951 flock->fl_type = F_RDLCK;
952 else
953 flock->fl_type = F_WRLCK;
954 } else if (!cinode->can_cache_brlcks)
955 rc = 1;
956 else
957 flock->fl_type = F_UNLCK;
958
959 up_read(&cinode->lock_sem);
960 return rc;
961 }
962
963 static void
964 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
965 {
966 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
967 down_write(&cinode->lock_sem);
968 list_add_tail(&lock->llist, &cfile->llist->locks);
969 up_write(&cinode->lock_sem);
970 }
971
972 /*
973 * Set the byte-range lock (mandatory style). Returns:
974 * 1) 0, if we set the lock and don't need to request to the server;
975 * 2) 1, if no locks prevent us but we need to request to the server;
976 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
977 */
978 static int
979 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
980 bool wait)
981 {
982 struct cifsLockInfo *conf_lock;
983 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
984 bool exist;
985 int rc = 0;
986
987 try_again:
988 exist = false;
989 down_write(&cinode->lock_sem);
990
991 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
992 lock->type, &conf_lock, CIFS_LOCK_OP);
993 if (!exist && cinode->can_cache_brlcks) {
994 list_add_tail(&lock->llist, &cfile->llist->locks);
995 up_write(&cinode->lock_sem);
996 return rc;
997 }
998
999 if (!exist)
1000 rc = 1;
1001 else if (!wait)
1002 rc = -EACCES;
1003 else {
1004 list_add_tail(&lock->blist, &conf_lock->blist);
1005 up_write(&cinode->lock_sem);
1006 rc = wait_event_interruptible(lock->block_q,
1007 (lock->blist.prev == &lock->blist) &&
1008 (lock->blist.next == &lock->blist));
1009 if (!rc)
1010 goto try_again;
1011 down_write(&cinode->lock_sem);
1012 list_del_init(&lock->blist);
1013 }
1014
1015 up_write(&cinode->lock_sem);
1016 return rc;
1017 }
1018
1019 /*
1020 * Check if there is another lock that prevents us to set the lock (posix
1021 * style). If such a lock exists, update the flock structure with its
1022 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1023 * or leave it the same if we can't. Returns 0 if we don't need to request to
1024 * the server or 1 otherwise.
1025 */
1026 static int
1027 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1028 {
1029 int rc = 0;
1030 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1031 unsigned char saved_type = flock->fl_type;
1032
1033 if ((flock->fl_flags & FL_POSIX) == 0)
1034 return 1;
1035
1036 down_read(&cinode->lock_sem);
1037 posix_test_lock(file, flock);
1038
1039 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1040 flock->fl_type = saved_type;
1041 rc = 1;
1042 }
1043
1044 up_read(&cinode->lock_sem);
1045 return rc;
1046 }
1047
1048 /*
1049 * Set the byte-range lock (posix style). Returns:
1050 * 1) 0, if we set the lock and don't need to request to the server;
1051 * 2) 1, if we need to request to the server;
1052 * 3) <0, if the error occurs while setting the lock.
1053 */
1054 static int
1055 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1056 {
1057 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1058 int rc = 1;
1059
1060 if ((flock->fl_flags & FL_POSIX) == 0)
1061 return rc;
1062
1063 try_again:
1064 down_write(&cinode->lock_sem);
1065 if (!cinode->can_cache_brlcks) {
1066 up_write(&cinode->lock_sem);
1067 return rc;
1068 }
1069
1070 rc = posix_lock_file(file, flock, NULL);
1071 up_write(&cinode->lock_sem);
1072 if (rc == FILE_LOCK_DEFERRED) {
1073 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1074 if (!rc)
1075 goto try_again;
1076 posix_unblock_lock(flock);
1077 }
1078 return rc;
1079 }
1080
1081 int
1082 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1083 {
1084 unsigned int xid;
1085 int rc = 0, stored_rc;
1086 struct cifsLockInfo *li, *tmp;
1087 struct cifs_tcon *tcon;
1088 unsigned int num, max_num, max_buf;
1089 LOCKING_ANDX_RANGE *buf, *cur;
1090 int types[] = {LOCKING_ANDX_LARGE_FILES,
1091 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1092 int i;
1093
1094 xid = get_xid();
1095 tcon = tlink_tcon(cfile->tlink);
1096
1097 /*
1098 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1099 * and check it for zero before using.
1100 */
1101 max_buf = tcon->ses->server->maxBuf;
1102 if (!max_buf) {
1103 free_xid(xid);
1104 return -EINVAL;
1105 }
1106
1107 max_num = (max_buf - sizeof(struct smb_hdr)) /
1108 sizeof(LOCKING_ANDX_RANGE);
1109 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1110 if (!buf) {
1111 free_xid(xid);
1112 return -ENOMEM;
1113 }
1114
1115 for (i = 0; i < 2; i++) {
1116 cur = buf;
1117 num = 0;
1118 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1119 if (li->type != types[i])
1120 continue;
1121 cur->Pid = cpu_to_le16(li->pid);
1122 cur->LengthLow = cpu_to_le32((u32)li->length);
1123 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1124 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1125 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1126 if (++num == max_num) {
1127 stored_rc = cifs_lockv(xid, tcon,
1128 cfile->fid.netfid,
1129 (__u8)li->type, 0, num,
1130 buf);
1131 if (stored_rc)
1132 rc = stored_rc;
1133 cur = buf;
1134 num = 0;
1135 } else
1136 cur++;
1137 }
1138
1139 if (num) {
1140 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1141 (__u8)types[i], 0, num, buf);
1142 if (stored_rc)
1143 rc = stored_rc;
1144 }
1145 }
1146
1147 kfree(buf);
1148 free_xid(xid);
1149 return rc;
1150 }
1151
1152 static __u32
1153 hash_lockowner(fl_owner_t owner)
1154 {
1155 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1156 }
1157
1158 struct lock_to_push {
1159 struct list_head llist;
1160 __u64 offset;
1161 __u64 length;
1162 __u32 pid;
1163 __u16 netfid;
1164 __u8 type;
1165 };
1166
1167 static int
1168 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1169 {
1170 struct inode *inode = d_inode(cfile->dentry);
1171 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1172 struct file_lock *flock;
1173 struct file_lock_context *flctx = inode->i_flctx;
1174 unsigned int count = 0, i;
1175 int rc = 0, xid, type;
1176 struct list_head locks_to_send, *el;
1177 struct lock_to_push *lck, *tmp;
1178 __u64 length;
1179
1180 xid = get_xid();
1181
1182 if (!flctx)
1183 goto out;
1184
1185 spin_lock(&flctx->flc_lock);
1186 list_for_each(el, &flctx->flc_posix) {
1187 count++;
1188 }
1189 spin_unlock(&flctx->flc_lock);
1190
1191 INIT_LIST_HEAD(&locks_to_send);
1192
1193 /*
1194 * Allocating count locks is enough because no FL_POSIX locks can be
1195 * added to the list while we are holding cinode->lock_sem that
1196 * protects locking operations of this inode.
1197 */
1198 for (i = 0; i < count; i++) {
1199 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1200 if (!lck) {
1201 rc = -ENOMEM;
1202 goto err_out;
1203 }
1204 list_add_tail(&lck->llist, &locks_to_send);
1205 }
1206
1207 el = locks_to_send.next;
1208 spin_lock(&flctx->flc_lock);
1209 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1210 if (el == &locks_to_send) {
1211 /*
1212 * The list ended. We don't have enough allocated
1213 * structures - something is really wrong.
1214 */
1215 cifs_dbg(VFS, "Can't push all brlocks!\n");
1216 break;
1217 }
1218 length = 1 + flock->fl_end - flock->fl_start;
1219 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1220 type = CIFS_RDLCK;
1221 else
1222 type = CIFS_WRLCK;
1223 lck = list_entry(el, struct lock_to_push, llist);
1224 lck->pid = hash_lockowner(flock->fl_owner);
1225 lck->netfid = cfile->fid.netfid;
1226 lck->length = length;
1227 lck->type = type;
1228 lck->offset = flock->fl_start;
1229 }
1230 spin_unlock(&flctx->flc_lock);
1231
1232 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1233 int stored_rc;
1234
1235 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1236 lck->offset, lck->length, NULL,
1237 lck->type, 0);
1238 if (stored_rc)
1239 rc = stored_rc;
1240 list_del(&lck->llist);
1241 kfree(lck);
1242 }
1243
1244 out:
1245 free_xid(xid);
1246 return rc;
1247 err_out:
1248 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1249 list_del(&lck->llist);
1250 kfree(lck);
1251 }
1252 goto out;
1253 }
1254
1255 static int
1256 cifs_push_locks(struct cifsFileInfo *cfile)
1257 {
1258 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1259 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1260 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1261 int rc = 0;
1262
1263 /* we are going to update can_cache_brlcks here - need a write access */
1264 down_write(&cinode->lock_sem);
1265 if (!cinode->can_cache_brlcks) {
1266 up_write(&cinode->lock_sem);
1267 return rc;
1268 }
1269
1270 if (cap_unix(tcon->ses) &&
1271 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1272 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1273 rc = cifs_push_posix_locks(cfile);
1274 else
1275 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1276
1277 cinode->can_cache_brlcks = false;
1278 up_write(&cinode->lock_sem);
1279 return rc;
1280 }
1281
1282 static void
1283 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1284 bool *wait_flag, struct TCP_Server_Info *server)
1285 {
1286 if (flock->fl_flags & FL_POSIX)
1287 cifs_dbg(FYI, "Posix\n");
1288 if (flock->fl_flags & FL_FLOCK)
1289 cifs_dbg(FYI, "Flock\n");
1290 if (flock->fl_flags & FL_SLEEP) {
1291 cifs_dbg(FYI, "Blocking lock\n");
1292 *wait_flag = true;
1293 }
1294 if (flock->fl_flags & FL_ACCESS)
1295 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1296 if (flock->fl_flags & FL_LEASE)
1297 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1298 if (flock->fl_flags &
1299 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1300 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1301 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1302
1303 *type = server->vals->large_lock_type;
1304 if (flock->fl_type == F_WRLCK) {
1305 cifs_dbg(FYI, "F_WRLCK\n");
1306 *type |= server->vals->exclusive_lock_type;
1307 *lock = 1;
1308 } else if (flock->fl_type == F_UNLCK) {
1309 cifs_dbg(FYI, "F_UNLCK\n");
1310 *type |= server->vals->unlock_lock_type;
1311 *unlock = 1;
1312 /* Check if unlock includes more than one lock range */
1313 } else if (flock->fl_type == F_RDLCK) {
1314 cifs_dbg(FYI, "F_RDLCK\n");
1315 *type |= server->vals->shared_lock_type;
1316 *lock = 1;
1317 } else if (flock->fl_type == F_EXLCK) {
1318 cifs_dbg(FYI, "F_EXLCK\n");
1319 *type |= server->vals->exclusive_lock_type;
1320 *lock = 1;
1321 } else if (flock->fl_type == F_SHLCK) {
1322 cifs_dbg(FYI, "F_SHLCK\n");
1323 *type |= server->vals->shared_lock_type;
1324 *lock = 1;
1325 } else
1326 cifs_dbg(FYI, "Unknown type of lock\n");
1327 }
1328
1329 static int
1330 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1331 bool wait_flag, bool posix_lck, unsigned int xid)
1332 {
1333 int rc = 0;
1334 __u64 length = 1 + flock->fl_end - flock->fl_start;
1335 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1336 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1337 struct TCP_Server_Info *server = tcon->ses->server;
1338 __u16 netfid = cfile->fid.netfid;
1339
1340 if (posix_lck) {
1341 int posix_lock_type;
1342
1343 rc = cifs_posix_lock_test(file, flock);
1344 if (!rc)
1345 return rc;
1346
1347 if (type & server->vals->shared_lock_type)
1348 posix_lock_type = CIFS_RDLCK;
1349 else
1350 posix_lock_type = CIFS_WRLCK;
1351 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1352 hash_lockowner(flock->fl_owner),
1353 flock->fl_start, length, flock,
1354 posix_lock_type, wait_flag);
1355 return rc;
1356 }
1357
1358 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1359 if (!rc)
1360 return rc;
1361
1362 /* BB we could chain these into one lock request BB */
1363 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1364 1, 0, false);
1365 if (rc == 0) {
1366 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1367 type, 0, 1, false);
1368 flock->fl_type = F_UNLCK;
1369 if (rc != 0)
1370 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1371 rc);
1372 return 0;
1373 }
1374
1375 if (type & server->vals->shared_lock_type) {
1376 flock->fl_type = F_WRLCK;
1377 return 0;
1378 }
1379
1380 type &= ~server->vals->exclusive_lock_type;
1381
1382 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1383 type | server->vals->shared_lock_type,
1384 1, 0, false);
1385 if (rc == 0) {
1386 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1387 type | server->vals->shared_lock_type, 0, 1, false);
1388 flock->fl_type = F_RDLCK;
1389 if (rc != 0)
1390 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1391 rc);
1392 } else
1393 flock->fl_type = F_WRLCK;
1394
1395 return 0;
1396 }
1397
1398 void
1399 cifs_move_llist(struct list_head *source, struct list_head *dest)
1400 {
1401 struct list_head *li, *tmp;
1402 list_for_each_safe(li, tmp, source)
1403 list_move(li, dest);
1404 }
1405
1406 void
1407 cifs_free_llist(struct list_head *llist)
1408 {
1409 struct cifsLockInfo *li, *tmp;
1410 list_for_each_entry_safe(li, tmp, llist, llist) {
1411 cifs_del_lock_waiters(li);
1412 list_del(&li->llist);
1413 kfree(li);
1414 }
1415 }
1416
1417 int
1418 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1419 unsigned int xid)
1420 {
1421 int rc = 0, stored_rc;
1422 int types[] = {LOCKING_ANDX_LARGE_FILES,
1423 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1424 unsigned int i;
1425 unsigned int max_num, num, max_buf;
1426 LOCKING_ANDX_RANGE *buf, *cur;
1427 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1428 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1429 struct cifsLockInfo *li, *tmp;
1430 __u64 length = 1 + flock->fl_end - flock->fl_start;
1431 struct list_head tmp_llist;
1432
1433 INIT_LIST_HEAD(&tmp_llist);
1434
1435 /*
1436 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1437 * and check it for zero before using.
1438 */
1439 max_buf = tcon->ses->server->maxBuf;
1440 if (!max_buf)
1441 return -EINVAL;
1442
1443 max_num = (max_buf - sizeof(struct smb_hdr)) /
1444 sizeof(LOCKING_ANDX_RANGE);
1445 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1446 if (!buf)
1447 return -ENOMEM;
1448
1449 down_write(&cinode->lock_sem);
1450 for (i = 0; i < 2; i++) {
1451 cur = buf;
1452 num = 0;
1453 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1454 if (flock->fl_start > li->offset ||
1455 (flock->fl_start + length) <
1456 (li->offset + li->length))
1457 continue;
1458 if (current->tgid != li->pid)
1459 continue;
1460 if (types[i] != li->type)
1461 continue;
1462 if (cinode->can_cache_brlcks) {
1463 /*
1464 * We can cache brlock requests - simply remove
1465 * a lock from the file's list.
1466 */
1467 list_del(&li->llist);
1468 cifs_del_lock_waiters(li);
1469 kfree(li);
1470 continue;
1471 }
1472 cur->Pid = cpu_to_le16(li->pid);
1473 cur->LengthLow = cpu_to_le32((u32)li->length);
1474 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1475 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1476 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1477 /*
1478 * We need to save a lock here to let us add it again to
1479 * the file's list if the unlock range request fails on
1480 * the server.
1481 */
1482 list_move(&li->llist, &tmp_llist);
1483 if (++num == max_num) {
1484 stored_rc = cifs_lockv(xid, tcon,
1485 cfile->fid.netfid,
1486 li->type, num, 0, buf);
1487 if (stored_rc) {
1488 /*
1489 * We failed on the unlock range
1490 * request - add all locks from the tmp
1491 * list to the head of the file's list.
1492 */
1493 cifs_move_llist(&tmp_llist,
1494 &cfile->llist->locks);
1495 rc = stored_rc;
1496 } else
1497 /*
1498 * The unlock range request succeed -
1499 * free the tmp list.
1500 */
1501 cifs_free_llist(&tmp_llist);
1502 cur = buf;
1503 num = 0;
1504 } else
1505 cur++;
1506 }
1507 if (num) {
1508 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1509 types[i], num, 0, buf);
1510 if (stored_rc) {
1511 cifs_move_llist(&tmp_llist,
1512 &cfile->llist->locks);
1513 rc = stored_rc;
1514 } else
1515 cifs_free_llist(&tmp_llist);
1516 }
1517 }
1518
1519 up_write(&cinode->lock_sem);
1520 kfree(buf);
1521 return rc;
1522 }
1523
1524 static int
1525 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1526 bool wait_flag, bool posix_lck, int lock, int unlock,
1527 unsigned int xid)
1528 {
1529 int rc = 0;
1530 __u64 length = 1 + flock->fl_end - flock->fl_start;
1531 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1532 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1533 struct TCP_Server_Info *server = tcon->ses->server;
1534 struct inode *inode = d_inode(cfile->dentry);
1535
1536 if (posix_lck) {
1537 int posix_lock_type;
1538
1539 rc = cifs_posix_lock_set(file, flock);
1540 if (!rc || rc < 0)
1541 return rc;
1542
1543 if (type & server->vals->shared_lock_type)
1544 posix_lock_type = CIFS_RDLCK;
1545 else
1546 posix_lock_type = CIFS_WRLCK;
1547
1548 if (unlock == 1)
1549 posix_lock_type = CIFS_UNLCK;
1550
1551 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1552 hash_lockowner(flock->fl_owner),
1553 flock->fl_start, length,
1554 NULL, posix_lock_type, wait_flag);
1555 goto out;
1556 }
1557
1558 if (lock) {
1559 struct cifsLockInfo *lock;
1560
1561 lock = cifs_lock_init(flock->fl_start, length, type);
1562 if (!lock)
1563 return -ENOMEM;
1564
1565 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1566 if (rc < 0) {
1567 kfree(lock);
1568 return rc;
1569 }
1570 if (!rc)
1571 goto out;
1572
1573 /*
1574 * Windows 7 server can delay breaking lease from read to None
1575 * if we set a byte-range lock on a file - break it explicitly
1576 * before sending the lock to the server to be sure the next
1577 * read won't conflict with non-overlapted locks due to
1578 * pagereading.
1579 */
1580 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1581 CIFS_CACHE_READ(CIFS_I(inode))) {
1582 cifs_zap_mapping(inode);
1583 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1584 inode);
1585 CIFS_I(inode)->oplock = 0;
1586 }
1587
1588 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1589 type, 1, 0, wait_flag);
1590 if (rc) {
1591 kfree(lock);
1592 return rc;
1593 }
1594
1595 cifs_lock_add(cfile, lock);
1596 } else if (unlock)
1597 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1598
1599 out:
1600 if (flock->fl_flags & FL_POSIX && !rc)
1601 rc = locks_lock_file_wait(file, flock);
1602 return rc;
1603 }
1604
1605 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1606 {
1607 int rc, xid;
1608 int lock = 0, unlock = 0;
1609 bool wait_flag = false;
1610 bool posix_lck = false;
1611 struct cifs_sb_info *cifs_sb;
1612 struct cifs_tcon *tcon;
1613 struct cifsInodeInfo *cinode;
1614 struct cifsFileInfo *cfile;
1615 __u16 netfid;
1616 __u32 type;
1617
1618 rc = -EACCES;
1619 xid = get_xid();
1620
1621 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1622 cmd, flock->fl_flags, flock->fl_type,
1623 flock->fl_start, flock->fl_end);
1624
1625 cfile = (struct cifsFileInfo *)file->private_data;
1626 tcon = tlink_tcon(cfile->tlink);
1627
1628 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1629 tcon->ses->server);
1630
1631 cifs_sb = CIFS_FILE_SB(file);
1632 netfid = cfile->fid.netfid;
1633 cinode = CIFS_I(file_inode(file));
1634
1635 if (cap_unix(tcon->ses) &&
1636 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1637 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1638 posix_lck = true;
1639 /*
1640 * BB add code here to normalize offset and length to account for
1641 * negative length which we can not accept over the wire.
1642 */
1643 if (IS_GETLK(cmd)) {
1644 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1645 free_xid(xid);
1646 return rc;
1647 }
1648
1649 if (!lock && !unlock) {
1650 /*
1651 * if no lock or unlock then nothing to do since we do not
1652 * know what it is
1653 */
1654 free_xid(xid);
1655 return -EOPNOTSUPP;
1656 }
1657
1658 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1659 xid);
1660 free_xid(xid);
1661 return rc;
1662 }
1663
1664 /*
1665 * update the file size (if needed) after a write. Should be called with
1666 * the inode->i_lock held
1667 */
1668 void
1669 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1670 unsigned int bytes_written)
1671 {
1672 loff_t end_of_write = offset + bytes_written;
1673
1674 if (end_of_write > cifsi->server_eof)
1675 cifsi->server_eof = end_of_write;
1676 }
1677
1678 static ssize_t
1679 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1680 size_t write_size, loff_t *offset)
1681 {
1682 int rc = 0;
1683 unsigned int bytes_written = 0;
1684 unsigned int total_written;
1685 struct cifs_sb_info *cifs_sb;
1686 struct cifs_tcon *tcon;
1687 struct TCP_Server_Info *server;
1688 unsigned int xid;
1689 struct dentry *dentry = open_file->dentry;
1690 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1691 struct cifs_io_parms io_parms;
1692
1693 cifs_sb = CIFS_SB(dentry->d_sb);
1694
1695 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1696 write_size, *offset, dentry);
1697
1698 tcon = tlink_tcon(open_file->tlink);
1699 server = tcon->ses->server;
1700
1701 if (!server->ops->sync_write)
1702 return -ENOSYS;
1703
1704 xid = get_xid();
1705
1706 for (total_written = 0; write_size > total_written;
1707 total_written += bytes_written) {
1708 rc = -EAGAIN;
1709 while (rc == -EAGAIN) {
1710 struct kvec iov[2];
1711 unsigned int len;
1712
1713 if (open_file->invalidHandle) {
1714 /* we could deadlock if we called
1715 filemap_fdatawait from here so tell
1716 reopen_file not to flush data to
1717 server now */
1718 rc = cifs_reopen_file(open_file, false);
1719 if (rc != 0)
1720 break;
1721 }
1722
1723 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1724 (unsigned int)write_size - total_written);
1725 /* iov[0] is reserved for smb header */
1726 iov[1].iov_base = (char *)write_data + total_written;
1727 iov[1].iov_len = len;
1728 io_parms.pid = pid;
1729 io_parms.tcon = tcon;
1730 io_parms.offset = *offset;
1731 io_parms.length = len;
1732 rc = server->ops->sync_write(xid, &open_file->fid,
1733 &io_parms, &bytes_written, iov, 1);
1734 }
1735 if (rc || (bytes_written == 0)) {
1736 if (total_written)
1737 break;
1738 else {
1739 free_xid(xid);
1740 return rc;
1741 }
1742 } else {
1743 spin_lock(&d_inode(dentry)->i_lock);
1744 cifs_update_eof(cifsi, *offset, bytes_written);
1745 spin_unlock(&d_inode(dentry)->i_lock);
1746 *offset += bytes_written;
1747 }
1748 }
1749
1750 cifs_stats_bytes_written(tcon, total_written);
1751
1752 if (total_written > 0) {
1753 spin_lock(&d_inode(dentry)->i_lock);
1754 if (*offset > d_inode(dentry)->i_size)
1755 i_size_write(d_inode(dentry), *offset);
1756 spin_unlock(&d_inode(dentry)->i_lock);
1757 }
1758 mark_inode_dirty_sync(d_inode(dentry));
1759 free_xid(xid);
1760 return total_written;
1761 }
1762
1763 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1764 bool fsuid_only)
1765 {
1766 struct cifsFileInfo *open_file = NULL;
1767 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1768 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1769
1770 /* only filter by fsuid on multiuser mounts */
1771 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1772 fsuid_only = false;
1773
1774 spin_lock(&tcon->open_file_lock);
1775 /* we could simply get the first_list_entry since write-only entries
1776 are always at the end of the list but since the first entry might
1777 have a close pending, we go through the whole list */
1778 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1779 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1780 continue;
1781 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1782 if (!open_file->invalidHandle) {
1783 /* found a good file */
1784 /* lock it so it will not be closed on us */
1785 cifsFileInfo_get(open_file);
1786 spin_unlock(&tcon->open_file_lock);
1787 return open_file;
1788 } /* else might as well continue, and look for
1789 another, or simply have the caller reopen it
1790 again rather than trying to fix this handle */
1791 } else /* write only file */
1792 break; /* write only files are last so must be done */
1793 }
1794 spin_unlock(&tcon->open_file_lock);
1795 return NULL;
1796 }
1797
1798 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1799 bool fsuid_only)
1800 {
1801 struct cifsFileInfo *open_file, *inv_file = NULL;
1802 struct cifs_sb_info *cifs_sb;
1803 struct cifs_tcon *tcon;
1804 bool any_available = false;
1805 int rc;
1806 unsigned int refind = 0;
1807
1808 /* Having a null inode here (because mapping->host was set to zero by
1809 the VFS or MM) should not happen but we had reports of on oops (due to
1810 it being zero) during stress testcases so we need to check for it */
1811
1812 if (cifs_inode == NULL) {
1813 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1814 dump_stack();
1815 return NULL;
1816 }
1817
1818 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1819 tcon = cifs_sb_master_tcon(cifs_sb);
1820
1821 /* only filter by fsuid on multiuser mounts */
1822 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1823 fsuid_only = false;
1824
1825 spin_lock(&tcon->open_file_lock);
1826 refind_writable:
1827 if (refind > MAX_REOPEN_ATT) {
1828 spin_unlock(&tcon->open_file_lock);
1829 return NULL;
1830 }
1831 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1832 if (!any_available && open_file->pid != current->tgid)
1833 continue;
1834 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1835 continue;
1836 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1837 if (!open_file->invalidHandle) {
1838 /* found a good writable file */
1839 cifsFileInfo_get(open_file);
1840 spin_unlock(&tcon->open_file_lock);
1841 return open_file;
1842 } else {
1843 if (!inv_file)
1844 inv_file = open_file;
1845 }
1846 }
1847 }
1848 /* couldn't find useable FH with same pid, try any available */
1849 if (!any_available) {
1850 any_available = true;
1851 goto refind_writable;
1852 }
1853
1854 if (inv_file) {
1855 any_available = false;
1856 cifsFileInfo_get(inv_file);
1857 }
1858
1859 spin_unlock(&tcon->open_file_lock);
1860
1861 if (inv_file) {
1862 rc = cifs_reopen_file(inv_file, false);
1863 if (!rc)
1864 return inv_file;
1865 else {
1866 spin_lock(&tcon->open_file_lock);
1867 list_move_tail(&inv_file->flist,
1868 &cifs_inode->openFileList);
1869 spin_unlock(&tcon->open_file_lock);
1870 cifsFileInfo_put(inv_file);
1871 ++refind;
1872 inv_file = NULL;
1873 spin_lock(&tcon->open_file_lock);
1874 goto refind_writable;
1875 }
1876 }
1877
1878 return NULL;
1879 }
1880
1881 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1882 {
1883 struct address_space *mapping = page->mapping;
1884 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1885 char *write_data;
1886 int rc = -EFAULT;
1887 int bytes_written = 0;
1888 struct inode *inode;
1889 struct cifsFileInfo *open_file;
1890
1891 if (!mapping || !mapping->host)
1892 return -EFAULT;
1893
1894 inode = page->mapping->host;
1895
1896 offset += (loff_t)from;
1897 write_data = kmap(page);
1898 write_data += from;
1899
1900 if ((to > PAGE_SIZE) || (from > to)) {
1901 kunmap(page);
1902 return -EIO;
1903 }
1904
1905 /* racing with truncate? */
1906 if (offset > mapping->host->i_size) {
1907 kunmap(page);
1908 return 0; /* don't care */
1909 }
1910
1911 /* check to make sure that we are not extending the file */
1912 if (mapping->host->i_size - offset < (loff_t)to)
1913 to = (unsigned)(mapping->host->i_size - offset);
1914
1915 open_file = find_writable_file(CIFS_I(mapping->host), false);
1916 if (open_file) {
1917 bytes_written = cifs_write(open_file, open_file->pid,
1918 write_data, to - from, &offset);
1919 cifsFileInfo_put(open_file);
1920 /* Does mm or vfs already set times? */
1921 inode->i_atime = inode->i_mtime = current_time(inode);
1922 if ((bytes_written > 0) && (offset))
1923 rc = 0;
1924 else if (bytes_written < 0)
1925 rc = bytes_written;
1926 } else {
1927 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1928 rc = -EIO;
1929 }
1930
1931 kunmap(page);
1932 return rc;
1933 }
1934
1935 static struct cifs_writedata *
1936 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1937 pgoff_t end, pgoff_t *index,
1938 unsigned int *found_pages)
1939 {
1940 unsigned int nr_pages;
1941 struct page **pages;
1942 struct cifs_writedata *wdata;
1943
1944 wdata = cifs_writedata_alloc((unsigned int)tofind,
1945 cifs_writev_complete);
1946 if (!wdata)
1947 return NULL;
1948
1949 /*
1950 * find_get_pages_tag seems to return a max of 256 on each
1951 * iteration, so we must call it several times in order to
1952 * fill the array or the wsize is effectively limited to
1953 * 256 * PAGE_SIZE.
1954 */
1955 *found_pages = 0;
1956 pages = wdata->pages;
1957 do {
1958 nr_pages = find_get_pages_tag(mapping, index,
1959 PAGECACHE_TAG_DIRTY, tofind,
1960 pages);
1961 *found_pages += nr_pages;
1962 tofind -= nr_pages;
1963 pages += nr_pages;
1964 } while (nr_pages && tofind && *index <= end);
1965
1966 return wdata;
1967 }
1968
1969 static unsigned int
1970 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1971 struct address_space *mapping,
1972 struct writeback_control *wbc,
1973 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1974 {
1975 unsigned int nr_pages = 0, i;
1976 struct page *page;
1977
1978 for (i = 0; i < found_pages; i++) {
1979 page = wdata->pages[i];
1980 /*
1981 * At this point we hold neither mapping->tree_lock nor
1982 * lock on the page itself: the page may be truncated or
1983 * invalidated (changing page->mapping to NULL), or even
1984 * swizzled back from swapper_space to tmpfs file
1985 * mapping
1986 */
1987
1988 if (nr_pages == 0)
1989 lock_page(page);
1990 else if (!trylock_page(page))
1991 break;
1992
1993 if (unlikely(page->mapping != mapping)) {
1994 unlock_page(page);
1995 break;
1996 }
1997
1998 if (!wbc->range_cyclic && page->index > end) {
1999 *done = true;
2000 unlock_page(page);
2001 break;
2002 }
2003
2004 if (*next && (page->index != *next)) {
2005 /* Not next consecutive page */
2006 unlock_page(page);
2007 break;
2008 }
2009
2010 if (wbc->sync_mode != WB_SYNC_NONE)
2011 wait_on_page_writeback(page);
2012
2013 if (PageWriteback(page) ||
2014 !clear_page_dirty_for_io(page)) {
2015 unlock_page(page);
2016 break;
2017 }
2018
2019 /*
2020 * This actually clears the dirty bit in the radix tree.
2021 * See cifs_writepage() for more commentary.
2022 */
2023 set_page_writeback(page);
2024 if (page_offset(page) >= i_size_read(mapping->host)) {
2025 *done = true;
2026 unlock_page(page);
2027 end_page_writeback(page);
2028 break;
2029 }
2030
2031 wdata->pages[i] = page;
2032 *next = page->index + 1;
2033 ++nr_pages;
2034 }
2035
2036 /* reset index to refind any pages skipped */
2037 if (nr_pages == 0)
2038 *index = wdata->pages[0]->index + 1;
2039
2040 /* put any pages we aren't going to use */
2041 for (i = nr_pages; i < found_pages; i++) {
2042 put_page(wdata->pages[i]);
2043 wdata->pages[i] = NULL;
2044 }
2045
2046 return nr_pages;
2047 }
2048
2049 static int
2050 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2051 struct address_space *mapping, struct writeback_control *wbc)
2052 {
2053 int rc = 0;
2054 struct TCP_Server_Info *server;
2055 unsigned int i;
2056
2057 wdata->sync_mode = wbc->sync_mode;
2058 wdata->nr_pages = nr_pages;
2059 wdata->offset = page_offset(wdata->pages[0]);
2060 wdata->pagesz = PAGE_SIZE;
2061 wdata->tailsz = min(i_size_read(mapping->host) -
2062 page_offset(wdata->pages[nr_pages - 1]),
2063 (loff_t)PAGE_SIZE);
2064 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2065
2066 if (wdata->cfile != NULL)
2067 cifsFileInfo_put(wdata->cfile);
2068 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2069 if (!wdata->cfile) {
2070 cifs_dbg(VFS, "No writable handles for inode\n");
2071 rc = -EBADF;
2072 } else {
2073 wdata->pid = wdata->cfile->pid;
2074 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2075 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2076 }
2077
2078 for (i = 0; i < nr_pages; ++i)
2079 unlock_page(wdata->pages[i]);
2080
2081 return rc;
2082 }
2083
2084 static int cifs_writepages(struct address_space *mapping,
2085 struct writeback_control *wbc)
2086 {
2087 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2088 struct TCP_Server_Info *server;
2089 bool done = false, scanned = false, range_whole = false;
2090 pgoff_t end, index;
2091 struct cifs_writedata *wdata;
2092 int rc = 0;
2093
2094 /*
2095 * If wsize is smaller than the page cache size, default to writing
2096 * one page at a time via cifs_writepage
2097 */
2098 if (cifs_sb->wsize < PAGE_SIZE)
2099 return generic_writepages(mapping, wbc);
2100
2101 if (wbc->range_cyclic) {
2102 index = mapping->writeback_index; /* Start from prev offset */
2103 end = -1;
2104 } else {
2105 index = wbc->range_start >> PAGE_SHIFT;
2106 end = wbc->range_end >> PAGE_SHIFT;
2107 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2108 range_whole = true;
2109 scanned = true;
2110 }
2111 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2112 retry:
2113 while (!done && index <= end) {
2114 unsigned int i, nr_pages, found_pages, wsize, credits;
2115 pgoff_t next = 0, tofind, saved_index = index;
2116
2117 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2118 &wsize, &credits);
2119 if (rc)
2120 break;
2121
2122 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2123
2124 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2125 &found_pages);
2126 if (!wdata) {
2127 rc = -ENOMEM;
2128 add_credits_and_wake_if(server, credits, 0);
2129 break;
2130 }
2131
2132 if (found_pages == 0) {
2133 kref_put(&wdata->refcount, cifs_writedata_release);
2134 add_credits_and_wake_if(server, credits, 0);
2135 break;
2136 }
2137
2138 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2139 end, &index, &next, &done);
2140
2141 /* nothing to write? */
2142 if (nr_pages == 0) {
2143 kref_put(&wdata->refcount, cifs_writedata_release);
2144 add_credits_and_wake_if(server, credits, 0);
2145 continue;
2146 }
2147
2148 wdata->credits = credits;
2149
2150 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2151
2152 /* send failure -- clean up the mess */
2153 if (rc != 0) {
2154 add_credits_and_wake_if(server, wdata->credits, 0);
2155 for (i = 0; i < nr_pages; ++i) {
2156 if (rc == -EAGAIN)
2157 redirty_page_for_writepage(wbc,
2158 wdata->pages[i]);
2159 else
2160 SetPageError(wdata->pages[i]);
2161 end_page_writeback(wdata->pages[i]);
2162 put_page(wdata->pages[i]);
2163 }
2164 if (rc != -EAGAIN)
2165 mapping_set_error(mapping, rc);
2166 }
2167 kref_put(&wdata->refcount, cifs_writedata_release);
2168
2169 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2170 index = saved_index;
2171 continue;
2172 }
2173
2174 wbc->nr_to_write -= nr_pages;
2175 if (wbc->nr_to_write <= 0)
2176 done = true;
2177
2178 index = next;
2179 }
2180
2181 if (!scanned && !done) {
2182 /*
2183 * We hit the last page and there is more work to be done: wrap
2184 * back to the start of the file
2185 */
2186 scanned = true;
2187 index = 0;
2188 goto retry;
2189 }
2190
2191 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2192 mapping->writeback_index = index;
2193
2194 return rc;
2195 }
2196
2197 static int
2198 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2199 {
2200 int rc;
2201 unsigned int xid;
2202
2203 xid = get_xid();
2204 /* BB add check for wbc flags */
2205 get_page(page);
2206 if (!PageUptodate(page))
2207 cifs_dbg(FYI, "ppw - page not up to date\n");
2208
2209 /*
2210 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2211 *
2212 * A writepage() implementation always needs to do either this,
2213 * or re-dirty the page with "redirty_page_for_writepage()" in
2214 * the case of a failure.
2215 *
2216 * Just unlocking the page will cause the radix tree tag-bits
2217 * to fail to update with the state of the page correctly.
2218 */
2219 set_page_writeback(page);
2220 retry_write:
2221 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2222 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2223 goto retry_write;
2224 else if (rc == -EAGAIN)
2225 redirty_page_for_writepage(wbc, page);
2226 else if (rc != 0)
2227 SetPageError(page);
2228 else
2229 SetPageUptodate(page);
2230 end_page_writeback(page);
2231 put_page(page);
2232 free_xid(xid);
2233 return rc;
2234 }
2235
2236 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2237 {
2238 int rc = cifs_writepage_locked(page, wbc);
2239 unlock_page(page);
2240 return rc;
2241 }
2242
2243 static int cifs_write_end(struct file *file, struct address_space *mapping,
2244 loff_t pos, unsigned len, unsigned copied,
2245 struct page *page, void *fsdata)
2246 {
2247 int rc;
2248 struct inode *inode = mapping->host;
2249 struct cifsFileInfo *cfile = file->private_data;
2250 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2251 __u32 pid;
2252
2253 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2254 pid = cfile->pid;
2255 else
2256 pid = current->tgid;
2257
2258 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2259 page, pos, copied);
2260
2261 if (PageChecked(page)) {
2262 if (copied == len)
2263 SetPageUptodate(page);
2264 ClearPageChecked(page);
2265 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2266 SetPageUptodate(page);
2267
2268 if (!PageUptodate(page)) {
2269 char *page_data;
2270 unsigned offset = pos & (PAGE_SIZE - 1);
2271 unsigned int xid;
2272
2273 xid = get_xid();
2274 /* this is probably better than directly calling
2275 partialpage_write since in this function the file handle is
2276 known which we might as well leverage */
2277 /* BB check if anything else missing out of ppw
2278 such as updating last write time */
2279 page_data = kmap(page);
2280 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2281 /* if (rc < 0) should we set writebehind rc? */
2282 kunmap(page);
2283
2284 free_xid(xid);
2285 } else {
2286 rc = copied;
2287 pos += copied;
2288 set_page_dirty(page);
2289 }
2290
2291 if (rc > 0) {
2292 spin_lock(&inode->i_lock);
2293 if (pos > inode->i_size)
2294 i_size_write(inode, pos);
2295 spin_unlock(&inode->i_lock);
2296 }
2297
2298 unlock_page(page);
2299 put_page(page);
2300
2301 return rc;
2302 }
2303
2304 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2305 int datasync)
2306 {
2307 unsigned int xid;
2308 int rc = 0;
2309 struct cifs_tcon *tcon;
2310 struct TCP_Server_Info *server;
2311 struct cifsFileInfo *smbfile = file->private_data;
2312 struct inode *inode = file_inode(file);
2313 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2314
2315 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2316 if (rc)
2317 return rc;
2318 inode_lock(inode);
2319
2320 xid = get_xid();
2321
2322 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2323 file, datasync);
2324
2325 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2326 rc = cifs_zap_mapping(inode);
2327 if (rc) {
2328 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2329 rc = 0; /* don't care about it in fsync */
2330 }
2331 }
2332
2333 tcon = tlink_tcon(smbfile->tlink);
2334 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2335 server = tcon->ses->server;
2336 if (server->ops->flush)
2337 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2338 else
2339 rc = -ENOSYS;
2340 }
2341
2342 free_xid(xid);
2343 inode_unlock(inode);
2344 return rc;
2345 }
2346
2347 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2348 {
2349 unsigned int xid;
2350 int rc = 0;
2351 struct cifs_tcon *tcon;
2352 struct TCP_Server_Info *server;
2353 struct cifsFileInfo *smbfile = file->private_data;
2354 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2355 struct inode *inode = file->f_mapping->host;
2356
2357 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2358 if (rc)
2359 return rc;
2360 inode_lock(inode);
2361
2362 xid = get_xid();
2363
2364 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2365 file, datasync);
2366
2367 tcon = tlink_tcon(smbfile->tlink);
2368 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2369 server = tcon->ses->server;
2370 if (server->ops->flush)
2371 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2372 else
2373 rc = -ENOSYS;
2374 }
2375
2376 free_xid(xid);
2377 inode_unlock(inode);
2378 return rc;
2379 }
2380
2381 /*
2382 * As file closes, flush all cached write data for this inode checking
2383 * for write behind errors.
2384 */
2385 int cifs_flush(struct file *file, fl_owner_t id)
2386 {
2387 struct inode *inode = file_inode(file);
2388 int rc = 0;
2389
2390 if (file->f_mode & FMODE_WRITE)
2391 rc = filemap_write_and_wait(inode->i_mapping);
2392
2393 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2394
2395 return rc;
2396 }
2397
2398 static int
2399 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2400 {
2401 int rc = 0;
2402 unsigned long i;
2403
2404 for (i = 0; i < num_pages; i++) {
2405 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2406 if (!pages[i]) {
2407 /*
2408 * save number of pages we have already allocated and
2409 * return with ENOMEM error
2410 */
2411 num_pages = i;
2412 rc = -ENOMEM;
2413 break;
2414 }
2415 }
2416
2417 if (rc) {
2418 for (i = 0; i < num_pages; i++)
2419 put_page(pages[i]);
2420 }
2421 return rc;
2422 }
2423
2424 static inline
2425 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2426 {
2427 size_t num_pages;
2428 size_t clen;
2429
2430 clen = min_t(const size_t, len, wsize);
2431 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2432
2433 if (cur_len)
2434 *cur_len = clen;
2435
2436 return num_pages;
2437 }
2438
2439 static void
2440 cifs_uncached_writedata_release(struct kref *refcount)
2441 {
2442 int i;
2443 struct cifs_writedata *wdata = container_of(refcount,
2444 struct cifs_writedata, refcount);
2445
2446 for (i = 0; i < wdata->nr_pages; i++)
2447 put_page(wdata->pages[i]);
2448 cifs_writedata_release(refcount);
2449 }
2450
2451 static void
2452 cifs_uncached_writev_complete(struct work_struct *work)
2453 {
2454 struct cifs_writedata *wdata = container_of(work,
2455 struct cifs_writedata, work);
2456 struct inode *inode = d_inode(wdata->cfile->dentry);
2457 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2458
2459 spin_lock(&inode->i_lock);
2460 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2461 if (cifsi->server_eof > inode->i_size)
2462 i_size_write(inode, cifsi->server_eof);
2463 spin_unlock(&inode->i_lock);
2464
2465 complete(&wdata->done);
2466
2467 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2468 }
2469
2470 static int
2471 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2472 size_t *len, unsigned long *num_pages)
2473 {
2474 size_t save_len, copied, bytes, cur_len = *len;
2475 unsigned long i, nr_pages = *num_pages;
2476
2477 save_len = cur_len;
2478 for (i = 0; i < nr_pages; i++) {
2479 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2480 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2481 cur_len -= copied;
2482 /*
2483 * If we didn't copy as much as we expected, then that
2484 * may mean we trod into an unmapped area. Stop copying
2485 * at that point. On the next pass through the big
2486 * loop, we'll likely end up getting a zero-length
2487 * write and bailing out of it.
2488 */
2489 if (copied < bytes)
2490 break;
2491 }
2492 cur_len = save_len - cur_len;
2493 *len = cur_len;
2494
2495 /*
2496 * If we have no data to send, then that probably means that
2497 * the copy above failed altogether. That's most likely because
2498 * the address in the iovec was bogus. Return -EFAULT and let
2499 * the caller free anything we allocated and bail out.
2500 */
2501 if (!cur_len)
2502 return -EFAULT;
2503
2504 /*
2505 * i + 1 now represents the number of pages we actually used in
2506 * the copy phase above.
2507 */
2508 *num_pages = i + 1;
2509 return 0;
2510 }
2511
2512 static int
2513 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2514 struct cifsFileInfo *open_file,
2515 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2516 {
2517 int rc = 0;
2518 size_t cur_len;
2519 unsigned long nr_pages, num_pages, i;
2520 struct cifs_writedata *wdata;
2521 struct iov_iter saved_from = *from;
2522 loff_t saved_offset = offset;
2523 pid_t pid;
2524 struct TCP_Server_Info *server;
2525
2526 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2527 pid = open_file->pid;
2528 else
2529 pid = current->tgid;
2530
2531 server = tlink_tcon(open_file->tlink)->ses->server;
2532
2533 do {
2534 unsigned int wsize, credits;
2535
2536 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2537 &wsize, &credits);
2538 if (rc)
2539 break;
2540
2541 nr_pages = get_numpages(wsize, len, &cur_len);
2542 wdata = cifs_writedata_alloc(nr_pages,
2543 cifs_uncached_writev_complete);
2544 if (!wdata) {
2545 rc = -ENOMEM;
2546 add_credits_and_wake_if(server, credits, 0);
2547 break;
2548 }
2549
2550 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2551 if (rc) {
2552 kfree(wdata);
2553 add_credits_and_wake_if(server, credits, 0);
2554 break;
2555 }
2556
2557 num_pages = nr_pages;
2558 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2559 if (rc) {
2560 for (i = 0; i < nr_pages; i++)
2561 put_page(wdata->pages[i]);
2562 kfree(wdata);
2563 add_credits_and_wake_if(server, credits, 0);
2564 break;
2565 }
2566
2567 /*
2568 * Bring nr_pages down to the number of pages we actually used,
2569 * and free any pages that we didn't use.
2570 */
2571 for ( ; nr_pages > num_pages; nr_pages--)
2572 put_page(wdata->pages[nr_pages - 1]);
2573
2574 wdata->sync_mode = WB_SYNC_ALL;
2575 wdata->nr_pages = nr_pages;
2576 wdata->offset = (__u64)offset;
2577 wdata->cfile = cifsFileInfo_get(open_file);
2578 wdata->pid = pid;
2579 wdata->bytes = cur_len;
2580 wdata->pagesz = PAGE_SIZE;
2581 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2582 wdata->credits = credits;
2583
2584 if (!wdata->cfile->invalidHandle ||
2585 !cifs_reopen_file(wdata->cfile, false))
2586 rc = server->ops->async_writev(wdata,
2587 cifs_uncached_writedata_release);
2588 if (rc) {
2589 add_credits_and_wake_if(server, wdata->credits, 0);
2590 kref_put(&wdata->refcount,
2591 cifs_uncached_writedata_release);
2592 if (rc == -EAGAIN) {
2593 *from = saved_from;
2594 iov_iter_advance(from, offset - saved_offset);
2595 continue;
2596 }
2597 break;
2598 }
2599
2600 list_add_tail(&wdata->list, wdata_list);
2601 offset += cur_len;
2602 len -= cur_len;
2603 } while (len > 0);
2604
2605 return rc;
2606 }
2607
2608 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2609 {
2610 struct file *file = iocb->ki_filp;
2611 ssize_t total_written = 0;
2612 struct cifsFileInfo *open_file;
2613 struct cifs_tcon *tcon;
2614 struct cifs_sb_info *cifs_sb;
2615 struct cifs_writedata *wdata, *tmp;
2616 struct list_head wdata_list;
2617 struct iov_iter saved_from = *from;
2618 int rc;
2619
2620 /*
2621 * BB - optimize the way when signing is disabled. We can drop this
2622 * extra memory-to-memory copying and use iovec buffers for constructing
2623 * write request.
2624 */
2625
2626 rc = generic_write_checks(iocb, from);
2627 if (rc <= 0)
2628 return rc;
2629
2630 INIT_LIST_HEAD(&wdata_list);
2631 cifs_sb = CIFS_FILE_SB(file);
2632 open_file = file->private_data;
2633 tcon = tlink_tcon(open_file->tlink);
2634
2635 if (!tcon->ses->server->ops->async_writev)
2636 return -ENOSYS;
2637
2638 rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2639 open_file, cifs_sb, &wdata_list);
2640
2641 /*
2642 * If at least one write was successfully sent, then discard any rc
2643 * value from the later writes. If the other write succeeds, then
2644 * we'll end up returning whatever was written. If it fails, then
2645 * we'll get a new rc value from that.
2646 */
2647 if (!list_empty(&wdata_list))
2648 rc = 0;
2649
2650 /*
2651 * Wait for and collect replies for any successful sends in order of
2652 * increasing offset. Once an error is hit or we get a fatal signal
2653 * while waiting, then return without waiting for any more replies.
2654 */
2655 restart_loop:
2656 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2657 if (!rc) {
2658 /* FIXME: freezable too? */
2659 rc = wait_for_completion_killable(&wdata->done);
2660 if (rc)
2661 rc = -EINTR;
2662 else if (wdata->result)
2663 rc = wdata->result;
2664 else
2665 total_written += wdata->bytes;
2666
2667 /* resend call if it's a retryable error */
2668 if (rc == -EAGAIN) {
2669 struct list_head tmp_list;
2670 struct iov_iter tmp_from = saved_from;
2671
2672 INIT_LIST_HEAD(&tmp_list);
2673 list_del_init(&wdata->list);
2674
2675 iov_iter_advance(&tmp_from,
2676 wdata->offset - iocb->ki_pos);
2677
2678 rc = cifs_write_from_iter(wdata->offset,
2679 wdata->bytes, &tmp_from,
2680 open_file, cifs_sb, &tmp_list);
2681
2682 list_splice(&tmp_list, &wdata_list);
2683
2684 kref_put(&wdata->refcount,
2685 cifs_uncached_writedata_release);
2686 goto restart_loop;
2687 }
2688 }
2689 list_del_init(&wdata->list);
2690 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2691 }
2692
2693 if (unlikely(!total_written))
2694 return rc;
2695
2696 iocb->ki_pos += total_written;
2697 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2698 cifs_stats_bytes_written(tcon, total_written);
2699 return total_written;
2700 }
2701
2702 static ssize_t
2703 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2704 {
2705 struct file *file = iocb->ki_filp;
2706 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2707 struct inode *inode = file->f_mapping->host;
2708 struct cifsInodeInfo *cinode = CIFS_I(inode);
2709 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2710 ssize_t rc;
2711
2712 /*
2713 * We need to hold the sem to be sure nobody modifies lock list
2714 * with a brlock that prevents writing.
2715 */
2716 down_read(&cinode->lock_sem);
2717 inode_lock(inode);
2718
2719 rc = generic_write_checks(iocb, from);
2720 if (rc <= 0)
2721 goto out;
2722
2723 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2724 server->vals->exclusive_lock_type, NULL,
2725 CIFS_WRITE_OP))
2726 rc = __generic_file_write_iter(iocb, from);
2727 else
2728 rc = -EACCES;
2729 out:
2730 inode_unlock(inode);
2731
2732 if (rc > 0)
2733 rc = generic_write_sync(iocb, rc);
2734 up_read(&cinode->lock_sem);
2735 return rc;
2736 }
2737
2738 ssize_t
2739 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2740 {
2741 struct inode *inode = file_inode(iocb->ki_filp);
2742 struct cifsInodeInfo *cinode = CIFS_I(inode);
2743 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2744 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2745 iocb->ki_filp->private_data;
2746 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2747 ssize_t written;
2748
2749 written = cifs_get_writer(cinode);
2750 if (written)
2751 return written;
2752
2753 if (CIFS_CACHE_WRITE(cinode)) {
2754 if (cap_unix(tcon->ses) &&
2755 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2756 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2757 written = generic_file_write_iter(iocb, from);
2758 goto out;
2759 }
2760 written = cifs_writev(iocb, from);
2761 goto out;
2762 }
2763 /*
2764 * For non-oplocked files in strict cache mode we need to write the data
2765 * to the server exactly from the pos to pos+len-1 rather than flush all
2766 * affected pages because it may cause a error with mandatory locks on
2767 * these pages but not on the region from pos to ppos+len-1.
2768 */
2769 written = cifs_user_writev(iocb, from);
2770 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2771 /*
2772 * Windows 7 server can delay breaking level2 oplock if a write
2773 * request comes - break it on the client to prevent reading
2774 * an old data.
2775 */
2776 cifs_zap_mapping(inode);
2777 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2778 inode);
2779 cinode->oplock = 0;
2780 }
2781 out:
2782 cifs_put_writer(cinode);
2783 return written;
2784 }
2785
2786 static struct cifs_readdata *
2787 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2788 {
2789 struct cifs_readdata *rdata;
2790
2791 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2792 GFP_KERNEL);
2793 if (rdata != NULL) {
2794 kref_init(&rdata->refcount);
2795 INIT_LIST_HEAD(&rdata->list);
2796 init_completion(&rdata->done);
2797 INIT_WORK(&rdata->work, complete);
2798 }
2799
2800 return rdata;
2801 }
2802
2803 void
2804 cifs_readdata_release(struct kref *refcount)
2805 {
2806 struct cifs_readdata *rdata = container_of(refcount,
2807 struct cifs_readdata, refcount);
2808
2809 if (rdata->cfile)
2810 cifsFileInfo_put(rdata->cfile);
2811
2812 kfree(rdata);
2813 }
2814
2815 static int
2816 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2817 {
2818 int rc = 0;
2819 struct page *page;
2820 unsigned int i;
2821
2822 for (i = 0; i < nr_pages; i++) {
2823 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2824 if (!page) {
2825 rc = -ENOMEM;
2826 break;
2827 }
2828 rdata->pages[i] = page;
2829 }
2830
2831 if (rc) {
2832 for (i = 0; i < nr_pages; i++) {
2833 put_page(rdata->pages[i]);
2834 rdata->pages[i] = NULL;
2835 }
2836 }
2837 return rc;
2838 }
2839
2840 static void
2841 cifs_uncached_readdata_release(struct kref *refcount)
2842 {
2843 struct cifs_readdata *rdata = container_of(refcount,
2844 struct cifs_readdata, refcount);
2845 unsigned int i;
2846
2847 for (i = 0; i < rdata->nr_pages; i++) {
2848 put_page(rdata->pages[i]);
2849 rdata->pages[i] = NULL;
2850 }
2851 cifs_readdata_release(refcount);
2852 }
2853
2854 /**
2855 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2856 * @rdata: the readdata response with list of pages holding data
2857 * @iter: destination for our data
2858 *
2859 * This function copies data from a list of pages in a readdata response into
2860 * an array of iovecs. It will first calculate where the data should go
2861 * based on the info in the readdata and then copy the data into that spot.
2862 */
2863 static int
2864 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2865 {
2866 size_t remaining = rdata->got_bytes;
2867 unsigned int i;
2868
2869 for (i = 0; i < rdata->nr_pages; i++) {
2870 struct page *page = rdata->pages[i];
2871 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2872 size_t written = copy_page_to_iter(page, 0, copy, iter);
2873 remaining -= written;
2874 if (written < copy && iov_iter_count(iter) > 0)
2875 break;
2876 }
2877 return remaining ? -EFAULT : 0;
2878 }
2879
2880 static void
2881 cifs_uncached_readv_complete(struct work_struct *work)
2882 {
2883 struct cifs_readdata *rdata = container_of(work,
2884 struct cifs_readdata, work);
2885
2886 complete(&rdata->done);
2887 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2888 }
2889
2890 static int
2891 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2892 struct cifs_readdata *rdata, unsigned int len)
2893 {
2894 int result = 0;
2895 unsigned int i;
2896 unsigned int nr_pages = rdata->nr_pages;
2897
2898 rdata->got_bytes = 0;
2899 rdata->tailsz = PAGE_SIZE;
2900 for (i = 0; i < nr_pages; i++) {
2901 struct page *page = rdata->pages[i];
2902 size_t n;
2903
2904 if (len <= 0) {
2905 /* no need to hold page hostage */
2906 rdata->pages[i] = NULL;
2907 rdata->nr_pages--;
2908 put_page(page);
2909 continue;
2910 }
2911 n = len;
2912 if (len >= PAGE_SIZE) {
2913 /* enough data to fill the page */
2914 n = PAGE_SIZE;
2915 len -= n;
2916 } else {
2917 zero_user(page, len, PAGE_SIZE - len);
2918 rdata->tailsz = len;
2919 len = 0;
2920 }
2921 result = cifs_read_page_from_socket(server, page, n);
2922 if (result < 0)
2923 break;
2924
2925 rdata->got_bytes += result;
2926 }
2927
2928 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2929 rdata->got_bytes : result;
2930 }
2931
2932 static int
2933 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2934 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2935 {
2936 struct cifs_readdata *rdata;
2937 unsigned int npages, rsize, credits;
2938 size_t cur_len;
2939 int rc;
2940 pid_t pid;
2941 struct TCP_Server_Info *server;
2942
2943 server = tlink_tcon(open_file->tlink)->ses->server;
2944
2945 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2946 pid = open_file->pid;
2947 else
2948 pid = current->tgid;
2949
2950 do {
2951 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2952 &rsize, &credits);
2953 if (rc)
2954 break;
2955
2956 cur_len = min_t(const size_t, len, rsize);
2957 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2958
2959 /* allocate a readdata struct */
2960 rdata = cifs_readdata_alloc(npages,
2961 cifs_uncached_readv_complete);
2962 if (!rdata) {
2963 add_credits_and_wake_if(server, credits, 0);
2964 rc = -ENOMEM;
2965 break;
2966 }
2967
2968 rc = cifs_read_allocate_pages(rdata, npages);
2969 if (rc)
2970 goto error;
2971
2972 rdata->cfile = cifsFileInfo_get(open_file);
2973 rdata->nr_pages = npages;
2974 rdata->offset = offset;
2975 rdata->bytes = cur_len;
2976 rdata->pid = pid;
2977 rdata->pagesz = PAGE_SIZE;
2978 rdata->read_into_pages = cifs_uncached_read_into_pages;
2979 rdata->credits = credits;
2980
2981 if (!rdata->cfile->invalidHandle ||
2982 !cifs_reopen_file(rdata->cfile, true))
2983 rc = server->ops->async_readv(rdata);
2984 error:
2985 if (rc) {
2986 add_credits_and_wake_if(server, rdata->credits, 0);
2987 kref_put(&rdata->refcount,
2988 cifs_uncached_readdata_release);
2989 if (rc == -EAGAIN)
2990 continue;
2991 break;
2992 }
2993
2994 list_add_tail(&rdata->list, rdata_list);
2995 offset += cur_len;
2996 len -= cur_len;
2997 } while (len > 0);
2998
2999 return rc;
3000 }
3001
3002 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3003 {
3004 struct file *file = iocb->ki_filp;
3005 ssize_t rc;
3006 size_t len;
3007 ssize_t total_read = 0;
3008 loff_t offset = iocb->ki_pos;
3009 struct cifs_sb_info *cifs_sb;
3010 struct cifs_tcon *tcon;
3011 struct cifsFileInfo *open_file;
3012 struct cifs_readdata *rdata, *tmp;
3013 struct list_head rdata_list;
3014
3015 len = iov_iter_count(to);
3016 if (!len)
3017 return 0;
3018
3019 INIT_LIST_HEAD(&rdata_list);
3020 cifs_sb = CIFS_FILE_SB(file);
3021 open_file = file->private_data;
3022 tcon = tlink_tcon(open_file->tlink);
3023
3024 if (!tcon->ses->server->ops->async_readv)
3025 return -ENOSYS;
3026
3027 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3028 cifs_dbg(FYI, "attempting read on write only file instance\n");
3029
3030 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3031
3032 /* if at least one read request send succeeded, then reset rc */
3033 if (!list_empty(&rdata_list))
3034 rc = 0;
3035
3036 len = iov_iter_count(to);
3037 /* the loop below should proceed in the order of increasing offsets */
3038 again:
3039 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3040 if (!rc) {
3041 /* FIXME: freezable sleep too? */
3042 rc = wait_for_completion_killable(&rdata->done);
3043 if (rc)
3044 rc = -EINTR;
3045 else if (rdata->result == -EAGAIN) {
3046 /* resend call if it's a retryable error */
3047 struct list_head tmp_list;
3048 unsigned int got_bytes = rdata->got_bytes;
3049
3050 list_del_init(&rdata->list);
3051 INIT_LIST_HEAD(&tmp_list);
3052
3053 /*
3054 * Got a part of data and then reconnect has
3055 * happened -- fill the buffer and continue
3056 * reading.
3057 */
3058 if (got_bytes && got_bytes < rdata->bytes) {
3059 rc = cifs_readdata_to_iov(rdata, to);
3060 if (rc) {
3061 kref_put(&rdata->refcount,
3062 cifs_uncached_readdata_release);
3063 continue;
3064 }
3065 }
3066
3067 rc = cifs_send_async_read(
3068 rdata->offset + got_bytes,
3069 rdata->bytes - got_bytes,
3070 rdata->cfile, cifs_sb,
3071 &tmp_list);
3072
3073 list_splice(&tmp_list, &rdata_list);
3074
3075 kref_put(&rdata->refcount,
3076 cifs_uncached_readdata_release);
3077 goto again;
3078 } else if (rdata->result)
3079 rc = rdata->result;
3080 else
3081 rc = cifs_readdata_to_iov(rdata, to);
3082
3083 /* if there was a short read -- discard anything left */
3084 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3085 rc = -ENODATA;
3086 }
3087 list_del_init(&rdata->list);
3088 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3089 }
3090
3091 total_read = len - iov_iter_count(to);
3092
3093 cifs_stats_bytes_read(tcon, total_read);
3094
3095 /* mask nodata case */
3096 if (rc == -ENODATA)
3097 rc = 0;
3098
3099 if (total_read) {
3100 iocb->ki_pos += total_read;
3101 return total_read;
3102 }
3103 return rc;
3104 }
3105
3106 ssize_t
3107 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3108 {
3109 struct inode *inode = file_inode(iocb->ki_filp);
3110 struct cifsInodeInfo *cinode = CIFS_I(inode);
3111 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3112 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3113 iocb->ki_filp->private_data;
3114 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3115 int rc = -EACCES;
3116
3117 /*
3118 * In strict cache mode we need to read from the server all the time
3119 * if we don't have level II oplock because the server can delay mtime
3120 * change - so we can't make a decision about inode invalidating.
3121 * And we can also fail with pagereading if there are mandatory locks
3122 * on pages affected by this read but not on the region from pos to
3123 * pos+len-1.
3124 */
3125 if (!CIFS_CACHE_READ(cinode))
3126 return cifs_user_readv(iocb, to);
3127
3128 if (cap_unix(tcon->ses) &&
3129 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3130 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3131 return generic_file_read_iter(iocb, to);
3132
3133 /*
3134 * We need to hold the sem to be sure nobody modifies lock list
3135 * with a brlock that prevents reading.
3136 */
3137 down_read(&cinode->lock_sem);
3138 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3139 tcon->ses->server->vals->shared_lock_type,
3140 NULL, CIFS_READ_OP))
3141 rc = generic_file_read_iter(iocb, to);
3142 up_read(&cinode->lock_sem);
3143 return rc;
3144 }
3145
3146 static ssize_t
3147 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3148 {
3149 int rc = -EACCES;
3150 unsigned int bytes_read = 0;
3151 unsigned int total_read;
3152 unsigned int current_read_size;
3153 unsigned int rsize;
3154 struct cifs_sb_info *cifs_sb;
3155 struct cifs_tcon *tcon;
3156 struct TCP_Server_Info *server;
3157 unsigned int xid;
3158 char *cur_offset;
3159 struct cifsFileInfo *open_file;
3160 struct cifs_io_parms io_parms;
3161 int buf_type = CIFS_NO_BUFFER;
3162 __u32 pid;
3163
3164 xid = get_xid();
3165 cifs_sb = CIFS_FILE_SB(file);
3166
3167 /* FIXME: set up handlers for larger reads and/or convert to async */
3168 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3169
3170 if (file->private_data == NULL) {
3171 rc = -EBADF;
3172 free_xid(xid);
3173 return rc;
3174 }
3175 open_file = file->private_data;
3176 tcon = tlink_tcon(open_file->tlink);
3177 server = tcon->ses->server;
3178
3179 if (!server->ops->sync_read) {
3180 free_xid(xid);
3181 return -ENOSYS;
3182 }
3183
3184 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3185 pid = open_file->pid;
3186 else
3187 pid = current->tgid;
3188
3189 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3190 cifs_dbg(FYI, "attempting read on write only file instance\n");
3191
3192 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3193 total_read += bytes_read, cur_offset += bytes_read) {
3194 do {
3195 current_read_size = min_t(uint, read_size - total_read,
3196 rsize);
3197 /*
3198 * For windows me and 9x we do not want to request more
3199 * than it negotiated since it will refuse the read
3200 * then.
3201 */
3202 if ((tcon->ses) && !(tcon->ses->capabilities &
3203 tcon->ses->server->vals->cap_large_files)) {
3204 current_read_size = min_t(uint,
3205 current_read_size, CIFSMaxBufSize);
3206 }
3207 if (open_file->invalidHandle) {
3208 rc = cifs_reopen_file(open_file, true);
3209 if (rc != 0)
3210 break;
3211 }
3212 io_parms.pid = pid;
3213 io_parms.tcon = tcon;
3214 io_parms.offset = *offset;
3215 io_parms.length = current_read_size;
3216 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3217 &bytes_read, &cur_offset,
3218 &buf_type);
3219 } while (rc == -EAGAIN);
3220
3221 if (rc || (bytes_read == 0)) {
3222 if (total_read) {
3223 break;
3224 } else {
3225 free_xid(xid);
3226 return rc;
3227 }
3228 } else {
3229 cifs_stats_bytes_read(tcon, total_read);
3230 *offset += bytes_read;
3231 }
3232 }
3233 free_xid(xid);
3234 return total_read;
3235 }
3236
3237 /*
3238 * If the page is mmap'ed into a process' page tables, then we need to make
3239 * sure that it doesn't change while being written back.
3240 */
3241 static int
3242 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3243 {
3244 struct page *page = vmf->page;
3245
3246 lock_page(page);
3247 return VM_FAULT_LOCKED;
3248 }
3249
3250 static const struct vm_operations_struct cifs_file_vm_ops = {
3251 .fault = filemap_fault,
3252 .map_pages = filemap_map_pages,
3253 .page_mkwrite = cifs_page_mkwrite,
3254 };
3255
3256 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3257 {
3258 int rc, xid;
3259 struct inode *inode = file_inode(file);
3260
3261 xid = get_xid();
3262
3263 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3264 rc = cifs_zap_mapping(inode);
3265 if (rc)
3266 return rc;
3267 }
3268
3269 rc = generic_file_mmap(file, vma);
3270 if (rc == 0)
3271 vma->vm_ops = &cifs_file_vm_ops;
3272 free_xid(xid);
3273 return rc;
3274 }
3275
3276 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3277 {
3278 int rc, xid;
3279
3280 xid = get_xid();
3281 rc = cifs_revalidate_file(file);
3282 if (rc) {
3283 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3284 rc);
3285 free_xid(xid);
3286 return rc;
3287 }
3288 rc = generic_file_mmap(file, vma);
3289 if (rc == 0)
3290 vma->vm_ops = &cifs_file_vm_ops;
3291 free_xid(xid);
3292 return rc;
3293 }
3294
3295 static void
3296 cifs_readv_complete(struct work_struct *work)
3297 {
3298 unsigned int i, got_bytes;
3299 struct cifs_readdata *rdata = container_of(work,
3300 struct cifs_readdata, work);
3301
3302 got_bytes = rdata->got_bytes;
3303 for (i = 0; i < rdata->nr_pages; i++) {
3304 struct page *page = rdata->pages[i];
3305
3306 lru_cache_add_file(page);
3307
3308 if (rdata->result == 0 ||
3309 (rdata->result == -EAGAIN && got_bytes)) {
3310 flush_dcache_page(page);
3311 SetPageUptodate(page);
3312 }
3313
3314 unlock_page(page);
3315
3316 if (rdata->result == 0 ||
3317 (rdata->result == -EAGAIN && got_bytes))
3318 cifs_readpage_to_fscache(rdata->mapping->host, page);
3319
3320 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3321
3322 put_page(page);
3323 rdata->pages[i] = NULL;
3324 }
3325 kref_put(&rdata->refcount, cifs_readdata_release);
3326 }
3327
3328 static int
3329 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3330 struct cifs_readdata *rdata, unsigned int len)
3331 {
3332 int result = 0;
3333 unsigned int i;
3334 u64 eof;
3335 pgoff_t eof_index;
3336 unsigned int nr_pages = rdata->nr_pages;
3337
3338 /* determine the eof that the server (probably) has */
3339 eof = CIFS_I(rdata->mapping->host)->server_eof;
3340 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3341 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3342
3343 rdata->got_bytes = 0;
3344 rdata->tailsz = PAGE_SIZE;
3345 for (i = 0; i < nr_pages; i++) {
3346 struct page *page = rdata->pages[i];
3347 size_t n = PAGE_SIZE;
3348
3349 if (len >= PAGE_SIZE) {
3350 len -= PAGE_SIZE;
3351 } else if (len > 0) {
3352 /* enough for partial page, fill and zero the rest */
3353 zero_user(page, len, PAGE_SIZE - len);
3354 n = rdata->tailsz = len;
3355 len = 0;
3356 } else if (page->index > eof_index) {
3357 /*
3358 * The VFS will not try to do readahead past the
3359 * i_size, but it's possible that we have outstanding
3360 * writes with gaps in the middle and the i_size hasn't
3361 * caught up yet. Populate those with zeroed out pages
3362 * to prevent the VFS from repeatedly attempting to
3363 * fill them until the writes are flushed.
3364 */
3365 zero_user(page, 0, PAGE_SIZE);
3366 lru_cache_add_file(page);
3367 flush_dcache_page(page);
3368 SetPageUptodate(page);
3369 unlock_page(page);
3370 put_page(page);
3371 rdata->pages[i] = NULL;
3372 rdata->nr_pages--;
3373 continue;
3374 } else {
3375 /* no need to hold page hostage */
3376 lru_cache_add_file(page);
3377 unlock_page(page);
3378 put_page(page);
3379 rdata->pages[i] = NULL;
3380 rdata->nr_pages--;
3381 continue;
3382 }
3383
3384 result = cifs_read_page_from_socket(server, page, n);
3385 if (result < 0)
3386 break;
3387
3388 rdata->got_bytes += result;
3389 }
3390
3391 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3392 rdata->got_bytes : result;
3393 }
3394
3395 static int
3396 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3397 unsigned int rsize, struct list_head *tmplist,
3398 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3399 {
3400 struct page *page, *tpage;
3401 unsigned int expected_index;
3402 int rc;
3403 gfp_t gfp = readahead_gfp_mask(mapping);
3404
3405 INIT_LIST_HEAD(tmplist);
3406
3407 page = list_entry(page_list->prev, struct page, lru);
3408
3409 /*
3410 * Lock the page and put it in the cache. Since no one else
3411 * should have access to this page, we're safe to simply set
3412 * PG_locked without checking it first.
3413 */
3414 __SetPageLocked(page);
3415 rc = add_to_page_cache_locked(page, mapping,
3416 page->index, gfp);
3417
3418 /* give up if we can't stick it in the cache */
3419 if (rc) {
3420 __ClearPageLocked(page);
3421 return rc;
3422 }
3423
3424 /* move first page to the tmplist */
3425 *offset = (loff_t)page->index << PAGE_SHIFT;
3426 *bytes = PAGE_SIZE;
3427 *nr_pages = 1;
3428 list_move_tail(&page->lru, tmplist);
3429
3430 /* now try and add more pages onto the request */
3431 expected_index = page->index + 1;
3432 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3433 /* discontinuity ? */
3434 if (page->index != expected_index)
3435 break;
3436
3437 /* would this page push the read over the rsize? */
3438 if (*bytes + PAGE_SIZE > rsize)
3439 break;
3440
3441 __SetPageLocked(page);
3442 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3443 __ClearPageLocked(page);
3444 break;
3445 }
3446 list_move_tail(&page->lru, tmplist);
3447 (*bytes) += PAGE_SIZE;
3448 expected_index++;
3449 (*nr_pages)++;
3450 }
3451 return rc;
3452 }
3453
3454 static int cifs_readpages(struct file *file, struct address_space *mapping,
3455 struct list_head *page_list, unsigned num_pages)
3456 {
3457 int rc;
3458 struct list_head tmplist;
3459 struct cifsFileInfo *open_file = file->private_data;
3460 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3461 struct TCP_Server_Info *server;
3462 pid_t pid;
3463
3464 /*
3465 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3466 * immediately if the cookie is negative
3467 *
3468 * After this point, every page in the list might have PG_fscache set,
3469 * so we will need to clean that up off of every page we don't use.
3470 */
3471 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3472 &num_pages);
3473 if (rc == 0)
3474 return rc;
3475
3476 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3477 pid = open_file->pid;
3478 else
3479 pid = current->tgid;
3480
3481 rc = 0;
3482 server = tlink_tcon(open_file->tlink)->ses->server;
3483
3484 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3485 __func__, file, mapping, num_pages);
3486
3487 /*
3488 * Start with the page at end of list and move it to private
3489 * list. Do the same with any following pages until we hit
3490 * the rsize limit, hit an index discontinuity, or run out of
3491 * pages. Issue the async read and then start the loop again
3492 * until the list is empty.
3493 *
3494 * Note that list order is important. The page_list is in
3495 * the order of declining indexes. When we put the pages in
3496 * the rdata->pages, then we want them in increasing order.
3497 */
3498 while (!list_empty(page_list)) {
3499 unsigned int i, nr_pages, bytes, rsize;
3500 loff_t offset;
3501 struct page *page, *tpage;
3502 struct cifs_readdata *rdata;
3503 unsigned credits;
3504
3505 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3506 &rsize, &credits);
3507 if (rc)
3508 break;
3509
3510 /*
3511 * Give up immediately if rsize is too small to read an entire
3512 * page. The VFS will fall back to readpage. We should never
3513 * reach this point however since we set ra_pages to 0 when the
3514 * rsize is smaller than a cache page.
3515 */
3516 if (unlikely(rsize < PAGE_SIZE)) {
3517 add_credits_and_wake_if(server, credits, 0);
3518 return 0;
3519 }
3520
3521 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3522 &nr_pages, &offset, &bytes);
3523 if (rc) {
3524 add_credits_and_wake_if(server, credits, 0);
3525 break;
3526 }
3527
3528 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3529 if (!rdata) {
3530 /* best to give up if we're out of mem */
3531 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3532 list_del(&page->lru);
3533 lru_cache_add_file(page);
3534 unlock_page(page);
3535 put_page(page);
3536 }
3537 rc = -ENOMEM;
3538 add_credits_and_wake_if(server, credits, 0);
3539 break;
3540 }
3541
3542 rdata->cfile = cifsFileInfo_get(open_file);
3543 rdata->mapping = mapping;
3544 rdata->offset = offset;
3545 rdata->bytes = bytes;
3546 rdata->pid = pid;
3547 rdata->pagesz = PAGE_SIZE;
3548 rdata->read_into_pages = cifs_readpages_read_into_pages;
3549 rdata->credits = credits;
3550
3551 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3552 list_del(&page->lru);
3553 rdata->pages[rdata->nr_pages++] = page;
3554 }
3555
3556 if (!rdata->cfile->invalidHandle ||
3557 !cifs_reopen_file(rdata->cfile, true))
3558 rc = server->ops->async_readv(rdata);
3559 if (rc) {
3560 add_credits_and_wake_if(server, rdata->credits, 0);
3561 for (i = 0; i < rdata->nr_pages; i++) {
3562 page = rdata->pages[i];
3563 lru_cache_add_file(page);
3564 unlock_page(page);
3565 put_page(page);
3566 }
3567 /* Fallback to the readpage in error/reconnect cases */
3568 kref_put(&rdata->refcount, cifs_readdata_release);
3569 break;
3570 }
3571
3572 kref_put(&rdata->refcount, cifs_readdata_release);
3573 }
3574
3575 /* Any pages that have been shown to fscache but didn't get added to
3576 * the pagecache must be uncached before they get returned to the
3577 * allocator.
3578 */
3579 cifs_fscache_readpages_cancel(mapping->host, page_list);
3580 return rc;
3581 }
3582
3583 /*
3584 * cifs_readpage_worker must be called with the page pinned
3585 */
3586 static int cifs_readpage_worker(struct file *file, struct page *page,
3587 loff_t *poffset)
3588 {
3589 char *read_data;
3590 int rc;
3591
3592 /* Is the page cached? */
3593 rc = cifs_readpage_from_fscache(file_inode(file), page);
3594 if (rc == 0)
3595 goto read_complete;
3596
3597 read_data = kmap(page);
3598 /* for reads over a certain size could initiate async read ahead */
3599
3600 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3601
3602 if (rc < 0)
3603 goto io_error;
3604 else
3605 cifs_dbg(FYI, "Bytes read %d\n", rc);
3606
3607 file_inode(file)->i_atime =
3608 current_time(file_inode(file));
3609
3610 if (PAGE_SIZE > rc)
3611 memset(read_data + rc, 0, PAGE_SIZE - rc);
3612
3613 flush_dcache_page(page);
3614 SetPageUptodate(page);
3615
3616 /* send this page to the cache */
3617 cifs_readpage_to_fscache(file_inode(file), page);
3618
3619 rc = 0;
3620
3621 io_error:
3622 kunmap(page);
3623 unlock_page(page);
3624
3625 read_complete:
3626 return rc;
3627 }
3628
3629 static int cifs_readpage(struct file *file, struct page *page)
3630 {
3631 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3632 int rc = -EACCES;
3633 unsigned int xid;
3634
3635 xid = get_xid();
3636
3637 if (file->private_data == NULL) {
3638 rc = -EBADF;
3639 free_xid(xid);
3640 return rc;
3641 }
3642
3643 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3644 page, (int)offset, (int)offset);
3645
3646 rc = cifs_readpage_worker(file, page, &offset);
3647
3648 free_xid(xid);
3649 return rc;
3650 }
3651
3652 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3653 {
3654 struct cifsFileInfo *open_file;
3655 struct cifs_tcon *tcon =
3656 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3657
3658 spin_lock(&tcon->open_file_lock);
3659 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3660 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3661 spin_unlock(&tcon->open_file_lock);
3662 return 1;
3663 }
3664 }
3665 spin_unlock(&tcon->open_file_lock);
3666 return 0;
3667 }
3668
3669 /* We do not want to update the file size from server for inodes
3670 open for write - to avoid races with writepage extending
3671 the file - in the future we could consider allowing
3672 refreshing the inode only on increases in the file size
3673 but this is tricky to do without racing with writebehind
3674 page caching in the current Linux kernel design */
3675 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3676 {
3677 if (!cifsInode)
3678 return true;
3679
3680 if (is_inode_writable(cifsInode)) {
3681 /* This inode is open for write at least once */
3682 struct cifs_sb_info *cifs_sb;
3683
3684 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3685 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3686 /* since no page cache to corrupt on directio
3687 we can change size safely */
3688 return true;
3689 }
3690
3691 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3692 return true;
3693
3694 return false;
3695 } else
3696 return true;
3697 }
3698
3699 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3700 loff_t pos, unsigned len, unsigned flags,
3701 struct page **pagep, void **fsdata)
3702 {
3703 int oncethru = 0;
3704 pgoff_t index = pos >> PAGE_SHIFT;
3705 loff_t offset = pos & (PAGE_SIZE - 1);
3706 loff_t page_start = pos & PAGE_MASK;
3707 loff_t i_size;
3708 struct page *page;
3709 int rc = 0;
3710
3711 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3712
3713 start:
3714 page = grab_cache_page_write_begin(mapping, index, flags);
3715 if (!page) {
3716 rc = -ENOMEM;
3717 goto out;
3718 }
3719
3720 if (PageUptodate(page))
3721 goto out;
3722
3723 /*
3724 * If we write a full page it will be up to date, no need to read from
3725 * the server. If the write is short, we'll end up doing a sync write
3726 * instead.
3727 */
3728 if (len == PAGE_SIZE)
3729 goto out;
3730
3731 /*
3732 * optimize away the read when we have an oplock, and we're not
3733 * expecting to use any of the data we'd be reading in. That
3734 * is, when the page lies beyond the EOF, or straddles the EOF
3735 * and the write will cover all of the existing data.
3736 */
3737 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3738 i_size = i_size_read(mapping->host);
3739 if (page_start >= i_size ||
3740 (offset == 0 && (pos + len) >= i_size)) {
3741 zero_user_segments(page, 0, offset,
3742 offset + len,
3743 PAGE_SIZE);
3744 /*
3745 * PageChecked means that the parts of the page
3746 * to which we're not writing are considered up
3747 * to date. Once the data is copied to the
3748 * page, it can be set uptodate.
3749 */
3750 SetPageChecked(page);
3751 goto out;
3752 }
3753 }
3754
3755 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3756 /*
3757 * might as well read a page, it is fast enough. If we get
3758 * an error, we don't need to return it. cifs_write_end will
3759 * do a sync write instead since PG_uptodate isn't set.
3760 */
3761 cifs_readpage_worker(file, page, &page_start);
3762 put_page(page);
3763 oncethru = 1;
3764 goto start;
3765 } else {
3766 /* we could try using another file handle if there is one -
3767 but how would we lock it to prevent close of that handle
3768 racing with this read? In any case
3769 this will be written out by write_end so is fine */
3770 }
3771 out:
3772 *pagep = page;
3773 return rc;
3774 }
3775
3776 static int cifs_release_page(struct page *page, gfp_t gfp)
3777 {
3778 if (PagePrivate(page))
3779 return 0;
3780
3781 return cifs_fscache_release_page(page, gfp);
3782 }
3783
3784 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3785 unsigned int length)
3786 {
3787 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3788
3789 if (offset == 0 && length == PAGE_SIZE)
3790 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3791 }
3792
3793 static int cifs_launder_page(struct page *page)
3794 {
3795 int rc = 0;
3796 loff_t range_start = page_offset(page);
3797 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3798 struct writeback_control wbc = {
3799 .sync_mode = WB_SYNC_ALL,
3800 .nr_to_write = 0,
3801 .range_start = range_start,
3802 .range_end = range_end,
3803 };
3804
3805 cifs_dbg(FYI, "Launder page: %p\n", page);
3806
3807 if (clear_page_dirty_for_io(page))
3808 rc = cifs_writepage_locked(page, &wbc);
3809
3810 cifs_fscache_invalidate_page(page, page->mapping->host);
3811 return rc;
3812 }
3813
3814 void cifs_oplock_break(struct work_struct *work)
3815 {
3816 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3817 oplock_break);
3818 struct inode *inode = d_inode(cfile->dentry);
3819 struct cifsInodeInfo *cinode = CIFS_I(inode);
3820 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3821 struct TCP_Server_Info *server = tcon->ses->server;
3822 int rc = 0;
3823
3824 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3825 TASK_UNINTERRUPTIBLE);
3826
3827 server->ops->downgrade_oplock(server, cinode,
3828 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3829
3830 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3831 cifs_has_mand_locks(cinode)) {
3832 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3833 inode);
3834 cinode->oplock = 0;
3835 }
3836
3837 if (inode && S_ISREG(inode->i_mode)) {
3838 if (CIFS_CACHE_READ(cinode))
3839 break_lease(inode, O_RDONLY);
3840 else
3841 break_lease(inode, O_WRONLY);
3842 rc = filemap_fdatawrite(inode->i_mapping);
3843 if (!CIFS_CACHE_READ(cinode)) {
3844 rc = filemap_fdatawait(inode->i_mapping);
3845 mapping_set_error(inode->i_mapping, rc);
3846 cifs_zap_mapping(inode);
3847 }
3848 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3849 }
3850
3851 rc = cifs_push_locks(cfile);
3852 if (rc)
3853 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3854
3855 /*
3856 * releasing stale oplock after recent reconnect of smb session using
3857 * a now incorrect file handle is not a data integrity issue but do
3858 * not bother sending an oplock release if session to server still is
3859 * disconnected since oplock already released by the server
3860 */
3861 if (!cfile->oplock_break_cancelled) {
3862 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3863 cinode);
3864 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3865 }
3866 cifs_done_oplock_break(cinode);
3867 }
3868
3869 /*
3870 * The presence of cifs_direct_io() in the address space ops vector
3871 * allowes open() O_DIRECT flags which would have failed otherwise.
3872 *
3873 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3874 * so this method should never be called.
3875 *
3876 * Direct IO is not yet supported in the cached mode.
3877 */
3878 static ssize_t
3879 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3880 {
3881 /*
3882 * FIXME
3883 * Eventually need to support direct IO for non forcedirectio mounts
3884 */
3885 return -EINVAL;
3886 }
3887
3888
3889 const struct address_space_operations cifs_addr_ops = {
3890 .readpage = cifs_readpage,
3891 .readpages = cifs_readpages,
3892 .writepage = cifs_writepage,
3893 .writepages = cifs_writepages,
3894 .write_begin = cifs_write_begin,
3895 .write_end = cifs_write_end,
3896 .set_page_dirty = __set_page_dirty_nobuffers,
3897 .releasepage = cifs_release_page,
3898 .direct_IO = cifs_direct_io,
3899 .invalidatepage = cifs_invalidate_page,
3900 .launder_page = cifs_launder_page,
3901 };
3902
3903 /*
3904 * cifs_readpages requires the server to support a buffer large enough to
3905 * contain the header plus one complete page of data. Otherwise, we need
3906 * to leave cifs_readpages out of the address space operations.
3907 */
3908 const struct address_space_operations cifs_addr_ops_smallbuf = {
3909 .readpage = cifs_readpage,
3910 .writepage = cifs_writepage,
3911 .writepages = cifs_writepages,
3912 .write_begin = cifs_write_begin,
3913 .write_end = cifs_write_end,
3914 .set_page_dirty = __set_page_dirty_nobuffers,
3915 .releasepage = cifs_release_page,
3916 .invalidatepage = cifs_invalidate_page,
3917 .launder_page = cifs_launder_page,
3918 };