]> git.ipfire.org Git - people/ms/linux.git/blame - fs/notify/fanotify/fanotify_user.c
selinux: always return a secid from the network caches if we find one
[people/ms/linux.git] / fs / notify / fanotify / fanotify_user.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
33d3dfff 2#include <linux/fanotify.h>
11637e4b 3#include <linux/fcntl.h>
2a3edf86 4#include <linux/file.h>
11637e4b 5#include <linux/fs.h>
52c923dd 6#include <linux/anon_inodes.h>
11637e4b 7#include <linux/fsnotify_backend.h>
2a3edf86 8#include <linux/init.h>
a1014f10 9#include <linux/mount.h>
2a3edf86 10#include <linux/namei.h>
a1014f10 11#include <linux/poll.h>
11637e4b
EP
12#include <linux/security.h>
13#include <linux/syscalls.h>
e4e047a2 14#include <linux/slab.h>
2a3edf86 15#include <linux/types.h>
a1014f10 16#include <linux/uaccess.h>
91c2e0bc 17#include <linux/compat.h>
174cd4b1 18#include <linux/sched/signal.h>
d46eb14b 19#include <linux/memcontrol.h>
a8b13aa2
AG
20#include <linux/statfs.h>
21#include <linux/exportfs.h>
a1014f10
EP
22
23#include <asm/ioctls.h>
11637e4b 24
c63181e6 25#include "../../mount.h"
be77196b 26#include "../fdinfo.h"
7053aee2 27#include "fanotify.h"
c63181e6 28
2529a0df 29#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
e7099d8a 30#define FANOTIFY_DEFAULT_MAX_MARKS 8192
4afeff85 31#define FANOTIFY_DEFAULT_MAX_LISTENERS 128
2529a0df 32
48149e9d
HS
33/*
34 * All flags that may be specified in parameter event_f_flags of fanotify_init.
35 *
36 * Internal and external open flags are stored together in field f_flags of
37 * struct file. Only external open flags shall be allowed in event_f_flags.
38 * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be
39 * excluded.
40 */
41#define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \
42 O_ACCMODE | O_APPEND | O_NONBLOCK | \
43 __O_SYNC | O_DSYNC | O_CLOEXEC | \
44 O_LARGEFILE | O_NOATIME )
45
33d3dfff 46extern const struct fsnotify_ops fanotify_fsnotify_ops;
11637e4b 47
054c636e 48struct kmem_cache *fanotify_mark_cache __read_mostly;
7053aee2 49struct kmem_cache *fanotify_event_cachep __read_mostly;
f083441b 50struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
2a3edf86 51
5e469c83
AG
52#define FANOTIFY_EVENT_ALIGN 4
53
54static int fanotify_event_info_len(struct fanotify_event *event)
55{
56 if (!fanotify_event_has_fid(event))
57 return 0;
58
59 return roundup(sizeof(struct fanotify_event_info_fid) +
60 sizeof(struct file_handle) + event->fh_len,
61 FANOTIFY_EVENT_ALIGN);
62}
63
a1014f10
EP
64/*
65 * Get an fsnotify notification event if one exists and is small
66 * enough to fit in "count". Return an error pointer if the count
40873284
JK
67 * is not large enough. When permission event is dequeued, its state is
68 * updated accordingly.
a1014f10
EP
69 */
70static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
71 size_t count)
72{
5e469c83 73 size_t event_size = FAN_EVENT_METADATA_LEN;
8c554466 74 struct fsnotify_event *fsn_event = NULL;
a1014f10
EP
75
76 pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
77
8c554466 78 spin_lock(&group->notification_lock);
a1014f10 79 if (fsnotify_notify_queue_is_empty(group))
8c554466 80 goto out;
a1014f10 81
5e469c83 82 if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
8c554466
JK
83 event_size += fanotify_event_info_len(
84 FANOTIFY_E(fsnotify_peek_first_event(group)));
5e469c83
AG
85 }
86
8c554466
JK
87 if (event_size > count) {
88 fsn_event = ERR_PTR(-EINVAL);
89 goto out;
90 }
91 fsn_event = fsnotify_remove_first_event(group);
40873284
JK
92 if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask))
93 FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED;
8c554466
JK
94out:
95 spin_unlock(&group->notification_lock);
96 return fsn_event;
a1014f10
EP
97}
98
352e3b24 99static int create_fd(struct fsnotify_group *group,
33913997 100 struct fanotify_event *event,
7053aee2 101 struct file **file)
a1014f10
EP
102{
103 int client_fd;
a1014f10
EP
104 struct file *new_file;
105
22aa425d 106 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
a1014f10 107
0b37e097 108 client_fd = get_unused_fd_flags(group->fanotify_data.f_flags);
a1014f10
EP
109 if (client_fd < 0)
110 return client_fd;
111
a1014f10
EP
112 /*
113 * we need a new file handle for the userspace program so it can read even if it was
114 * originally opened O_WRONLY.
115 */
a1014f10
EP
116 /* it's possible this event was an overflow event. in that case dentry and mnt
117 * are NULL; That's fine, just don't call dentry open */
765927b2
AV
118 if (event->path.dentry && event->path.mnt)
119 new_file = dentry_open(&event->path,
80af2588 120 group->fanotify_data.f_flags | FMODE_NONOTIFY,
a1014f10
EP
121 current_cred());
122 else
123 new_file = ERR_PTR(-EOVERFLOW);
124 if (IS_ERR(new_file)) {
125 /*
126 * we still send an event even if we can't open the file. this
127 * can happen when say tasks are gone and we try to open their
128 * /proc files or we try to open a WRONLY file like in sysfs
129 * we just send the errno to userspace since there isn't much
130 * else we can do.
131 */
132 put_unused_fd(client_fd);
133 client_fd = PTR_ERR(new_file);
134 } else {
352e3b24 135 *file = new_file;
a1014f10
EP
136 }
137
22aa425d 138 return client_fd;
a1014f10
EP
139}
140
40873284
JK
141/*
142 * Finish processing of permission event by setting it to ANSWERED state and
143 * drop group->notification_lock.
144 */
145static void finish_permission_event(struct fsnotify_group *group,
146 struct fanotify_perm_event *event,
147 unsigned int response)
148 __releases(&group->notification_lock)
149{
fabf7f29
JK
150 bool destroy = false;
151
40873284
JK
152 assert_spin_locked(&group->notification_lock);
153 event->response = response;
fabf7f29
JK
154 if (event->state == FAN_EVENT_CANCELED)
155 destroy = true;
156 else
157 event->state = FAN_EVENT_ANSWERED;
40873284 158 spin_unlock(&group->notification_lock);
fabf7f29
JK
159 if (destroy)
160 fsnotify_destroy_event(group, &event->fae.fse);
40873284
JK
161}
162
b2d87909
EP
163static int process_access_response(struct fsnotify_group *group,
164 struct fanotify_response *response_struct)
165{
33913997 166 struct fanotify_perm_event *event;
f083441b
JK
167 int fd = response_struct->fd;
168 int response = response_struct->response;
b2d87909
EP
169
170 pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
171 fd, response);
172 /*
173 * make sure the response is valid, if invalid we do nothing and either
25985edc 174 * userspace can send a valid response or we will clean it up after the
b2d87909
EP
175 * timeout
176 */
de8cd83e 177 switch (response & ~FAN_AUDIT) {
b2d87909
EP
178 case FAN_ALLOW:
179 case FAN_DENY:
180 break;
181 default:
182 return -EINVAL;
183 }
184
185 if (fd < 0)
186 return -EINVAL;
187
96a71f21 188 if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
de8cd83e
SG
189 return -EINVAL;
190
af6a5113
JK
191 spin_lock(&group->notification_lock);
192 list_for_each_entry(event, &group->fanotify_data.access_list,
193 fae.fse.list) {
194 if (event->fd != fd)
195 continue;
b2d87909 196
af6a5113 197 list_del_init(&event->fae.fse.list);
40873284 198 finish_permission_event(group, event, response);
af6a5113
JK
199 wake_up(&group->fanotify_data.access_waitq);
200 return 0;
201 }
202 spin_unlock(&group->notification_lock);
b2d87909 203
af6a5113 204 return -ENOENT;
b2d87909 205}
b2d87909 206
5e469c83
AG
207static int copy_fid_to_user(struct fanotify_event *event, char __user *buf)
208{
209 struct fanotify_event_info_fid info = { };
210 struct file_handle handle = { };
b2d22b6b 211 unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh;
5e469c83
AG
212 size_t fh_len = event->fh_len;
213 size_t len = fanotify_event_info_len(event);
214
215 if (!len)
216 return 0;
217
218 if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len))
219 return -EFAULT;
220
221 /* Copy event info fid header followed by vaiable sized file handle */
222 info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID;
223 info.hdr.len = len;
224 info.fsid = event->fid.fsid;
225 if (copy_to_user(buf, &info, sizeof(info)))
226 return -EFAULT;
227
228 buf += sizeof(info);
229 len -= sizeof(info);
230 handle.handle_type = event->fh_type;
231 handle.handle_bytes = fh_len;
232 if (copy_to_user(buf, &handle, sizeof(handle)))
233 return -EFAULT;
234
235 buf += sizeof(handle);
236 len -= sizeof(handle);
b2d22b6b
JK
237 /*
238 * For an inline fh, copy through stack to exclude the copy from
239 * usercopy hardening protections.
240 */
241 fh = fanotify_event_fh(event);
242 if (fh_len <= FANOTIFY_INLINE_FH_LEN) {
243 memcpy(bounce, fh, fh_len);
244 fh = bounce;
245 }
246 if (copy_to_user(buf, fh, fh_len))
5e469c83
AG
247 return -EFAULT;
248
249 /* Pad with 0's */
250 buf += fh_len;
251 len -= fh_len;
252 WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN);
253 if (len > 0 && clear_user(buf, len))
254 return -EFAULT;
255
256 return 0;
257}
258
a1014f10 259static ssize_t copy_event_to_user(struct fsnotify_group *group,
bb2f7b45 260 struct fsnotify_event *fsn_event,
5b03a472 261 char __user *buf, size_t count)
a1014f10 262{
bb2f7b45
AG
263 struct fanotify_event_metadata metadata;
264 struct fanotify_event *event;
265 struct file *f = NULL;
e9e0c890 266 int ret, fd = FAN_NOFD;
a1014f10 267
bb2f7b45 268 pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
a1014f10 269
bb2f7b45
AG
270 event = container_of(fsn_event, struct fanotify_event, fse);
271 metadata.event_len = FAN_EVENT_METADATA_LEN;
272 metadata.metadata_len = FAN_EVENT_METADATA_LEN;
273 metadata.vers = FANOTIFY_METADATA_VERSION;
274 metadata.reserved = 0;
275 metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
276 metadata.pid = pid_vnr(event->pid);
277
e9e0c890 278 if (fanotify_event_has_path(event)) {
bb2f7b45
AG
279 fd = create_fd(group, event, &f);
280 if (fd < 0)
281 return fd;
5e469c83
AG
282 } else if (fanotify_event_has_fid(event)) {
283 metadata.event_len += fanotify_event_info_len(event);
bb2f7b45
AG
284 }
285 metadata.fd = fd;
b2d87909 286
b2d87909 287 ret = -EFAULT;
5b03a472
KC
288 /*
289 * Sanity check copy size in case get_one_event() and
290 * fill_event_metadata() event_len sizes ever get out of sync.
291 */
bb2f7b45 292 if (WARN_ON_ONCE(metadata.event_len > count))
5b03a472 293 goto out_close_fd;
bb2f7b45 294
5e469c83 295 if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN))
352e3b24
AV
296 goto out_close_fd;
297
bb2f7b45
AG
298 if (fanotify_is_perm_event(event->mask))
299 FANOTIFY_PE(fsn_event)->fd = fd;
a1014f10 300
5e469c83 301 if (fanotify_event_has_path(event)) {
3587b1b0 302 fd_install(fd, f);
5e469c83
AG
303 } else if (fanotify_event_has_fid(event)) {
304 ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN);
305 if (ret < 0)
306 return ret;
307 }
308
bb2f7b45 309 return metadata.event_len;
b2d87909 310
b2d87909 311out_close_fd:
352e3b24
AV
312 if (fd != FAN_NOFD) {
313 put_unused_fd(fd);
314 fput(f);
315 }
b2d87909 316 return ret;
a1014f10
EP
317}
318
319/* intofiy userspace file descriptor functions */
076ccb76 320static __poll_t fanotify_poll(struct file *file, poll_table *wait)
a1014f10
EP
321{
322 struct fsnotify_group *group = file->private_data;
076ccb76 323 __poll_t ret = 0;
a1014f10
EP
324
325 poll_wait(file, &group->notification_waitq, wait);
c21dbe20 326 spin_lock(&group->notification_lock);
a1014f10 327 if (!fsnotify_notify_queue_is_empty(group))
a9a08845 328 ret = EPOLLIN | EPOLLRDNORM;
c21dbe20 329 spin_unlock(&group->notification_lock);
a1014f10
EP
330
331 return ret;
332}
333
334static ssize_t fanotify_read(struct file *file, char __user *buf,
335 size_t count, loff_t *pos)
336{
337 struct fsnotify_group *group;
338 struct fsnotify_event *kevent;
339 char __user *start;
340 int ret;
536ebe9c 341 DEFINE_WAIT_FUNC(wait, woken_wake_function);
a1014f10
EP
342
343 start = buf;
344 group = file->private_data;
345
346 pr_debug("%s: group=%p\n", __func__, group);
347
536ebe9c 348 add_wait_queue(&group->notification_waitq, &wait);
a1014f10 349 while (1) {
a1014f10 350 kevent = get_one_event(group, count);
d8aaab4f 351 if (IS_ERR(kevent)) {
a1014f10 352 ret = PTR_ERR(kevent);
d8aaab4f
JK
353 break;
354 }
355
356 if (!kevent) {
357 ret = -EAGAIN;
358 if (file->f_flags & O_NONBLOCK)
a1014f10 359 break;
d8aaab4f
JK
360
361 ret = -ERESTARTSYS;
362 if (signal_pending(current))
363 break;
364
365 if (start != buf)
a1014f10 366 break;
536ebe9c
PZ
367
368 wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
a1014f10
EP
369 continue;
370 }
371
5b03a472 372 ret = copy_event_to_user(group, kevent, buf, count);
4ff33aaf
AG
373 if (unlikely(ret == -EOPENSTALE)) {
374 /*
375 * We cannot report events with stale fd so drop it.
376 * Setting ret to 0 will continue the event loop and
377 * do the right thing if there are no more events to
378 * read (i.e. return bytes read, -EAGAIN or wait).
379 */
380 ret = 0;
381 }
382
d8aaab4f
JK
383 /*
384 * Permission events get queued to wait for response. Other
385 * events can be destroyed now.
386 */
a0a92d26 387 if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) {
d8aaab4f 388 fsnotify_destroy_event(group, kevent);
d507816b 389 } else {
4ff33aaf 390 if (ret <= 0) {
40873284
JK
391 spin_lock(&group->notification_lock);
392 finish_permission_event(group,
393 FANOTIFY_PE(kevent), FAN_DENY);
d507816b 394 wake_up(&group->fanotify_data.access_waitq);
4ff33aaf
AG
395 } else {
396 spin_lock(&group->notification_lock);
397 list_add_tail(&kevent->list,
398 &group->fanotify_data.access_list);
399 spin_unlock(&group->notification_lock);
d507816b 400 }
d507816b 401 }
4ff33aaf
AG
402 if (ret < 0)
403 break;
d8aaab4f
JK
404 buf += ret;
405 count -= ret;
a1014f10 406 }
536ebe9c 407 remove_wait_queue(&group->notification_waitq, &wait);
a1014f10 408
a1014f10
EP
409 if (start != buf && ret != -EFAULT)
410 ret = buf - start;
411 return ret;
412}
413
b2d87909
EP
414static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
415{
b2d87909
EP
416 struct fanotify_response response = { .fd = -1, .response = -1 };
417 struct fsnotify_group *group;
418 int ret;
419
6685df31
MS
420 if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
421 return -EINVAL;
422
b2d87909
EP
423 group = file->private_data;
424
425 if (count > sizeof(response))
426 count = sizeof(response);
427
428 pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
429
430 if (copy_from_user(&response, buf, count))
431 return -EFAULT;
432
433 ret = process_access_response(group, &response);
434 if (ret < 0)
435 count = ret;
436
437 return count;
b2d87909
EP
438}
439
52c923dd
EP
440static int fanotify_release(struct inode *ignored, struct file *file)
441{
442 struct fsnotify_group *group = file->private_data;
ca6f8699 443 struct fanotify_perm_event *event;
96d41019 444 struct fsnotify_event *fsn_event;
19ba54f4 445
5838d444 446 /*
96d41019
JK
447 * Stop new events from arriving in the notification queue. since
448 * userspace cannot use fanotify fd anymore, no event can enter or
449 * leave access_list by now either.
5838d444 450 */
96d41019 451 fsnotify_group_stop_queueing(group);
2eebf582 452
96d41019
JK
453 /*
454 * Process all permission events on access_list and notification queue
455 * and simulate reply from userspace.
456 */
073f6552 457 spin_lock(&group->notification_lock);
ca6f8699
JK
458 while (!list_empty(&group->fanotify_data.access_list)) {
459 event = list_first_entry(&group->fanotify_data.access_list,
460 struct fanotify_perm_event, fae.fse.list);
f083441b 461 list_del_init(&event->fae.fse.list);
40873284
JK
462 finish_permission_event(group, event, FAN_ALLOW);
463 spin_lock(&group->notification_lock);
2eebf582 464 }
2eebf582 465
5838d444 466 /*
96d41019
JK
467 * Destroy all non-permission events. For permission events just
468 * dequeue them and set the response. They will be freed once the
469 * response is consumed and fanotify_get_response() returns.
5838d444 470 */
96d41019
JK
471 while (!fsnotify_notify_queue_is_empty(group)) {
472 fsn_event = fsnotify_remove_first_event(group);
a0a92d26 473 if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) {
c21dbe20 474 spin_unlock(&group->notification_lock);
96d41019 475 fsnotify_destroy_event(group, fsn_event);
6685df31 476 } else {
40873284
JK
477 finish_permission_event(group, FANOTIFY_PE(fsn_event),
478 FAN_ALLOW);
6685df31 479 }
40873284 480 spin_lock(&group->notification_lock);
96d41019 481 }
c21dbe20 482 spin_unlock(&group->notification_lock);
96d41019
JK
483
484 /* Response for all permission events it set, wakeup waiters */
2eebf582 485 wake_up(&group->fanotify_data.access_waitq);
0a6b6bd5 486
52c923dd 487 /* matches the fanotify_init->fsnotify_alloc_group */
d8153d4d 488 fsnotify_destroy_group(group);
52c923dd
EP
489
490 return 0;
491}
492
a1014f10
EP
493static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
494{
495 struct fsnotify_group *group;
7053aee2 496 struct fsnotify_event *fsn_event;
a1014f10
EP
497 void __user *p;
498 int ret = -ENOTTY;
499 size_t send_len = 0;
500
501 group = file->private_data;
502
503 p = (void __user *) arg;
504
505 switch (cmd) {
506 case FIONREAD:
c21dbe20 507 spin_lock(&group->notification_lock);
7053aee2 508 list_for_each_entry(fsn_event, &group->notification_list, list)
a1014f10 509 send_len += FAN_EVENT_METADATA_LEN;
c21dbe20 510 spin_unlock(&group->notification_lock);
a1014f10
EP
511 ret = put_user(send_len, (int __user *) p);
512 break;
513 }
514
515 return ret;
516}
517
52c923dd 518static const struct file_operations fanotify_fops = {
be77196b 519 .show_fdinfo = fanotify_show_fdinfo,
a1014f10
EP
520 .poll = fanotify_poll,
521 .read = fanotify_read,
b2d87909 522 .write = fanotify_write,
52c923dd
EP
523 .fasync = NULL,
524 .release = fanotify_release,
a1014f10
EP
525 .unlocked_ioctl = fanotify_ioctl,
526 .compat_ioctl = fanotify_ioctl,
6038f373 527 .llseek = noop_llseek,
52c923dd
EP
528};
529
2a3edf86
EP
530static int fanotify_find_path(int dfd, const char __user *filename,
531 struct path *path, unsigned int flags)
532{
533 int ret;
534
535 pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__,
536 dfd, filename, flags);
537
538 if (filename == NULL) {
2903ff01 539 struct fd f = fdget(dfd);
2a3edf86
EP
540
541 ret = -EBADF;
2903ff01 542 if (!f.file)
2a3edf86
EP
543 goto out;
544
545 ret = -ENOTDIR;
546 if ((flags & FAN_MARK_ONLYDIR) &&
496ad9aa 547 !(S_ISDIR(file_inode(f.file)->i_mode))) {
2903ff01 548 fdput(f);
2a3edf86
EP
549 goto out;
550 }
551
2903ff01 552 *path = f.file->f_path;
2a3edf86 553 path_get(path);
2903ff01 554 fdput(f);
2a3edf86
EP
555 } else {
556 unsigned int lookup_flags = 0;
557
558 if (!(flags & FAN_MARK_DONT_FOLLOW))
559 lookup_flags |= LOOKUP_FOLLOW;
560 if (flags & FAN_MARK_ONLYDIR)
561 lookup_flags |= LOOKUP_DIRECTORY;
562
563 ret = user_path_at(dfd, filename, lookup_flags, path);
564 if (ret)
565 goto out;
566 }
567
568 /* you can only watch an inode if you have read permissions on it */
569 ret = inode_permission(path->dentry->d_inode, MAY_READ);
570 if (ret)
571 path_put(path);
572out:
573 return ret;
574}
575
b9e4e3bd
EP
576static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
577 __u32 mask,
6dfbd149
LS
578 unsigned int flags,
579 int *destroy)
088b09b0 580{
d2c1874c 581 __u32 oldmask = 0;
088b09b0
AG
582
583 spin_lock(&fsn_mark->lock);
b9e4e3bd
EP
584 if (!(flags & FAN_MARK_IGNORED_MASK)) {
585 oldmask = fsn_mark->mask;
a72fd224 586 fsn_mark->mask &= ~mask;
b9e4e3bd 587 } else {
a72fd224 588 fsn_mark->ignored_mask &= ~mask;
b9e4e3bd 589 }
a118449a 590 *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
088b09b0
AG
591 spin_unlock(&fsn_mark->lock);
592
088b09b0
AG
593 return mask & oldmask;
594}
595
eaa2c6b0
AG
596static int fanotify_remove_mark(struct fsnotify_group *group,
597 fsnotify_connp_t *connp, __u32 mask,
598 unsigned int flags)
88826276
EP
599{
600 struct fsnotify_mark *fsn_mark = NULL;
088b09b0 601 __u32 removed;
6dfbd149 602 int destroy_mark;
88826276 603
7b18527c 604 mutex_lock(&group->mark_mutex);
eaa2c6b0 605 fsn_mark = fsnotify_find_mark(connp, group);
7b18527c
LS
606 if (!fsn_mark) {
607 mutex_unlock(&group->mark_mutex);
f3640192 608 return -ENOENT;
7b18527c 609 }
88826276 610
6dfbd149
LS
611 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
612 &destroy_mark);
3ac70bfc
AG
613 if (removed & fsnotify_conn_mask(fsn_mark->connector))
614 fsnotify_recalc_mask(fsn_mark->connector);
6dfbd149 615 if (destroy_mark)
4712e722 616 fsnotify_detach_mark(fsn_mark);
7b18527c 617 mutex_unlock(&group->mark_mutex);
4712e722
JK
618 if (destroy_mark)
619 fsnotify_free_mark(fsn_mark);
6dfbd149 620
eaa2c6b0 621 /* matches the fsnotify_find_mark() */
f3640192 622 fsnotify_put_mark(fsn_mark);
f3640192
AG
623 return 0;
624}
2a3edf86 625
eaa2c6b0
AG
626static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
627 struct vfsmount *mnt, __u32 mask,
628 unsigned int flags)
629{
630 return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
631 mask, flags);
632}
633
d54f4fba
AG
634static int fanotify_remove_sb_mark(struct fsnotify_group *group,
635 struct super_block *sb, __u32 mask,
636 unsigned int flags)
637{
638 return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags);
639}
640
f3640192 641static int fanotify_remove_inode_mark(struct fsnotify_group *group,
b9e4e3bd
EP
642 struct inode *inode, __u32 mask,
643 unsigned int flags)
f3640192 644{
eaa2c6b0
AG
645 return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask,
646 flags);
2a3edf86
EP
647}
648
b9e4e3bd
EP
649static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
650 __u32 mask,
651 unsigned int flags)
912ee394 652{
192ca4d1 653 __u32 oldmask = -1;
912ee394
AG
654
655 spin_lock(&fsn_mark->lock);
b9e4e3bd
EP
656 if (!(flags & FAN_MARK_IGNORED_MASK)) {
657 oldmask = fsn_mark->mask;
a72fd224 658 fsn_mark->mask |= mask;
b9e4e3bd 659 } else {
a72fd224 660 fsn_mark->ignored_mask |= mask;
c9778a98
EP
661 if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
662 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
b9e4e3bd 663 }
912ee394
AG
664 spin_unlock(&fsn_mark->lock);
665
666 return mask & ~oldmask;
667}
668
5e9c070c 669static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
b812a9f5 670 fsnotify_connp_t *connp,
77115225
AG
671 unsigned int type,
672 __kernel_fsid_t *fsid)
5e9c070c
LS
673{
674 struct fsnotify_mark *mark;
675 int ret;
676
677 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
678 return ERR_PTR(-ENOSPC);
679
680 mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
681 if (!mark)
682 return ERR_PTR(-ENOMEM);
683
054c636e 684 fsnotify_init_mark(mark, group);
77115225 685 ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid);
5e9c070c
LS
686 if (ret) {
687 fsnotify_put_mark(mark);
688 return ERR_PTR(ret);
689 }
690
691 return mark;
692}
693
694
eaa2c6b0
AG
695static int fanotify_add_mark(struct fsnotify_group *group,
696 fsnotify_connp_t *connp, unsigned int type,
77115225
AG
697 __u32 mask, unsigned int flags,
698 __kernel_fsid_t *fsid)
2a3edf86
EP
699{
700 struct fsnotify_mark *fsn_mark;
912ee394 701 __u32 added;
2a3edf86 702
7b18527c 703 mutex_lock(&group->mark_mutex);
b812a9f5 704 fsn_mark = fsnotify_find_mark(connp, group);
88826276 705 if (!fsn_mark) {
77115225 706 fsn_mark = fanotify_add_new_mark(group, connp, type, fsid);
5e9c070c 707 if (IS_ERR(fsn_mark)) {
7b18527c 708 mutex_unlock(&group->mark_mutex);
5e9c070c 709 return PTR_ERR(fsn_mark);
7b18527c 710 }
88826276 711 }
b9e4e3bd 712 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
3ac70bfc
AG
713 if (added & ~fsnotify_conn_mask(fsn_mark->connector))
714 fsnotify_recalc_mask(fsn_mark->connector);
c9747640 715 mutex_unlock(&group->mark_mutex);
5e9c070c 716
fa218ab9 717 fsnotify_put_mark(fsn_mark);
5e9c070c 718 return 0;
88826276
EP
719}
720
eaa2c6b0
AG
721static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
722 struct vfsmount *mnt, __u32 mask,
77115225 723 unsigned int flags, __kernel_fsid_t *fsid)
eaa2c6b0
AG
724{
725 return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
77115225 726 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
eaa2c6b0
AG
727}
728
d54f4fba 729static int fanotify_add_sb_mark(struct fsnotify_group *group,
77115225
AG
730 struct super_block *sb, __u32 mask,
731 unsigned int flags, __kernel_fsid_t *fsid)
d54f4fba
AG
732{
733 return fanotify_add_mark(group, &sb->s_fsnotify_marks,
77115225 734 FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
d54f4fba
AG
735}
736
52202dfb 737static int fanotify_add_inode_mark(struct fsnotify_group *group,
b9e4e3bd 738 struct inode *inode, __u32 mask,
77115225 739 unsigned int flags, __kernel_fsid_t *fsid)
88826276 740{
88826276 741 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
2a3edf86 742
5322a59f
EP
743 /*
744 * If some other task has this inode open for write we should not add
745 * an ignored mark, unless that ignored mark is supposed to survive
746 * modification changes anyway.
747 */
748 if ((flags & FAN_MARK_IGNORED_MASK) &&
749 !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
ac9498d6 750 inode_is_open_for_write(inode))
5322a59f
EP
751 return 0;
752
eaa2c6b0 753 return fanotify_add_mark(group, &inode->i_fsnotify_marks,
77115225 754 FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid);
88826276 755}
2a3edf86 756
52c923dd 757/* fanotify syscalls */
08ae8938 758SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
11637e4b 759{
52c923dd
EP
760 struct fsnotify_group *group;
761 int f_flags, fd;
4afeff85 762 struct user_struct *user;
33913997 763 struct fanotify_event *oevent;
52c923dd 764
96a71f21
AG
765 pr_debug("%s: flags=%x event_f_flags=%x\n",
766 __func__, flags, event_f_flags);
52c923dd 767
52c923dd 768 if (!capable(CAP_SYS_ADMIN))
a2f13ad0 769 return -EPERM;
52c923dd 770
de8cd83e 771#ifdef CONFIG_AUDITSYSCALL
23c9deeb 772 if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
de8cd83e 773#else
23c9deeb 774 if (flags & ~FANOTIFY_INIT_FLAGS)
de8cd83e 775#endif
52c923dd
EP
776 return -EINVAL;
777
48149e9d
HS
778 if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
779 return -EINVAL;
780
781 switch (event_f_flags & O_ACCMODE) {
782 case O_RDONLY:
783 case O_RDWR:
784 case O_WRONLY:
785 break;
786 default:
787 return -EINVAL;
788 }
789
a8b13aa2
AG
790 if ((flags & FAN_REPORT_FID) &&
791 (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF)
792 return -EINVAL;
793
4afeff85
EP
794 user = get_current_user();
795 if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
796 free_uid(user);
797 return -EMFILE;
798 }
799
b2d87909 800 f_flags = O_RDWR | FMODE_NONOTIFY;
52c923dd
EP
801 if (flags & FAN_CLOEXEC)
802 f_flags |= O_CLOEXEC;
803 if (flags & FAN_NONBLOCK)
804 f_flags |= O_NONBLOCK;
805
806 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
807 group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
26379198
EP
808 if (IS_ERR(group)) {
809 free_uid(user);
52c923dd 810 return PTR_ERR(group);
26379198 811 }
52c923dd 812
4afeff85 813 group->fanotify_data.user = user;
96a71f21 814 group->fanotify_data.flags = flags;
4afeff85 815 atomic_inc(&user->fanotify_listeners);
d46eb14b 816 group->memcg = get_mem_cgroup_from_mm(current->mm);
4afeff85 817
83b535d2
AG
818 oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL,
819 FSNOTIFY_EVENT_NONE, NULL);
ff57cd58
JK
820 if (unlikely(!oevent)) {
821 fd = -ENOMEM;
822 goto out_destroy_group;
823 }
824 group->overflow_event = &oevent->fse;
ff57cd58 825
1e2ee49f
WW
826 if (force_o_largefile())
827 event_f_flags |= O_LARGEFILE;
80af2588 828 group->fanotify_data.f_flags = event_f_flags;
9e66e423
EP
829 init_waitqueue_head(&group->fanotify_data.access_waitq);
830 INIT_LIST_HEAD(&group->fanotify_data.access_list);
23c9deeb 831 switch (flags & FANOTIFY_CLASS_BITS) {
4231a235
EP
832 case FAN_CLASS_NOTIF:
833 group->priority = FS_PRIO_0;
834 break;
835 case FAN_CLASS_CONTENT:
836 group->priority = FS_PRIO_1;
837 break;
838 case FAN_CLASS_PRE_CONTENT:
839 group->priority = FS_PRIO_2;
840 break;
841 default:
842 fd = -EINVAL;
d8153d4d 843 goto out_destroy_group;
4231a235 844 }
cb2d429f 845
5dd03f55
EP
846 if (flags & FAN_UNLIMITED_QUEUE) {
847 fd = -EPERM;
848 if (!capable(CAP_SYS_ADMIN))
d8153d4d 849 goto out_destroy_group;
5dd03f55
EP
850 group->max_events = UINT_MAX;
851 } else {
852 group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
853 }
2529a0df 854
ac7e22dc
EP
855 if (flags & FAN_UNLIMITED_MARKS) {
856 fd = -EPERM;
857 if (!capable(CAP_SYS_ADMIN))
d8153d4d 858 goto out_destroy_group;
ac7e22dc
EP
859 group->fanotify_data.max_marks = UINT_MAX;
860 } else {
861 group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
862 }
e7099d8a 863
de8cd83e
SG
864 if (flags & FAN_ENABLE_AUDIT) {
865 fd = -EPERM;
866 if (!capable(CAP_AUDIT_WRITE))
867 goto out_destroy_group;
de8cd83e
SG
868 }
869
52c923dd
EP
870 fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
871 if (fd < 0)
d8153d4d 872 goto out_destroy_group;
52c923dd
EP
873
874 return fd;
875
d8153d4d
LS
876out_destroy_group:
877 fsnotify_destroy_group(group);
52c923dd 878 return fd;
11637e4b 879}
bbaa4168 880
a8b13aa2 881/* Check if filesystem can encode a unique fid */
73072283 882static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
a8b13aa2 883{
73072283 884 __kernel_fsid_t root_fsid;
a8b13aa2
AG
885 int err;
886
887 /*
888 * Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
889 */
73072283 890 err = vfs_get_fsid(path->dentry, fsid);
a8b13aa2
AG
891 if (err)
892 return err;
893
73072283 894 if (!fsid->val[0] && !fsid->val[1])
a8b13aa2
AG
895 return -ENODEV;
896
897 /*
898 * Make sure path is not inside a filesystem subvolume (e.g. btrfs)
899 * which uses a different fsid than sb root.
900 */
73072283 901 err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid);
a8b13aa2
AG
902 if (err)
903 return err;
904
73072283
AG
905 if (root_fsid.val[0] != fsid->val[0] ||
906 root_fsid.val[1] != fsid->val[1])
a8b13aa2
AG
907 return -EXDEV;
908
909 /*
910 * We need to make sure that the file system supports at least
911 * encoding a file handle so user can use name_to_handle_at() to
912 * compare fid returned with event to the file handle of watched
913 * objects. However, name_to_handle_at() requires that the
914 * filesystem also supports decoding file handles.
915 */
916 if (!path->dentry->d_sb->s_export_op ||
917 !path->dentry->d_sb->s_export_op->fh_to_dentry)
918 return -EOPNOTSUPP;
919
920 return 0;
921}
922
183caa3c
DB
923static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
924 int dfd, const char __user *pathname)
bbaa4168 925{
0ff21db9
EP
926 struct inode *inode = NULL;
927 struct vfsmount *mnt = NULL;
2a3edf86 928 struct fsnotify_group *group;
2903ff01 929 struct fd f;
2a3edf86 930 struct path path;
73072283 931 __kernel_fsid_t __fsid, *fsid = NULL;
bdd5a46f 932 u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
23c9deeb 933 unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
2903ff01 934 int ret;
2a3edf86
EP
935
936 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
937 __func__, fanotify_fd, flags, dfd, pathname, mask);
938
939 /* we only use the lower 32 bits as of right now. */
940 if (mask & ((__u64)0xffffffff << 32))
941 return -EINVAL;
942
23c9deeb 943 if (flags & ~FANOTIFY_MARK_FLAGS)
88380fe6 944 return -EINVAL;
d54f4fba
AG
945
946 switch (mark_type) {
947 case FAN_MARK_INODE:
948 case FAN_MARK_MOUNT:
949 case FAN_MARK_FILESYSTEM:
950 break;
951 default:
952 return -EINVAL;
953 }
954
4d92604c 955 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
1734dee4 956 case FAN_MARK_ADD: /* fallthrough */
88380fe6 957 case FAN_MARK_REMOVE:
1734dee4
LS
958 if (!mask)
959 return -EINVAL;
cc299a98 960 break;
4d92604c 961 case FAN_MARK_FLUSH:
23c9deeb 962 if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH))
cc299a98 963 return -EINVAL;
88380fe6
AG
964 break;
965 default:
966 return -EINVAL;
967 }
8fcd6528 968
6685df31 969 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
23c9deeb 970 valid_mask |= FANOTIFY_PERM_EVENTS;
6685df31
MS
971
972 if (mask & ~valid_mask)
2a3edf86
EP
973 return -EINVAL;
974
2903ff01
AV
975 f = fdget(fanotify_fd);
976 if (unlikely(!f.file))
2a3edf86
EP
977 return -EBADF;
978
979 /* verify that this is indeed an fanotify instance */
980 ret = -EINVAL;
2903ff01 981 if (unlikely(f.file->f_op != &fanotify_fops))
2a3edf86 982 goto fput_and_out;
2903ff01 983 group = f.file->private_data;
4231a235
EP
984
985 /*
986 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
987 * allowed to set permissions events.
988 */
989 ret = -EINVAL;
23c9deeb 990 if (mask & FANOTIFY_PERM_EVENTS &&
4231a235
EP
991 group->priority == FS_PRIO_0)
992 goto fput_and_out;
2a3edf86 993
235328d1
AG
994 /*
995 * Events with data type inode do not carry enough information to report
996 * event->fd, so we do not allow setting a mask for inode events unless
997 * group supports reporting fid.
998 * inode events are not supported on a mount mark, because they do not
999 * carry enough information (i.e. path) to be filtered by mount point.
1000 */
1001 if (mask & FANOTIFY_INODE_EVENTS &&
1002 (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) ||
1003 mark_type == FAN_MARK_MOUNT))
1004 goto fput_and_out;
1005
0a8dd2db
HS
1006 if (flags & FAN_MARK_FLUSH) {
1007 ret = 0;
d54f4fba 1008 if (mark_type == FAN_MARK_MOUNT)
0a8dd2db 1009 fsnotify_clear_vfsmount_marks_by_group(group);
d54f4fba
AG
1010 else if (mark_type == FAN_MARK_FILESYSTEM)
1011 fsnotify_clear_sb_marks_by_group(group);
0a8dd2db
HS
1012 else
1013 fsnotify_clear_inode_marks_by_group(group);
1014 goto fput_and_out;
1015 }
1016
2a3edf86
EP
1017 ret = fanotify_find_path(dfd, pathname, &path, flags);
1018 if (ret)
1019 goto fput_and_out;
1020
a8b13aa2 1021 if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
73072283 1022 ret = fanotify_test_fid(&path, &__fsid);
a8b13aa2
AG
1023 if (ret)
1024 goto path_put_and_out;
77115225 1025
73072283 1026 fsid = &__fsid;
a8b13aa2
AG
1027 }
1028
2a3edf86 1029 /* inode held in place by reference to path; group by fget on fd */
d54f4fba 1030 if (mark_type == FAN_MARK_INODE)
0ff21db9
EP
1031 inode = path.dentry->d_inode;
1032 else
1033 mnt = path.mnt;
2a3edf86
EP
1034
1035 /* create/update an inode mark */
0a8dd2db 1036 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
c6223f46 1037 case FAN_MARK_ADD:
d54f4fba 1038 if (mark_type == FAN_MARK_MOUNT)
77115225
AG
1039 ret = fanotify_add_vfsmount_mark(group, mnt, mask,
1040 flags, fsid);
d54f4fba 1041 else if (mark_type == FAN_MARK_FILESYSTEM)
77115225
AG
1042 ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask,
1043 flags, fsid);
0ff21db9 1044 else
77115225
AG
1045 ret = fanotify_add_inode_mark(group, inode, mask,
1046 flags, fsid);
c6223f46
AG
1047 break;
1048 case FAN_MARK_REMOVE:
d54f4fba 1049 if (mark_type == FAN_MARK_MOUNT)
77115225
AG
1050 ret = fanotify_remove_vfsmount_mark(group, mnt, mask,
1051 flags);
d54f4fba 1052 else if (mark_type == FAN_MARK_FILESYSTEM)
77115225
AG
1053 ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask,
1054 flags);
f3640192 1055 else
77115225
AG
1056 ret = fanotify_remove_inode_mark(group, inode, mask,
1057 flags);
c6223f46
AG
1058 break;
1059 default:
1060 ret = -EINVAL;
1061 }
2a3edf86 1062
a8b13aa2 1063path_put_and_out:
2a3edf86
EP
1064 path_put(&path);
1065fput_and_out:
2903ff01 1066 fdput(f);
2a3edf86
EP
1067 return ret;
1068}
1069
183caa3c
DB
1070SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
1071 __u64, mask, int, dfd,
1072 const char __user *, pathname)
1073{
1074 return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
1075}
1076
91c2e0bc
AV
1077#ifdef CONFIG_COMPAT
1078COMPAT_SYSCALL_DEFINE6(fanotify_mark,
1079 int, fanotify_fd, unsigned int, flags,
1080 __u32, mask0, __u32, mask1, int, dfd,
1081 const char __user *, pathname)
1082{
183caa3c 1083 return do_fanotify_mark(fanotify_fd, flags,
91c2e0bc 1084#ifdef __BIG_ENDIAN
91c2e0bc 1085 ((__u64)mask0 << 32) | mask1,
592f6b84
HC
1086#else
1087 ((__u64)mask1 << 32) | mask0,
91c2e0bc
AV
1088#endif
1089 dfd, pathname);
1090}
1091#endif
1092
2a3edf86 1093/*
ae0e47f0 1094 * fanotify_user_setup - Our initialization function. Note that we cannot return
2a3edf86
EP
1095 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
1096 * must result in panic().
1097 */
1098static int __init fanotify_user_setup(void)
1099{
a8b13aa2 1100 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8);
bdd5a46f
AG
1101 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
1102
d46eb14b
SB
1103 fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
1104 SLAB_PANIC|SLAB_ACCOUNT);
33913997 1105 fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC);
6685df31
MS
1106 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
1107 fanotify_perm_event_cachep =
33913997 1108 KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
6685df31 1109 }
2a3edf86
EP
1110
1111 return 0;
bbaa4168 1112}
2a3edf86 1113device_initcall(fanotify_user_setup);