]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
33d3dfff | 2 | #include <linux/fanotify.h> |
11637e4b | 3 | #include <linux/fcntl.h> |
2a3edf86 | 4 | #include <linux/file.h> |
11637e4b | 5 | #include <linux/fs.h> |
52c923dd | 6 | #include <linux/anon_inodes.h> |
11637e4b | 7 | #include <linux/fsnotify_backend.h> |
2a3edf86 | 8 | #include <linux/init.h> |
a1014f10 | 9 | #include <linux/mount.h> |
2a3edf86 | 10 | #include <linux/namei.h> |
a1014f10 | 11 | #include <linux/poll.h> |
11637e4b EP |
12 | #include <linux/security.h> |
13 | #include <linux/syscalls.h> | |
e4e047a2 | 14 | #include <linux/slab.h> |
2a3edf86 | 15 | #include <linux/types.h> |
a1014f10 | 16 | #include <linux/uaccess.h> |
91c2e0bc | 17 | #include <linux/compat.h> |
174cd4b1 | 18 | #include <linux/sched/signal.h> |
d46eb14b | 19 | #include <linux/memcontrol.h> |
a8b13aa2 AG |
20 | #include <linux/statfs.h> |
21 | #include <linux/exportfs.h> | |
a1014f10 EP |
22 | |
23 | #include <asm/ioctls.h> | |
11637e4b | 24 | |
c63181e6 | 25 | #include "../../mount.h" |
be77196b | 26 | #include "../fdinfo.h" |
7053aee2 | 27 | #include "fanotify.h" |
c63181e6 | 28 | |
2529a0df | 29 | #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 |
5b8fea65 AG |
30 | #define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192 |
31 | #define FANOTIFY_DEFAULT_MAX_GROUPS 128 | |
32 | ||
33 | /* | |
34 | * Legacy fanotify marks limits (8192) is per group and we introduced a tunable | |
35 | * limit of marks per user, similar to inotify. Effectively, the legacy limit | |
36 | * of fanotify marks per user is <max marks per group> * <max groups per user>. | |
37 | * This default limit (1M) also happens to match the increased limit of inotify | |
38 | * max_user_watches since v5.10. | |
39 | */ | |
40 | #define FANOTIFY_DEFAULT_MAX_USER_MARKS \ | |
41 | (FANOTIFY_OLD_DEFAULT_MAX_MARKS * FANOTIFY_DEFAULT_MAX_GROUPS) | |
42 | ||
43 | /* | |
44 | * Most of the memory cost of adding an inode mark is pinning the marked inode. | |
45 | * The size of the filesystem inode struct is not uniform across filesystems, | |
46 | * so double the size of a VFS inode is used as a conservative approximation. | |
47 | */ | |
48 | #define INODE_MARK_COST (2 * sizeof(struct inode)) | |
49 | ||
50 | /* configurable via /proc/sys/fs/fanotify/ */ | |
51 | static int fanotify_max_queued_events __read_mostly; | |
52 | ||
53 | #ifdef CONFIG_SYSCTL | |
54 | ||
55 | #include <linux/sysctl.h> | |
56 | ||
57 | struct ctl_table fanotify_table[] = { | |
58 | { | |
59 | .procname = "max_user_groups", | |
60 | .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], | |
61 | .maxlen = sizeof(int), | |
62 | .mode = 0644, | |
63 | .proc_handler = proc_dointvec_minmax, | |
64 | .extra1 = SYSCTL_ZERO, | |
65 | }, | |
66 | { | |
67 | .procname = "max_user_marks", | |
68 | .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS], | |
69 | .maxlen = sizeof(int), | |
70 | .mode = 0644, | |
71 | .proc_handler = proc_dointvec_minmax, | |
72 | .extra1 = SYSCTL_ZERO, | |
73 | }, | |
74 | { | |
75 | .procname = "max_queued_events", | |
76 | .data = &fanotify_max_queued_events, | |
77 | .maxlen = sizeof(int), | |
78 | .mode = 0644, | |
79 | .proc_handler = proc_dointvec_minmax, | |
80 | .extra1 = SYSCTL_ZERO | |
81 | }, | |
82 | { } | |
83 | }; | |
84 | #endif /* CONFIG_SYSCTL */ | |
2529a0df | 85 | |
48149e9d HS |
86 | /* |
87 | * All flags that may be specified in parameter event_f_flags of fanotify_init. | |
88 | * | |
89 | * Internal and external open flags are stored together in field f_flags of | |
90 | * struct file. Only external open flags shall be allowed in event_f_flags. | |
91 | * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be | |
92 | * excluded. | |
93 | */ | |
94 | #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ | |
95 | O_ACCMODE | O_APPEND | O_NONBLOCK | \ | |
96 | __O_SYNC | O_DSYNC | O_CLOEXEC | \ | |
97 | O_LARGEFILE | O_NOATIME ) | |
98 | ||
33d3dfff | 99 | extern const struct fsnotify_ops fanotify_fsnotify_ops; |
11637e4b | 100 | |
054c636e | 101 | struct kmem_cache *fanotify_mark_cache __read_mostly; |
7088f357 JK |
102 | struct kmem_cache *fanotify_fid_event_cachep __read_mostly; |
103 | struct kmem_cache *fanotify_path_event_cachep __read_mostly; | |
f083441b | 104 | struct kmem_cache *fanotify_perm_event_cachep __read_mostly; |
2a3edf86 | 105 | |
5e469c83 | 106 | #define FANOTIFY_EVENT_ALIGN 4 |
d3424c9b | 107 | #define FANOTIFY_FID_INFO_HDR_LEN \ |
44d705b0 | 108 | (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) |
5e469c83 | 109 | |
44d705b0 | 110 | static int fanotify_fid_info_len(int fh_len, int name_len) |
d766b553 | 111 | { |
44d705b0 AG |
112 | int info_len = fh_len; |
113 | ||
114 | if (name_len) | |
115 | info_len += name_len + 1; | |
116 | ||
d3424c9b MB |
117 | return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len, |
118 | FANOTIFY_EVENT_ALIGN); | |
d766b553 AG |
119 | } |
120 | ||
d3424c9b | 121 | static int fanotify_event_info_len(unsigned int info_mode, |
929943b3 | 122 | struct fanotify_event *event) |
5e469c83 | 123 | { |
f454fa61 AG |
124 | struct fanotify_info *info = fanotify_event_info(event); |
125 | int dir_fh_len = fanotify_event_dir_fh_len(event); | |
afc894c7 | 126 | int fh_len = fanotify_event_object_fh_len(event); |
f454fa61 | 127 | int info_len = 0; |
929943b3 | 128 | int dot_len = 0; |
f454fa61 | 129 | |
929943b3 | 130 | if (dir_fh_len) { |
f454fa61 | 131 | info_len += fanotify_fid_info_len(dir_fh_len, info->name_len); |
d3424c9b MB |
132 | } else if ((info_mode & FAN_REPORT_NAME) && |
133 | (event->mask & FAN_ONDIR)) { | |
929943b3 AG |
134 | /* |
135 | * With group flag FAN_REPORT_NAME, if name was not recorded in | |
136 | * event on a directory, we will report the name ".". | |
137 | */ | |
138 | dot_len = 1; | |
139 | } | |
afc894c7 | 140 | |
44d705b0 | 141 | if (fh_len) |
929943b3 | 142 | info_len += fanotify_fid_info_len(fh_len, dot_len); |
44d705b0 | 143 | |
44d705b0 | 144 | return info_len; |
5e469c83 AG |
145 | } |
146 | ||
94e00d28 AG |
147 | /* |
148 | * Remove an hashed event from merge hash table. | |
149 | */ | |
150 | static void fanotify_unhash_event(struct fsnotify_group *group, | |
151 | struct fanotify_event *event) | |
152 | { | |
153 | assert_spin_locked(&group->notification_lock); | |
154 | ||
155 | pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, | |
156 | group, event, fanotify_event_hash_bucket(group, event)); | |
157 | ||
158 | if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list))) | |
159 | return; | |
160 | ||
161 | hlist_del_init(&event->merge_list); | |
162 | } | |
163 | ||
a1014f10 | 164 | /* |
7088f357 | 165 | * Get an fanotify notification event if one exists and is small |
a1014f10 | 166 | * enough to fit in "count". Return an error pointer if the count |
40873284 JK |
167 | * is not large enough. When permission event is dequeued, its state is |
168 | * updated accordingly. | |
a1014f10 | 169 | */ |
7088f357 | 170 | static struct fanotify_event *get_one_event(struct fsnotify_group *group, |
a1014f10 EP |
171 | size_t count) |
172 | { | |
5e469c83 | 173 | size_t event_size = FAN_EVENT_METADATA_LEN; |
7088f357 | 174 | struct fanotify_event *event = NULL; |
6f73171e | 175 | struct fsnotify_event *fsn_event; |
0aca67bb | 176 | unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); |
a1014f10 EP |
177 | |
178 | pr_debug("%s: group=%p count=%zd\n", __func__, group, count); | |
179 | ||
8c554466 | 180 | spin_lock(&group->notification_lock); |
6f73171e AG |
181 | fsn_event = fsnotify_peek_first_event(group); |
182 | if (!fsn_event) | |
8c554466 | 183 | goto out; |
a1014f10 | 184 | |
6f73171e | 185 | event = FANOTIFY_E(fsn_event); |
0aca67bb MB |
186 | if (info_mode) |
187 | event_size += fanotify_event_info_len(info_mode, event); | |
5e469c83 | 188 | |
8c554466 | 189 | if (event_size > count) { |
7088f357 | 190 | event = ERR_PTR(-EINVAL); |
8c554466 JK |
191 | goto out; |
192 | } | |
6f73171e AG |
193 | |
194 | /* | |
195 | * Held the notification_lock the whole time, so this is the | |
196 | * same event we peeked above. | |
197 | */ | |
198 | fsnotify_remove_first_event(group); | |
7088f357 JK |
199 | if (fanotify_is_perm_event(event->mask)) |
200 | FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; | |
94e00d28 AG |
201 | if (fanotify_is_hashed_event(event->mask)) |
202 | fanotify_unhash_event(group, event); | |
8c554466 JK |
203 | out: |
204 | spin_unlock(&group->notification_lock); | |
7088f357 | 205 | return event; |
a1014f10 EP |
206 | } |
207 | ||
a741c2fe | 208 | static int create_fd(struct fsnotify_group *group, struct path *path, |
7053aee2 | 209 | struct file **file) |
a1014f10 EP |
210 | { |
211 | int client_fd; | |
a1014f10 EP |
212 | struct file *new_file; |
213 | ||
0b37e097 | 214 | client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); |
a1014f10 EP |
215 | if (client_fd < 0) |
216 | return client_fd; | |
217 | ||
a1014f10 EP |
218 | /* |
219 | * we need a new file handle for the userspace program so it can read even if it was | |
220 | * originally opened O_WRONLY. | |
221 | */ | |
a741c2fe JK |
222 | new_file = dentry_open(path, |
223 | group->fanotify_data.f_flags | FMODE_NONOTIFY, | |
224 | current_cred()); | |
a1014f10 EP |
225 | if (IS_ERR(new_file)) { |
226 | /* | |
227 | * we still send an event even if we can't open the file. this | |
228 | * can happen when say tasks are gone and we try to open their | |
229 | * /proc files or we try to open a WRONLY file like in sysfs | |
230 | * we just send the errno to userspace since there isn't much | |
231 | * else we can do. | |
232 | */ | |
233 | put_unused_fd(client_fd); | |
234 | client_fd = PTR_ERR(new_file); | |
235 | } else { | |
352e3b24 | 236 | *file = new_file; |
a1014f10 EP |
237 | } |
238 | ||
22aa425d | 239 | return client_fd; |
a1014f10 EP |
240 | } |
241 | ||
40873284 JK |
242 | /* |
243 | * Finish processing of permission event by setting it to ANSWERED state and | |
244 | * drop group->notification_lock. | |
245 | */ | |
246 | static void finish_permission_event(struct fsnotify_group *group, | |
247 | struct fanotify_perm_event *event, | |
248 | unsigned int response) | |
249 | __releases(&group->notification_lock) | |
250 | { | |
fabf7f29 JK |
251 | bool destroy = false; |
252 | ||
40873284 JK |
253 | assert_spin_locked(&group->notification_lock); |
254 | event->response = response; | |
fabf7f29 JK |
255 | if (event->state == FAN_EVENT_CANCELED) |
256 | destroy = true; | |
257 | else | |
258 | event->state = FAN_EVENT_ANSWERED; | |
40873284 | 259 | spin_unlock(&group->notification_lock); |
fabf7f29 JK |
260 | if (destroy) |
261 | fsnotify_destroy_event(group, &event->fae.fse); | |
40873284 JK |
262 | } |
263 | ||
b2d87909 EP |
264 | static int process_access_response(struct fsnotify_group *group, |
265 | struct fanotify_response *response_struct) | |
266 | { | |
33913997 | 267 | struct fanotify_perm_event *event; |
f083441b JK |
268 | int fd = response_struct->fd; |
269 | int response = response_struct->response; | |
b2d87909 EP |
270 | |
271 | pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, | |
272 | fd, response); | |
273 | /* | |
274 | * make sure the response is valid, if invalid we do nothing and either | |
25985edc | 275 | * userspace can send a valid response or we will clean it up after the |
b2d87909 EP |
276 | * timeout |
277 | */ | |
de8cd83e | 278 | switch (response & ~FAN_AUDIT) { |
b2d87909 EP |
279 | case FAN_ALLOW: |
280 | case FAN_DENY: | |
281 | break; | |
282 | default: | |
283 | return -EINVAL; | |
284 | } | |
285 | ||
286 | if (fd < 0) | |
287 | return -EINVAL; | |
288 | ||
96a71f21 | 289 | if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) |
de8cd83e SG |
290 | return -EINVAL; |
291 | ||
af6a5113 JK |
292 | spin_lock(&group->notification_lock); |
293 | list_for_each_entry(event, &group->fanotify_data.access_list, | |
294 | fae.fse.list) { | |
295 | if (event->fd != fd) | |
296 | continue; | |
b2d87909 | 297 | |
af6a5113 | 298 | list_del_init(&event->fae.fse.list); |
40873284 | 299 | finish_permission_event(group, event, response); |
af6a5113 JK |
300 | wake_up(&group->fanotify_data.access_waitq); |
301 | return 0; | |
302 | } | |
303 | spin_unlock(&group->notification_lock); | |
b2d87909 | 304 | |
af6a5113 | 305 | return -ENOENT; |
b2d87909 | 306 | } |
b2d87909 | 307 | |
d3424c9b MB |
308 | static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, |
309 | int info_type, const char *name, | |
310 | size_t name_len, | |
311 | char __user *buf, size_t count) | |
5e469c83 AG |
312 | { |
313 | struct fanotify_event_info_fid info = { }; | |
314 | struct file_handle handle = { }; | |
afc894c7 | 315 | unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; |
cacfb956 | 316 | size_t fh_len = fh ? fh->len : 0; |
44d705b0 AG |
317 | size_t info_len = fanotify_fid_info_len(fh_len, name_len); |
318 | size_t len = info_len; | |
5e469c83 | 319 | |
44d705b0 AG |
320 | pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n", |
321 | __func__, fh_len, name_len, info_len, count); | |
322 | ||
83b7a598 | 323 | if (!fh_len) |
5e469c83 AG |
324 | return 0; |
325 | ||
44d705b0 | 326 | if (WARN_ON_ONCE(len < sizeof(info) || len > count)) |
5e469c83 AG |
327 | return -EFAULT; |
328 | ||
44d705b0 AG |
329 | /* |
330 | * Copy event info fid header followed by variable sized file handle | |
331 | * and optionally followed by variable sized filename. | |
332 | */ | |
83b7a598 AG |
333 | switch (info_type) { |
334 | case FAN_EVENT_INFO_TYPE_FID: | |
335 | case FAN_EVENT_INFO_TYPE_DFID: | |
336 | if (WARN_ON_ONCE(name_len)) | |
337 | return -EFAULT; | |
338 | break; | |
339 | case FAN_EVENT_INFO_TYPE_DFID_NAME: | |
340 | if (WARN_ON_ONCE(!name || !name_len)) | |
341 | return -EFAULT; | |
342 | break; | |
343 | default: | |
344 | return -EFAULT; | |
345 | } | |
346 | ||
347 | info.hdr.info_type = info_type; | |
5e469c83 | 348 | info.hdr.len = len; |
d766b553 | 349 | info.fsid = *fsid; |
5e469c83 AG |
350 | if (copy_to_user(buf, &info, sizeof(info))) |
351 | return -EFAULT; | |
352 | ||
353 | buf += sizeof(info); | |
354 | len -= sizeof(info); | |
44d705b0 AG |
355 | if (WARN_ON_ONCE(len < sizeof(handle))) |
356 | return -EFAULT; | |
357 | ||
afc894c7 | 358 | handle.handle_type = fh->type; |
5e469c83 AG |
359 | handle.handle_bytes = fh_len; |
360 | if (copy_to_user(buf, &handle, sizeof(handle))) | |
361 | return -EFAULT; | |
362 | ||
363 | buf += sizeof(handle); | |
364 | len -= sizeof(handle); | |
44d705b0 AG |
365 | if (WARN_ON_ONCE(len < fh_len)) |
366 | return -EFAULT; | |
367 | ||
b2d22b6b | 368 | /* |
44d705b0 AG |
369 | * For an inline fh and inline file name, copy through stack to exclude |
370 | * the copy from usercopy hardening protections. | |
b2d22b6b | 371 | */ |
afc894c7 | 372 | fh_buf = fanotify_fh_buf(fh); |
b2d22b6b | 373 | if (fh_len <= FANOTIFY_INLINE_FH_LEN) { |
afc894c7 JK |
374 | memcpy(bounce, fh_buf, fh_len); |
375 | fh_buf = bounce; | |
b2d22b6b | 376 | } |
afc894c7 | 377 | if (copy_to_user(buf, fh_buf, fh_len)) |
5e469c83 AG |
378 | return -EFAULT; |
379 | ||
5e469c83 AG |
380 | buf += fh_len; |
381 | len -= fh_len; | |
44d705b0 AG |
382 | |
383 | if (name_len) { | |
384 | /* Copy the filename with terminating null */ | |
385 | name_len++; | |
386 | if (WARN_ON_ONCE(len < name_len)) | |
387 | return -EFAULT; | |
388 | ||
389 | if (copy_to_user(buf, name, name_len)) | |
390 | return -EFAULT; | |
391 | ||
392 | buf += name_len; | |
393 | len -= name_len; | |
394 | } | |
395 | ||
396 | /* Pad with 0's */ | |
5e469c83 AG |
397 | WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); |
398 | if (len > 0 && clear_user(buf, len)) | |
399 | return -EFAULT; | |
400 | ||
44d705b0 | 401 | return info_len; |
5e469c83 AG |
402 | } |
403 | ||
0aca67bb MB |
404 | static int copy_info_records_to_user(struct fanotify_event *event, |
405 | struct fanotify_info *info, | |
406 | unsigned int info_mode, | |
407 | char __user *buf, size_t count) | |
408 | { | |
409 | int ret, total_bytes = 0, info_type = 0; | |
410 | unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS; | |
411 | ||
412 | /* | |
413 | * Event info records order is as follows: dir fid + name, child fid. | |
414 | */ | |
415 | if (fanotify_event_dir_fh_len(event)) { | |
416 | info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : | |
417 | FAN_EVENT_INFO_TYPE_DFID; | |
418 | ret = copy_fid_info_to_user(fanotify_event_fsid(event), | |
419 | fanotify_info_dir_fh(info), | |
420 | info_type, | |
421 | fanotify_info_name(info), | |
422 | info->name_len, buf, count); | |
423 | if (ret < 0) | |
424 | return ret; | |
425 | ||
426 | buf += ret; | |
427 | count -= ret; | |
428 | total_bytes += ret; | |
429 | } | |
430 | ||
431 | if (fanotify_event_object_fh_len(event)) { | |
432 | const char *dot = NULL; | |
433 | int dot_len = 0; | |
434 | ||
435 | if (fid_mode == FAN_REPORT_FID || info_type) { | |
436 | /* | |
437 | * With only group flag FAN_REPORT_FID only type FID is | |
438 | * reported. Second info record type is always FID. | |
439 | */ | |
440 | info_type = FAN_EVENT_INFO_TYPE_FID; | |
441 | } else if ((fid_mode & FAN_REPORT_NAME) && | |
442 | (event->mask & FAN_ONDIR)) { | |
443 | /* | |
444 | * With group flag FAN_REPORT_NAME, if name was not | |
445 | * recorded in an event on a directory, report the name | |
446 | * "." with info type DFID_NAME. | |
447 | */ | |
448 | info_type = FAN_EVENT_INFO_TYPE_DFID_NAME; | |
449 | dot = "."; | |
450 | dot_len = 1; | |
451 | } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) || | |
452 | (event->mask & FAN_ONDIR)) { | |
453 | /* | |
454 | * With group flag FAN_REPORT_DIR_FID, a single info | |
455 | * record has type DFID for directory entry modification | |
456 | * event and for event on a directory. | |
457 | */ | |
458 | info_type = FAN_EVENT_INFO_TYPE_DFID; | |
459 | } else { | |
460 | /* | |
461 | * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID, | |
462 | * a single info record has type FID for event on a | |
463 | * non-directory, when there is no directory to report. | |
464 | * For example, on FAN_DELETE_SELF event. | |
465 | */ | |
466 | info_type = FAN_EVENT_INFO_TYPE_FID; | |
467 | } | |
468 | ||
469 | ret = copy_fid_info_to_user(fanotify_event_fsid(event), | |
470 | fanotify_event_object_fh(event), | |
471 | info_type, dot, dot_len, | |
472 | buf, count); | |
473 | if (ret < 0) | |
474 | return ret; | |
475 | ||
476 | buf += ret; | |
477 | count -= ret; | |
478 | total_bytes += ret; | |
479 | } | |
480 | ||
481 | return total_bytes; | |
482 | } | |
483 | ||
a1014f10 | 484 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
7088f357 | 485 | struct fanotify_event *event, |
5b03a472 | 486 | char __user *buf, size_t count) |
a1014f10 | 487 | { |
bb2f7b45 | 488 | struct fanotify_event_metadata metadata; |
7088f357 | 489 | struct path *path = fanotify_event_path(event); |
f454fa61 | 490 | struct fanotify_info *info = fanotify_event_info(event); |
0aca67bb | 491 | unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); |
bb2f7b45 | 492 | struct file *f = NULL; |
e9e0c890 | 493 | int ret, fd = FAN_NOFD; |
a1014f10 | 494 | |
7088f357 | 495 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
a1014f10 | 496 | |
44d705b0 | 497 | metadata.event_len = FAN_EVENT_METADATA_LEN + |
0aca67bb | 498 | fanotify_event_info_len(info_mode, event); |
bb2f7b45 AG |
499 | metadata.metadata_len = FAN_EVENT_METADATA_LEN; |
500 | metadata.vers = FANOTIFY_METADATA_VERSION; | |
501 | metadata.reserved = 0; | |
502 | metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; | |
503 | metadata.pid = pid_vnr(event->pid); | |
7cea2a3c AG |
504 | /* |
505 | * For an unprivileged listener, event->pid can be used to identify the | |
506 | * events generated by the listener process itself, without disclosing | |
507 | * the pids of other processes. | |
508 | */ | |
a8b98c80 | 509 | if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && |
7cea2a3c AG |
510 | task_tgid(current) != event->pid) |
511 | metadata.pid = 0; | |
bb2f7b45 | 512 | |
a8b98c80 AG |
513 | /* |
514 | * For now, fid mode is required for an unprivileged listener and | |
515 | * fid mode does not report fd in events. Keep this check anyway | |
516 | * for safety in case fid mode requirement is relaxed in the future | |
517 | * to allow unprivileged listener to get events with no fd and no fid. | |
518 | */ | |
519 | if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && | |
520 | path && path->mnt && path->dentry) { | |
afc894c7 JK |
521 | fd = create_fd(group, path, &f); |
522 | if (fd < 0) | |
523 | return fd; | |
bb2f7b45 AG |
524 | } |
525 | metadata.fd = fd; | |
b2d87909 | 526 | |
b2d87909 | 527 | ret = -EFAULT; |
5b03a472 KC |
528 | /* |
529 | * Sanity check copy size in case get_one_event() and | |
c5e443cb | 530 | * event_len sizes ever get out of sync. |
5b03a472 | 531 | */ |
bb2f7b45 | 532 | if (WARN_ON_ONCE(metadata.event_len > count)) |
5b03a472 | 533 | goto out_close_fd; |
bb2f7b45 | 534 | |
5e469c83 | 535 | if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) |
352e3b24 AV |
536 | goto out_close_fd; |
537 | ||
44d705b0 AG |
538 | buf += FAN_EVENT_METADATA_LEN; |
539 | count -= FAN_EVENT_METADATA_LEN; | |
540 | ||
bb2f7b45 | 541 | if (fanotify_is_perm_event(event->mask)) |
7088f357 | 542 | FANOTIFY_PERM(event)->fd = fd; |
a1014f10 | 543 | |
44d705b0 | 544 | if (f) |
3587b1b0 | 545 | fd_install(fd, f); |
44d705b0 | 546 | |
0aca67bb MB |
547 | if (info_mode) { |
548 | ret = copy_info_records_to_user(event, info, info_mode, | |
549 | buf, count); | |
44d705b0 | 550 | if (ret < 0) |
f644bc44 | 551 | goto out_close_fd; |
5e469c83 AG |
552 | } |
553 | ||
bb2f7b45 | 554 | return metadata.event_len; |
b2d87909 | 555 | |
b2d87909 | 556 | out_close_fd: |
352e3b24 AV |
557 | if (fd != FAN_NOFD) { |
558 | put_unused_fd(fd); | |
559 | fput(f); | |
560 | } | |
b2d87909 | 561 | return ret; |
a1014f10 EP |
562 | } |
563 | ||
564 | /* intofiy userspace file descriptor functions */ | |
076ccb76 | 565 | static __poll_t fanotify_poll(struct file *file, poll_table *wait) |
a1014f10 EP |
566 | { |
567 | struct fsnotify_group *group = file->private_data; | |
076ccb76 | 568 | __poll_t ret = 0; |
a1014f10 EP |
569 | |
570 | poll_wait(file, &group->notification_waitq, wait); | |
c21dbe20 | 571 | spin_lock(&group->notification_lock); |
a1014f10 | 572 | if (!fsnotify_notify_queue_is_empty(group)) |
a9a08845 | 573 | ret = EPOLLIN | EPOLLRDNORM; |
c21dbe20 | 574 | spin_unlock(&group->notification_lock); |
a1014f10 EP |
575 | |
576 | return ret; | |
577 | } | |
578 | ||
579 | static ssize_t fanotify_read(struct file *file, char __user *buf, | |
580 | size_t count, loff_t *pos) | |
581 | { | |
582 | struct fsnotify_group *group; | |
7088f357 | 583 | struct fanotify_event *event; |
a1014f10 EP |
584 | char __user *start; |
585 | int ret; | |
536ebe9c | 586 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
a1014f10 EP |
587 | |
588 | start = buf; | |
589 | group = file->private_data; | |
590 | ||
591 | pr_debug("%s: group=%p\n", __func__, group); | |
592 | ||
536ebe9c | 593 | add_wait_queue(&group->notification_waitq, &wait); |
a1014f10 | 594 | while (1) { |
47aaabde JK |
595 | /* |
596 | * User can supply arbitrarily large buffer. Avoid softlockups | |
597 | * in case there are lots of available events. | |
598 | */ | |
599 | cond_resched(); | |
7088f357 JK |
600 | event = get_one_event(group, count); |
601 | if (IS_ERR(event)) { | |
602 | ret = PTR_ERR(event); | |
d8aaab4f JK |
603 | break; |
604 | } | |
605 | ||
7088f357 | 606 | if (!event) { |
d8aaab4f JK |
607 | ret = -EAGAIN; |
608 | if (file->f_flags & O_NONBLOCK) | |
a1014f10 | 609 | break; |
d8aaab4f JK |
610 | |
611 | ret = -ERESTARTSYS; | |
612 | if (signal_pending(current)) | |
613 | break; | |
614 | ||
615 | if (start != buf) | |
a1014f10 | 616 | break; |
536ebe9c PZ |
617 | |
618 | wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); | |
a1014f10 EP |
619 | continue; |
620 | } | |
621 | ||
7088f357 | 622 | ret = copy_event_to_user(group, event, buf, count); |
4ff33aaf AG |
623 | if (unlikely(ret == -EOPENSTALE)) { |
624 | /* | |
625 | * We cannot report events with stale fd so drop it. | |
626 | * Setting ret to 0 will continue the event loop and | |
627 | * do the right thing if there are no more events to | |
628 | * read (i.e. return bytes read, -EAGAIN or wait). | |
629 | */ | |
630 | ret = 0; | |
631 | } | |
632 | ||
d8aaab4f JK |
633 | /* |
634 | * Permission events get queued to wait for response. Other | |
635 | * events can be destroyed now. | |
636 | */ | |
7088f357 JK |
637 | if (!fanotify_is_perm_event(event->mask)) { |
638 | fsnotify_destroy_event(group, &event->fse); | |
d507816b | 639 | } else { |
4ff33aaf | 640 | if (ret <= 0) { |
40873284 JK |
641 | spin_lock(&group->notification_lock); |
642 | finish_permission_event(group, | |
7088f357 | 643 | FANOTIFY_PERM(event), FAN_DENY); |
d507816b | 644 | wake_up(&group->fanotify_data.access_waitq); |
4ff33aaf AG |
645 | } else { |
646 | spin_lock(&group->notification_lock); | |
7088f357 | 647 | list_add_tail(&event->fse.list, |
4ff33aaf AG |
648 | &group->fanotify_data.access_list); |
649 | spin_unlock(&group->notification_lock); | |
d507816b | 650 | } |
d507816b | 651 | } |
4ff33aaf AG |
652 | if (ret < 0) |
653 | break; | |
d8aaab4f JK |
654 | buf += ret; |
655 | count -= ret; | |
a1014f10 | 656 | } |
536ebe9c | 657 | remove_wait_queue(&group->notification_waitq, &wait); |
a1014f10 | 658 | |
a1014f10 EP |
659 | if (start != buf && ret != -EFAULT) |
660 | ret = buf - start; | |
661 | return ret; | |
662 | } | |
663 | ||
b2d87909 EP |
664 | static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) |
665 | { | |
b2d87909 EP |
666 | struct fanotify_response response = { .fd = -1, .response = -1 }; |
667 | struct fsnotify_group *group; | |
668 | int ret; | |
669 | ||
6685df31 MS |
670 | if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) |
671 | return -EINVAL; | |
672 | ||
b2d87909 EP |
673 | group = file->private_data; |
674 | ||
5e23663b FF |
675 | if (count < sizeof(response)) |
676 | return -EINVAL; | |
677 | ||
678 | count = sizeof(response); | |
b2d87909 EP |
679 | |
680 | pr_debug("%s: group=%p count=%zu\n", __func__, group, count); | |
681 | ||
682 | if (copy_from_user(&response, buf, count)) | |
683 | return -EFAULT; | |
684 | ||
685 | ret = process_access_response(group, &response); | |
686 | if (ret < 0) | |
687 | count = ret; | |
688 | ||
689 | return count; | |
b2d87909 EP |
690 | } |
691 | ||
52c923dd EP |
692 | static int fanotify_release(struct inode *ignored, struct file *file) |
693 | { | |
694 | struct fsnotify_group *group = file->private_data; | |
6f73171e | 695 | struct fsnotify_event *fsn_event; |
19ba54f4 | 696 | |
5838d444 | 697 | /* |
96d41019 JK |
698 | * Stop new events from arriving in the notification queue. since |
699 | * userspace cannot use fanotify fd anymore, no event can enter or | |
700 | * leave access_list by now either. | |
5838d444 | 701 | */ |
96d41019 | 702 | fsnotify_group_stop_queueing(group); |
2eebf582 | 703 | |
96d41019 JK |
704 | /* |
705 | * Process all permission events on access_list and notification queue | |
706 | * and simulate reply from userspace. | |
707 | */ | |
073f6552 | 708 | spin_lock(&group->notification_lock); |
ca6f8699 | 709 | while (!list_empty(&group->fanotify_data.access_list)) { |
7088f357 JK |
710 | struct fanotify_perm_event *event; |
711 | ||
ca6f8699 JK |
712 | event = list_first_entry(&group->fanotify_data.access_list, |
713 | struct fanotify_perm_event, fae.fse.list); | |
f083441b | 714 | list_del_init(&event->fae.fse.list); |
40873284 JK |
715 | finish_permission_event(group, event, FAN_ALLOW); |
716 | spin_lock(&group->notification_lock); | |
2eebf582 | 717 | } |
2eebf582 | 718 | |
5838d444 | 719 | /* |
96d41019 JK |
720 | * Destroy all non-permission events. For permission events just |
721 | * dequeue them and set the response. They will be freed once the | |
722 | * response is consumed and fanotify_get_response() returns. | |
5838d444 | 723 | */ |
6f73171e AG |
724 | while ((fsn_event = fsnotify_remove_first_event(group))) { |
725 | struct fanotify_event *event = FANOTIFY_E(fsn_event); | |
7088f357 | 726 | |
7088f357 | 727 | if (!(event->mask & FANOTIFY_PERM_EVENTS)) { |
c21dbe20 | 728 | spin_unlock(&group->notification_lock); |
6f73171e | 729 | fsnotify_destroy_event(group, fsn_event); |
6685df31 | 730 | } else { |
7088f357 | 731 | finish_permission_event(group, FANOTIFY_PERM(event), |
40873284 | 732 | FAN_ALLOW); |
6685df31 | 733 | } |
40873284 | 734 | spin_lock(&group->notification_lock); |
96d41019 | 735 | } |
c21dbe20 | 736 | spin_unlock(&group->notification_lock); |
96d41019 JK |
737 | |
738 | /* Response for all permission events it set, wakeup waiters */ | |
2eebf582 | 739 | wake_up(&group->fanotify_data.access_waitq); |
0a6b6bd5 | 740 | |
52c923dd | 741 | /* matches the fanotify_init->fsnotify_alloc_group */ |
d8153d4d | 742 | fsnotify_destroy_group(group); |
52c923dd EP |
743 | |
744 | return 0; | |
745 | } | |
746 | ||
a1014f10 EP |
747 | static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
748 | { | |
749 | struct fsnotify_group *group; | |
7053aee2 | 750 | struct fsnotify_event *fsn_event; |
a1014f10 EP |
751 | void __user *p; |
752 | int ret = -ENOTTY; | |
753 | size_t send_len = 0; | |
754 | ||
755 | group = file->private_data; | |
756 | ||
757 | p = (void __user *) arg; | |
758 | ||
759 | switch (cmd) { | |
760 | case FIONREAD: | |
c21dbe20 | 761 | spin_lock(&group->notification_lock); |
7053aee2 | 762 | list_for_each_entry(fsn_event, &group->notification_list, list) |
a1014f10 | 763 | send_len += FAN_EVENT_METADATA_LEN; |
c21dbe20 | 764 | spin_unlock(&group->notification_lock); |
a1014f10 EP |
765 | ret = put_user(send_len, (int __user *) p); |
766 | break; | |
767 | } | |
768 | ||
769 | return ret; | |
770 | } | |
771 | ||
52c923dd | 772 | static const struct file_operations fanotify_fops = { |
be77196b | 773 | .show_fdinfo = fanotify_show_fdinfo, |
a1014f10 EP |
774 | .poll = fanotify_poll, |
775 | .read = fanotify_read, | |
b2d87909 | 776 | .write = fanotify_write, |
52c923dd EP |
777 | .fasync = NULL, |
778 | .release = fanotify_release, | |
a1014f10 | 779 | .unlocked_ioctl = fanotify_ioctl, |
1832f2d8 | 780 | .compat_ioctl = compat_ptr_ioctl, |
6038f373 | 781 | .llseek = noop_llseek, |
52c923dd EP |
782 | }; |
783 | ||
2a3edf86 | 784 | static int fanotify_find_path(int dfd, const char __user *filename, |
ac5656d8 AG |
785 | struct path *path, unsigned int flags, __u64 mask, |
786 | unsigned int obj_type) | |
2a3edf86 EP |
787 | { |
788 | int ret; | |
789 | ||
790 | pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__, | |
791 | dfd, filename, flags); | |
792 | ||
793 | if (filename == NULL) { | |
2903ff01 | 794 | struct fd f = fdget(dfd); |
2a3edf86 EP |
795 | |
796 | ret = -EBADF; | |
2903ff01 | 797 | if (!f.file) |
2a3edf86 EP |
798 | goto out; |
799 | ||
800 | ret = -ENOTDIR; | |
801 | if ((flags & FAN_MARK_ONLYDIR) && | |
496ad9aa | 802 | !(S_ISDIR(file_inode(f.file)->i_mode))) { |
2903ff01 | 803 | fdput(f); |
2a3edf86 EP |
804 | goto out; |
805 | } | |
806 | ||
2903ff01 | 807 | *path = f.file->f_path; |
2a3edf86 | 808 | path_get(path); |
2903ff01 | 809 | fdput(f); |
2a3edf86 EP |
810 | } else { |
811 | unsigned int lookup_flags = 0; | |
812 | ||
813 | if (!(flags & FAN_MARK_DONT_FOLLOW)) | |
814 | lookup_flags |= LOOKUP_FOLLOW; | |
815 | if (flags & FAN_MARK_ONLYDIR) | |
816 | lookup_flags |= LOOKUP_DIRECTORY; | |
817 | ||
818 | ret = user_path_at(dfd, filename, lookup_flags, path); | |
819 | if (ret) | |
820 | goto out; | |
821 | } | |
822 | ||
823 | /* you can only watch an inode if you have read permissions on it */ | |
02f92b38 | 824 | ret = path_permission(path, MAY_READ); |
ac5656d8 AG |
825 | if (ret) { |
826 | path_put(path); | |
827 | goto out; | |
828 | } | |
829 | ||
830 | ret = security_path_notify(path, mask, obj_type); | |
2a3edf86 EP |
831 | if (ret) |
832 | path_put(path); | |
ac5656d8 | 833 | |
2a3edf86 EP |
834 | out: |
835 | return ret; | |
836 | } | |
837 | ||
b9e4e3bd | 838 | static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, |
4ed6814a AG |
839 | __u32 mask, unsigned int flags, |
840 | __u32 umask, int *destroy) | |
088b09b0 | 841 | { |
d2c1874c | 842 | __u32 oldmask = 0; |
088b09b0 | 843 | |
4ed6814a AG |
844 | /* umask bits cannot be removed by user */ |
845 | mask &= ~umask; | |
088b09b0 | 846 | spin_lock(&fsn_mark->lock); |
b9e4e3bd EP |
847 | if (!(flags & FAN_MARK_IGNORED_MASK)) { |
848 | oldmask = fsn_mark->mask; | |
a72fd224 | 849 | fsn_mark->mask &= ~mask; |
b9e4e3bd | 850 | } else { |
a72fd224 | 851 | fsn_mark->ignored_mask &= ~mask; |
b9e4e3bd | 852 | } |
4ed6814a AG |
853 | /* |
854 | * We need to keep the mark around even if remaining mask cannot | |
855 | * result in any events (e.g. mask == FAN_ONDIR) to support incremenal | |
856 | * changes to the mask. | |
857 | * Destroy mark when only umask bits remain. | |
858 | */ | |
859 | *destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask); | |
088b09b0 AG |
860 | spin_unlock(&fsn_mark->lock); |
861 | ||
088b09b0 AG |
862 | return mask & oldmask; |
863 | } | |
864 | ||
eaa2c6b0 AG |
865 | static int fanotify_remove_mark(struct fsnotify_group *group, |
866 | fsnotify_connp_t *connp, __u32 mask, | |
4ed6814a | 867 | unsigned int flags, __u32 umask) |
88826276 EP |
868 | { |
869 | struct fsnotify_mark *fsn_mark = NULL; | |
088b09b0 | 870 | __u32 removed; |
6dfbd149 | 871 | int destroy_mark; |
88826276 | 872 | |
7b18527c | 873 | mutex_lock(&group->mark_mutex); |
eaa2c6b0 | 874 | fsn_mark = fsnotify_find_mark(connp, group); |
7b18527c LS |
875 | if (!fsn_mark) { |
876 | mutex_unlock(&group->mark_mutex); | |
f3640192 | 877 | return -ENOENT; |
7b18527c | 878 | } |
88826276 | 879 | |
6dfbd149 | 880 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, |
4ed6814a | 881 | umask, &destroy_mark); |
3ac70bfc AG |
882 | if (removed & fsnotify_conn_mask(fsn_mark->connector)) |
883 | fsnotify_recalc_mask(fsn_mark->connector); | |
6dfbd149 | 884 | if (destroy_mark) |
4712e722 | 885 | fsnotify_detach_mark(fsn_mark); |
7b18527c | 886 | mutex_unlock(&group->mark_mutex); |
4712e722 JK |
887 | if (destroy_mark) |
888 | fsnotify_free_mark(fsn_mark); | |
6dfbd149 | 889 | |
eaa2c6b0 | 890 | /* matches the fsnotify_find_mark() */ |
f3640192 | 891 | fsnotify_put_mark(fsn_mark); |
f3640192 AG |
892 | return 0; |
893 | } | |
2a3edf86 | 894 | |
eaa2c6b0 AG |
895 | static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, |
896 | struct vfsmount *mnt, __u32 mask, | |
4ed6814a | 897 | unsigned int flags, __u32 umask) |
eaa2c6b0 AG |
898 | { |
899 | return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, | |
4ed6814a | 900 | mask, flags, umask); |
eaa2c6b0 AG |
901 | } |
902 | ||
d54f4fba | 903 | static int fanotify_remove_sb_mark(struct fsnotify_group *group, |
4ed6814a AG |
904 | struct super_block *sb, __u32 mask, |
905 | unsigned int flags, __u32 umask) | |
d54f4fba | 906 | { |
4ed6814a AG |
907 | return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, |
908 | flags, umask); | |
d54f4fba AG |
909 | } |
910 | ||
f3640192 | 911 | static int fanotify_remove_inode_mark(struct fsnotify_group *group, |
b9e4e3bd | 912 | struct inode *inode, __u32 mask, |
4ed6814a | 913 | unsigned int flags, __u32 umask) |
f3640192 | 914 | { |
eaa2c6b0 | 915 | return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask, |
4ed6814a | 916 | flags, umask); |
2a3edf86 EP |
917 | } |
918 | ||
b9e4e3bd EP |
919 | static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, |
920 | __u32 mask, | |
921 | unsigned int flags) | |
912ee394 | 922 | { |
192ca4d1 | 923 | __u32 oldmask = -1; |
912ee394 AG |
924 | |
925 | spin_lock(&fsn_mark->lock); | |
b9e4e3bd EP |
926 | if (!(flags & FAN_MARK_IGNORED_MASK)) { |
927 | oldmask = fsn_mark->mask; | |
a72fd224 | 928 | fsn_mark->mask |= mask; |
b9e4e3bd | 929 | } else { |
a72fd224 | 930 | fsn_mark->ignored_mask |= mask; |
c9778a98 EP |
931 | if (flags & FAN_MARK_IGNORED_SURV_MODIFY) |
932 | fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; | |
b9e4e3bd | 933 | } |
912ee394 AG |
934 | spin_unlock(&fsn_mark->lock); |
935 | ||
936 | return mask & ~oldmask; | |
937 | } | |
938 | ||
5e9c070c | 939 | static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, |
b812a9f5 | 940 | fsnotify_connp_t *connp, |
77115225 AG |
941 | unsigned int type, |
942 | __kernel_fsid_t *fsid) | |
5e9c070c | 943 | { |
5b8fea65 | 944 | struct ucounts *ucounts = group->fanotify_data.ucounts; |
5e9c070c LS |
945 | struct fsnotify_mark *mark; |
946 | int ret; | |
947 | ||
5b8fea65 AG |
948 | /* |
949 | * Enforce per user marks limits per user in all containing user ns. | |
950 | * A group with FAN_UNLIMITED_MARKS does not contribute to mark count | |
951 | * in the limited groups account. | |
952 | */ | |
953 | if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && | |
954 | !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) | |
5e9c070c LS |
955 | return ERR_PTR(-ENOSPC); |
956 | ||
957 | mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); | |
5b8fea65 AG |
958 | if (!mark) { |
959 | ret = -ENOMEM; | |
960 | goto out_dec_ucounts; | |
961 | } | |
5e9c070c | 962 | |
054c636e | 963 | fsnotify_init_mark(mark, group); |
77115225 | 964 | ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); |
5e9c070c LS |
965 | if (ret) { |
966 | fsnotify_put_mark(mark); | |
5b8fea65 | 967 | goto out_dec_ucounts; |
5e9c070c LS |
968 | } |
969 | ||
970 | return mark; | |
5b8fea65 AG |
971 | |
972 | out_dec_ucounts: | |
973 | if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) | |
974 | dec_ucount(ucounts, UCOUNT_FANOTIFY_MARKS); | |
975 | return ERR_PTR(ret); | |
5e9c070c LS |
976 | } |
977 | ||
978 | ||
eaa2c6b0 AG |
979 | static int fanotify_add_mark(struct fsnotify_group *group, |
980 | fsnotify_connp_t *connp, unsigned int type, | |
77115225 AG |
981 | __u32 mask, unsigned int flags, |
982 | __kernel_fsid_t *fsid) | |
2a3edf86 EP |
983 | { |
984 | struct fsnotify_mark *fsn_mark; | |
912ee394 | 985 | __u32 added; |
2a3edf86 | 986 | |
7b18527c | 987 | mutex_lock(&group->mark_mutex); |
b812a9f5 | 988 | fsn_mark = fsnotify_find_mark(connp, group); |
88826276 | 989 | if (!fsn_mark) { |
77115225 | 990 | fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); |
5e9c070c | 991 | if (IS_ERR(fsn_mark)) { |
7b18527c | 992 | mutex_unlock(&group->mark_mutex); |
5e9c070c | 993 | return PTR_ERR(fsn_mark); |
7b18527c | 994 | } |
88826276 | 995 | } |
b9e4e3bd | 996 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); |
3ac70bfc AG |
997 | if (added & ~fsnotify_conn_mask(fsn_mark->connector)) |
998 | fsnotify_recalc_mask(fsn_mark->connector); | |
c9747640 | 999 | mutex_unlock(&group->mark_mutex); |
5e9c070c | 1000 | |
fa218ab9 | 1001 | fsnotify_put_mark(fsn_mark); |
5e9c070c | 1002 | return 0; |
88826276 EP |
1003 | } |
1004 | ||
eaa2c6b0 AG |
1005 | static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, |
1006 | struct vfsmount *mnt, __u32 mask, | |
77115225 | 1007 | unsigned int flags, __kernel_fsid_t *fsid) |
eaa2c6b0 AG |
1008 | { |
1009 | return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, | |
77115225 | 1010 | FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); |
eaa2c6b0 AG |
1011 | } |
1012 | ||
d54f4fba | 1013 | static int fanotify_add_sb_mark(struct fsnotify_group *group, |
77115225 AG |
1014 | struct super_block *sb, __u32 mask, |
1015 | unsigned int flags, __kernel_fsid_t *fsid) | |
d54f4fba AG |
1016 | { |
1017 | return fanotify_add_mark(group, &sb->s_fsnotify_marks, | |
77115225 | 1018 | FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); |
d54f4fba AG |
1019 | } |
1020 | ||
52202dfb | 1021 | static int fanotify_add_inode_mark(struct fsnotify_group *group, |
b9e4e3bd | 1022 | struct inode *inode, __u32 mask, |
77115225 | 1023 | unsigned int flags, __kernel_fsid_t *fsid) |
88826276 | 1024 | { |
88826276 | 1025 | pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); |
2a3edf86 | 1026 | |
5322a59f EP |
1027 | /* |
1028 | * If some other task has this inode open for write we should not add | |
1029 | * an ignored mark, unless that ignored mark is supposed to survive | |
1030 | * modification changes anyway. | |
1031 | */ | |
1032 | if ((flags & FAN_MARK_IGNORED_MASK) && | |
1033 | !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && | |
ac9498d6 | 1034 | inode_is_open_for_write(inode)) |
5322a59f EP |
1035 | return 0; |
1036 | ||
eaa2c6b0 | 1037 | return fanotify_add_mark(group, &inode->i_fsnotify_marks, |
77115225 | 1038 | FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); |
88826276 | 1039 | } |
2a3edf86 | 1040 | |
b8a6c3a2 AG |
1041 | static struct fsnotify_event *fanotify_alloc_overflow_event(void) |
1042 | { | |
1043 | struct fanotify_event *oevent; | |
1044 | ||
1045 | oevent = kmalloc(sizeof(*oevent), GFP_KERNEL_ACCOUNT); | |
1046 | if (!oevent) | |
1047 | return NULL; | |
1048 | ||
1049 | fanotify_init_event(oevent, 0, FS_Q_OVERFLOW); | |
1050 | oevent->type = FANOTIFY_EVENT_TYPE_OVERFLOW; | |
1051 | ||
1052 | return &oevent->fse; | |
1053 | } | |
1054 | ||
94e00d28 AG |
1055 | static struct hlist_head *fanotify_alloc_merge_hash(void) |
1056 | { | |
1057 | struct hlist_head *hash; | |
1058 | ||
1059 | hash = kmalloc(sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS, | |
1060 | GFP_KERNEL_ACCOUNT); | |
1061 | if (!hash) | |
1062 | return NULL; | |
1063 | ||
1064 | __hash_init(hash, FANOTIFY_HTABLE_SIZE); | |
1065 | ||
1066 | return hash; | |
1067 | } | |
1068 | ||
52c923dd | 1069 | /* fanotify syscalls */ |
08ae8938 | 1070 | SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) |
11637e4b | 1071 | { |
52c923dd EP |
1072 | struct fsnotify_group *group; |
1073 | int f_flags, fd; | |
83b7a598 AG |
1074 | unsigned int fid_mode = flags & FANOTIFY_FID_BITS; |
1075 | unsigned int class = flags & FANOTIFY_CLASS_BITS; | |
a8b98c80 | 1076 | unsigned int internal_flags = 0; |
52c923dd | 1077 | |
96a71f21 AG |
1078 | pr_debug("%s: flags=%x event_f_flags=%x\n", |
1079 | __func__, flags, event_f_flags); | |
52c923dd | 1080 | |
7cea2a3c AG |
1081 | if (!capable(CAP_SYS_ADMIN)) { |
1082 | /* | |
1083 | * An unprivileged user can setup an fanotify group with | |
1084 | * limited functionality - an unprivileged group is limited to | |
1085 | * notification events with file handles and it cannot use | |
1086 | * unlimited queue/marks. | |
1087 | */ | |
1088 | if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) | |
1089 | return -EPERM; | |
a8b98c80 AG |
1090 | |
1091 | /* | |
1092 | * Setting the internal flag FANOTIFY_UNPRIV on the group | |
1093 | * prevents setting mount/filesystem marks on this group and | |
1094 | * prevents reporting pid and open fd in events. | |
1095 | */ | |
1096 | internal_flags |= FANOTIFY_UNPRIV; | |
7cea2a3c | 1097 | } |
52c923dd | 1098 | |
de8cd83e | 1099 | #ifdef CONFIG_AUDITSYSCALL |
23c9deeb | 1100 | if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) |
de8cd83e | 1101 | #else |
23c9deeb | 1102 | if (flags & ~FANOTIFY_INIT_FLAGS) |
de8cd83e | 1103 | #endif |
52c923dd EP |
1104 | return -EINVAL; |
1105 | ||
48149e9d HS |
1106 | if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) |
1107 | return -EINVAL; | |
1108 | ||
1109 | switch (event_f_flags & O_ACCMODE) { | |
1110 | case O_RDONLY: | |
1111 | case O_RDWR: | |
1112 | case O_WRONLY: | |
1113 | break; | |
1114 | default: | |
1115 | return -EINVAL; | |
1116 | } | |
1117 | ||
83b7a598 | 1118 | if (fid_mode && class != FAN_CLASS_NOTIF) |
a8b13aa2 AG |
1119 | return -EINVAL; |
1120 | ||
929943b3 | 1121 | /* |
929943b3 | 1122 | * Child name is reported with parent fid so requires dir fid. |
691d9763 | 1123 | * We can report both child fid and dir fid with or without name. |
929943b3 | 1124 | */ |
691d9763 | 1125 | if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID)) |
83b7a598 | 1126 | return -EINVAL; |
83b7a598 | 1127 | |
b2d87909 | 1128 | f_flags = O_RDWR | FMODE_NONOTIFY; |
52c923dd EP |
1129 | if (flags & FAN_CLOEXEC) |
1130 | f_flags |= O_CLOEXEC; | |
1131 | if (flags & FAN_NONBLOCK) | |
1132 | f_flags |= O_NONBLOCK; | |
1133 | ||
1134 | /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ | |
ac7b79fd | 1135 | group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops); |
26379198 | 1136 | if (IS_ERR(group)) { |
52c923dd | 1137 | return PTR_ERR(group); |
26379198 | 1138 | } |
52c923dd | 1139 | |
5b8fea65 AG |
1140 | /* Enforce groups limits per user in all containing user ns */ |
1141 | group->fanotify_data.ucounts = inc_ucount(current_user_ns(), | |
1142 | current_euid(), | |
1143 | UCOUNT_FANOTIFY_GROUPS); | |
1144 | if (!group->fanotify_data.ucounts) { | |
1145 | fd = -EMFILE; | |
1146 | goto out_destroy_group; | |
1147 | } | |
1148 | ||
a8b98c80 | 1149 | group->fanotify_data.flags = flags | internal_flags; |
d46eb14b | 1150 | group->memcg = get_mem_cgroup_from_mm(current->mm); |
4afeff85 | 1151 | |
94e00d28 AG |
1152 | group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); |
1153 | if (!group->fanotify_data.merge_hash) { | |
1154 | fd = -ENOMEM; | |
1155 | goto out_destroy_group; | |
1156 | } | |
1157 | ||
b8a6c3a2 AG |
1158 | group->overflow_event = fanotify_alloc_overflow_event(); |
1159 | if (unlikely(!group->overflow_event)) { | |
ff57cd58 JK |
1160 | fd = -ENOMEM; |
1161 | goto out_destroy_group; | |
1162 | } | |
ff57cd58 | 1163 | |
1e2ee49f WW |
1164 | if (force_o_largefile()) |
1165 | event_f_flags |= O_LARGEFILE; | |
80af2588 | 1166 | group->fanotify_data.f_flags = event_f_flags; |
9e66e423 EP |
1167 | init_waitqueue_head(&group->fanotify_data.access_waitq); |
1168 | INIT_LIST_HEAD(&group->fanotify_data.access_list); | |
83b7a598 | 1169 | switch (class) { |
4231a235 EP |
1170 | case FAN_CLASS_NOTIF: |
1171 | group->priority = FS_PRIO_0; | |
1172 | break; | |
1173 | case FAN_CLASS_CONTENT: | |
1174 | group->priority = FS_PRIO_1; | |
1175 | break; | |
1176 | case FAN_CLASS_PRE_CONTENT: | |
1177 | group->priority = FS_PRIO_2; | |
1178 | break; | |
1179 | default: | |
1180 | fd = -EINVAL; | |
d8153d4d | 1181 | goto out_destroy_group; |
4231a235 | 1182 | } |
cb2d429f | 1183 | |
5dd03f55 EP |
1184 | if (flags & FAN_UNLIMITED_QUEUE) { |
1185 | fd = -EPERM; | |
1186 | if (!capable(CAP_SYS_ADMIN)) | |
d8153d4d | 1187 | goto out_destroy_group; |
5dd03f55 EP |
1188 | group->max_events = UINT_MAX; |
1189 | } else { | |
5b8fea65 | 1190 | group->max_events = fanotify_max_queued_events; |
5dd03f55 | 1191 | } |
2529a0df | 1192 | |
ac7e22dc EP |
1193 | if (flags & FAN_UNLIMITED_MARKS) { |
1194 | fd = -EPERM; | |
1195 | if (!capable(CAP_SYS_ADMIN)) | |
d8153d4d | 1196 | goto out_destroy_group; |
ac7e22dc | 1197 | } |
e7099d8a | 1198 | |
de8cd83e SG |
1199 | if (flags & FAN_ENABLE_AUDIT) { |
1200 | fd = -EPERM; | |
1201 | if (!capable(CAP_AUDIT_WRITE)) | |
1202 | goto out_destroy_group; | |
de8cd83e SG |
1203 | } |
1204 | ||
52c923dd EP |
1205 | fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); |
1206 | if (fd < 0) | |
d8153d4d | 1207 | goto out_destroy_group; |
52c923dd EP |
1208 | |
1209 | return fd; | |
1210 | ||
d8153d4d LS |
1211 | out_destroy_group: |
1212 | fsnotify_destroy_group(group); | |
52c923dd | 1213 | return fd; |
11637e4b | 1214 | } |
bbaa4168 | 1215 | |
a8b13aa2 | 1216 | /* Check if filesystem can encode a unique fid */ |
73072283 | 1217 | static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) |
a8b13aa2 | 1218 | { |
73072283 | 1219 | __kernel_fsid_t root_fsid; |
a8b13aa2 AG |
1220 | int err; |
1221 | ||
1222 | /* | |
1223 | * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). | |
1224 | */ | |
73072283 | 1225 | err = vfs_get_fsid(path->dentry, fsid); |
a8b13aa2 AG |
1226 | if (err) |
1227 | return err; | |
1228 | ||
73072283 | 1229 | if (!fsid->val[0] && !fsid->val[1]) |
a8b13aa2 AG |
1230 | return -ENODEV; |
1231 | ||
1232 | /* | |
1233 | * Make sure path is not inside a filesystem subvolume (e.g. btrfs) | |
1234 | * which uses a different fsid than sb root. | |
1235 | */ | |
73072283 | 1236 | err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid); |
a8b13aa2 AG |
1237 | if (err) |
1238 | return err; | |
1239 | ||
73072283 AG |
1240 | if (root_fsid.val[0] != fsid->val[0] || |
1241 | root_fsid.val[1] != fsid->val[1]) | |
a8b13aa2 AG |
1242 | return -EXDEV; |
1243 | ||
1244 | /* | |
1245 | * We need to make sure that the file system supports at least | |
1246 | * encoding a file handle so user can use name_to_handle_at() to | |
1247 | * compare fid returned with event to the file handle of watched | |
1248 | * objects. However, name_to_handle_at() requires that the | |
1249 | * filesystem also supports decoding file handles. | |
1250 | */ | |
1251 | if (!path->dentry->d_sb->s_export_op || | |
1252 | !path->dentry->d_sb->s_export_op->fh_to_dentry) | |
1253 | return -EOPNOTSUPP; | |
1254 | ||
1255 | return 0; | |
1256 | } | |
1257 | ||
0b3b094a JK |
1258 | static int fanotify_events_supported(struct path *path, __u64 mask) |
1259 | { | |
1260 | /* | |
1261 | * Some filesystems such as 'proc' acquire unusual locks when opening | |
1262 | * files. For them fanotify permission events have high chances of | |
1263 | * deadlocking the system - open done when reporting fanotify event | |
1264 | * blocks on this "unusual" lock while another process holding the lock | |
1265 | * waits for fanotify permission event to be answered. Just disallow | |
1266 | * permission events for such filesystems. | |
1267 | */ | |
1268 | if (mask & FANOTIFY_PERM_EVENTS && | |
1269 | path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) | |
1270 | return -EINVAL; | |
1271 | return 0; | |
1272 | } | |
1273 | ||
183caa3c DB |
1274 | static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, |
1275 | int dfd, const char __user *pathname) | |
bbaa4168 | 1276 | { |
0ff21db9 EP |
1277 | struct inode *inode = NULL; |
1278 | struct vfsmount *mnt = NULL; | |
2a3edf86 | 1279 | struct fsnotify_group *group; |
2903ff01 | 1280 | struct fd f; |
2a3edf86 | 1281 | struct path path; |
73072283 | 1282 | __kernel_fsid_t __fsid, *fsid = NULL; |
bdd5a46f | 1283 | u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; |
23c9deeb | 1284 | unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; |
3ef86653 | 1285 | bool ignored = flags & FAN_MARK_IGNORED_MASK; |
d809daf1 | 1286 | unsigned int obj_type, fid_mode; |
85af5d92 | 1287 | u32 umask = 0; |
2903ff01 | 1288 | int ret; |
2a3edf86 EP |
1289 | |
1290 | pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", | |
1291 | __func__, fanotify_fd, flags, dfd, pathname, mask); | |
1292 | ||
1293 | /* we only use the lower 32 bits as of right now. */ | |
22d483b9 | 1294 | if (upper_32_bits(mask)) |
2a3edf86 EP |
1295 | return -EINVAL; |
1296 | ||
23c9deeb | 1297 | if (flags & ~FANOTIFY_MARK_FLAGS) |
88380fe6 | 1298 | return -EINVAL; |
d54f4fba AG |
1299 | |
1300 | switch (mark_type) { | |
1301 | case FAN_MARK_INODE: | |
ac5656d8 AG |
1302 | obj_type = FSNOTIFY_OBJ_TYPE_INODE; |
1303 | break; | |
d54f4fba | 1304 | case FAN_MARK_MOUNT: |
ac5656d8 AG |
1305 | obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; |
1306 | break; | |
d54f4fba | 1307 | case FAN_MARK_FILESYSTEM: |
ac5656d8 | 1308 | obj_type = FSNOTIFY_OBJ_TYPE_SB; |
d54f4fba AG |
1309 | break; |
1310 | default: | |
1311 | return -EINVAL; | |
1312 | } | |
1313 | ||
4d92604c | 1314 | switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { |
df561f66 | 1315 | case FAN_MARK_ADD: |
88380fe6 | 1316 | case FAN_MARK_REMOVE: |
1734dee4 LS |
1317 | if (!mask) |
1318 | return -EINVAL; | |
cc299a98 | 1319 | break; |
4d92604c | 1320 | case FAN_MARK_FLUSH: |
23c9deeb | 1321 | if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) |
cc299a98 | 1322 | return -EINVAL; |
88380fe6 AG |
1323 | break; |
1324 | default: | |
1325 | return -EINVAL; | |
1326 | } | |
8fcd6528 | 1327 | |
6685df31 | 1328 | if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) |
23c9deeb | 1329 | valid_mask |= FANOTIFY_PERM_EVENTS; |
6685df31 MS |
1330 | |
1331 | if (mask & ~valid_mask) | |
2a3edf86 EP |
1332 | return -EINVAL; |
1333 | ||
3ef86653 AG |
1334 | /* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */ |
1335 | if (ignored) | |
1336 | mask &= ~FANOTIFY_EVENT_FLAGS; | |
1337 | ||
2903ff01 AV |
1338 | f = fdget(fanotify_fd); |
1339 | if (unlikely(!f.file)) | |
2a3edf86 EP |
1340 | return -EBADF; |
1341 | ||
1342 | /* verify that this is indeed an fanotify instance */ | |
1343 | ret = -EINVAL; | |
2903ff01 | 1344 | if (unlikely(f.file->f_op != &fanotify_fops)) |
2a3edf86 | 1345 | goto fput_and_out; |
2903ff01 | 1346 | group = f.file->private_data; |
4231a235 | 1347 | |
7cea2a3c | 1348 | /* |
a8b98c80 AG |
1349 | * An unprivileged user is not allowed to setup mount nor filesystem |
1350 | * marks. This also includes setting up such marks by a group that | |
1351 | * was initialized by an unprivileged user. | |
7cea2a3c AG |
1352 | */ |
1353 | ret = -EPERM; | |
a8b98c80 AG |
1354 | if ((!capable(CAP_SYS_ADMIN) || |
1355 | FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && | |
7cea2a3c AG |
1356 | mark_type != FAN_MARK_INODE) |
1357 | goto fput_and_out; | |
1358 | ||
4231a235 EP |
1359 | /* |
1360 | * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not | |
1361 | * allowed to set permissions events. | |
1362 | */ | |
1363 | ret = -EINVAL; | |
23c9deeb | 1364 | if (mask & FANOTIFY_PERM_EVENTS && |
4231a235 EP |
1365 | group->priority == FS_PRIO_0) |
1366 | goto fput_and_out; | |
2a3edf86 | 1367 | |
235328d1 AG |
1368 | /* |
1369 | * Events with data type inode do not carry enough information to report | |
1370 | * event->fd, so we do not allow setting a mask for inode events unless | |
1371 | * group supports reporting fid. | |
1372 | * inode events are not supported on a mount mark, because they do not | |
1373 | * carry enough information (i.e. path) to be filtered by mount point. | |
1374 | */ | |
d809daf1 | 1375 | fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); |
235328d1 | 1376 | if (mask & FANOTIFY_INODE_EVENTS && |
d809daf1 | 1377 | (!fid_mode || mark_type == FAN_MARK_MOUNT)) |
235328d1 AG |
1378 | goto fput_and_out; |
1379 | ||
0a8dd2db HS |
1380 | if (flags & FAN_MARK_FLUSH) { |
1381 | ret = 0; | |
d54f4fba | 1382 | if (mark_type == FAN_MARK_MOUNT) |
0a8dd2db | 1383 | fsnotify_clear_vfsmount_marks_by_group(group); |
d54f4fba AG |
1384 | else if (mark_type == FAN_MARK_FILESYSTEM) |
1385 | fsnotify_clear_sb_marks_by_group(group); | |
0a8dd2db HS |
1386 | else |
1387 | fsnotify_clear_inode_marks_by_group(group); | |
1388 | goto fput_and_out; | |
1389 | } | |
1390 | ||
ac5656d8 AG |
1391 | ret = fanotify_find_path(dfd, pathname, &path, flags, |
1392 | (mask & ALL_FSNOTIFY_EVENTS), obj_type); | |
2a3edf86 EP |
1393 | if (ret) |
1394 | goto fput_and_out; | |
1395 | ||
0b3b094a JK |
1396 | if (flags & FAN_MARK_ADD) { |
1397 | ret = fanotify_events_supported(&path, mask); | |
1398 | if (ret) | |
1399 | goto path_put_and_out; | |
1400 | } | |
1401 | ||
d809daf1 | 1402 | if (fid_mode) { |
73072283 | 1403 | ret = fanotify_test_fid(&path, &__fsid); |
a8b13aa2 AG |
1404 | if (ret) |
1405 | goto path_put_and_out; | |
77115225 | 1406 | |
73072283 | 1407 | fsid = &__fsid; |
a8b13aa2 AG |
1408 | } |
1409 | ||
2a3edf86 | 1410 | /* inode held in place by reference to path; group by fget on fd */ |
d54f4fba | 1411 | if (mark_type == FAN_MARK_INODE) |
0ff21db9 EP |
1412 | inode = path.dentry->d_inode; |
1413 | else | |
1414 | mnt = path.mnt; | |
2a3edf86 | 1415 | |
85af5d92 AG |
1416 | /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ |
1417 | if (mnt || !S_ISDIR(inode->i_mode)) { | |
1418 | mask &= ~FAN_EVENT_ON_CHILD; | |
1419 | umask = FAN_EVENT_ON_CHILD; | |
51280637 AG |
1420 | /* |
1421 | * If group needs to report parent fid, register for getting | |
1422 | * events with parent/name info for non-directory. | |
1423 | */ | |
1424 | if ((fid_mode & FAN_REPORT_DIR_FID) && | |
1425 | (flags & FAN_MARK_ADD) && !ignored) | |
1426 | mask |= FAN_EVENT_ON_CHILD; | |
85af5d92 AG |
1427 | } |
1428 | ||
2a3edf86 | 1429 | /* create/update an inode mark */ |
0a8dd2db | 1430 | switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { |
c6223f46 | 1431 | case FAN_MARK_ADD: |
d54f4fba | 1432 | if (mark_type == FAN_MARK_MOUNT) |
77115225 AG |
1433 | ret = fanotify_add_vfsmount_mark(group, mnt, mask, |
1434 | flags, fsid); | |
d54f4fba | 1435 | else if (mark_type == FAN_MARK_FILESYSTEM) |
77115225 AG |
1436 | ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, |
1437 | flags, fsid); | |
0ff21db9 | 1438 | else |
77115225 AG |
1439 | ret = fanotify_add_inode_mark(group, inode, mask, |
1440 | flags, fsid); | |
c6223f46 AG |
1441 | break; |
1442 | case FAN_MARK_REMOVE: | |
d54f4fba | 1443 | if (mark_type == FAN_MARK_MOUNT) |
77115225 | 1444 | ret = fanotify_remove_vfsmount_mark(group, mnt, mask, |
85af5d92 | 1445 | flags, umask); |
d54f4fba | 1446 | else if (mark_type == FAN_MARK_FILESYSTEM) |
77115225 | 1447 | ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, |
85af5d92 | 1448 | flags, umask); |
f3640192 | 1449 | else |
77115225 | 1450 | ret = fanotify_remove_inode_mark(group, inode, mask, |
85af5d92 | 1451 | flags, umask); |
c6223f46 AG |
1452 | break; |
1453 | default: | |
1454 | ret = -EINVAL; | |
1455 | } | |
2a3edf86 | 1456 | |
a8b13aa2 | 1457 | path_put_and_out: |
2a3edf86 EP |
1458 | path_put(&path); |
1459 | fput_and_out: | |
2903ff01 | 1460 | fdput(f); |
2a3edf86 EP |
1461 | return ret; |
1462 | } | |
1463 | ||
2ca408d9 | 1464 | #ifndef CONFIG_ARCH_SPLIT_ARG64 |
183caa3c DB |
1465 | SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, |
1466 | __u64, mask, int, dfd, | |
1467 | const char __user *, pathname) | |
1468 | { | |
1469 | return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); | |
1470 | } | |
2ca408d9 | 1471 | #endif |
183caa3c | 1472 | |
2ca408d9 BG |
1473 | #if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT) |
1474 | SYSCALL32_DEFINE6(fanotify_mark, | |
91c2e0bc | 1475 | int, fanotify_fd, unsigned int, flags, |
2ca408d9 | 1476 | SC_ARG64(mask), int, dfd, |
91c2e0bc AV |
1477 | const char __user *, pathname) |
1478 | { | |
2ca408d9 BG |
1479 | return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask), |
1480 | dfd, pathname); | |
91c2e0bc AV |
1481 | } |
1482 | #endif | |
1483 | ||
2a3edf86 | 1484 | /* |
ae0e47f0 | 1485 | * fanotify_user_setup - Our initialization function. Note that we cannot return |
2a3edf86 EP |
1486 | * error because we have compiled-in VFS hooks. So an (unlikely) failure here |
1487 | * must result in panic(). | |
1488 | */ | |
1489 | static int __init fanotify_user_setup(void) | |
1490 | { | |
5b8fea65 AG |
1491 | struct sysinfo si; |
1492 | int max_marks; | |
1493 | ||
1494 | si_meminfo(&si); | |
1495 | /* | |
1496 | * Allow up to 1% of addressable memory to be accounted for per user | |
1497 | * marks limited to the range [8192, 1048576]. mount and sb marks are | |
1498 | * a lot cheaper than inode marks, but there is no reason for a user | |
1499 | * to have many of those, so calculate by the cost of inode marks. | |
1500 | */ | |
1501 | max_marks = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / | |
1502 | INODE_MARK_COST; | |
1503 | max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS, | |
1504 | FANOTIFY_DEFAULT_MAX_USER_MARKS); | |
1505 | ||
a8b98c80 | 1506 | BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); |
929943b3 | 1507 | BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10); |
bdd5a46f AG |
1508 | BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); |
1509 | ||
d46eb14b SB |
1510 | fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, |
1511 | SLAB_PANIC|SLAB_ACCOUNT); | |
7088f357 JK |
1512 | fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event, |
1513 | SLAB_PANIC); | |
1514 | fanotify_path_event_cachep = KMEM_CACHE(fanotify_path_event, | |
1515 | SLAB_PANIC); | |
6685df31 MS |
1516 | if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { |
1517 | fanotify_perm_event_cachep = | |
33913997 | 1518 | KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); |
6685df31 | 1519 | } |
2a3edf86 | 1520 | |
5b8fea65 AG |
1521 | fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; |
1522 | init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = | |
1523 | FANOTIFY_DEFAULT_MAX_GROUPS; | |
1524 | init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks; | |
1525 | ||
2a3edf86 | 1526 | return 0; |
bbaa4168 | 1527 | } |
2a3edf86 | 1528 | device_initcall(fanotify_user_setup); |