1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/fanotify.h>
3 #include <linux/fdtable.h>
4 #include <linux/fsnotify_backend.h>
5 #include <linux/init.h>
6 #include <linux/jiffies.h>
7 #include <linux/kernel.h> /* UINT_MAX */
8 #include <linux/mount.h>
9 #include <linux/sched.h>
10 #include <linux/sched/user.h>
11 #include <linux/sched/signal.h>
12 #include <linux/types.h>
13 #include <linux/wait.h>
14 #include <linux/audit.h>
15 #include <linux/sched/mm.h>
16 #include <linux/statfs.h>
17 #include <linux/stringhash.h>
21 static bool fanotify_path_equal(const struct path
*p1
, const struct path
*p2
)
23 return p1
->mnt
== p2
->mnt
&& p1
->dentry
== p2
->dentry
;
26 static unsigned int fanotify_hash_path(const struct path
*path
)
28 return hash_ptr(path
->dentry
, FANOTIFY_EVENT_HASH_BITS
) ^
29 hash_ptr(path
->mnt
, FANOTIFY_EVENT_HASH_BITS
);
32 static inline bool fanotify_fsid_equal(__kernel_fsid_t
*fsid1
,
33 __kernel_fsid_t
*fsid2
)
35 return fsid1
->val
[0] == fsid2
->val
[0] && fsid1
->val
[1] == fsid2
->val
[1];
38 static unsigned int fanotify_hash_fsid(__kernel_fsid_t
*fsid
)
40 return hash_32(fsid
->val
[0], FANOTIFY_EVENT_HASH_BITS
) ^
41 hash_32(fsid
->val
[1], FANOTIFY_EVENT_HASH_BITS
);
44 static bool fanotify_fh_equal(struct fanotify_fh
*fh1
,
45 struct fanotify_fh
*fh2
)
47 if (fh1
->type
!= fh2
->type
|| fh1
->len
!= fh2
->len
)
51 !memcmp(fanotify_fh_buf(fh1
), fanotify_fh_buf(fh2
), fh1
->len
);
54 static unsigned int fanotify_hash_fh(struct fanotify_fh
*fh
)
56 long salt
= (long)fh
->type
| (long)fh
->len
<< 8;
59 * full_name_hash() works long by long, so it handles fh buf optimally.
61 return full_name_hash((void *)salt
, fanotify_fh_buf(fh
), fh
->len
);
64 static bool fanotify_fid_event_equal(struct fanotify_fid_event
*ffe1
,
65 struct fanotify_fid_event
*ffe2
)
67 /* Do not merge fid events without object fh */
68 if (!ffe1
->object_fh
.len
)
71 return fanotify_fsid_equal(&ffe1
->fsid
, &ffe2
->fsid
) &&
72 fanotify_fh_equal(&ffe1
->object_fh
, &ffe2
->object_fh
);
75 static bool fanotify_info_equal(struct fanotify_info
*info1
,
76 struct fanotify_info
*info2
)
78 if (info1
->dir_fh_totlen
!= info2
->dir_fh_totlen
||
79 info1
->dir2_fh_totlen
!= info2
->dir2_fh_totlen
||
80 info1
->file_fh_totlen
!= info2
->file_fh_totlen
||
81 info1
->name_len
!= info2
->name_len
||
82 info1
->name2_len
!= info2
->name2_len
)
85 if (info1
->dir_fh_totlen
&&
86 !fanotify_fh_equal(fanotify_info_dir_fh(info1
),
87 fanotify_info_dir_fh(info2
)))
90 if (info1
->dir2_fh_totlen
&&
91 !fanotify_fh_equal(fanotify_info_dir2_fh(info1
),
92 fanotify_info_dir2_fh(info2
)))
95 if (info1
->file_fh_totlen
&&
96 !fanotify_fh_equal(fanotify_info_file_fh(info1
),
97 fanotify_info_file_fh(info2
)))
100 if (info1
->name_len
&&
101 memcmp(fanotify_info_name(info1
), fanotify_info_name(info2
),
105 return !info1
->name2_len
||
106 !memcmp(fanotify_info_name2(info1
), fanotify_info_name2(info2
),
110 static bool fanotify_name_event_equal(struct fanotify_name_event
*fne1
,
111 struct fanotify_name_event
*fne2
)
113 struct fanotify_info
*info1
= &fne1
->info
;
114 struct fanotify_info
*info2
= &fne2
->info
;
116 /* Do not merge name events without dir fh */
117 if (!info1
->dir_fh_totlen
)
120 if (!fanotify_fsid_equal(&fne1
->fsid
, &fne2
->fsid
))
123 return fanotify_info_equal(info1
, info2
);
126 static bool fanotify_error_event_equal(struct fanotify_error_event
*fee1
,
127 struct fanotify_error_event
*fee2
)
129 /* Error events against the same file system are always merged. */
130 if (!fanotify_fsid_equal(&fee1
->fsid
, &fee2
->fsid
))
136 static bool fanotify_should_merge(struct fanotify_event
*old
,
137 struct fanotify_event
*new)
139 pr_debug("%s: old=%p new=%p\n", __func__
, old
, new);
141 if (old
->hash
!= new->hash
||
142 old
->type
!= new->type
|| old
->pid
!= new->pid
)
146 * We want to merge many dirent events in the same dir (i.e.
147 * creates/unlinks/renames), but we do not want to merge dirent
148 * events referring to subdirs with dirent events referring to
149 * non subdirs, otherwise, user won't be able to tell from a
150 * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+
151 * unlink pair or rmdir+create pair of events.
153 if ((old
->mask
& FS_ISDIR
) != (new->mask
& FS_ISDIR
))
157 * FAN_RENAME event is reported with special info record types,
158 * so we cannot merge it with other events.
160 if ((old
->mask
& FAN_RENAME
) != (new->mask
& FAN_RENAME
))
164 case FANOTIFY_EVENT_TYPE_PATH
:
165 return fanotify_path_equal(fanotify_event_path(old
),
166 fanotify_event_path(new));
167 case FANOTIFY_EVENT_TYPE_FID
:
168 return fanotify_fid_event_equal(FANOTIFY_FE(old
),
170 case FANOTIFY_EVENT_TYPE_FID_NAME
:
171 return fanotify_name_event_equal(FANOTIFY_NE(old
),
173 case FANOTIFY_EVENT_TYPE_FS_ERROR
:
174 return fanotify_error_event_equal(FANOTIFY_EE(old
),
183 /* Limit event merges to limit CPU overhead per event */
184 #define FANOTIFY_MAX_MERGE_EVENTS 128
186 /* and the list better be locked by something too! */
187 static int fanotify_merge(struct fsnotify_group
*group
,
188 struct fsnotify_event
*event
)
190 struct fanotify_event
*old
, *new = FANOTIFY_E(event
);
191 unsigned int bucket
= fanotify_event_hash_bucket(group
, new);
192 struct hlist_head
*hlist
= &group
->fanotify_data
.merge_hash
[bucket
];
195 pr_debug("%s: group=%p event=%p bucket=%u\n", __func__
,
196 group
, event
, bucket
);
199 * Don't merge a permission event with any other event so that we know
200 * the event structure we have created in fanotify_handle_event() is the
201 * one we should check for permission response.
203 if (fanotify_is_perm_event(new->mask
))
206 hlist_for_each_entry(old
, hlist
, merge_list
) {
207 if (++i
> FANOTIFY_MAX_MERGE_EVENTS
)
209 if (fanotify_should_merge(old
, new)) {
210 old
->mask
|= new->mask
;
212 if (fanotify_is_error_event(old
->mask
))
213 FANOTIFY_EE(old
)->err_count
++;
223 * Wait for response to permission event. The function also takes care of
224 * freeing the permission event (or offloads that in case the wait is canceled
225 * by a signal). The function returns 0 in case access got allowed by userspace,
226 * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case
227 * the wait got interrupted by a signal.
229 static int fanotify_get_response(struct fsnotify_group
*group
,
230 struct fanotify_perm_event
*event
,
231 struct fsnotify_iter_info
*iter_info
)
235 pr_debug("%s: group=%p event=%p\n", __func__
, group
, event
);
237 ret
= wait_event_killable(group
->fanotify_data
.access_waitq
,
238 event
->state
== FAN_EVENT_ANSWERED
);
239 /* Signal pending? */
241 spin_lock(&group
->notification_lock
);
242 /* Event reported to userspace and no answer yet? */
243 if (event
->state
== FAN_EVENT_REPORTED
) {
244 /* Event will get freed once userspace answers to it */
245 event
->state
= FAN_EVENT_CANCELED
;
246 spin_unlock(&group
->notification_lock
);
249 /* Event not yet reported? Just remove it. */
250 if (event
->state
== FAN_EVENT_INIT
) {
251 fsnotify_remove_queued_event(group
, &event
->fae
.fse
);
252 /* Permission events are not supposed to be hashed */
253 WARN_ON_ONCE(!hlist_unhashed(&event
->fae
.merge_list
));
256 * Event may be also answered in case signal delivery raced
257 * with wakeup. In that case we have nothing to do besides
258 * freeing the event and reporting error.
260 spin_unlock(&group
->notification_lock
);
264 /* userspace responded, convert to something usable */
265 switch (event
->response
& FANOTIFY_RESPONSE_ACCESS
) {
274 /* Check if the response should be audited */
275 if (event
->response
& FAN_AUDIT
)
276 audit_fanotify(event
->response
& ~FAN_AUDIT
,
279 pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__
,
282 fsnotify_destroy_event(group
, &event
->fae
.fse
);
288 * This function returns a mask for an event that only contains the flags
289 * that have been specifically requested by the user. Flags that may have
290 * been included within the event mask, but have not been explicitly
291 * requested by the user, will not be present in the returned mask.
293 static u32
fanotify_group_event_mask(struct fsnotify_group
*group
,
294 struct fsnotify_iter_info
*iter_info
,
295 u32
*match_mask
, u32 event_mask
,
296 const void *data
, int data_type
,
299 __u32 marks_mask
= 0, marks_ignore_mask
= 0;
300 __u32 test_mask
, user_mask
= FANOTIFY_OUTGOING_EVENTS
|
301 FANOTIFY_EVENT_FLAGS
;
302 const struct path
*path
= fsnotify_data_path(data
, data_type
);
303 unsigned int fid_mode
= FAN_GROUP_FLAG(group
, FANOTIFY_FID_BITS
);
304 struct fsnotify_mark
*mark
;
305 bool ondir
= event_mask
& FAN_ONDIR
;
308 pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
309 __func__
, iter_info
->report_mask
, event_mask
, data
, data_type
);
312 /* Do we have path to open a file descriptor? */
315 /* Path type events are only relevant for files and dirs */
316 if (!d_is_reg(path
->dentry
) && !d_can_lookup(path
->dentry
))
318 } else if (!(fid_mode
& FAN_REPORT_FID
)) {
319 /* Do we have a directory inode to report? */
324 fsnotify_foreach_iter_mark_type(iter_info
, mark
, type
) {
326 * Apply ignore mask depending on event flags in ignore mask.
329 fsnotify_effective_ignore_mask(mark
, ondir
, type
);
332 * Send the event depending on event flags in mark mask.
334 if (!fsnotify_mask_applicable(mark
->mask
, ondir
, type
))
337 marks_mask
|= mark
->mask
;
339 /* Record the mark types of this group that matched the event */
340 *match_mask
|= 1U << type
;
343 test_mask
= event_mask
& marks_mask
& ~marks_ignore_mask
;
346 * For dirent modification events (create/delete/move) that do not carry
347 * the child entry name information, we report FAN_ONDIR for mkdir/rmdir
348 * so user can differentiate them from creat/unlink.
350 * For backward compatibility and consistency, do not report FAN_ONDIR
351 * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR
352 * to user in fid mode for all event types.
354 * We never report FAN_EVENT_ON_CHILD to user, but we do pass it in to
355 * fanotify_alloc_event() when group is reporting fid as indication
356 * that event happened on child.
359 /* Do not report event flags without any event */
360 if (!(test_mask
& ~FANOTIFY_EVENT_FLAGS
))
363 user_mask
&= ~FANOTIFY_EVENT_FLAGS
;
366 return test_mask
& user_mask
;
370 * Check size needed to encode fanotify_fh.
372 * Return size of encoded fh without fanotify_fh header.
373 * Return 0 on failure to encode.
375 static int fanotify_encode_fh_len(struct inode
*inode
)
383 exportfs_encode_fid(inode
, NULL
, &dwords
);
384 fh_len
= dwords
<< 2;
387 * struct fanotify_error_event might be preallocated and is
388 * limited to MAX_HANDLE_SZ. This should never happen, but
389 * safeguard by forcing an invalid file handle.
391 if (WARN_ON_ONCE(fh_len
> MAX_HANDLE_SZ
))
398 * Encode fanotify_fh.
400 * Return total size of encoded fh including fanotify_fh header.
401 * Return 0 on failure to encode.
403 static int fanotify_encode_fh(struct fanotify_fh
*fh
, struct inode
*inode
,
404 unsigned int fh_len
, unsigned int *hash
,
407 int dwords
, type
= 0;
408 char *ext_buf
= NULL
;
412 fh
->type
= FILEID_ROOT
;
417 * Invalid FHs are used by FAN_FS_ERROR for errors not
418 * linked to any inode. The f_handle won't be reported
425 * !gpf means preallocated variable size fh, but fh_len could
426 * be zero in that case if encoding fh len failed.
429 if (fh_len
< 4 || WARN_ON_ONCE(fh_len
% 4) || fh_len
> MAX_HANDLE_SZ
)
432 /* No external buffer in a variable size allocated fh */
433 if (gfp
&& fh_len
> FANOTIFY_INLINE_FH_LEN
) {
434 /* Treat failure to allocate fh as failure to encode fh */
436 ext_buf
= kmalloc(fh_len
, gfp
);
440 *fanotify_fh_ext_buf_ptr(fh
) = ext_buf
;
442 fh
->flags
|= FANOTIFY_FH_FLAG_EXT_BUF
;
445 dwords
= fh_len
>> 2;
446 type
= exportfs_encode_fid(inode
, buf
, &dwords
);
448 if (type
<= 0 || type
== FILEID_INVALID
|| fh_len
!= dwords
<< 2)
456 * Mix fh into event merge key. Hash might be NULL in case of
457 * unhashed FID events (i.e. FAN_FS_ERROR).
460 *hash
^= fanotify_hash_fh(fh
);
462 return FANOTIFY_FH_HDR_LEN
+ fh_len
;
465 pr_warn_ratelimited("fanotify: failed to encode fid (type=%d, len=%d, err=%i)\n",
468 *fanotify_fh_ext_buf_ptr(fh
) = NULL
;
469 /* Report the event without a file identifier on encode error */
470 fh
->type
= FILEID_INVALID
;
476 * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for
477 * some events and the fid of the parent for create/delete/move events.
479 * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported
480 * also in create/delete/move events in addition to the fid of the parent
481 * and the name of the child.
483 static inline bool fanotify_report_child_fid(unsigned int fid_mode
, u32 mask
)
485 if (mask
& ALL_FSNOTIFY_DIRENT_EVENTS
)
486 return (fid_mode
& FAN_REPORT_TARGET_FID
);
488 return (fid_mode
& FAN_REPORT_FID
) && !(mask
& FAN_ONDIR
);
492 * The inode to use as identifier when reporting fid depends on the event
493 * and the group flags.
495 * With the group flag FAN_REPORT_TARGET_FID, always report the child fid.
497 * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory
498 * fid on dirent events and the child fid otherwise.
501 * FS_ATTRIB reports the child fid even if reported on a watched parent.
502 * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID.
503 * and reports the created child fid with FAN_REPORT_TARGET_FID.
505 static struct inode
*fanotify_fid_inode(u32 event_mask
, const void *data
,
506 int data_type
, struct inode
*dir
,
507 unsigned int fid_mode
)
509 if ((event_mask
& ALL_FSNOTIFY_DIRENT_EVENTS
) &&
510 !(fid_mode
& FAN_REPORT_TARGET_FID
))
513 return fsnotify_data_inode(data
, data_type
);
517 * The inode to use as identifier when reporting dir fid depends on the event.
518 * Report the modified directory inode on dirent modification events.
519 * Report the "victim" inode if "victim" is a directory.
520 * Report the parent inode if "victim" is not a directory and event is
521 * reported to parent.
522 * Otherwise, do not report dir fid.
524 static struct inode
*fanotify_dfid_inode(u32 event_mask
, const void *data
,
525 int data_type
, struct inode
*dir
)
527 struct inode
*inode
= fsnotify_data_inode(data
, data_type
);
529 if (event_mask
& ALL_FSNOTIFY_DIRENT_EVENTS
)
532 if (inode
&& S_ISDIR(inode
->i_mode
))
538 static struct fanotify_event
*fanotify_alloc_path_event(const struct path
*path
,
542 struct fanotify_path_event
*pevent
;
544 pevent
= kmem_cache_alloc(fanotify_path_event_cachep
, gfp
);
548 pevent
->fae
.type
= FANOTIFY_EVENT_TYPE_PATH
;
549 pevent
->path
= *path
;
550 *hash
^= fanotify_hash_path(path
);
556 static struct fanotify_event
*fanotify_alloc_perm_event(const struct path
*path
,
559 struct fanotify_perm_event
*pevent
;
561 pevent
= kmem_cache_alloc(fanotify_perm_event_cachep
, gfp
);
565 pevent
->fae
.type
= FANOTIFY_EVENT_TYPE_PATH_PERM
;
566 pevent
->response
= 0;
567 pevent
->hdr
.type
= FAN_RESPONSE_INFO_NONE
;
570 pevent
->state
= FAN_EVENT_INIT
;
571 pevent
->path
= *path
;
577 static struct fanotify_event
*fanotify_alloc_fid_event(struct inode
*id
,
578 __kernel_fsid_t
*fsid
,
582 struct fanotify_fid_event
*ffe
;
584 ffe
= kmem_cache_alloc(fanotify_fid_event_cachep
, gfp
);
588 ffe
->fae
.type
= FANOTIFY_EVENT_TYPE_FID
;
590 *hash
^= fanotify_hash_fsid(fsid
);
591 fanotify_encode_fh(&ffe
->object_fh
, id
, fanotify_encode_fh_len(id
),
597 static struct fanotify_event
*fanotify_alloc_name_event(struct inode
*dir
,
598 __kernel_fsid_t
*fsid
,
599 const struct qstr
*name
,
601 struct dentry
*moved
,
605 struct fanotify_name_event
*fne
;
606 struct fanotify_info
*info
;
607 struct fanotify_fh
*dfh
, *ffh
;
608 struct inode
*dir2
= moved
? d_inode(moved
->d_parent
) : NULL
;
609 const struct qstr
*name2
= moved
? &moved
->d_name
: NULL
;
610 unsigned int dir_fh_len
= fanotify_encode_fh_len(dir
);
611 unsigned int dir2_fh_len
= fanotify_encode_fh_len(dir2
);
612 unsigned int child_fh_len
= fanotify_encode_fh_len(child
);
613 unsigned long name_len
= name
? name
->len
: 0;
614 unsigned long name2_len
= name2
? name2
->len
: 0;
615 unsigned int len
, size
;
617 /* Reserve terminating null byte even for empty name */
618 size
= sizeof(*fne
) + name_len
+ name2_len
+ 2;
620 size
+= FANOTIFY_FH_HDR_LEN
+ dir_fh_len
;
622 size
+= FANOTIFY_FH_HDR_LEN
+ dir2_fh_len
;
624 size
+= FANOTIFY_FH_HDR_LEN
+ child_fh_len
;
625 fne
= kmalloc(size
, gfp
);
629 fne
->fae
.type
= FANOTIFY_EVENT_TYPE_FID_NAME
;
631 *hash
^= fanotify_hash_fsid(fsid
);
633 fanotify_info_init(info
);
635 dfh
= fanotify_info_dir_fh(info
);
636 len
= fanotify_encode_fh(dfh
, dir
, dir_fh_len
, hash
, 0);
637 fanotify_info_set_dir_fh(info
, len
);
640 dfh
= fanotify_info_dir2_fh(info
);
641 len
= fanotify_encode_fh(dfh
, dir2
, dir2_fh_len
, hash
, 0);
642 fanotify_info_set_dir2_fh(info
, len
);
645 ffh
= fanotify_info_file_fh(info
);
646 len
= fanotify_encode_fh(ffh
, child
, child_fh_len
, hash
, 0);
647 fanotify_info_set_file_fh(info
, len
);
650 fanotify_info_copy_name(info
, name
);
651 *hash
^= full_name_hash((void *)name_len
, name
->name
, name_len
);
654 fanotify_info_copy_name2(info
, name2
);
655 *hash
^= full_name_hash((void *)name2_len
, name2
->name
,
659 pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
660 __func__
, size
, dir_fh_len
, child_fh_len
,
661 info
->name_len
, info
->name_len
, fanotify_info_name(info
));
664 pr_debug("%s: dir2_fh_len=%u name2_len=%u name2='%.*s'\n",
665 __func__
, dir2_fh_len
, info
->name2_len
,
666 info
->name2_len
, fanotify_info_name2(info
));
672 static struct fanotify_event
*fanotify_alloc_error_event(
673 struct fsnotify_group
*group
,
674 __kernel_fsid_t
*fsid
,
675 const void *data
, int data_type
,
678 struct fs_error_report
*report
=
679 fsnotify_data_error_report(data
, data_type
);
681 struct fanotify_error_event
*fee
;
684 if (WARN_ON_ONCE(!report
))
687 fee
= mempool_alloc(&group
->fanotify_data
.error_events_pool
, GFP_NOFS
);
691 fee
->fae
.type
= FANOTIFY_EVENT_TYPE_FS_ERROR
;
692 fee
->error
= report
->error
;
696 inode
= report
->inode
;
697 fh_len
= fanotify_encode_fh_len(inode
);
699 /* Bad fh_len. Fallback to using an invalid fh. Should never happen. */
700 if (!fh_len
&& inode
)
703 fanotify_encode_fh(&fee
->object_fh
, inode
, fh_len
, NULL
, 0);
705 *hash
^= fanotify_hash_fsid(fsid
);
710 static struct fanotify_event
*fanotify_alloc_event(
711 struct fsnotify_group
*group
,
712 u32 mask
, const void *data
, int data_type
,
713 struct inode
*dir
, const struct qstr
*file_name
,
714 __kernel_fsid_t
*fsid
, u32 match_mask
)
716 struct fanotify_event
*event
= NULL
;
717 gfp_t gfp
= GFP_KERNEL_ACCOUNT
;
718 unsigned int fid_mode
= FAN_GROUP_FLAG(group
, FANOTIFY_FID_BITS
);
719 struct inode
*id
= fanotify_fid_inode(mask
, data
, data_type
, dir
,
721 struct inode
*dirid
= fanotify_dfid_inode(mask
, data
, data_type
, dir
);
722 const struct path
*path
= fsnotify_data_path(data
, data_type
);
723 struct mem_cgroup
*old_memcg
;
724 struct dentry
*moved
= NULL
;
725 struct inode
*child
= NULL
;
726 bool name_event
= false;
727 unsigned int hash
= 0;
728 bool ondir
= mask
& FAN_ONDIR
;
731 if ((fid_mode
& FAN_REPORT_DIR_FID
) && dirid
) {
733 * For certain events and group flags, report the child fid
734 * in addition to reporting the parent fid and maybe child name.
736 if (fanotify_report_child_fid(fid_mode
, mask
) && id
!= dirid
)
742 * We record file name only in a group with FAN_REPORT_NAME
743 * and when we have a directory inode to report.
745 * For directory entry modification event, we record the fid of
746 * the directory and the name of the modified entry.
748 * For event on non-directory that is reported to parent, we
749 * record the fid of the parent and the name of the child.
751 * Even if not reporting name, we need a variable length
752 * fanotify_name_event if reporting both parent and child fids.
754 if (!(fid_mode
& FAN_REPORT_NAME
)) {
755 name_event
= !!child
;
757 } else if ((mask
& ALL_FSNOTIFY_DIRENT_EVENTS
) || !ondir
) {
762 * In the special case of FAN_RENAME event, use the match_mask
763 * to determine if we need to report only the old parent+name,
764 * only the new parent+name or both.
765 * 'dirid' and 'file_name' are the old parent+name and
766 * 'moved' has the new parent+name.
768 if (mask
& FAN_RENAME
) {
769 bool report_old
, report_new
;
771 if (WARN_ON_ONCE(!match_mask
))
774 /* Report both old and new parent+name if sb watching */
775 report_old
= report_new
=
776 match_mask
& (1U << FSNOTIFY_ITER_TYPE_SB
);
778 match_mask
& (1U << FSNOTIFY_ITER_TYPE_INODE
);
780 match_mask
& (1U << FSNOTIFY_ITER_TYPE_INODE2
);
783 /* Do not report old parent+name */
788 /* Report new parent+name */
789 moved
= fsnotify_data_dentry(data
, data_type
);
795 * For queues with unlimited length lost events are not expected and
796 * can possibly have security implications. Avoid losing events when
797 * memory is short. For the limited size queues, avoid OOM killer in the
798 * target monitoring memcg as it may have security repercussion.
800 if (group
->max_events
== UINT_MAX
)
803 gfp
|= __GFP_RETRY_MAYFAIL
;
805 /* Whoever is interested in the event, pays for the allocation. */
806 old_memcg
= set_active_memcg(group
->memcg
);
808 if (fanotify_is_perm_event(mask
)) {
809 event
= fanotify_alloc_perm_event(path
, gfp
);
810 } else if (fanotify_is_error_event(mask
)) {
811 event
= fanotify_alloc_error_event(group
, fsid
, data
,
813 } else if (name_event
&& (file_name
|| moved
|| child
)) {
814 event
= fanotify_alloc_name_event(dirid
, fsid
, file_name
, child
,
816 } else if (fid_mode
) {
817 event
= fanotify_alloc_fid_event(id
, fsid
, &hash
, gfp
);
819 event
= fanotify_alloc_path_event(path
, &hash
, gfp
);
825 if (FAN_GROUP_FLAG(group
, FAN_REPORT_TID
))
826 pid
= get_pid(task_pid(current
));
828 pid
= get_pid(task_tgid(current
));
830 /* Mix event info, FAN_ONDIR flag and pid into event merge key */
831 hash
^= hash_long((unsigned long)pid
| ondir
, FANOTIFY_EVENT_HASH_BITS
);
832 fanotify_init_event(event
, hash
, mask
);
836 set_active_memcg(old_memcg
);
841 * Get cached fsid of the filesystem containing the object from any connector.
842 * All connectors are supposed to have the same fsid, but we do not verify that
845 static __kernel_fsid_t
fanotify_get_fsid(struct fsnotify_iter_info
*iter_info
)
847 struct fsnotify_mark
*mark
;
849 __kernel_fsid_t fsid
= {};
851 fsnotify_foreach_iter_mark_type(iter_info
, mark
, type
) {
852 struct fsnotify_mark_connector
*conn
;
854 conn
= READ_ONCE(mark
->connector
);
855 /* Mark is just getting destroyed or created? */
858 if (!(conn
->flags
& FSNOTIFY_CONN_FLAG_HAS_FSID
))
860 /* Pairs with smp_wmb() in fsnotify_add_mark_list() */
863 if (WARN_ON_ONCE(!fsid
.val
[0] && !fsid
.val
[1]))
872 * Add an event to hash table for faster merge.
874 static void fanotify_insert_event(struct fsnotify_group
*group
,
875 struct fsnotify_event
*fsn_event
)
877 struct fanotify_event
*event
= FANOTIFY_E(fsn_event
);
878 unsigned int bucket
= fanotify_event_hash_bucket(group
, event
);
879 struct hlist_head
*hlist
= &group
->fanotify_data
.merge_hash
[bucket
];
881 assert_spin_locked(&group
->notification_lock
);
883 if (!fanotify_is_hashed_event(event
->mask
))
886 pr_debug("%s: group=%p event=%p bucket=%u\n", __func__
,
887 group
, event
, bucket
);
889 hlist_add_head(&event
->merge_list
, hlist
);
892 static int fanotify_handle_event(struct fsnotify_group
*group
, u32 mask
,
893 const void *data
, int data_type
,
895 const struct qstr
*file_name
, u32 cookie
,
896 struct fsnotify_iter_info
*iter_info
)
899 struct fanotify_event
*event
;
900 struct fsnotify_event
*fsn_event
;
901 __kernel_fsid_t fsid
= {};
904 BUILD_BUG_ON(FAN_ACCESS
!= FS_ACCESS
);
905 BUILD_BUG_ON(FAN_MODIFY
!= FS_MODIFY
);
906 BUILD_BUG_ON(FAN_ATTRIB
!= FS_ATTRIB
);
907 BUILD_BUG_ON(FAN_CLOSE_NOWRITE
!= FS_CLOSE_NOWRITE
);
908 BUILD_BUG_ON(FAN_CLOSE_WRITE
!= FS_CLOSE_WRITE
);
909 BUILD_BUG_ON(FAN_OPEN
!= FS_OPEN
);
910 BUILD_BUG_ON(FAN_MOVED_TO
!= FS_MOVED_TO
);
911 BUILD_BUG_ON(FAN_MOVED_FROM
!= FS_MOVED_FROM
);
912 BUILD_BUG_ON(FAN_CREATE
!= FS_CREATE
);
913 BUILD_BUG_ON(FAN_DELETE
!= FS_DELETE
);
914 BUILD_BUG_ON(FAN_DELETE_SELF
!= FS_DELETE_SELF
);
915 BUILD_BUG_ON(FAN_MOVE_SELF
!= FS_MOVE_SELF
);
916 BUILD_BUG_ON(FAN_EVENT_ON_CHILD
!= FS_EVENT_ON_CHILD
);
917 BUILD_BUG_ON(FAN_Q_OVERFLOW
!= FS_Q_OVERFLOW
);
918 BUILD_BUG_ON(FAN_OPEN_PERM
!= FS_OPEN_PERM
);
919 BUILD_BUG_ON(FAN_ACCESS_PERM
!= FS_ACCESS_PERM
);
920 BUILD_BUG_ON(FAN_ONDIR
!= FS_ISDIR
);
921 BUILD_BUG_ON(FAN_OPEN_EXEC
!= FS_OPEN_EXEC
);
922 BUILD_BUG_ON(FAN_OPEN_EXEC_PERM
!= FS_OPEN_EXEC_PERM
);
923 BUILD_BUG_ON(FAN_FS_ERROR
!= FS_ERROR
);
924 BUILD_BUG_ON(FAN_RENAME
!= FS_RENAME
);
926 BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS
) != 21);
928 mask
= fanotify_group_event_mask(group
, iter_info
, &match_mask
,
929 mask
, data
, data_type
, dir
);
933 pr_debug("%s: group=%p mask=%x report_mask=%x\n", __func__
,
934 group
, mask
, match_mask
);
936 if (fanotify_is_perm_event(mask
)) {
938 * fsnotify_prepare_user_wait() fails if we race with mark
939 * deletion. Just let the operation pass in that case.
941 if (!fsnotify_prepare_user_wait(iter_info
))
945 if (FAN_GROUP_FLAG(group
, FANOTIFY_FID_BITS
)) {
946 fsid
= fanotify_get_fsid(iter_info
);
947 /* Racing with mark destruction or creation? */
948 if (!fsid
.val
[0] && !fsid
.val
[1])
952 event
= fanotify_alloc_event(group
, mask
, data
, data_type
, dir
,
953 file_name
, &fsid
, match_mask
);
955 if (unlikely(!event
)) {
957 * We don't queue overflow events for permission events as
958 * there the access is denied and so no event is in fact lost.
960 if (!fanotify_is_perm_event(mask
))
961 fsnotify_queue_overflow(group
);
965 fsn_event
= &event
->fse
;
966 ret
= fsnotify_insert_event(group
, fsn_event
, fanotify_merge
,
967 fanotify_insert_event
);
969 /* Permission events shouldn't be merged */
970 BUG_ON(ret
== 1 && mask
& FANOTIFY_PERM_EVENTS
);
971 /* Our event wasn't used in the end. Free it. */
972 fsnotify_destroy_event(group
, fsn_event
);
975 } else if (fanotify_is_perm_event(mask
)) {
976 ret
= fanotify_get_response(group
, FANOTIFY_PERM(event
),
980 if (fanotify_is_perm_event(mask
))
981 fsnotify_finish_user_wait(iter_info
);
986 static void fanotify_free_group_priv(struct fsnotify_group
*group
)
988 kfree(group
->fanotify_data
.merge_hash
);
989 if (group
->fanotify_data
.ucounts
)
990 dec_ucount(group
->fanotify_data
.ucounts
,
991 UCOUNT_FANOTIFY_GROUPS
);
993 if (mempool_initialized(&group
->fanotify_data
.error_events_pool
))
994 mempool_exit(&group
->fanotify_data
.error_events_pool
);
997 static void fanotify_free_path_event(struct fanotify_event
*event
)
999 path_put(fanotify_event_path(event
));
1000 kmem_cache_free(fanotify_path_event_cachep
, FANOTIFY_PE(event
));
1003 static void fanotify_free_perm_event(struct fanotify_event
*event
)
1005 path_put(fanotify_event_path(event
));
1006 kmem_cache_free(fanotify_perm_event_cachep
, FANOTIFY_PERM(event
));
1009 static void fanotify_free_fid_event(struct fanotify_event
*event
)
1011 struct fanotify_fid_event
*ffe
= FANOTIFY_FE(event
);
1013 if (fanotify_fh_has_ext_buf(&ffe
->object_fh
))
1014 kfree(fanotify_fh_ext_buf(&ffe
->object_fh
));
1015 kmem_cache_free(fanotify_fid_event_cachep
, ffe
);
1018 static void fanotify_free_name_event(struct fanotify_event
*event
)
1020 kfree(FANOTIFY_NE(event
));
1023 static void fanotify_free_error_event(struct fsnotify_group
*group
,
1024 struct fanotify_event
*event
)
1026 struct fanotify_error_event
*fee
= FANOTIFY_EE(event
);
1028 mempool_free(fee
, &group
->fanotify_data
.error_events_pool
);
1031 static void fanotify_free_event(struct fsnotify_group
*group
,
1032 struct fsnotify_event
*fsn_event
)
1034 struct fanotify_event
*event
;
1036 event
= FANOTIFY_E(fsn_event
);
1037 put_pid(event
->pid
);
1038 switch (event
->type
) {
1039 case FANOTIFY_EVENT_TYPE_PATH
:
1040 fanotify_free_path_event(event
);
1042 case FANOTIFY_EVENT_TYPE_PATH_PERM
:
1043 fanotify_free_perm_event(event
);
1045 case FANOTIFY_EVENT_TYPE_FID
:
1046 fanotify_free_fid_event(event
);
1048 case FANOTIFY_EVENT_TYPE_FID_NAME
:
1049 fanotify_free_name_event(event
);
1051 case FANOTIFY_EVENT_TYPE_OVERFLOW
:
1054 case FANOTIFY_EVENT_TYPE_FS_ERROR
:
1055 fanotify_free_error_event(group
, event
);
1062 static void fanotify_freeing_mark(struct fsnotify_mark
*mark
,
1063 struct fsnotify_group
*group
)
1065 if (!FAN_GROUP_FLAG(group
, FAN_UNLIMITED_MARKS
))
1066 dec_ucount(group
->fanotify_data
.ucounts
, UCOUNT_FANOTIFY_MARKS
);
1069 static void fanotify_free_mark(struct fsnotify_mark
*fsn_mark
)
1071 kmem_cache_free(fanotify_mark_cache
, fsn_mark
);
1074 const struct fsnotify_ops fanotify_fsnotify_ops
= {
1075 .handle_event
= fanotify_handle_event
,
1076 .free_group_priv
= fanotify_free_group_priv
,
1077 .free_event
= fanotify_free_event
,
1078 .freeing_mark
= fanotify_freeing_mark
,
1079 .free_mark
= fanotify_free_mark
,