1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 1995 Linus Torvalds
8 #include <linux/stddef.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/time.h>
13 #include <linux/errno.h>
14 #include <linux/stat.h>
15 #include <linux/file.h>
17 #include <linux/fsnotify.h>
18 #include <linux/dirent.h>
19 #include <linux/security.h>
20 #include <linux/syscalls.h>
21 #include <linux/unistd.h>
22 #include <linux/compat.h>
23 #include <linux/uaccess.h>
25 #include <asm/unaligned.h>
28 * Some filesystems were never converted to '->iterate_shared()'
29 * and their directory iterators want the inode lock held for
30 * writing. This wrapper allows for converting from the shared
31 * semantics to the exclusive inode use.
33 int wrap_directory_iterator(struct file
*file
,
34 struct dir_context
*ctx
,
35 int (*iter
)(struct file
*, struct dir_context
*))
37 struct inode
*inode
= file_inode(file
);
41 * We'd love to have an 'inode_upgrade_trylock()' operation,
42 * see the comment in mmap_upgrade_trylock() in mm/memory.c.
44 * But considering this is for "filesystems that never got
45 * converted", it really doesn't matter.
47 * Also note that since we have to return with the lock held
48 * for reading, we can't use the "killable()" locking here,
49 * since we do need to get the lock even if we're dying.
51 * We could do the write part killably and then get the read
52 * lock unconditionally if it mattered, but see above on why
53 * this does the very simplistic conversion.
55 up_read(&inode
->i_rwsem
);
56 down_write(&inode
->i_rwsem
);
59 * Since we dropped the inode lock, we should do the
60 * DEADDIR test again. See 'iterate_dir()' below.
62 * Note that we don't need to re-do the f_pos games,
63 * since the file must be locked wrt f_pos anyway.
66 if (!IS_DEADDIR(inode
))
67 ret
= iter(file
, ctx
);
69 downgrade_write(&inode
->i_rwsem
);
72 EXPORT_SYMBOL(wrap_directory_iterator
);
75 * Note the "unsafe_put_user() semantics: we goto a
78 #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
79 char __user *dst = (_dst); \
80 const char *src = (_src); \
81 size_t len = (_len); \
82 unsafe_put_user(0, dst+len, label); \
83 unsafe_copy_to_user(dst, src, len, label); \
87 int iterate_dir(struct file
*file
, struct dir_context
*ctx
)
89 struct inode
*inode
= file_inode(file
);
92 if (!file
->f_op
->iterate_shared
)
95 res
= security_file_permission(file
, MAY_READ
);
99 res
= fsnotify_file_perm(file
, MAY_READ
);
103 res
= down_read_killable(&inode
->i_rwsem
);
108 if (!IS_DEADDIR(inode
)) {
109 ctx
->pos
= file
->f_pos
;
110 res
= file
->f_op
->iterate_shared(file
, ctx
);
111 file
->f_pos
= ctx
->pos
;
112 fsnotify_access(file
);
115 inode_unlock_shared(inode
);
119 EXPORT_SYMBOL(iterate_dir
);
122 * POSIX says that a dirent name cannot contain NULL or a '/'.
124 * It's not 100% clear what we should really do in this case.
125 * The filesystem is clearly corrupted, but returning a hard
126 * error means that you now don't see any of the other names
127 * either, so that isn't a perfect alternative.
129 * And if you return an error, what error do you use? Several
130 * filesystems seem to have decided on EUCLEAN being the error
131 * code for EFSCORRUPTED, and that may be the error to use. Or
132 * just EIO, which is perhaps more obvious to users.
134 * In order to see the other file names in the directory, the
135 * caller might want to make this a "soft" error: skip the
136 * entry, and return the error at the end instead.
138 * Note that this should likely do a "memchr(name, 0, len)"
139 * check too, since that would be filesystem corruption as
140 * well. However, that case can't actually confuse user space,
141 * which has to do a strlen() on the name anyway to find the
142 * filename length, and the above "soft error" worry means
143 * that it's probably better left alone until we have that
146 * Note the PATH_MAX check - it's arbitrary but the real
147 * kernel limit on a possible path component, not NAME_MAX,
148 * which is the technical standard limit.
150 static int verify_dirent_name(const char *name
, int len
)
152 if (len
<= 0 || len
>= PATH_MAX
)
154 if (memchr(name
, '/', len
))
160 * Traditional linux readdir() handling..
162 * "count=1" is a special case, meaning that the buffer is one
163 * dirent-structure in size and that the code can't handle more
164 * anyway. Thus the special "fillonedir()" function for that
165 * case (the low-level handlers don't need to care about this).
168 #ifdef __ARCH_WANT_OLD_READDIR
170 struct old_linux_dirent
{
172 unsigned long d_offset
;
173 unsigned short d_namlen
;
177 struct readdir_callback
{
178 struct dir_context ctx
;
179 struct old_linux_dirent __user
* dirent
;
183 static bool fillonedir(struct dir_context
*ctx
, const char *name
, int namlen
,
184 loff_t offset
, u64 ino
, unsigned int d_type
)
186 struct readdir_callback
*buf
=
187 container_of(ctx
, struct readdir_callback
, ctx
);
188 struct old_linux_dirent __user
* dirent
;
193 buf
->result
= verify_dirent_name(name
, namlen
);
197 if (sizeof(d_ino
) < sizeof(ino
) && d_ino
!= ino
) {
198 buf
->result
= -EOVERFLOW
;
202 dirent
= buf
->dirent
;
203 if (!user_write_access_begin(dirent
,
204 (unsigned long)(dirent
->d_name
+ namlen
+ 1) -
205 (unsigned long)dirent
))
207 unsafe_put_user(d_ino
, &dirent
->d_ino
, efault_end
);
208 unsafe_put_user(offset
, &dirent
->d_offset
, efault_end
);
209 unsafe_put_user(namlen
, &dirent
->d_namlen
, efault_end
);
210 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
211 user_write_access_end();
214 user_write_access_end();
216 buf
->result
= -EFAULT
;
220 SYSCALL_DEFINE3(old_readdir
, unsigned int, fd
,
221 struct old_linux_dirent __user
*, dirent
, unsigned int, count
)
224 struct fd f
= fdget_pos(fd
);
225 struct readdir_callback buf
= {
226 .ctx
.actor
= fillonedir
,
233 error
= iterate_dir(f
.file
, &buf
.ctx
);
241 #endif /* __ARCH_WANT_OLD_READDIR */
244 * New, all-improved, singing, dancing, iBCS2-compliant getdents()
247 struct linux_dirent
{
250 unsigned short d_reclen
;
254 struct getdents_callback
{
255 struct dir_context ctx
;
256 struct linux_dirent __user
* current_dir
;
262 static bool filldir(struct dir_context
*ctx
, const char *name
, int namlen
,
263 loff_t offset
, u64 ino
, unsigned int d_type
)
265 struct linux_dirent __user
*dirent
, *prev
;
266 struct getdents_callback
*buf
=
267 container_of(ctx
, struct getdents_callback
, ctx
);
269 int reclen
= ALIGN(offsetof(struct linux_dirent
, d_name
) + namlen
+ 2,
273 buf
->error
= verify_dirent_name(name
, namlen
);
274 if (unlikely(buf
->error
))
276 buf
->error
= -EINVAL
; /* only used if we fail.. */
277 if (reclen
> buf
->count
)
280 if (sizeof(d_ino
) < sizeof(ino
) && d_ino
!= ino
) {
281 buf
->error
= -EOVERFLOW
;
284 prev_reclen
= buf
->prev_reclen
;
285 if (prev_reclen
&& signal_pending(current
))
287 dirent
= buf
->current_dir
;
288 prev
= (void __user
*) dirent
- prev_reclen
;
289 if (!user_write_access_begin(prev
, reclen
+ prev_reclen
))
292 /* This might be 'dirent->d_off', but if so it will get overwritten */
293 unsafe_put_user(offset
, &prev
->d_off
, efault_end
);
294 unsafe_put_user(d_ino
, &dirent
->d_ino
, efault_end
);
295 unsafe_put_user(reclen
, &dirent
->d_reclen
, efault_end
);
296 unsafe_put_user(d_type
, (char __user
*) dirent
+ reclen
- 1, efault_end
);
297 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
298 user_write_access_end();
300 buf
->current_dir
= (void __user
*)dirent
+ reclen
;
301 buf
->prev_reclen
= reclen
;
302 buf
->count
-= reclen
;
305 user_write_access_end();
307 buf
->error
= -EFAULT
;
311 SYSCALL_DEFINE3(getdents
, unsigned int, fd
,
312 struct linux_dirent __user
*, dirent
, unsigned int, count
)
315 struct getdents_callback buf
= {
316 .ctx
.actor
= filldir
,
318 .current_dir
= dirent
326 error
= iterate_dir(f
.file
, &buf
.ctx
);
329 if (buf
.prev_reclen
) {
330 struct linux_dirent __user
* lastdirent
;
331 lastdirent
= (void __user
*)buf
.current_dir
- buf
.prev_reclen
;
333 if (put_user(buf
.ctx
.pos
, &lastdirent
->d_off
))
336 error
= count
- buf
.count
;
342 struct getdents_callback64
{
343 struct dir_context ctx
;
344 struct linux_dirent64 __user
* current_dir
;
350 static bool filldir64(struct dir_context
*ctx
, const char *name
, int namlen
,
351 loff_t offset
, u64 ino
, unsigned int d_type
)
353 struct linux_dirent64 __user
*dirent
, *prev
;
354 struct getdents_callback64
*buf
=
355 container_of(ctx
, struct getdents_callback64
, ctx
);
356 int reclen
= ALIGN(offsetof(struct linux_dirent64
, d_name
) + namlen
+ 1,
360 buf
->error
= verify_dirent_name(name
, namlen
);
361 if (unlikely(buf
->error
))
363 buf
->error
= -EINVAL
; /* only used if we fail.. */
364 if (reclen
> buf
->count
)
366 prev_reclen
= buf
->prev_reclen
;
367 if (prev_reclen
&& signal_pending(current
))
369 dirent
= buf
->current_dir
;
370 prev
= (void __user
*)dirent
- prev_reclen
;
371 if (!user_write_access_begin(prev
, reclen
+ prev_reclen
))
374 /* This might be 'dirent->d_off', but if so it will get overwritten */
375 unsafe_put_user(offset
, &prev
->d_off
, efault_end
);
376 unsafe_put_user(ino
, &dirent
->d_ino
, efault_end
);
377 unsafe_put_user(reclen
, &dirent
->d_reclen
, efault_end
);
378 unsafe_put_user(d_type
, &dirent
->d_type
, efault_end
);
379 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
380 user_write_access_end();
382 buf
->prev_reclen
= reclen
;
383 buf
->current_dir
= (void __user
*)dirent
+ reclen
;
384 buf
->count
-= reclen
;
388 user_write_access_end();
390 buf
->error
= -EFAULT
;
394 SYSCALL_DEFINE3(getdents64
, unsigned int, fd
,
395 struct linux_dirent64 __user
*, dirent
, unsigned int, count
)
398 struct getdents_callback64 buf
= {
399 .ctx
.actor
= filldir64
,
401 .current_dir
= dirent
409 error
= iterate_dir(f
.file
, &buf
.ctx
);
412 if (buf
.prev_reclen
) {
413 struct linux_dirent64 __user
* lastdirent
;
414 typeof(lastdirent
->d_off
) d_off
= buf
.ctx
.pos
;
416 lastdirent
= (void __user
*) buf
.current_dir
- buf
.prev_reclen
;
417 if (put_user(d_off
, &lastdirent
->d_off
))
420 error
= count
- buf
.count
;
427 struct compat_old_linux_dirent
{
428 compat_ulong_t d_ino
;
429 compat_ulong_t d_offset
;
430 unsigned short d_namlen
;
434 struct compat_readdir_callback
{
435 struct dir_context ctx
;
436 struct compat_old_linux_dirent __user
*dirent
;
440 static bool compat_fillonedir(struct dir_context
*ctx
, const char *name
,
441 int namlen
, loff_t offset
, u64 ino
,
444 struct compat_readdir_callback
*buf
=
445 container_of(ctx
, struct compat_readdir_callback
, ctx
);
446 struct compat_old_linux_dirent __user
*dirent
;
447 compat_ulong_t d_ino
;
451 buf
->result
= verify_dirent_name(name
, namlen
);
455 if (sizeof(d_ino
) < sizeof(ino
) && d_ino
!= ino
) {
456 buf
->result
= -EOVERFLOW
;
460 dirent
= buf
->dirent
;
461 if (!user_write_access_begin(dirent
,
462 (unsigned long)(dirent
->d_name
+ namlen
+ 1) -
463 (unsigned long)dirent
))
465 unsafe_put_user(d_ino
, &dirent
->d_ino
, efault_end
);
466 unsafe_put_user(offset
, &dirent
->d_offset
, efault_end
);
467 unsafe_put_user(namlen
, &dirent
->d_namlen
, efault_end
);
468 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
469 user_write_access_end();
472 user_write_access_end();
474 buf
->result
= -EFAULT
;
478 COMPAT_SYSCALL_DEFINE3(old_readdir
, unsigned int, fd
,
479 struct compat_old_linux_dirent __user
*, dirent
, unsigned int, count
)
482 struct fd f
= fdget_pos(fd
);
483 struct compat_readdir_callback buf
= {
484 .ctx
.actor
= compat_fillonedir
,
491 error
= iterate_dir(f
.file
, &buf
.ctx
);
499 struct compat_linux_dirent
{
500 compat_ulong_t d_ino
;
501 compat_ulong_t d_off
;
502 unsigned short d_reclen
;
506 struct compat_getdents_callback
{
507 struct dir_context ctx
;
508 struct compat_linux_dirent __user
*current_dir
;
514 static bool compat_filldir(struct dir_context
*ctx
, const char *name
, int namlen
,
515 loff_t offset
, u64 ino
, unsigned int d_type
)
517 struct compat_linux_dirent __user
*dirent
, *prev
;
518 struct compat_getdents_callback
*buf
=
519 container_of(ctx
, struct compat_getdents_callback
, ctx
);
520 compat_ulong_t d_ino
;
521 int reclen
= ALIGN(offsetof(struct compat_linux_dirent
, d_name
) +
522 namlen
+ 2, sizeof(compat_long_t
));
525 buf
->error
= verify_dirent_name(name
, namlen
);
526 if (unlikely(buf
->error
))
528 buf
->error
= -EINVAL
; /* only used if we fail.. */
529 if (reclen
> buf
->count
)
532 if (sizeof(d_ino
) < sizeof(ino
) && d_ino
!= ino
) {
533 buf
->error
= -EOVERFLOW
;
536 prev_reclen
= buf
->prev_reclen
;
537 if (prev_reclen
&& signal_pending(current
))
539 dirent
= buf
->current_dir
;
540 prev
= (void __user
*) dirent
- prev_reclen
;
541 if (!user_write_access_begin(prev
, reclen
+ prev_reclen
))
544 unsafe_put_user(offset
, &prev
->d_off
, efault_end
);
545 unsafe_put_user(d_ino
, &dirent
->d_ino
, efault_end
);
546 unsafe_put_user(reclen
, &dirent
->d_reclen
, efault_end
);
547 unsafe_put_user(d_type
, (char __user
*) dirent
+ reclen
- 1, efault_end
);
548 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
549 user_write_access_end();
551 buf
->prev_reclen
= reclen
;
552 buf
->current_dir
= (void __user
*)dirent
+ reclen
;
553 buf
->count
-= reclen
;
556 user_write_access_end();
558 buf
->error
= -EFAULT
;
562 COMPAT_SYSCALL_DEFINE3(getdents
, unsigned int, fd
,
563 struct compat_linux_dirent __user
*, dirent
, unsigned int, count
)
566 struct compat_getdents_callback buf
= {
567 .ctx
.actor
= compat_filldir
,
568 .current_dir
= dirent
,
577 error
= iterate_dir(f
.file
, &buf
.ctx
);
580 if (buf
.prev_reclen
) {
581 struct compat_linux_dirent __user
* lastdirent
;
582 lastdirent
= (void __user
*)buf
.current_dir
- buf
.prev_reclen
;
584 if (put_user(buf
.ctx
.pos
, &lastdirent
->d_off
))
587 error
= count
- buf
.count
;