]> git.ipfire.org Git - thirdparty/man-pages.git/commitdiff
man/man7/pathname.7: Add file documenting pathnames
authorJason Yundt <jason@jasonyundt.email>
Tue, 21 Jan 2025 13:35:20 +0000 (08:35 -0500)
committerAlejandro Colomar <alx@kernel.org>
Tue, 28 Jan 2025 12:24:50 +0000 (13:24 +0100)
The goal of this new manual page is to help people create programs that
do the right thing even in the face of unusual paths.  The information
that I used to create this new manual page came from these sources:

Link: <https://unix.stackexchange.com/a/39179/316181>
Link: <https://sourceware.org/pipermail/libc-help/2024-August/006737.html>
Link: <https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/fs/ext4/ext4.h?h=v6.12.9#n2288>
Link: <man:unix(7)>
Link: <https://unix.stackexchange.com/q/92426/316181>
Cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Jason Yundt <jason@jasonyundt.email>
Message-ID: <20250121133523.24606-1-jason@jasonyundt.email>
Cc: "G. Branden Robinson" <branden@debian.org>
[alx: wfix, ffix, and other tweaks]
Signed-off-by: Alejandro Colomar <alx@kernel.org>
man/man7/pathname.7 [new file with mode: 0644]

diff --git a/man/man7/pathname.7 b/man/man7/pathname.7
new file mode 100644 (file)
index 0000000..93d938c
--- /dev/null
@@ -0,0 +1,159 @@
+.\" Copyright 2025, Jason Yundt <jason@jasonyundt.email>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH pathname 7 (date) "Linux man-pages (unreleased)"
+.SH NAME
+pathname,
+filename
+\-
+how pathnames are encoded and interpreted
+.SH DESCRIPTION
+Some system calls allow you to pass a pathname as a parameter.
+When writing code that deals with pathnames,
+there are kernel-space requirements that you must comply with,
+and user-space requirements that you should comply with.
+.P
+The kernel stores pathnames as C strings,
+that is,
+sequences of non-null bytes terminated by a null byte.
+The kernel has a few general rules that apply to all pathnames:
+.IP \[bu] 3
+The last byte in the sequence needs to be a null byte.
+.IP \[bu]
+Any other bytes in the sequence need to be non-null bytes.
+.IP \[bu]
+A 0x2F byte
+.RI ( \[aq]/\[aq] )
+is always interpreted as a directory separator,
+and cannot be part of a filename.
+.IP \[bu]
+A pathname can be at most
+.B PATH_MAX
+bytes long
+(see
+.BR limits.h (0p)).
+A pathname that's longer than
+.B PATH_MAX
+bytes
+can be split into multiple smaller pathnames
+and opened piecewise using
+.BR openat (2).
+.IP \[bu]
+A filename can be at most a certain number of bytes long.
+The number is filesystem-specific
+(see
+.B _PC_NAME_MAX
+in
+.BR fpathconf (3)).
+For maximum portability,
+programs should be able to
+handle filenames that are as long as the relevant filesystems will allow.
+For maximum portability,
+programs and users should
+limit the length of their own pathnames to
+.B NAME_MAX
+bytes
+(see
+.BR limits.h (0p)).
+.P
+Some filesystems or APIs may apply further restrictions,
+such as requiring shorter filenames,
+or restricting the allowed characters in a filename.
+.P
+User-space programs treat pathnames differently.
+They typically expect pathnames to
+use a consistent character encoding.
+For maximum interoperability,
+programs should use
+.BR nl_langinfo (3)
+to determine the current locale's codeset.
+Pathnames should be encoded and decoded using the current locale's codeset
+in order to help prevent mojibake.
+For maximum interoperability,
+programs and users should also
+limit the characters that they use for their own pathnames to
+characters in the POSIX
+.UR https://pubs.opengroup.org/\:onlinepubs/\:9799919799/\:basedefs/\:V1_chap03.html#tag_03_265
+Portable Filename Character Set
+.UE .
+.SH EXAMPLES
+The following program demonstrates
+how to ensure that a pathname uses the proper encoding.
+The program starts with a UTF-32 encoded pathname.
+It then calls
+.BR nl_langinfo (3)
+in order to determine what the current locale's codeset is.
+After that, it uses
+.BR iconv (3)
+to convert the UTF-32-encoded pathname into a locale-codeset-encoded pathname.
+Finally,
+the program uses the locale-codeset-encoded pathname
+to create a file that contains the message \[lq]Hello, world!\[rq].
+.SS Program source
+.\" SRC BEGIN (pathname_encoding_example.c)
+.EX
+#include <err.h>
+#include <iconv.h>
+#include <langinfo.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <uchar.h>
+\&
+#define NELEMS(a)  (sizeof(a) / sizeof(a[0]))
+\&
+int
+main(void)
+{
+    char      *locale_pathname;
+    char      *in, *out;
+    FILE      *fp;
+    size_t    size;
+    size_t    inbytes, outbytes;
+    iconv_t   cd;
+    char32_t  utf32_pathname[] = U"example";
+\&
+    if (setlocale(LC_ALL, "") == NULL)
+        err(EXIT_FAILURE, "setlocale");
+\&
+    size = NELEMS(utf32_pathname) * MB_CUR_MAX;
+    locale_pathname = malloc(size);
+    if (locale_pathname == NULL)
+        err(EXIT_FAILURE, "malloc");
+\&
+    cd = iconv_open(nl_langinfo(CODESET), "UTF\-32");
+    if (cd == (iconv_t)\-1)
+        err(EXIT_FAILURE, "iconv_open");
+\&
+    in = (char *) utf32_pathname;
+    inbytes = sizeof(utf32_pathname);
+    out = locale_pathname;
+    outbytes = size;
+    if (iconv(cd, &in, &inbytes, &out, &outbytes) == (size_t) \-1)
+        err(EXIT_FAILURE, "iconv");
+\&
+    if (iconv_close(cd) == \-1)
+        err(EXIT_FAILURE, "iconv_close");
+\&
+    fp = fopen(locale_pathname, "w");
+    if (fp == NULL)
+        err(EXIT_FAILURE, "fopen");
+\&
+    fputs("Hello, world!\[rs]n", fp);
+    if (fclose(fp) == EOF)
+        err(EXIT_FAILURE, "fclose");
+\&
+    free(locale_pathname);
+    exit(EXIT_SUCCESS);
+}
+.EE
+.\" SRC END
+.SH SEE ALSO
+.BR limits.h (0p),
+.BR open (2),
+.BR fpathconf (3),
+.BR iconv (3),
+.BR nl_langinfo (3),
+.BR path_resolution (7),
+.BR mount (8)