+2017-11-23 Florian Weimer <fweimer@redhat.com>
+
+ Linux: Add memfd_create system call wrapper
+ * sysdeps/unix/sysv/linux/Makefile [misc] (tests): Add
+ tst-memfd_create.
+ * sysdeps/unix/sysv/linux/bits/mman-linux.h [__USE_GNU]
+ (MFD_CLOEXEC, MFD_ALLOW_SEALING): Define.
+ [__USE_GNU] (memfd_create): Declare.
+ * sysdeps/unix/sysv/linux/Versions (GLIBC_2.27): Add memfd_create.
+ * sysdeps/unix/sysv/linux/syscalls.list (memfd_create): Add.
+ * sysdeps/unix/sysv/linux/tst-memfd_create.c: New file.
+ * sysdeps/unix/sysv/linux/**.abilist: Update.
+ * manual/llio.texi (Memory-mapped I/O): Document memfd_create.
+
2017-11-22 Joseph Myers <joseph@codesourcery.com>
* localedata/gen-locale.sh: Fix typo in variable name.
are the same interfaces added in version 2.26 for some platforms where
this format is supported but is not the format of long double.
+* glibc now implements the memfd_create function on Linux.
+
Deprecated and removed features, and other changes affecting compatibility:
* On GNU/Linux, the obsolete Linux constant PTRACE_SEIZE_DEVEL is no longer
On failure @code{errno} is set.
@end deftypefn
+@deftypefun int memfd_create (const char *@var{name}, unsigned int @var{flags})
+@standards{Linux, sys/mman.h}
+@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
+The @code{memfd_create} function returns a file descriptor which can be
+used to create memory mappings using the @code{mmap} function. It is
+similar to the @code{shm_open} function in the sense that these mappings
+are not backed by actual files. However, the descriptor returned by
+@code{memfd_create} does not correspond to a named object; the
+@var{name} argument is used for debugging purposes only (e.g., will
+appear in @file{/proc}), and separate invocations of @code{memfd_create}
+with the same @var{name} will not return descriptors for the same region
+of memory. The descriptor can also be used to create alias mappings
+within the same process.
+
+The descriptor initially refers to a zero-length file. Before mappings
+can be created which are backed by memory, the file size needs to be
+increased with the @code{ftruncate} function. @xref{File Size}.
+
+The @var{flags} argument can be a combination of the following flags:
+
+@vtable @code
+@item MFD_CLOEXEC
+@standards{Linux, sys/mman.h}
+The descriptor is created with the @code{O_CLOEXEC} flag.
+
+@item MFD_ALLOW_SEALING
+@standards{Linux, sys/mman.h}
+The descriptor supports the addition of seals using the @code{fcntl}
+function.
+
+@item MFD_HUGETLB
+@standards{Linux, sys/mman.h}
+This requests that mappings created using the returned file descriptor
+use a larger page size. See @code{MAP_HUGETLB} above for details.
+
+This flag is incompatible with @code{MFD_ALLOW_SEALING}.
+@end vtable
+
+@code{memfd_create} returns a file descriptor on success, and @math{-1}
+on failure.
+
+The following @code{errno} error conditions are defined for this
+function:
+
+@table @code
+@item EINVAL
+An invalid combination is specified in @var{flags}, or @var{name} is
+too long.
+
+@item EFAULT
+The @var{name} argument does not point to a string.
+
+@item EMFILE
+The operation would exceed the file descriptor limit for this process.
+
+@item ENFILE
+The operation would exceed the system-wide file descriptor limit.
+
+@item ENOMEM
+There is not enough memory for the operation.
+@end table
+@end deftypefun
+
@node Waiting for I/O
@section Waiting for Input or Output
@cindex waiting for input or output
tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \
tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \
- test-errno-linux
+ test-errno-linux tst-memfd_create
# Generate the list of SYS_* macros for the system calls (__NR_*
# macros). The file syscall-names.list contains all possible system
GLIBC_2.15 {
process_vm_readv; process_vm_writev;
}
+ GLIBC_2.27 {
+ memfd_create;
+ }
GLIBC_PRIVATE {
# functions used in other libraries
__syscall_rt_sigqueueinfo;
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.4 GLIBC_2.4 A
GLIBC_2.4 _Exit F
GLIBC_2.4 _IO_2_1_stderr_ D 0xa0
# define MCL_ONFAULT 4 /* Lock all pages that are
faulted in. */
#endif
+
+#ifdef __USE_GNU
+/* Flags for memfd_create. */
+# define MFD_CLOEXEC 1U
+# define MFD_ALLOW_SEALING 2U
+# define MFD_HUGETLB 4U
+
+__BEGIN_DECLS
+
+/* Create a new memory file descriptor. NAME is a name for debugging.
+ FLAGS is a combination of the MFD_* constants. */
+int memfd_create (const char *__name, unsigned int __flags) __THROW;
+
+__END_DECLS
+
+#endif /* __USE_GNU */
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.4 GLIBC_2.4 A
GLIBC_2.4 _Exit F
GLIBC_2.4 _IO_2_1_stderr_ D 0x98
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 _Exit F
GLIBC_2.3 _IO_2_1_stderr_ D 0xe0
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F
process_vm_readv EXTRA process_vm_readv i:ipipii process_vm_readv
process_vm_writev EXTRA process_vm_writev i:ipipii process_vm_writev
+memfd_create EXTRA memfd_create i:si memfd_create
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
--- /dev/null
+/* Test for the memfd_create system call.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <support/xunistd.h>
+#include <sys/mman.h>
+
+/* Return true if the descriptor has the FD_CLOEXEC flag set. */
+static bool
+is_cloexec (int fd)
+{
+ int flags = fcntl (fd, F_GETFD);
+ TEST_VERIFY (flags >= 0);
+ return flags & FD_CLOEXEC;
+}
+
+/* Return the seals set on FD. */
+static int
+get_seals (int fd)
+{
+ int flags = fcntl (fd, F_GET_SEALS);
+ TEST_VERIFY (flags >= 0);
+ return flags;
+}
+
+/* Return true if the F_SEAL_SEAL flag is set on the descriptor. */
+static bool
+is_sealed (int fd)
+{
+ return get_seals (fd) & F_SEAL_SEAL;
+}
+
+static int
+do_test (void)
+{
+ /* Initialized by the first call to memfd_create to 0 (memfd_create
+ unsupported) or 1 (memfd_create is implemented in the kernel).
+ Subsequent iterations check that the success/failure state is
+ consistent. */
+ int supported = -1;
+
+ for (int do_cloexec = 0; do_cloexec < 2; ++do_cloexec)
+ for (int do_sealing = 0; do_sealing < 2; ++do_sealing)
+ {
+ int flags = 0;
+ if (do_cloexec)
+ flags |= MFD_CLOEXEC;
+ if (do_sealing)
+ flags |= MFD_ALLOW_SEALING;
+ if (test_verbose > 0)
+ printf ("info: memfd_create with flags=0x%x\n", flags);
+ int fd = memfd_create ("tst-memfd_create", flags);
+ if (fd < 0)
+ {
+ if (errno == ENOSYS)
+ {
+ if (supported < 0)
+ {
+ printf ("warning: memfd_create is unsupported\n");
+ supported = 0;
+ continue;
+ }
+ TEST_VERIFY (supported == 0);
+ continue;
+ }
+ else
+ FAIL_EXIT1 ("memfd_create: %m");
+ }
+ if (supported < 0)
+ supported = 1;
+ TEST_VERIFY (supported > 0);
+
+ char *fd_path = xasprintf ("/proc/self/fd/%d", fd);
+ char *link = xreadlink (fd_path);
+ if (test_verbose > 0)
+ printf ("info: memfd link: %s\n", link);
+ TEST_VERIFY (strcmp (link, "memfd:tst-memfd_create (deleted)"));
+ TEST_VERIFY (is_cloexec (fd) == do_cloexec);
+ TEST_VERIFY (is_sealed (fd) == !do_sealing);
+ if (do_sealing)
+ {
+ TEST_VERIFY (fcntl (fd, F_ADD_SEALS, F_SEAL_WRITE) == 0);
+ TEST_VERIFY (!is_sealed (fd));
+ TEST_VERIFY (get_seals (fd) & F_SEAL_WRITE);
+ TEST_VERIFY (fcntl (fd, F_ADD_SEALS, F_SEAL_SEAL) == 0);
+ TEST_VERIFY (is_sealed (fd));
+ }
+ xclose (fd);
+ free (fd_path);
+ free (link);
+ }
+
+ if (supported == 0)
+ return EXIT_UNSUPPORTED;
+ return 0;
+}
+
+#include <support/test-driver.c>
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
+GLIBC_2.27 memfd_create F