Major new features:
- [Add new features here]
+* On Linux, the mseal function has been added. It allows to seal memory
+ mappings to avoid further change during process execution such as protection
+ permissions, unmapping, moving to another location, or shrinking the size.
Deprecated and removed features, and other changes affecting compatibility:
of the function requires that it is only used with memory regions
returned by @code{mmap} or @code{mmap64}.
+@deftypefun int mseal (void *@var{address}, size_t @var{length}, unsigned long @var{flags})
+@standards{Linux, sys/mman.h}
+@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
+
+A successful call to the @code {mseal} function protects the memory
+range @var{address} of @var{length} bytes, previous allocated with
+@code{mmap} or @code{mremap}, against further metadata changes such
+as:
+
+@itemize @bullet
+@item
+Unmapping, moving to another location, extending or shrinking the size,
+via @code{munmap} and @code{mremap}.
+
+@item
+Moving or expanding a different VMA into the current location, via
+@code{mremap}.
+
+@item
+Modifying the memory range with @code{mmap} along with flag @code{MAP_FIXED}.
+
+@item
+Change the protection flags with @code{mprotect} or @code{pkey_mprotect}. Also
+for certain destructive @code{madvise} behaviours (@code{MADV_DONTNEED},
+@code{MADV_FREE}, @code{MADV_DONTNEED_LOCKED}, and @code{MADV_WIPEONFORK}),
+@code{mseal} only blocks the operation if the protection key associate with
+the memory denies write.
+
+@item
+Destructive behaviors on anonymous memory, such as @code{madvice} with
+@code{MADV_DONTNEED}.
+@end itemize
+
+The @var{address} must be an allocated virtual memory done by @code{mmap}
+or @code{mremap}, and it must be page aligned. The end address (@var{address}
+plus @var{length}) must be within an allocated virtual memory range. There
+should be no unallocated memory between the start and end of address range.
+
+The @var{flags} is currently ununsed.
+
+The @code{mseal} function returns @math{0} on sucess and @math{-1} on
+failure.
+
+The following @code{errno} error conditions are defined for this
+function:
+
+@table @code
+@item EPERM
+The system blocked the operation, and the given address range is unmodified
+without a partial update. This error is also returned when @code{mseal}
+is issued on a 32 bit CPUs (the sealing is currently supported only on
+64-bit CPUs, although 32 bit binaries running on 64 bit kernel is
+supported).
+
+@item ENOMEM
+Either the @var{address} is not allocated, or the end address is not within the
+allocation, or there is an unallocated memory between start and end address.
+
+@item ENOSYS
+The kernel does not support the @code{mseal} syscall.
+
+@strong{NB:} The memory sealing changes the lifetime of a mapping, where the
+sealing memory could not be unmapped until the process terminates or replaces
+the process image through @code{execve} function. The sealed mappings are
+inherited through @code{fork}.
+
+@end table
+@end deftypefun
+
@subsection Memory Protection Keys
@cindex memory protection key
tst-misalign-clone \
tst-mlock2 \
tst-mount \
+ tst-mseal \
+ tst-mseal-pkey \
tst-ntp_adjtime \
tst-ntp_gettime \
tst-ntp_gettimex \
sched_getattr;
sched_setattr;
}
+ GLIBC_2.42 {
+ mseal;
+ }
GLIBC_PRIVATE {
# functions used in other libraries
__syscall_rt_sigqueueinfo;
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.4 xprt_unregister F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 xprt_unregister F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
range. */
int pkey_mprotect (void *__addr, size_t __len, int __prot, int __pkey) __THROW;
+/* Seal the address range to avoid further modifications, such as remmap to
+ shrink or expand the VMA, change protection permission with mprotect,
+ unmap with munmap, destructive semantic such madvise with MADV_DONTNEED.
+ The address range must be valid VMA, withouth any gap (unallocated memory)
+ between start and end, and ADDR much be page aligned (LEN will be page
+ aligned implicitly). */
+int mseal (void *__addr, size_t __len, unsigned long flags) __THROW;
+
__END_DECLS
#endif /* __USE_GNU */
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.41 cacheflush F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
# define __ASSUME_FCHMODAT2 0
#endif
+/* The mseal system call was introduced across all architectures in Linux 6.10
+ (although only supported on 64-bit CPUs). */
+#if __LINUX_KERNEL_VERSION >= 0x060A00
+# define __ASSUME_MSEAL 1
+#else
+# define __ASSUME_MSEAL 0
+#endif
+
#endif /* kernel-features.h */
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.4 xprt_unregister F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.40 swapcontext F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.40 __riscv_hwprobe F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.40 __riscv_hwprobe F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
mount EXTRA mount i:sssUp __mount mount
mount_setattr EXTRA mount_setattr i:isUpU mount_setattr
move_mount EXTRA move_mount i:isisU move_mount
+mseal EXTRA mseal i:bUU __mseal mseal
munlock - munlock i:aU munlock
munlockall - munlockall i: munlockall
nfsservctl EXTRA nfsservctl i:ipp __compat_nfsservctl nfsservctl@GLIBC_2.0:GLIBC_2.28
--- /dev/null
+/* Basic tests for mseal and pkey.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <sys/mman.h>
+#include <support/check.h>
+#include <support/xunistd.h>
+
+static int
+do_test (void)
+{
+ TEST_VERIFY_EXIT (mseal (MAP_FAILED, 0, 0) == -1);
+ if (errno == ENOSYS || errno == EPERM)
+ FAIL_UNSUPPORTED ("kernel does not support mseal");
+ TEST_COMPARE (errno, EINVAL);
+
+ int key = pkey_alloc (0, 0);
+ if (key < 0)
+ {
+ if (errno == ENOSYS)
+ FAIL_UNSUPPORTED
+ ("kernel does not support memory protection keys");
+ if (errno == EINVAL)
+ FAIL_UNSUPPORTED
+ ("CPU does not support memory protection keys: %m");
+ if (errno == ENOSPC)
+ FAIL_UNSUPPORTED
+ ("no keys available or kernel does not support memory"
+ " protection keys");
+ FAIL_EXIT1 ("pkey_alloc: %m");
+ }
+
+ long pagesize = xsysconf (_SC_PAGESIZE);
+
+ void *page = xmmap (NULL, pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1);
+
+ TEST_COMPARE (pkey_mprotect (page, pagesize, PROT_READ | PROT_WRITE,
+ key), 0);
+
+ TEST_VERIFY_EXIT (mseal (page, pagesize, 0) == 0);
+
+ /* For certain destructive madvise behaviours (MADV_DONTNEED,
+ MADV_FREE, MADV_DONTNEED_LOCKED, and MADV_WIPEONFORK), mseal
+ only blocks the operation if the PKRU denies write. */
+ TEST_VERIFY_EXIT (pkey_set (key, 0) == 0);
+ TEST_COMPARE (madvise (page, pagesize, MADV_DONTNEED), 0);
+
+ /* The other mapping operation change are always blocked,
+ regardless of PKRU state. */
+ TEST_COMPARE (pkey_mprotect (page, pagesize, PROT_READ, key), -1);
+ TEST_COMPARE (errno, EPERM);
+
+ TEST_COMPARE (mprotect (page, pagesize, PROT_READ), -1);
+ TEST_COMPARE (errno, EPERM);
+
+ TEST_VERIFY_EXIT (pkey_set (key, PKEY_DISABLE_WRITE) == 0);
+ TEST_COMPARE (madvise (page, pagesize, MADV_DONTNEED), -1);
+ TEST_COMPARE (errno, EPERM);
+
+ TEST_COMPARE (mprotect (page, pagesize, PROT_READ), -1);
+ TEST_COMPARE (errno, EPERM);
+ TEST_COMPARE (munmap (page, pagesize),-1);
+ TEST_COMPARE (errno, EPERM);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Basic tests for mseal.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <sys/mman.h>
+#include <support/check.h>
+#include <support/xunistd.h>
+
+static int
+do_test (void)
+{
+ TEST_VERIFY_EXIT (mseal (MAP_FAILED, 0, 0) == -1);
+ if (errno == ENOSYS || errno == EPERM)
+ FAIL_UNSUPPORTED ("kernel does not support mseal");
+ TEST_COMPARE (errno, EINVAL);
+
+ size_t pagesize = getpagesize ();
+ void *p = xmmap (NULL, 4 * pagesize, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1);
+ xmunmap (p + 2 * pagesize, pagesize);
+
+ /* Unaligned address. */
+ TEST_VERIFY_EXIT (mseal (p + 1, pagesize, 0) == -1);
+ TEST_COMPARE (errno, EINVAL);
+
+ /* Length too big. */
+ TEST_VERIFY_EXIT (mseal (p, 3 * pagesize, 0) == -1);
+ TEST_COMPARE (errno, ENOMEM);
+
+ TEST_VERIFY_EXIT (mseal (p, pagesize, 0) == 0);
+ /* Apply the same seal should be idempotent. */
+ TEST_VERIFY_EXIT (mseal (p, pagesize, 0) == 0);
+
+ TEST_VERIFY_EXIT (mprotect (p, pagesize, PROT_WRITE) == -1);
+ TEST_COMPARE (errno, EPERM);
+
+ TEST_VERIFY_EXIT (munmap (p, pagesize) == -1);
+ TEST_COMPARE (errno, EPERM);
+
+ TEST_VERIFY_EXIT (mremap (p, pagesize, 2 * pagesize, 0) == MAP_FAILED);
+ TEST_COMPARE (errno, EPERM);
+
+ TEST_VERIFY_EXIT (madvise (p, pagesize, MADV_DONTNEED) == -1);
+ TEST_COMPARE (errno, EPERM);
+
+ xmunmap (p + pagesize, pagesize);
+ xmunmap (p + 3 * pagesize, pagesize);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
+GLIBC_2.42 mseal F