return flags & (O_ACCMODE_STRICT | extra_flags); /* return the flags variable, but remove the noise */
}
-int read_nr_open(void) {
+unsigned read_nr_open(void) {
_cleanup_free_ char *nr_open = NULL;
int r;
if (r < 0)
log_debug_errno(r, "Failed to read /proc/sys/fs/nr_open, ignoring: %m");
else {
- int v;
+ unsigned v;
- r = safe_atoi(nr_open, &v);
+ r = safe_atou(nr_open, &v);
if (r < 0)
log_debug_errno(r, "Failed to parse /proc/sys/fs/nr_open value '%s', ignoring: %m", nr_open);
else
}
/* If we fail, fall back to the hard-coded kernel limit of 1024 * 1024. */
- return 1024 * 1024;
+ return NR_OPEN_DEFAULT;
}
int fd_get_diskseq(int fd, uint64_t *ret) {
* around the problems with musl's definition. */
#define O_ACCMODE_STRICT (O_RDONLY|O_WRONLY|O_RDWR)
+/* The default, minimum, and maximum values of /proc/sys/fs/nr_open. See kernel's fs/file.c.
+ * These values have been unchanged since kernel-2.6.26:
+ * https://github.com/torvalds/linux/commit/eceea0b3df05ed262ae32e0c6340cc7a3626632d */
+#define NR_OPEN_DEFAULT ((unsigned) (1024 * 1024))
+#define NR_OPEN_MINIMUM ((unsigned) (sizeof(long) * 8))
+#define NR_OPEN_MAXIMUM ((unsigned) (CONST_MIN((size_t) INT_MAX, SIZE_MAX / __SIZEOF_POINTER__) & ~(sizeof(long) * 8 - 1)))
+
int close_nointr(int fd);
int safe_close(int fd);
void safe_close_pair(int p[static 2]);
return fd_verify_safe_flags_full(fd, 0);
}
-int read_nr_open(void);
+unsigned read_nr_open(void);
int fd_get_diskseq(int fd, uint64_t *ret);
int path_is_root_at(int dir_fd, const char *path);
}
static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
- int r, nr_open;
+ int r;
assert(p);
assert(f);
assert(fds);
- nr_open = read_nr_open();
- if (nr_open < 3)
- nr_open = HIGH_RLIMIT_NOFILE;
- assert(nr_open > 0); /* For compilers/static analyzers */
+ unsigned nr_open = MAX(read_nr_open(), NR_OPEN_MINIMUM);
for (;;) {
_cleanup_free_ char *l = NULL;
if (r < 0)
return r;
- if (p->n_socket_fds > (size_t) nr_open)
+ if (p->n_socket_fds > nr_open)
return -EINVAL; /* too many, someone is playing games with us */
} else if ((val = startswith(l, "exec-parameters-n-storage-fds="))) {
if (p->fds)
if (r < 0)
return r;
- if (p->n_storage_fds > (size_t) nr_open)
+ if (p->n_storage_fds > nr_open)
return -EINVAL; /* too many, someone is playing games with us */
} else if ((val = startswith(l, "exec-parameters-n-extra-fds="))) {
if (p->fds)
if (r < 0)
return r;
- if (p->n_extra_fds > (size_t) nr_open)
+ if (p->n_extra_fds > nr_open)
return -EINVAL; /* too many, someone is playing games with us */
} else if ((val = startswith(l, "exec-parameters-fds="))) {
if (p->n_socket_fds + p->n_storage_fds + p->n_extra_fds == 0)
SYNTHETIC_ERRNO(EINVAL),
"Got exec-parameters-fds= without "
"prior exec-parameters-n-socket-fds= or exec-parameters-n-storage-fds= or exec-parameters-n-extra-fds=");
- if (p->n_socket_fds + p->n_storage_fds + p->n_extra_fds > (size_t) nr_open)
+ if (p->n_socket_fds + p->n_storage_fds + p->n_extra_fds > nr_open)
return -EINVAL; /* too many, someone is playing games with us */
if (p->fds)
#endif
#if BUMP_PROC_SYS_FS_NR_OPEN
- int v = INT_MAX;
+ /* The kernel enforces maximum and minimum values on the fs.nr_open, but they are not directly
+ * exposed, but hardcoded in fs/file.c. Hopefully, these values will not be changed, but not sure.
+ * Let's first try the hardcoded maximum value, and if it does not work, try the half of it. */
- /* Argh! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know
- * what they are. The expression by which the maximum is determined is dependent on the architecture,
- * and is something we don't really want to copy to userspace, as it is dependent on implementation
- * details of the kernel. Since the kernel doesn't expose the maximum value to us, we can only try
- * and hope. Hence, let's start with INT_MAX, and then keep halving the value until we find one that
- * works. Ugly? Yes, absolutely, but kernel APIs are kernel APIs, so what do can we do... 🤯 */
-
- for (;;) {
- int k;
-
- v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
- if (v < 1024) {
- log_warning("Can't bump fs.nr_open, value too small.");
- break;
- }
-
- k = read_nr_open();
- if (k < 0) {
- log_error_errno(k, "Failed to read fs.nr_open: %m");
- break;
- }
+ for (unsigned v = NR_OPEN_MAXIMUM; v >= NR_OPEN_MINIMUM; v /= 2) {
+ unsigned k = read_nr_open();
if (k >= v) { /* Already larger */
log_debug("Skipping bump, value is already larger.");
break;
}
- r = sysctl_writef("fs/nr_open", "%i", v);
+ r = sysctl_writef("fs/nr_open", "%u", v);
if (r == -EINVAL) {
- log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
- v /= 2;
+ log_debug("Couldn't write fs.nr_open as %u, halving it.", v);
continue;
}
if (r < 0) {
break;
}
- log_debug("Successfully bumped fs.nr_open to %i", v);
+ log_debug("Successfully bumped fs.nr_open to %u", v);
break;
}
#endif
static int bump_rlimit_nofile(const struct rlimit *saved_rlimit) {
struct rlimit new_rlimit;
- int r, nr;
+ int r;
/* Get the underlying absolute limit the kernel enforces */
- nr = read_nr_open();
+ unsigned nr = read_nr_open();
/* Calculate the new limits to use for us. Never lower from what we inherited. */
new_rlimit = (struct rlimit) {
* (and thus use poll()/epoll instead of select(), the way everybody should) can
* explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
* we pass. */
- if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM) {
- int nr;
-
- /* Get the underlying absolute limit the kernel enforces */
- nr = read_nr_open();
-
- rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
- }
+ if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM)
+ rl->rlim_max = MIN((rlim_t) read_nr_open(), MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
/* If for some reason we were invoked with a soft limit above 1024 (which should never
* happen!, but who knows what we get passed in from pam_limit when invoked as --user
}
TEST(read_nr_open) {
- log_info("nr-open: %i", read_nr_open());
+ log_info("nr-open: %u", read_nr_open());
}
static size_t validate_fds(