</para>
<para><function>sd_event_source_get_child_pidfd()</function> retrieves the file descriptor referencing
- the watched process ("pidfd") if this functionality is available. On kernels that support the concept the
- event loop will make use of pidfds to watch child processes, regardless of whether the individual event sources
- are allocated via <function>sd_event_add_child()</function> or
- <function>sd_event_add_child_pidfd()</function>. If the latter call was used to allocate the event
- source, this function returns the file descriptor used for allocation. On kernels that do not support the
- pidfd concept this function will fail with <constant>EOPNOTSUPP</constant>. This call takes the event
- source object as the <parameter>source</parameter> parameter and returns the numeric file descriptor.
+ the watched process ("pidfd"). The event loop internally makes use of pidfds to watch child processes,
+ regardless of whether the individual event sources are allocated via <function>sd_event_add_child()</function>
+ or <function>sd_event_add_child_pidfd()</function>. If the latter call was used to allocate the event
+ source, this function returns the original file descriptor used for allocation. This call takes
+ the event source object as the <parameter>source</parameter> parameter and returns the numeric file descriptor.
</para>
<para><function>sd_event_source_get_child_pidfd_own()</function> may be used to query whether the pidfd
exist. This behaviour might change eventually.</para>
<para><function>sd_event_source_send_child_signal()</function> may be used to send a UNIX signal to the
- watched process. If the pidfd concept is supported in the kernel, this is implemented via <citerefentry
- project='man-pages'><refentrytitle>pidfd_send_signal</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- and otherwise via <citerefentry
- project='man-pages'><refentrytitle>rt_sigqueueinfo</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- (or via <citerefentry
- project='man-pages'><refentrytitle>kill</refentrytitle><manvolnum>2</manvolnum></citerefentry> in case
- <parameter>info</parameter> is <constant>NULL</constant>). The specified parameters match those of these
- underlying system calls, except that the <parameter>info</parameter> is never modified (and is thus
- declared constant). Like for the underlying system calls, the <parameter>flags</parameter> parameter
+ watched process via <citerefentry
+ project='man-pages'><refentrytitle>pidfd_send_signal</refentrytitle><manvolnum>2</manvolnum></citerefentry>.
+ The specified parameters match those of the underlying system call, except that the <parameter>info</parameter>
+ is never modified (and is thus declared constant). Like for the underlying system call, the <parameter>flags</parameter> parameter
currently must be zero.</para>
</refsect1>
<listitem><para>The passed event source is not a child process event source.</para></listitem>
</varlistentry>
- <varlistentry>
- <term><constant>-EOPNOTSUPP</constant></term>
-
- <listitem><para>A pidfd was requested but the kernel does not support this concept.</para>
-
- <xi:include href="version-info.xml" xpointer="v245"/></listitem>
- </varlistentry>
-
</variablelist>
</refsect2>
</refsect1>
#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
-static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
+static bool EVENT_SOURCE_WATCH_PIDFD(const sd_event_source *s) {
/* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
return s &&
s->type == SOURCE_CHILD &&
- s->child.pidfd >= 0 &&
s->child.options == WEXITED;
}
/* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
if (s->child.process_owned) {
+ assert(s->child.pid > 0);
+ assert(s->child.pidfd >= 0);
if (!s->child.exited) {
- if (s->child.pidfd >= 0)
- r = RET_NERRNO(pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0));
- else
- r = RET_NERRNO(kill(s->child.pid, SIGKILL));
+ r = RET_NERRNO(pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0));
if (r < 0 && r != -ESRCH)
log_debug_errno(r, "Failed to kill process " PID_FMT ", ignoring: %m",
s->child.pid);
siginfo_t si = {};
/* Reap the child if we can */
- if (s->child.pidfd >= 0)
- (void) waitid(P_PIDFD, s->child.pidfd, &si, WEXITED);
- else
- (void) waitid(P_PID, s->child.pid, &si, WEXITED);
+ (void) waitid(P_PIDFD, s->child.pidfd, &si, WEXITED);
}
}
case SOURCE_CHILD:
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
- /* yes, we have pidfd */
+ /* yes, we can rely on pidfd */
r = source_child_pidfd_register(s, enabled);
if (r < 0)
return r;
} else {
- /* no pidfd, or something other to watch for than WEXITED */
+ /* something other to watch for than WEXITED */
r = event_make_signal_data(s->event, SIGCHLD, NULL);
if (r < 0) {
assert_return(s->type == SOURCE_CHILD, -EDOM);
assert_return(!event_origin_changed(s->event), -ECHILD);
- if (s->child.pidfd < 0)
- return -EOPNOTSUPP;
-
return s->child.pidfd;
}
assert_return(s, -EINVAL);
assert_return(s->type == SOURCE_CHILD, -EDOM);
assert_return(!event_origin_changed(s->event), -ECHILD);
-
- if (s->child.pidfd < 0)
- return -EOPNOTSUPP;
+ assert(s->child.pidfd >= 0);
return s->child.pidfd_owned;
}
assert_return(s, -EINVAL);
assert_return(s->type == SOURCE_CHILD, -EDOM);
assert_return(!event_origin_changed(s->event), -ECHILD);
-
- if (s->child.pidfd < 0)
- return -EOPNOTSUPP;
+ assert(s->child.pidfd >= 0);
s->child.pidfd_owned = own;
return 0;
e->need_process_child = false;
- /* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait
- * for, instead of using P_ALL. This is because we only want to get child information of very
- * specific child processes, and not all of them. We might not have processed the SIGCHLD event
+ /* So, this is ugly. We iteratively invoke waitid() + WNOHANG with each child process we shall wait for,
+ * instead of using P_ALL. This is because we only want to get child information of very specific
+ * child processes, and not all of them. We might not have processed the SIGCHLD event
* of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
* hence we really don't want anything flushed out of the kernel's queue that we don't care
* about. Since this is O(n) this means that if you have a lot of processes you probably want
HASHMAP_FOREACH(s, e->child_sources) {
assert(s->type == SOURCE_CHILD);
+ assert(s->child.pidfd >= 0);
if (s->priority > threshold)
continue;
continue;
zero(s->child.siginfo);
- if (waitid(P_PID, s->child.pid, &s->child.siginfo,
+ if (waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo,
WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
return negative_errno();
if (zombie)
s->child.exited = true;
-
- if (!zombie && (s->child.options & WEXITED)) {
- /* If the child isn't dead then let's immediately remove the state
- * change from the queue, since there's no benefit in leaving it
- * queued. */
+ else if (s->child.options & WEXITED) {
+ /* If the child isn't dead then let's immediately remove the state change
+ * from the queue, since there's no benefit in leaving it queued. */
assert(s->child.options & (WSTOPPED|WCONTINUED));
- (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
+ (void) waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
}
r = source_set_pending(s, true);
assert(e);
assert(s);
assert(s->type == SOURCE_CHILD);
+ assert(s->child.pidfd >= 0);
if (s->pending)
return 0;
if (!EVENT_SOURCE_WATCH_PIDFD(s))
return 0;
+ /* Note that pidfd would also generate EPOLLHUP when the process gets reaped. But at this point we
+ * only permit EPOLLIN, under the assumption that upon EPOLLHUP the child source should already
+ * be set to pending, and we would have returned early above. */
+ assert(!s->child.exited);
+
zero(s->child.siginfo);
- if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
+ if (waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
return -errno;
if (s->child.siginfo.si_pid == 0)
/* Now, reap the PID for good. */
if (zombie) {
- (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
+ (void) waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo, WNOHANG|WEXITED);
s->child.waited = true;
}