]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
Merge pull request #6902 from keszybz/two-property-printing-fixes
authorLennart Poettering <lennart@poettering.net>
Tue, 26 Sep 2017 16:09:23 +0000 (18:09 +0200)
committerGitHub <noreply@github.com>
Tue, 26 Sep 2017 16:09:23 +0000 (18:09 +0200)
Two property printing fixes

83 files changed:
ENVIRONMENT.md
TODO
man/systemd-system.conf.xml
man/systemd.resource-control.xml
man/systemd.slice.xml
man/systemd.special.xml
meson.build
mkosi.build
src/basic/bpf-program.c [new file with mode: 0644]
src/basic/bpf-program.h [new file with mode: 0644]
src/basic/cgroup-util.c
src/basic/in-addr-util.c
src/basic/in-addr-util.h
src/basic/io-util.h
src/basic/journal-importer.c
src/basic/log.c
src/basic/log.h
src/basic/meson.build
src/basic/missing_syscall.h
src/basic/rlimit-util.c
src/basic/socket-label.c
src/basic/string-util.h
src/core/bpf-firewall.c [new file with mode: 0644]
src/core/bpf-firewall.h [new file with mode: 0644]
src/core/cgroup.c
src/core/cgroup.h
src/core/dbus-cgroup.c
src/core/dbus-unit.c
src/core/dynamic-user.c
src/core/execute.c
src/core/ip-address-access.c [new file with mode: 0644]
src/core/ip-address-access.h [new file with mode: 0644]
src/core/job.c
src/core/load-fragment-gperf.gperf.m4
src/core/main.c
src/core/manager.c
src/core/manager.h
src/core/meson.build
src/core/mount.c
src/core/mount.h
src/core/scope.c
src/core/service.c
src/core/service.h
src/core/show-status.c
src/core/slice.c
src/core/socket.c
src/core/socket.h
src/core/swap.c
src/core/swap.h
src/core/system.conf
src/core/unit.c
src/core/unit.h
src/coredump/coredump.c
src/journal/journal-send.c
src/journal/journal-verify.c
src/journal/journald-audit.c
src/journal/journald-console.c
src/journal/journald-kmsg.c
src/journal/journald-native.c
src/journal/journald-server.c
src/journal/journald-stream.c
src/journal/journald-syslog.c
src/libsystemd-network/sd-dhcp-lease.c
src/libsystemd-network/sd-dhcp-server.c
src/network/networkd-address.c
src/network/networkd-dhcp4.c
src/nspawn/nspawn.c
src/shared/bus-unit-util.c
src/shared/firewall-util.c
src/shared/install.c
src/shared/linux/bpf.h [new file with mode: 0644]
src/shared/linux/bpf_common.h [new file with mode: 0644]
src/shared/linux/libbpf.h [new file with mode: 0644]
src/systemctl/systemctl.c
src/systemd/sd-messages.h
src/test/meson.build
src/test/test-bpf.c [new file with mode: 0644]
src/test/test-cpu-set-util.c
src/test/test-fileio.c
src/test/test-in-addr-util.c [new file with mode: 0644]
test/hwdb-test.sh
test/meson.build
test/sysv-generator-test.py

index e542d4ec6fda3bd4f07b3e1cdb81992e83e75aa0..4ae561a8924624c59ac64d161ac60a98d5d05644 100644 (file)
@@ -64,3 +64,17 @@ installed systemd tests:
 
 * `$SYSTEMD_TEST_DATA` â€” override the location of test data. This is useful if
   a test executable is moved to an arbitrary location.
+
+nss-systemd:
+
+* `$SYSTEMD_NSS_BYPASS_SYNTHETIC=1` â€” if set, `nss-systemd` won't synthesize
+  user/group records for the `root` and `nobody` users if they are missing from
+  `/etc/passwd`.
+
+* `$SYSTEMD_NSS_DYNAMIC_BYPASS=1` â€” if set, `nss-systemd` won't return
+  user/group records for dynamically registered service users (i.e. users
+  registered through `DynamicUser=1`).
+
+* `$SYSTEMD_NSS_BYPASS_BUS=1` â€” if set, `nss-systemd` won't use D-Bus to do
+  dynamic user lookups. This is primarily useful to make `nss-systemd` work
+  safely from within `dbus-daemon`.
diff --git a/TODO b/TODO
index 2de99823a86f99a3144de5f1a2c8a7f086f5aadf..b638ab95c85e72b52713f049402e4d4e659f0c73 100644 (file)
--- a/TODO
+++ b/TODO
@@ -26,6 +26,15 @@ Features:
 
 * replace all uses of fgets() + LINE_MAX by read_line()
 
+* fix logging in execute.c: extend log.c to have an optional mode where
+  log_open() is implicitly done before each log line and log_close() right
+  after. This way we don't have open fds around but logs will still
+  work. Because it is slow this mode should used exclusively in the execute.c
+  case.
+
+* set IPAddressDeny=any on all services that shouldn't do networking (possibly
+  combined with IPAddressAllow=localhost).
+
 * dissect: when we discover squashfs, don't claim we had a "writable" partition
   in systemd-dissect
 
index 336c7a5fd114db19a390b2b9ba80b401835bcb08..81f1b1ef8d0dc60348678fa868ff95c4dba5e65e 100644 (file)
         <term><varname>DefaultBlockIOAccounting=</varname></term>
         <term><varname>DefaultMemoryAccounting=</varname></term>
         <term><varname>DefaultTasksAccounting=</varname></term>
+        <term><varname>DefaultIPAccounting=</varname></term>
 
-        <listitem><para>Configure the default resource accounting
-        settings, as configured per-unit by
-        <varname>CPUAccounting=</varname>,
-        <varname>BlockIOAccounting=</varname>,
-        <varname>MemoryAccounting=</varname> and
-        <varname>TasksAccounting=</varname>. See
+        <listitem><para>Configure the default resource accounting settings, as configured per-unit by
+        <varname>CPUAccounting=</varname>, <varname>BlockIOAccounting=</varname>, <varname>MemoryAccounting=</varname>,
+        <varname>TasksAccounting=</varname> and <varname>IPAccounting=</varname>. See
         <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
-        for details on the per-unit
-        settings. <varname>DefaultTasksAccounting=</varname> defaults
-        to on, the other three settings to off.</para></listitem>
+        for details on the per-unit settings. <varname>DefaultTasksAccounting=</varname> defaults to on, the other
+        four settings to off.</para></listitem>
       </varlistentry>
 
       <varlistentry>
index bb69599f9981fe21437a054c59b45eca9f5e5a47..0c0c91608ac82d8063752f1414a07c6c8a73b4d1 100644 (file)
         </listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>IPAccounting=</varname></term>
+
+        <listitem>
+          <para>Takes a boolean argument. If true, turns on IPv4 and IPv6 network traffic accounting for packets sent
+          or received by the unit. When this option is turned on, all IPv4 and IPv6 sockets created by any process of
+          the unit are accounted for. When this option is used in socket units, it applies to all IPv4 and IPv6 sockets
+          associated with it (including both listening and connection sockets where this applies). Note that for
+          socket-activated services, this configuration setting and the accounting data of the service unit and the
+          socket unit are kept separate, and displayed separately. No propagation of the setting and the collected
+          statistics is done, in either direction. Moreover, any traffic sent or received on any of the socket unit's
+          sockets is accounted to the socket unit â€” and never to the service unit it might have activated, even if the
+          socket is used by it. Note that IP accounting is currently not supported for slice units, and enabling this
+          option for them has no effect. The system default for this setting may be controlled with
+          <varname>DefaultIPAccounting=</varname> in
+          <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>IPAddressAllow=<replaceable>ADDDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
+        <term><varname>IPAddressDeny=<replaceable>ADDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
+
+        <listitem>
+          <para>Turn on address range network traffic filtering for packets sent and received over AF_INET and AF_INET6
+          sockets.  Both directives take a space separated list of IPv4 or IPv6 addresses, each optionally suffixed
+          with an address prefix length (separated by a <literal>/</literal> character). If the latter is omitted, the
+          address is considered a host address, i.e. the prefix covers the whole address (32 for IPv4, 128 for IPv6).
+          </para>
+
+          <para>The access lists configured with this option are applied to all sockets created by processes of this
+          unit (or in the case of socket units, associated with it). The lists are implicitly combined with any lists
+          configured for any of the parent slice units this unit might be a member of. By default all access lists are
+          empty. When configured the lists are enforced as follows:</para>
+
+          <itemizedlist>
+            <listitem><para>Access will be granted in case its destination/source address matches any entry in the
+            <varname>IPAddressAllow=</varname> setting.</para></listitem>
+
+            <listitem><para>Otherwise, access will be denied in case its destination/source address matches any entry
+            in the <varname>IPAddressDeny=</varname> setting.</para></listitem>
+
+            <listitem><para>Otherwise, access will be granted.</para></listitem>
+          </itemizedlist>
+
+          <para>In order to implement a whitelisting IP firewall, it is recommended to use a
+          <varname>IPAddressDeny=</varname><constant>any</constant> setting on an upper-level slice unit (such as the
+          root slice <filename>-.slice</filename> or the slice containing all system services
+          <filename>system.slice</filename> â€“ see
+          <citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
+          details on these slice units), plus individual per-service <varname>IPAddressAllow=</varname> lines
+          permitting network access to relevant services, and only them.</para>
+
+          <para>Note that for socket-activated services, the IP access list configured on the socket unit applies to
+          all sockets associated with it directly, but not to any sockets created by the ultimately activated services
+          for it. Conversely, the IP access list configured for the service is not applied to any sockets passed into
+          the service via socket activation. Thus, it is usually a good idea, to replicate the IP access lists on both
+          the socket and the service unit, however it often makes sense to maintain one list more open and the other
+          one more restricted, depending on the usecase.</para>
+
+          <para>If these settings are used multiple times in the same unit the specified lists are combined. If an
+          empty string is assigned to these settings the specific access list is reset and all previous settings undone.</para>
+
+          <para>In place of explicit IPv4 or IPv6 address and prefix length specifications a small set of symbolic
+          names may be used. The following names are defined:</para>
+
+          <table>
+            <title>Special address/network names</title>
+
+            <tgroup cols='3'>
+              <colspec colname='name'/>
+              <colspec colname='definition'/>
+              <colspec colname='meaning'/>
+
+              <thead>
+                <row>
+                  <entry>Symbolic Name</entry>
+                  <entry>Definition</entry>
+                  <entry>Meaning</entry>
+                </row>
+              </thead>
+
+            <tbody>
+              <row>
+                <entry><constant>any</constant></entry>
+                <entry>0.0.0.0/0 ::/0</entry>
+                <entry>Any host</entry>
+              </row>
+
+              <row>
+                <entry><constant>localhost</constant></entry>
+                <entry>127.0.0.0/8 ::1/128</entry>
+                <entry>All addresses on the local loopback</entry>
+              </row>
+
+              <row>
+                <entry><constant>link-local</constant></entry>
+                <entry>169.254.0.0/16 fe80::/64</entry>
+                <entry>All link-local IP addresses</entry>
+              </row>
+
+              <row>
+                <entry><constant>multicast</constant></entry>
+                <entry>224.0.0.0/4 ff00::/8</entry>
+                <entry>All IP multicasting addresses</entry>
+              </row>
+            </tbody>
+            </tgroup>
+          </table>
+
+          <para>Note that these settings might not be supported on some systems (for example if eBPF control group
+          support is not enabled in the underlying kernel or container manager). These settings will have no effect in
+          that case. If compatibility with such systems is desired it is hence recommended to not exclusively rely on
+          them for IP security.</para>
+        </listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>DeviceAllow=</varname></term>
 
index 67f7a934481e95db5e3f69b66caa6074002bcda4..c46ba7a2e167c026bd6e08c5abaf27bc587a00c8 100644 (file)
   <refsect1>
     <title>Description</title>
 
-    <para>A unit configuration file whose name ends in
-    <literal>.slice</literal> encodes information about a slice which
-    is a concept for hierarchically managing resources of a group of
-    processes. This management is performed by creating a node in the
-    Linux Control Group (cgroup) tree. Units that manage processes
-    (primarily scope and service units) may be assigned to a specific
-    slice. For each slice, certain resource limits may be set that
-    apply to all processes of all units contained in that
-    slice. Slices are organized hierarchically in a tree. The name of
-    the slice encodes the location in the tree. The name consists of a
-    dash-separated series of names, which describes the path to the
-    slice from the root slice. The root slice is named,
-    <filename>-.slice</filename>. Example:
-    <filename>foo-bar.slice</filename> is a slice that is located
-    within <filename>foo.slice</filename>, which in turn is located in
-    the root slice <filename>-.slice</filename>.
+    <para>A unit configuration file whose name ends in <literal>.slice</literal> encodes information about a slice
+    unit. A slice unit is a concept for hierarchically managing resources of a group of processes. This management is
+    performed by creating a node in the Linux Control Group (cgroup) tree. Units that manage processes (primarily scope
+    and service units) may be assigned to a specific slice. For each slice, certain resource limits may be set that
+    apply to all processes of all units contained in that slice. Slices are organized hierarchically in a tree. The
+    name of the slice encodes the location in the tree. The name consists of a dash-separated series of names, which
+    describes the path to the slice from the root slice. The root slice is named <filename>-.slice</filename>. Example:
+    <filename>foo-bar.slice</filename> is a slice that is located within <filename>foo.slice</filename>, which in turn
+    is located in the root slice <filename>-.slice</filename>.
     </para>
 
     <para>Note that slice units cannot be templated, nor is possible to add multiple names to a slice unit by creating
index 73e1e720e9c4c4e8f7d861088447a05627ef7e35..dc0b313b0ca65e79741c9884d8ac47800bf15a72 100644 (file)
@@ -48,8 +48,7 @@
   </refnamediv>
 
   <refsynopsisdiv><para>
-    <!-- sort alphabetically, targets first -->
-    <filename>basic.target</filename>,
+    <!-- sort alphabetically, targets first --><filename>basic.target</filename>,
     <filename>bluetooth.target</filename>,
     <filename>cryptsetup-pre.target</filename>,
     <filename>cryptsetup.target</filename>,
     <filename>time-sync.target</filename>,
     <filename>timers.target</filename>,
     <filename>umount.target</filename>,
-    <!-- slices -->
-    <filename>-.slice</filename>,
+    <!-- slices --><filename>-.slice</filename>,
     <filename>system.slice</filename>,
     <filename>user.slice</filename>,
     <filename>machine.slice</filename>,
-    <!-- the rest -->
+    <!-- the rest --><filename>-.mount</filename>,
     <filename>dbus.service</filename>,
     <filename>dbus.socket</filename>,
     <filename>display-manager.service</filename>,
+    <filename>init.scope</filename>,
     <filename>system-update-cleanup.service</filename>
   </para></refsynopsisdiv>
 
     <title>Special System Units</title>
 
     <variablelist>
+      <varlistentry>
+        <term><filename>-.mount</filename></term>
+        <listitem>
+          <para>The root mount point, i.e. the mount unit for the <filename>/</filename> path. This unit is
+          unconditionally active, during the entire time the system is up, as this mount point is where the basic
+          userspace is running from.</para>
+        </listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><filename>basic.target</filename></term>
         <listitem>
           directly.</para>
         </listitem>
       </varlistentry>
+      <varlistentry>
+        <term><filename>init.scope</filename></term>
+        <listitem>
+          <para>This scope unit is where the system and service manager (PID 1) itself resides. It is active as long as
+          the system is running.</para>
+        </listitem>
+      </varlistentry>
       <varlistentry>
         <term><filename>initrd-fs.target</filename></term>
         <listitem>
@@ -1009,17 +1024,17 @@ PartOf=graphical-session.target
   <refsect1>
     <title>Special Slice Units</title>
 
-    <para>There are four <literal>.slice</literal> units which form
-    the basis of the hierarchy for assignment of resources for
-    services, users, and virtual machines or containers.</para>
+    <para>There are four <literal>.slice</literal> units which form the basis of the hierarchy for assignment of
+    resources for services, users, and virtual machines or containers. See
+    <citerefentry><refentrytitle>-.slice</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details about slice
+    units.</para>
 
     <variablelist>
       <varlistentry>
         <term><filename>-.slice</filename></term>
         <listitem>
-          <para>The root slice is the root of the hierarchy. It
-          usually does not contain units directly, but may be used to
-          set defaults for the whole tree.</para>
+          <para>The root slice is the root of the slice hierarchy. It usually does not contain units directly, but may
+          be used to set defaults for the whole tree.</para>
         </listitem>
       </varlistentry>
 
index 3e85442a6fefff8c63ba66b76ba5c4b1f0d38d40..d72fc6f1486e876f3e188a04539878a71551b023 100644 (file)
@@ -443,6 +443,8 @@ foreach ident : [
                                  #include <keyutils.h>'''],
         ['copy_file_range',   '''#include <sys/syscall.h>
                                  #include <unistd.h>'''],
+        ['bpf',               '''#include <sys/syscall.h>
+                                 #include <unistd.h>'''],
         ['explicit_bzero' ,   '''#include <string.h>'''],
 ]
 
index 12e88b909ca3d46b5a0adc9e6c43f7a996b5667d..92eb55b130c2c4b71c5bd18500469c2809ddae21 100755 (executable)
@@ -28,7 +28,7 @@ export LC_CTYPE=C.UTF-8
 
 [ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR"
 ninja -C "$BUILDDIR" all
-[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test
+[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" )
 ninja -C "$BUILDDIR" install
 
 mkdir -p "$DESTDIR"/etc
diff --git a/src/basic/bpf-program.c b/src/basic/bpf-program.c
new file mode 100644 (file)
index 0000000..ce6f9e4
--- /dev/null
@@ -0,0 +1,183 @@
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "log.h"
+#include "missing.h"
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
+        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+
+        p = new0(BPFProgram, 1);
+        if (!p)
+                return log_oom();
+
+        p->prog_type = prog_type;
+        p->kernel_fd = -1;
+
+        *ret = p;
+        p = NULL;
+        return 0;
+}
+
+BPFProgram *bpf_program_unref(BPFProgram *p) {
+        if (!p)
+                return NULL;
+
+        safe_close(p->kernel_fd);
+        free(p->instructions);
+
+        return mfree(p);
+}
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
+
+        assert(p);
+
+        if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
+                return -ENOMEM;
+
+        memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
+        p->n_instructions += count;
+
+        return 0;
+}
+
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
+        union bpf_attr attr;
+
+        assert(p);
+
+        if (p->kernel_fd >= 0)
+                return -EBUSY;
+
+        attr = (union bpf_attr) {
+                .prog_type = p->prog_type,
+                .insns = PTR_TO_UINT64(p->instructions),
+                .insn_cnt = p->n_instructions,
+                .license = PTR_TO_UINT64("GPL"),
+                .log_buf = PTR_TO_UINT64(log_buf),
+                .log_level = !!log_buf,
+                .log_size = log_size,
+        };
+
+        p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+        if (p->kernel_fd < 0)
+                return -errno;
+
+        return 0;
+}
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
+        _cleanup_close_ int fd = -1;
+        union bpf_attr attr;
+
+        assert(p);
+        assert(type >= 0);
+        assert(path);
+
+        fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+        if (fd < 0)
+                return -errno;
+
+        attr = (union bpf_attr) {
+                .attach_type = type,
+                .target_fd = fd,
+                .attach_bpf_fd = p->kernel_fd,
+                .attach_flags = flags,
+        };
+
+        if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int bpf_program_cgroup_detach(int type, const char *path) {
+        _cleanup_close_ int fd = -1;
+        union bpf_attr attr;
+
+        assert(path);
+
+        fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+        if (fd < 0)
+                return -errno;
+
+        attr = (union bpf_attr) {
+                .attach_type = type,
+                .target_fd = fd,
+        };
+
+        if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
+        union bpf_attr attr = {
+                .map_type = type,
+                .key_size = key_size,
+                .value_size = value_size,
+                .max_entries = max_entries,
+                .map_flags = flags,
+        };
+        int fd;
+
+        fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+        if (fd < 0)
+                return -errno;
+
+        return fd;
+}
+
+int bpf_map_update_element(int fd, const void *key, void *value) {
+
+        union bpf_attr attr = {
+                .map_fd = fd,
+                .key = PTR_TO_UINT64(key),
+                .value = PTR_TO_UINT64(value),
+        };
+
+        if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int bpf_map_lookup_element(int fd, const void *key, void *value) {
+
+        union bpf_attr attr = {
+                .map_fd = fd,
+                .key = PTR_TO_UINT64(key),
+                .value = PTR_TO_UINT64(value),
+        };
+
+        if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
+                return -errno;
+
+        return 0;
+}
diff --git a/src/basic/bpf-program.h b/src/basic/bpf-program.h
new file mode 100644 (file)
index 0000000..35a41ff
--- /dev/null
@@ -0,0 +1,55 @@
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+
+  [Except for the stuff copy/pasted from the kernel sources, see below]
+***/
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <sys/syscall.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef struct BPFProgram BPFProgram;
+
+struct BPFProgram {
+        int kernel_fd;
+        uint32_t prog_type;
+
+        size_t n_instructions;
+        size_t allocated;
+        struct bpf_insn *instructions;
+};
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
+BPFProgram *bpf_program_unref(BPFProgram *p);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
+int bpf_program_cgroup_detach(int type, const char *path);
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
+int bpf_map_update_element(int fd, const void *key, void *value);
+int bpf_map_lookup_element(int fd, const void *key, void *value);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);
index 7cd92005e368df8c61dbf559213067525274a650..f3f6a21576926e7b082722b9032f94210cc30324 100644 (file)
@@ -103,9 +103,12 @@ int cg_read_pid(FILE *f, pid_t *_pid) {
         return 1;
 }
 
-int cg_read_event(const char *controller, const char *path, const char *event,
-                  char **val)
-{
+int cg_read_event(
+                const char *controller,
+                const char *path,
+                const char *event,
+                char **val) {
+
         _cleanup_free_ char *events = NULL, *content = NULL;
         char *p, *line;
         int r;
index d52fdad3ac974c670783a8c8bd4ef00bd040b2ae..e27faba75fac9231f5060ac6f2c1f5b685397aac 100644 (file)
@@ -308,22 +308,22 @@ int in_addr_from_string(int family, const char *s, union in_addr_union *ret) {
         return 0;
 }
 
-int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret) {
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) {
         int r;
 
         assert(s);
 
         r = in_addr_from_string(AF_INET, s, ret);
         if (r >= 0) {
-                if (family)
-                        *family = AF_INET;
+                if (ret_family)
+                        *ret_family = AF_INET;
                 return 0;
         }
 
         r = in_addr_from_string(AF_INET6, s, ret);
         if (r >= 0) {
-                if (family)
-                        *family = AF_INET6;
+                if (ret_family)
+                        *ret_family = AF_INET6;
                 return 0;
         }
 
@@ -371,13 +371,13 @@ int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_u
         return r;
 }
 
-unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr) {
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) {
         assert(addr);
 
         return 32 - u32ctz(be32toh(addr->s_addr));
 }
 
-struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
         assert(addr);
         assert(prefixlen <= 32);
 
@@ -390,7 +390,7 @@ struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char
         return addr;
 }
 
-int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
         uint8_t msb_octet = *(uint8_t*) addr;
 
         /* addr may not be aligned, so make sure we only access it byte-wise */
@@ -414,18 +414,18 @@ int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixl
         return 0;
 }
 
-int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
         unsigned char prefixlen;
         int r;
 
         assert(addr);
         assert(mask);
 
-        r = in_addr_default_prefixlen(addr, &prefixlen);
+        r = in4_addr_default_prefixlen(addr, &prefixlen);
         if (r < 0)
                 return r;
 
-        in_addr_prefixlen_to_netmask(mask, prefixlen);
+        in4_addr_prefixlen_to_netmask(mask, prefixlen);
         return 0;
 }
 
@@ -435,7 +435,7 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
         if (family == AF_INET) {
                 struct in_addr mask;
 
-                if (!in_addr_prefixlen_to_netmask(&mask, prefixlen))
+                if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen))
                         return -EINVAL;
 
                 addr->in.s_addr &= mask.s_addr;
@@ -465,10 +465,57 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
         return -EAFNOSUPPORT;
 }
 
-int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen) {
+int in_addr_prefix_covers(int family,
+                          const union in_addr_union *prefix,
+                          unsigned char prefixlen,
+                          const union in_addr_union *address) {
+
+        union in_addr_union masked_prefix, masked_address;
+        int r;
+
+        assert(prefix);
+        assert(address);
+
+        masked_prefix = *prefix;
+        r = in_addr_mask(family, &masked_prefix, prefixlen);
+        if (r < 0)
+                return r;
+
+        masked_address = *address;
+        r = in_addr_mask(family, &masked_address, prefixlen);
+        if (r < 0)
+                return r;
+
+        return in_addr_equal(family, &masked_prefix, &masked_address);
+}
+
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) {
+        uint8_t u;
+        int r;
+
+        if (!IN_SET(family, AF_INET, AF_INET6))
+                return -EAFNOSUPPORT;
+
+        r = safe_atou8(p, &u);
+        if (r < 0)
+                return r;
+
+        if (u > FAMILY_ADDRESS_SIZE(family) * 8)
+                return -ERANGE;
+
+        *ret = u;
+        return 0;
+}
+
+int in_addr_prefix_from_string(
+                const char *p,
+                int family,
+                union in_addr_union *ret_prefix,
+                unsigned char *ret_prefixlen) {
+
         union in_addr_union buffer;
         const char *e, *l;
-        uint8_t k;
+        unsigned char k;
         int r;
 
         assert(p);
@@ -486,23 +533,58 @@ int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *r
         if (r < 0)
                 return r;
 
-        k = FAMILY_ADDRESS_SIZE(family) * 8;
-
         if (e) {
-                uint8_t n;
-
-                r = safe_atou8(e + 1, &n);
+                r = in_addr_parse_prefixlen(family, e+1, &k);
                 if (r < 0)
                         return r;
+        } else
+                k = FAMILY_ADDRESS_SIZE(family) * 8;
 
-                if (n > k)
-                        return -ERANGE;
+        if (ret_prefix)
+                *ret_prefix = buffer;
+        if (ret_prefixlen)
+                *ret_prefixlen = k;
 
-                k = n;
-        }
+        return 0;
+}
+
+int in_addr_prefix_from_string_auto(
+                const char *p,
+                int *ret_family,
+                union in_addr_union *ret_prefix,
+                unsigned char *ret_prefixlen) {
+
+        union in_addr_union buffer;
+        const char *e, *l;
+        unsigned char k;
+        int family, r;
+
+        assert(p);
+
+        e = strchr(p, '/');
+        if (e)
+                l = strndupa(p, e - p);
+        else
+                l = p;
 
-        *ret_prefix = buffer;
-        *ret_prefixlen = k;
+        r = in_addr_from_string_auto(l, &family, &buffer);
+        if (r < 0)
+                return r;
+
+        if (e) {
+                r = in_addr_parse_prefixlen(family, e+1, &k);
+                if (r < 0)
+                        return r;
+        } else
+                k = FAMILY_ADDRESS_SIZE(family) * 8;
+
+        if (ret_family)
+                *ret_family = family;
+        if (ret_prefix)
+                *ret_prefix = buffer;
+        if (ret_prefixlen)
+                *ret_prefixlen = k;
 
         return 0;
+
 }
index 14e27246b59684d6f7594afaef7d423902a83311..d129bf5585483c339227f7655b54ac905881edc8 100644 (file)
@@ -53,14 +53,17 @@ int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen);
 int in_addr_to_string(int family, const union in_addr_union *u, char **ret);
 int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret);
 int in_addr_from_string(int family, const char *s, union in_addr_union *ret);
-int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret);
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret);
 int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex);
-unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr);
-struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
-int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
-int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr);
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
 int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen);
-int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen);
+int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address);
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret);
+int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
+int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
 
 static inline size_t FAMILY_ADDRESS_SIZE(int family) {
         assert(family == AF_INET || family == AF_INET6);
index 4684ed3bfc782919b71600521ba7a66f4b04043d..d9b69adde9abb03d05b7a4e7ed81e503fa58ee7b 100644 (file)
@@ -40,14 +40,6 @@ int fd_wait_for_event(int fd, int event, usec_t timeout);
 
 ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length);
 
-#define IOVEC_SET_STRING(i, s)                  \
-        do {                                    \
-                struct iovec *_i = &(i);        \
-                char *_s = (char *)(s);         \
-                _i->iov_base = _s;              \
-                _i->iov_len = strlen(_s);       \
-        } while (false)
-
 static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, unsigned n) {
         unsigned j;
         size_t r = 0;
@@ -93,3 +85,8 @@ static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) {
         return FILE_SIZE_VALID(l);
 
 }
+
+#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) }
+#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len)
+#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string))
+#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string)
index 7d72effdea9f30c57e8d4ca9b284f3359e3fbd64..38ac8deaf34828d3dbb1de225d7e79d8489d04f5 100644 (file)
@@ -20,8 +20,9 @@
 #include <unistd.h>
 
 #include "alloc-util.h"
-#include "journal-importer.h"
 #include "fd-util.h"
+#include "io-util.h"
+#include "journal-importer.h"
 #include "parse-util.h"
 #include "string-util.h"
 #include "unaligned.h"
@@ -38,7 +39,7 @@ static int iovw_put(struct iovec_wrapper *iovw, void* data, size_t len) {
         if (!GREEDY_REALLOC(iovw->iovec, iovw->size_bytes, iovw->count + 1))
                 return log_oom();
 
-        iovw->iovec[iovw->count++] = (struct iovec) {data, len};
+        iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len);
         return 0;
 }
 
index 421ae52dc5f69d09ce5f9d4636e69bdd885f56cd..168c6c37ceb0e432395c67e9366f6fa613340d8c 100644 (file)
@@ -351,22 +351,22 @@ static int write_to_console(
 
         if (log_target == LOG_TARGET_CONSOLE_PREFIXED) {
                 xsprintf(prefix, "<%i>", level);
-                IOVEC_SET_STRING(iovec[n++], prefix);
+                iovec[n++] = IOVEC_MAKE_STRING(prefix);
         }
 
         highlight = LOG_PRI(level) <= LOG_ERR && show_color;
 
         if (show_location) {
                 snprintf(location, sizeof(location), "(%s:%i) ", file, line);
-                IOVEC_SET_STRING(iovec[n++], location);
+                iovec[n++] = IOVEC_MAKE_STRING(location);
         }
 
         if (highlight)
-                IOVEC_SET_STRING(iovec[n++], ANSI_HIGHLIGHT_RED);
-        IOVEC_SET_STRING(iovec[n++], buffer);
+                iovec[n++] = IOVEC_MAKE_STRING(ANSI_HIGHLIGHT_RED);
+        iovec[n++] = IOVEC_MAKE_STRING(buffer);
         if (highlight)
-                IOVEC_SET_STRING(iovec[n++], ANSI_NORMAL);
-        IOVEC_SET_STRING(iovec[n++], "\n");
+                iovec[n++] = IOVEC_MAKE_STRING(ANSI_NORMAL);
+        iovec[n++] = IOVEC_MAKE_STRING("\n");
 
         if (writev(console_fd, iovec, n) < 0) {
 
@@ -425,11 +425,11 @@ static int write_to_syslog(
 
         xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
 
-        IOVEC_SET_STRING(iovec[0], header_priority);
-        IOVEC_SET_STRING(iovec[1], header_time);
-        IOVEC_SET_STRING(iovec[2], program_invocation_short_name);
-        IOVEC_SET_STRING(iovec[3], header_pid);
-        IOVEC_SET_STRING(iovec[4], buffer);
+        iovec[0] = IOVEC_MAKE_STRING(header_priority);
+        iovec[1] = IOVEC_MAKE_STRING(header_time);
+        iovec[2] = IOVEC_MAKE_STRING(program_invocation_short_name);
+        iovec[3] = IOVEC_MAKE_STRING(header_pid);
+        iovec[4] = IOVEC_MAKE_STRING(buffer);
 
         /* When using syslog via SOCK_STREAM separate the messages by NUL chars */
         if (syslog_is_stream)
@@ -470,11 +470,11 @@ static int write_to_kmsg(
         xsprintf(header_priority, "<%i>", level);
         xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
 
-        IOVEC_SET_STRING(iovec[0], header_priority);
-        IOVEC_SET_STRING(iovec[1], program_invocation_short_name);
-        IOVEC_SET_STRING(iovec[2], header_pid);
-        IOVEC_SET_STRING(iovec[3], buffer);
-        IOVEC_SET_STRING(iovec[4], "\n");
+        iovec[0] = IOVEC_MAKE_STRING(header_priority);
+        iovec[1] = IOVEC_MAKE_STRING(program_invocation_short_name);
+        iovec[2] = IOVEC_MAKE_STRING(header_pid);
+        iovec[3] = IOVEC_MAKE_STRING(buffer);
+        iovec[4] = IOVEC_MAKE_STRING("\n");
 
         if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0)
                 return -errno;
@@ -547,10 +547,10 @@ static int write_to_journal(
 
         log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra);
 
-        IOVEC_SET_STRING(iovec[0], header);
-        IOVEC_SET_STRING(iovec[1], "MESSAGE=");
-        IOVEC_SET_STRING(iovec[2], buffer);
-        IOVEC_SET_STRING(iovec[3], "\n");
+        iovec[0] = IOVEC_MAKE_STRING(header);
+        iovec[1] = IOVEC_MAKE_STRING("MESSAGE=");
+        iovec[2] = IOVEC_MAKE_STRING(buffer);
+        iovec[3] = IOVEC_MAKE_STRING("\n");
 
         mh.msg_iov = iovec;
         mh.msg_iovlen = ELEMENTSOF(iovec);
@@ -872,7 +872,7 @@ int log_format_iovec(
                  * the next format string */
                 VA_FORMAT_ADVANCE(format, ap);
 
-                IOVEC_SET_STRING(iovec[(*n)++], m);
+                iovec[(*n)++] = IOVEC_MAKE_STRING(m);
 
                 if (newline_separator) {
                         iovec[*n].iov_base = (char*) &nl;
@@ -893,9 +893,9 @@ int log_struct_internal(
                 const char *func,
                 const char *format, ...) {
 
+        LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
         char buf[LINE_MAX];
         bool found = false;
-        LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
         PROTECT_ERRNO;
         va_list ap;
 
@@ -926,7 +926,7 @@ int log_struct_internal(
 
                 /* If the journal is available do structured logging */
                 log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
-                IOVEC_SET_STRING(iovec[n++], header);
+                iovec[n++] = IOVEC_MAKE_STRING(header);
 
                 va_start(ap, format);
                 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap);
@@ -975,6 +975,73 @@ int log_struct_internal(
         return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8);
 }
 
+int log_struct_iovec_internal(
+                int level,
+                int error,
+                const char *file,
+                int line,
+                const char *func,
+                const struct iovec input_iovec[],
+                size_t n_input_iovec) {
+
+        LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+        PROTECT_ERRNO;
+        size_t i;
+        char *m;
+
+        if (error < 0)
+                error = -error;
+
+        if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+                return -error;
+
+        if (log_target == LOG_TARGET_NULL)
+                return -error;
+
+        if ((level & LOG_FACMASK) == 0)
+                level = log_facility | LOG_PRI(level);
+
+        if (IN_SET(log_target, LOG_TARGET_AUTO,
+                               LOG_TARGET_JOURNAL_OR_KMSG,
+                               LOG_TARGET_JOURNAL) &&
+            journal_fd >= 0) {
+
+                struct iovec iovec[1 + n_input_iovec*2];
+                char header[LINE_MAX];
+                struct msghdr mh = {
+                        .msg_iov = iovec,
+                        .msg_iovlen = 1 + n_input_iovec*2,
+                };
+
+                log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
+                iovec[0] = IOVEC_MAKE_STRING(header);
+
+                for (i = 0; i < n_input_iovec; i++) {
+                        iovec[1+i*2] = input_iovec[i];
+                        iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n");
+                }
+
+                if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) >= 0)
+                        return -error;
+        }
+
+        for (i = 0; i < n_input_iovec; i++) {
+                if (input_iovec[i].iov_len < strlen("MESSAGE="))
+                        continue;
+
+                if (memcmp(input_iovec[i].iov_base, "MESSAGE=", strlen("MESSAGE=")) == 0)
+                        break;
+        }
+
+        if (_unlikely_(i >= n_input_iovec)) /* Couldn't find MESSAGE=? */
+                return -error;
+
+        m = strndupa(input_iovec[i].iov_base + strlen("MESSAGE="),
+                     input_iovec[i].iov_len - strlen("MESSAGE="));
+
+        return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m);
+}
+
 int log_set_target_from_string(const char *e) {
         LogTarget t;
 
index 186747ff8e288a8ed16ece52fbda807e6caa1f31..e3fd3203d04d75148e9a2b511a29c94f5007fb06 100644 (file)
@@ -187,6 +187,15 @@ int log_format_iovec(
                 const char *format,
                 va_list ap) _printf_(6, 0);
 
+int log_struct_iovec_internal(
+                int level,
+                int error,
+                const char *file,
+                int line,
+                const char *func,
+                const struct iovec input_iovec[],
+                size_t n_input_iovec);
+
 /* This modifies the buffer passed! */
 int log_dump_internal(
                 int level,
@@ -270,6 +279,11 @@ void log_assert_failed_return_realm(
                             error, __FILE__, __LINE__, __func__, __VA_ARGS__)
 #define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__)
 
+#define log_struct_iovec_errno(level, error, iovec, n_iovec)            \
+        log_struct_iovec_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+                                  error, __FILE__, __LINE__, __func__, iovec, n_iovec)
+#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec)
+
 /* This modifies the buffer passed! */
 #define log_dump(level, buffer) \
         log_dump_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
index 67cc27274d65df4b023b50e2f141468bc53261ed..994336fde2d327781a7265c89ade48498b5a562e 100644 (file)
@@ -1,4 +1,6 @@
 basic_sources_plain = files('''
+        MurmurHash2.c
+        MurmurHash2.h
         af-list.c
         af-list.h
         alloc-util.c
@@ -16,6 +18,8 @@ basic_sources_plain = files('''
         bitmap.c
         bitmap.h
         blkid-util.h
+        bpf-program.c
+        bpf-program.h
         btrfs-ctree.h
         btrfs-util.c
         btrfs-util.h
@@ -24,10 +28,10 @@ basic_sources_plain = files('''
         bus-label.h
         calendarspec.c
         calendarspec.h
-        capability-util.c
-        capability-util.h
         cap-list.c
         cap-list.h
+        capability-util.c
+        capability-util.h
         cgroup-util.c
         cgroup-util.h
         chattr-util.c
@@ -61,10 +65,10 @@ basic_sources_plain = files('''
         extract-word.h
         fd-util.c
         fd-util.h
-        fileio.c
-        fileio.h
         fileio-label.c
         fileio-label.h
+        fileio.c
+        fileio.h
         format-util.h
         fs-util.c
         fs-util.h
@@ -82,9 +86,9 @@ basic_sources_plain = files('''
         hostname-util.h
         in-addr-util.c
         in-addr-util.h
-        ioprio.h
         io-util.c
         io-util.h
+        ioprio.h
         journal-importer.c
         journal-importer.h
         khash.c
@@ -106,13 +110,11 @@ basic_sources_plain = files('''
         mempool.c
         mempool.h
         missing_syscall.h
+        mkdir-label.c
         mkdir.c
         mkdir.h
-        mkdir-label.c
         mount-util.c
         mount-util.h
-        MurmurHash2.c
-        MurmurHash2.h
         nss-util.h
         ordered-set.c
         ordered-set.h
@@ -138,9 +140,9 @@ basic_sources_plain = files('''
         rlimit-util.h
         rm-rf.c
         rm-rf.h
-        securebits.h
         securebits-util.c
         securebits-util.h
+        securebits.h
         selinux-util.c
         selinux-util.h
         set.h
index 898116c7b31040909f79d7bb04ceb3fc3435a03c..17cde5e74f022d52614226233946a2c944320d21 100644 (file)
@@ -22,6 +22,8 @@
 
 /* Missing glibc definitions to access certain kernel APIs */
 
+#include <sys/types.h>
+
 #if !HAVE_DECL_PIVOT_ROOT
 static inline int pivot_root(const char *new_root, const char *put_old) {
         return syscall(SYS_pivot_root, new_root, put_old);
@@ -316,3 +318,33 @@ static inline ssize_t copy_file_range(int fd_in, loff_t *off_in,
 #  endif
 }
 #endif
+
+#if !HAVE_DECL_BPF
+#  ifndef __NR_bpf
+#    if defined __i386__
+#      define __NR_bpf 357
+#    elif defined __x86_64__
+#      define __NR_bpf 321
+#    elif defined __aarch64__
+#      define __NR_bpf 280
+#    elif defined __sparc__
+#      define __NR_bpf 349
+#    elif defined __s390__
+#      define __NR_bpf 351
+#    else
+#      warning "__NR_bpf not defined for your architecture"
+#    endif
+#  endif
+
+union bpf_attr;
+
+static inline int bpf(int cmd, union bpf_attr *attr, size_t size) {
+#ifdef __NR_bpf
+        return (int) syscall(__NR_bpf, cmd, attr, size);
+#else
+        errno = ENOSYS;
+        return -1;
+#endif
+}
+
+#endif
index ca834df621311b52c85a5972ea2eb27bb51a54cb..5c41429f01c20496a4672c3842f3ab87f152299e 100644 (file)
@@ -42,7 +42,8 @@ int setrlimit_closest(int resource, const struct rlimit *rlim) {
 
         /* So we failed to set the desired setrlimit, then let's try
          * to get as close as we can */
-        assert_se(getrlimit(resource, &highest) == 0);
+        if (getrlimit(resource, &highest) < 0)
+                return -errno;
 
         fixed.rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max);
         fixed.rlim_max = MIN(rlim->rlim_max, highest.rlim_max);
index 6d1dc83874f5ac7ee26c312e3494fc8a29c7cc7b..6e7cdaac633fbd9dfb2e67ac88e26e8f77e8db66 100644 (file)
@@ -83,7 +83,7 @@ int socket_address_listen(
                         return -errno;
         }
 
-        if (socket_address_family(a) == AF_INET || socket_address_family(a) == AF_INET6) {
+        if (IN_SET(socket_address_family(a), AF_INET, AF_INET6)) {
                 if (bind_to_device)
                         if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, bind_to_device, strlen(bind_to_device)+1) < 0)
                                 return -errno;
index 34eb952ce956eec4d59920f6deae8c5d5012043d..c511aeee68330b8f597a1b8ef88659748ceca6fa 100644 (file)
@@ -120,7 +120,7 @@ char *strjoin_real(const char *x, ...) _sentinel_;
         ({                                                              \
                 const char *_appendees_[] = { a, __VA_ARGS__ };         \
                 char *_d_, *_p_;                                        \
-                int _len_ = 0;                                          \
+                size_t _len_ = 0;                                          \
                 unsigned _i_;                                           \
                 for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \
                         _len_ += strlen(_appendees_[_i_]);              \
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
new file mode 100644 (file)
index 0000000..909c1c8
--- /dev/null
@@ -0,0 +1,680 @@
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/libbpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "ip-address-access.h"
+#include "unit.h"
+
+enum {
+        MAP_KEY_PACKETS,
+        MAP_KEY_BYTES,
+};
+
+enum {
+        ACCESS_ALLOWED = 1,
+        ACCESS_DENIED  = 2,
+};
+
+/* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
+
+static int add_lookup_instructions(
+                BPFProgram *p,
+                int map_fd,
+                int protocol,
+                bool is_ingress,
+                int verdict) {
+
+        int r, addr_offset, addr_size;
+
+        assert(p);
+        assert(map_fd >= 0);
+
+        switch (protocol) {
+
+        case ETH_P_IP:
+                addr_size = sizeof(uint32_t);
+                addr_offset = is_ingress ?
+                        offsetof(struct iphdr, saddr) :
+                        offsetof(struct iphdr, daddr);
+                break;
+
+        case ETH_P_IPV6:
+                addr_size = 4 * sizeof(uint32_t);
+                addr_offset = is_ingress ?
+                        offsetof(struct ip6_hdr, ip6_src.s6_addr) :
+                        offsetof(struct ip6_hdr, ip6_dst.s6_addr);
+                break;
+
+        default:
+                return -EAFNOSUPPORT;
+        }
+
+        do {
+                /* Compare IPv4 with one word instruction (32bit) */
+                struct bpf_insn insn[] = {
+                        /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
+                        BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
+
+                        /*
+                         * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
+                         *
+                         * R1: Pointer to the skb
+                         * R2: Data offset
+                         * R3: Destination buffer on the stack (r10 - 4)
+                         * R4: Number of bytes to read (4)
+                         */
+
+                        BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+                        BPF_MOV32_IMM(BPF_REG_2, addr_offset),
+
+                        BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+                        BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
+
+                        BPF_MOV32_IMM(BPF_REG_4, addr_size),
+                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+
+                        /*
+                         * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
+                         * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
+                         * has to be set to the maximum possible value.
+                         *
+                         * On success, the looked up value is stored in R0. For this application, the actual
+                         * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
+                         * matching value.
+                         */
+
+                        BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+                        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                        BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
+                        BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
+
+                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+                        BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
+                };
+
+                /* Jump label fixup */
+                insn[0].off = ELEMENTSOF(insn) - 1;
+
+                r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+                if (r < 0)
+                        return r;
+
+        } while (false);
+
+        return 0;
+}
+
+static int bpf_firewall_compile_bpf(
+                Unit *u,
+                bool is_ingress,
+                BPFProgram **ret) {
+
+        struct bpf_insn pre_insn[] = {
+                /*
+                 * When the eBPF program is entered, R1 contains the address of the skb.
+                 * However, R1-R5 are scratch registers that are not preserved when calling
+                 * into kernel functions, so we need to save anything that's supposed to
+                 * stay around to R6-R9. Save the skb to R6.
+                 */
+                BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+                /*
+                 * Although we cannot access the skb data directly from eBPF programs used in this
+                 * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
+                 * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
+                 * for later use.
+                 */
+                BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
+
+                /*
+                 * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
+                 * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
+                 */
+                BPF_MOV32_IMM(BPF_REG_8, 0),
+        };
+
+        /*
+         * The access checkers compiled for the configured allowance and denial lists
+         * write to R8 at runtime. The following code prepares for an early exit that
+         * skip the accounting if the packet is denied.
+         *
+         * R0 = 1
+         * if (R8 == ACCESS_DENIED)
+         *     R0 = 0
+         *
+         * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
+         * is allowed to pass.
+         */
+        struct bpf_insn post_insn[] = {
+                BPF_MOV64_IMM(BPF_REG_0, 1),
+                BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
+                BPF_MOV64_IMM(BPF_REG_0, 0),
+        };
+
+        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+        int accounting_map_fd, r;
+        bool access_enabled;
+
+        assert(u);
+        assert(ret);
+
+        accounting_map_fd = is_ingress ?
+                u->ip_accounting_ingress_map_fd :
+                u->ip_accounting_egress_map_fd;
+
+        access_enabled =
+                u->ipv4_allow_map_fd >= 0 ||
+                u->ipv6_allow_map_fd >= 0 ||
+                u->ipv4_deny_map_fd >= 0 ||
+                u->ipv6_deny_map_fd >= 0;
+
+        if (accounting_map_fd < 0 && !access_enabled) {
+                *ret = NULL;
+                return 0;
+        }
+
+        r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+        if (r < 0)
+                return r;
+
+        r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
+        if (r < 0)
+                return r;
+
+        if (access_enabled) {
+                /*
+                 * The simple rule this function translates into eBPF instructions is:
+                 *
+                 * - Access will be granted when an address matches an entry in @list_allow
+                 * - Otherwise, access will be denied when an address matches an entry in @list_deny
+                 * - Otherwise, access will be granted
+                 */
+
+                if (u->ipv4_deny_map_fd >= 0) {
+                        r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (u->ipv6_deny_map_fd >= 0) {
+                        r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (u->ipv4_allow_map_fd >= 0) {
+                        r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (u->ipv6_allow_map_fd >= 0) {
+                        r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
+                        if (r < 0)
+                                return r;
+                }
+        }
+
+        r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
+        if (r < 0)
+                return r;
+
+        if (accounting_map_fd >= 0) {
+                struct bpf_insn insn[] = {
+                        /*
+                         * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
+                         * The jump label will be fixed up later.
+                         */
+                        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+
+                        /* Count packets */
+                        BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+                        BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+                        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                        BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+                        BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
+                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                        BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+                        BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+                        /* Count bytes */
+                        BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+                        BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+                        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                        BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+                        BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
+                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                        BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+                        BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+                        /* Allow the packet to pass */
+                        BPF_MOV64_IMM(BPF_REG_0, 1),
+                };
+
+                /* Jump label fixup */
+                insn[0].off = ELEMENTSOF(insn) - 1;
+
+                r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+                if (r < 0)
+                        return r;
+        }
+
+        do {
+                /*
+                 * Exit from the eBPF program, R0 contains the verdict.
+                 * 0 means the packet is denied, 1 means the packet may pass.
+                 */
+                struct bpf_insn insn[] = {
+                        BPF_EXIT_INSN()
+                };
+
+                r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+                if (r < 0)
+                        return r;
+        } while (false);
+
+        *ret = p;
+        p = NULL;
+
+        return 0;
+}
+
+static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
+        IPAddressAccessItem *a;
+
+        assert(n_ipv4);
+        assert(n_ipv6);
+
+        LIST_FOREACH(items, a, list) {
+                switch (a->family) {
+
+                case AF_INET:
+                        (*n_ipv4)++;
+                        break;
+
+                case AF_INET6:
+                        (*n_ipv6)++;
+                        break;
+
+                default:
+                        return -EAFNOSUPPORT;
+                }
+        }
+
+        return 0;
+}
+
+static int bpf_firewall_add_access_items(
+                IPAddressAccessItem *list,
+                int ipv4_map_fd,
+                int ipv6_map_fd,
+                int verdict) {
+
+        struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
+        uint64_t value = verdict;
+        IPAddressAccessItem *a;
+        int r;
+
+        key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
+        key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
+
+        LIST_FOREACH(items, a, list) {
+                switch (a->family) {
+
+                case AF_INET:
+                        key_ipv4->prefixlen = a->prefixlen;
+                        memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
+
+                        r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
+                        if (r < 0)
+                                return r;
+
+                        break;
+
+                case AF_INET6:
+                        key_ipv6->prefixlen = a->prefixlen;
+                        memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
+
+                        r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
+                        if (r < 0)
+                                return r;
+
+                        break;
+
+                default:
+                        return -EAFNOSUPPORT;
+                }
+        }
+
+        return 0;
+}
+
+static int bpf_firewall_prepare_access_maps(
+                Unit *u,
+                int verdict,
+                int *ret_ipv4_map_fd,
+                int *ret_ipv6_map_fd) {
+
+        _cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
+        size_t n_ipv4 = 0, n_ipv6 = 0;
+        Unit *p;
+        int r;
+
+        assert(ret_ipv4_map_fd);
+        assert(ret_ipv6_map_fd);
+
+        for (p = u; p; p = UNIT_DEREF(p->slice)) {
+                CGroupContext *cc;
+
+                cc = unit_get_cgroup_context(p);
+                if (!cc)
+                        continue;
+
+                bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
+        }
+
+        if (n_ipv4 > 0) {
+                ipv4_map_fd = bpf_map_new(
+                                BPF_MAP_TYPE_LPM_TRIE,
+                                offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
+                                sizeof(uint64_t),
+                                n_ipv4,
+                                BPF_F_NO_PREALLOC);
+                if (ipv4_map_fd < 0)
+                        return ipv4_map_fd;
+        }
+
+        if (n_ipv6 > 0) {
+                ipv6_map_fd = bpf_map_new(
+                                BPF_MAP_TYPE_LPM_TRIE,
+                                offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
+                                sizeof(uint64_t),
+                                n_ipv6,
+                                BPF_F_NO_PREALLOC);
+                if (ipv6_map_fd < 0)
+                        return ipv6_map_fd;
+        }
+
+        for (p = u; p; p = UNIT_DEREF(p->slice)) {
+                CGroupContext *cc;
+
+                cc = unit_get_cgroup_context(p);
+                if (!cc)
+                        continue;
+
+                r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
+                                                  ipv4_map_fd, ipv6_map_fd, verdict);
+                if (r < 0)
+                        return r;
+        }
+
+        *ret_ipv4_map_fd = ipv4_map_fd;
+        *ret_ipv6_map_fd = ipv6_map_fd;
+
+        ipv4_map_fd = ipv6_map_fd = -1;
+        return 0;
+}
+
+static int bpf_firewall_prepare_accounting_maps(bool enabled, int *fd_ingress, int *fd_egress) {
+        int r;
+
+        assert(fd_ingress);
+        assert(fd_egress);
+
+        if (enabled) {
+                if (*fd_ingress < 0) {
+                        r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+                        if (r < 0)
+                                return r;
+
+                        *fd_ingress = r;
+                }
+
+                if (*fd_egress < 0) {
+
+                        r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+                        if (r < 0)
+                                return r;
+
+                        *fd_egress = r;
+                }
+        } else {
+                *fd_ingress = safe_close(*fd_ingress);
+                *fd_egress = safe_close(*fd_egress);
+        }
+
+        return 0;
+}
+
+int bpf_firewall_compile(Unit *u) {
+        CGroupContext *cc;
+        int r;
+
+        assert(u);
+
+        r = bpf_firewall_supported();
+        if (r < 0)
+                return r;
+        if (r == 0) {
+                log_debug("BPF firewalling not supported on this systemd, proceeding without.");
+                return -EOPNOTSUPP;
+        }
+
+        /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
+         * but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
+         * configuration, but we don't flush out the accounting unnecessarily */
+
+        u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
+        u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
+
+        u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
+        u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+
+        u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
+        u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -EINVAL;
+
+        r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
+        if (r < 0)
+                return log_error_errno(r, "Preparation of eBPF allow maps failed: %m");
+
+        r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
+        if (r < 0)
+                return log_error_errno(r, "Preparation of eBPF deny maps failed: %m");
+
+        r = bpf_firewall_prepare_accounting_maps(cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
+        if (r < 0)
+                return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m");
+
+        r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
+        if (r < 0)
+                return log_error_errno(r, "Compilation for ingress BPF program failed: %m");
+
+        r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
+        if (r < 0)
+                return log_error_errno(r, "Compilation for egress BPF program failed: %m");
+
+        return 0;
+}
+
+int bpf_firewall_install(Unit *u) {
+        _cleanup_free_ char *path = NULL;
+        CGroupContext *cc;
+        int r;
+
+        assert(u);
+
+        if (!u->cgroup_path)
+                return -EINVAL;
+
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -EINVAL;
+
+        r = bpf_firewall_supported();
+        if (r < 0)
+                return r;
+        if (r == 0) {
+                log_debug("BPF firewalling not supported on this systemd, proceeding without.");
+                return -EOPNOTSUPP;
+        }
+
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine cgroup path: %m");
+
+        if (u->ip_bpf_egress) {
+                r = bpf_program_load_kernel(u->ip_bpf_egress, NULL, 0);
+                if (r < 0)
+                        return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
+
+                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+                if (r < 0)
+                        return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
+        } else {
+                r = bpf_program_cgroup_detach(BPF_CGROUP_INET_EGRESS, path);
+                if (r < 0)
+                        return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
+                                              "Detaching egress BPF program from cgroup failed: %m");
+        }
+
+        if (u->ip_bpf_ingress) {
+                r = bpf_program_load_kernel(u->ip_bpf_ingress, NULL, 0);
+                if (r < 0)
+                        return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
+
+                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+                if (r < 0)
+                        return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
+        } else {
+                r = bpf_program_cgroup_detach(BPF_CGROUP_INET_INGRESS, path);
+                if (r < 0)
+                        return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
+                                              "Detaching ingress BPF program from cgroup failed: %m");
+        }
+
+        return 0;
+}
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
+        uint64_t key, packets;
+        int r;
+
+        if (map_fd < 0)
+                return -EBADF;
+
+        if (ret_packets) {
+                key = MAP_KEY_PACKETS;
+                r = bpf_map_lookup_element(map_fd, &key, &packets);
+                if (r < 0)
+                        return r;
+        }
+
+        if (ret_bytes) {
+                key = MAP_KEY_BYTES;
+                r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
+                if (r < 0)
+                        return r;
+        }
+
+        if (ret_packets)
+                *ret_packets = packets;
+
+        return 0;
+}
+
+int bpf_firewall_reset_accounting(int map_fd) {
+        uint64_t key, value = 0;
+        int r;
+
+        if (map_fd < 0)
+                return -EBADF;
+
+        key = MAP_KEY_PACKETS;
+        r = bpf_map_update_element(map_fd, &key, &value);
+        if (r < 0)
+                return r;
+
+        key = MAP_KEY_BYTES;
+        return bpf_map_update_element(map_fd, &key, &value);
+}
+
+
+int bpf_firewall_supported(void) {
+        static int supported = -1;
+        int fd, r;
+
+        /* Checks whether BPF firewalling is supported. For this, we check three things:
+         *
+         * a) whether we are privileged
+         * b) whether the unified hierarchy is being used
+         * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
+         *
+         */
+
+        if (supported >= 0)
+                return supported;
+
+        if (geteuid() != 0)
+                return supported = false;
+
+        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+        if (r < 0)
+                return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+        if (r == 0)
+                return supported = false;
+
+        fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
+                         offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
+                         sizeof(uint64_t),
+                         1,
+                         BPF_F_NO_PREALLOC);
+        if (fd < 0) {
+                log_debug_errno(r, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
+                return supported = false;
+        }
+
+        safe_close(fd);
+
+        return supported = true;
+}
diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h
new file mode 100644 (file)
index 0000000..870e314
--- /dev/null
@@ -0,0 +1,32 @@
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+
+#include "unit.h"
+
+int bpf_firewall_supported(void);
+
+int bpf_firewall_compile(Unit *u);
+int bpf_firewall_install(Unit *u);
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
+int bpf_firewall_reset_accounting(int map_fd);
index c806d6b7cb420138204a6ef606852dfa66d26574..9a0d374aa8dcc3e501c3898bee57ba049b356d27 100644 (file)
@@ -21,6 +21,7 @@
 #include <fnmatch.h>
 
 #include "alloc-util.h"
+#include "bpf-firewall.h"
 #include "cgroup-util.h"
 #include "cgroup.h"
 #include "fd-util.h"
@@ -30,9 +31,9 @@
 #include "path-util.h"
 #include "process-util.h"
 #include "special.h"
+#include "stdio-util.h"
 #include "string-table.h"
 #include "string-util.h"
-#include "stdio-util.h"
 
 #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
 
@@ -141,6 +142,9 @@ void cgroup_context_done(CGroupContext *c) {
 
         while (c->device_allow)
                 cgroup_context_free_device_allow(c, c->device_allow);
+
+        c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
+        c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
 }
 
 void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
@@ -149,6 +153,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
         CGroupBlockIODeviceBandwidth *b;
         CGroupBlockIODeviceWeight *w;
         CGroupDeviceAllow *a;
+        IPAddressAccessItem *iaai;
         char u[FORMAT_TIMESPAN_MAX];
 
         assert(c);
@@ -162,6 +167,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 "%sBlockIOAccounting=%s\n"
                 "%sMemoryAccounting=%s\n"
                 "%sTasksAccounting=%s\n"
+                "%sIPAccounting=%s\n"
                 "%sCPUWeight=%" PRIu64 "\n"
                 "%sStartupCPUWeight=%" PRIu64 "\n"
                 "%sCPUShares=%" PRIu64 "\n"
@@ -184,6 +190,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->blockio_accounting),
                 prefix, yes_no(c->memory_accounting),
                 prefix, yes_no(c->tasks_accounting),
+                prefix, yes_no(c->ip_accounting),
                 prefix, c->cpu_weight,
                 prefix, c->startup_cpu_weight,
                 prefix, c->cpu_shares,
@@ -253,6 +260,20 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                                 b->path,
                                 format_bytes(buf, sizeof(buf), b->wbps));
         }
+
+        LIST_FOREACH(items, iaai, c->ip_address_allow) {
+                _cleanup_free_ char *k = NULL;
+
+                (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+                fprintf(f, "%sIPAddressAllow=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
+        }
+
+        LIST_FOREACH(items, iaai, c->ip_address_deny) {
+                _cleanup_free_ char *k = NULL;
+
+                (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+                fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
+        }
 }
 
 static int lookup_block_device(const char *p, dev_t *dev) {
@@ -645,7 +666,27 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_
                               "Failed to set %s: %m", file);
 }
 
-static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
+static void cgroup_apply_firewall(Unit *u, CGroupContext *c) {
+        int r;
+
+        if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is
+                                    * not recursive we don't ever touch the bpf on them */
+                return;
+
+        r = bpf_firewall_compile(u);
+        if (r < 0)
+                return;
+
+        (void) bpf_firewall_install(u);
+        return;
+}
+
+static void cgroup_context_apply(
+                Unit *u,
+                CGroupMask apply_mask,
+                bool apply_bpf,
+                ManagerState state) {
+
         const char *path;
         CGroupContext *c;
         bool is_root;
@@ -659,7 +700,8 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
         assert(c);
         assert(path);
 
-        if (mask == 0)
+        /* Nothing to do? Exit early! */
+        if (apply_mask == 0 && !apply_bpf)
                 return;
 
         /* Some cgroup attributes are not supported on the root cgroup,
@@ -673,9 +715,11 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
          * cgroup trees (assuming we are running in a container then),
          * and missing cgroups, i.e. EROFS and ENOENT. */
 
-        if ((mask & CGROUP_MASK_CPU) && !is_root) {
-                bool has_weight = cgroup_context_has_cpu_weight(c);
-                bool has_shares = cgroup_context_has_cpu_shares(c);
+        if ((apply_mask & CGROUP_MASK_CPU) && !is_root) {
+                bool has_weight, has_shares;
+
+                has_weight = cgroup_context_has_cpu_weight(c);
+                has_shares = cgroup_context_has_cpu_shares(c);
 
                 if (cg_all_unified() > 0) {
                         uint64_t weight;
@@ -712,7 +756,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if (mask & CGROUP_MASK_IO) {
+        if (apply_mask & CGROUP_MASK_IO) {
                 bool has_io = cgroup_context_has_io_config(c);
                 bool has_blockio = cgroup_context_has_blockio_config(c);
 
@@ -789,7 +833,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if (mask & CGROUP_MASK_BLKIO) {
+        if (apply_mask & CGROUP_MASK_BLKIO) {
                 bool has_io = cgroup_context_has_io_config(c);
                 bool has_blockio = cgroup_context_has_blockio_config(c);
 
@@ -856,7 +900,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
+        if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) {
                 if (cg_all_unified() > 0) {
                         uint64_t max, swap_max = CGROUP_LIMIT_MAX;
 
@@ -896,7 +940,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
+        if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) {
                 CGroupDeviceAllow *a;
 
                 /* Changing the devices list of a populated cgroup
@@ -960,7 +1004,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 }
         }
 
-        if ((mask & CGROUP_MASK_PIDS) && !is_root) {
+        if ((apply_mask & CGROUP_MASK_PIDS) && !is_root) {
 
                 if (c->tasks_max != CGROUP_LIMIT_MAX) {
                         char buf[DECIMAL_STR_MAX(uint64_t) + 2];
@@ -974,6 +1018,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                         log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
                                       "Failed to set pids.max: %m");
         }
+
+        if (apply_bpf)
+                cgroup_apply_firewall(u, c);
 }
 
 CGroupMask cgroup_context_get_mask(CGroupContext *c) {
@@ -1120,6 +1167,39 @@ CGroupMask unit_get_enable_mask(Unit *u) {
         return mask;
 }
 
+bool unit_get_needs_bpf(Unit *u) {
+        CGroupContext *c;
+        Unit *p;
+        assert(u);
+
+        /* We never attach BPF to slice units, as they are inner cgroup nodes and cgroup/BPF is not recursive at the
+         * moment. */
+        if (u->type == UNIT_SLICE)
+                return false;
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return false;
+
+        if (c->ip_accounting ||
+            c->ip_address_allow ||
+            c->ip_address_deny)
+                return true;
+
+        /* If any parent slice has an IP access list defined, it applies too */
+        for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
+                c = unit_get_cgroup_context(p);
+                if (!c)
+                        return false;
+
+                if (c->ip_address_allow ||
+                    c->ip_address_deny)
+                        return true;
+        }
+
+        return false;
+}
+
 /* Recurse from a unit up through its containing slices, propagating
  * mask bits upward. A unit is also member of itself. */
 void unit_update_cgroup_members_masks(Unit *u) {
@@ -1295,7 +1375,8 @@ int unit_watch_cgroup(Unit *u) {
 static int unit_create_cgroup(
                 Unit *u,
                 CGroupMask target_mask,
-                CGroupMask enable_mask) {
+                CGroupMask enable_mask,
+                bool needs_bpf) {
 
         CGroupContext *c;
         int r;
@@ -1337,6 +1418,7 @@ static int unit_create_cgroup(
         u->cgroup_realized = true;
         u->cgroup_realized_mask = target_mask;
         u->cgroup_enabled_mask = enable_mask;
+        u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
 
         if (u->type != UNIT_SLICE && !c->delegate) {
 
@@ -1386,10 +1468,19 @@ static void cgroup_xattr_apply(Unit *u) {
                 log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
 }
 
-static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
+static bool unit_has_mask_realized(
+                Unit *u,
+                CGroupMask target_mask,
+                CGroupMask enable_mask,
+                bool needs_bpf) {
+
         assert(u);
 
-        return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask;
+        return u->cgroup_realized &&
+                u->cgroup_realized_mask == target_mask &&
+                u->cgroup_enabled_mask == enable_mask &&
+                ((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
+                 (!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
 }
 
 /* Check if necessary controllers and attributes for a unit are in place.
@@ -1400,6 +1491,7 @@ static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask e
  * Returns 0 on success and < 0 on failure. */
 static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
         CGroupMask target_mask, enable_mask;
+        bool needs_bpf, apply_bpf;
         int r;
 
         assert(u);
@@ -1411,10 +1503,16 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
 
         target_mask = unit_get_target_mask(u);
         enable_mask = unit_get_enable_mask(u);
+        needs_bpf = unit_get_needs_bpf(u);
 
-        if (unit_has_mask_realized(u, target_mask, enable_mask))
+        if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
                 return 0;
 
+        /* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
+         * the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
+         * this will trickle down properly to cgroupfs. */
+        apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
+
         /* First, realize parents */
         if (UNIT_ISSET(u->slice)) {
                 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
@@ -1423,18 +1521,19 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
         }
 
         /* And then do the real work */
-        r = unit_create_cgroup(u, target_mask, enable_mask);
+        r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
         if (r < 0)
                 return r;
 
         /* Finally, apply the necessary attributes. */
-        cgroup_context_apply(u, target_mask, state);
+        cgroup_context_apply(u, target_mask, apply_bpf, state);
         cgroup_xattr_apply(u);
 
         return 0;
 }
 
 static void unit_add_to_cgroup_queue(Unit *u) {
+        assert(u);
 
         if (u->in_cgroup_queue)
                 return;
@@ -1492,7 +1591,10 @@ static void unit_queue_siblings(Unit *u) {
                         /* If the unit doesn't need any new controllers
                          * and has current ones realized, it doesn't need
                          * any changes. */
-                        if (unit_has_mask_realized(m, unit_get_target_mask(m), unit_get_enable_mask(m)))
+                        if (unit_has_mask_realized(m,
+                                                   unit_get_target_mask(m),
+                                                   unit_get_enable_mask(m),
+                                                   unit_get_needs_bpf(m)))
                                 continue;
 
                         unit_add_to_cgroup_queue(m);
@@ -1756,6 +1858,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
 
 int manager_setup_cgroup(Manager *m) {
         _cleanup_free_ char *path = NULL;
+        const char *scope_path;
         CGroupController c;
         int r, all_unified;
         char *e;
@@ -1813,73 +1916,66 @@ int manager_setup_cgroup(Manager *m) {
                         log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path);
         }
 
-        if (!m->test_run_flags) {
-                const char *scope_path;
-
-                /* 3. Install agent */
-                if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
+        /* 3. Install agent */
+        if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
 
-                        /* In the unified hierarchy we can get
-                         * cgroup empty notifications via inotify. */
+                /* In the unified hierarchy we can get
+                 * cgroup empty notifications via inotify. */
 
-                        m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
-                        safe_close(m->cgroup_inotify_fd);
+                m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
+                safe_close(m->cgroup_inotify_fd);
 
-                        m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
-                        if (m->cgroup_inotify_fd < 0)
-                                return log_error_errno(errno, "Failed to create control group inotify object: %m");
+                m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+                if (m->cgroup_inotify_fd < 0)
+                        return log_error_errno(errno, "Failed to create control group inotify object: %m");
 
-                        r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
-                        if (r < 0)
-                                return log_error_errno(r, "Failed to watch control group inotify object: %m");
-
-                        /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
-                         * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
-                        r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
-                        if (r < 0)
-                                return log_error_errno(r, "Failed to set priority of inotify event source: %m");
+                r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to watch control group inotify object: %m");
 
-                        (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
+                /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
+                 * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
+                r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to set priority of inotify event source: %m");
 
-                } else if (MANAGER_IS_SYSTEM(m)) {
+                (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
 
-                        /* On the legacy hierarchy we only get
-                         * notifications via cgroup agents. (Which
-                         * isn't really reliable, since it does not
-                         * generate events when control groups with
-                         * children run empty. */
+        } else if (MANAGER_IS_SYSTEM(m) && m->test_run_flags == 0) {
 
-                        r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
-                        if (r < 0)
-                                log_warning_errno(r, "Failed to install release agent, ignoring: %m");
-                        else if (r > 0)
-                                log_debug("Installed release agent.");
-                        else if (r == 0)
-                                log_debug("Release agent already installed.");
-                }
+                /* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable,
+                 * since it does not generate events when control groups with children run empty. */
 
-                /* 4. Make sure we are in the special "init.scope" unit in the root slice. */
-                scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
-                r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+                r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
                 if (r < 0)
-                        return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
+                        log_warning_errno(r, "Failed to install release agent, ignoring: %m");
+                else if (r > 0)
+                        log_debug("Installed release agent.");
+                else if (r == 0)
+                        log_debug("Release agent already installed.");
+        }
 
-                /* also, move all other userspace processes remaining
-                 * in the root cgroup into that scope. */
-                r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
-                if (r < 0)
-                        log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
+        /* 4. Make sure we are in the special "init.scope" unit in the root slice. */
+        scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
+        r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
 
-                /* 5. And pin it, so that it cannot be unmounted */
-                safe_close(m->pin_cgroupfs_fd);
-                m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
-                if (m->pin_cgroupfs_fd < 0)
-                        return log_error_errno(errno, "Failed to open pin file: %m");
+        /* also, move all other userspace processes remaining
+         * in the root cgroup into that scope. */
+        r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+        if (r < 0)
+                log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
 
-                /* 6.  Always enable hierarchical support if it exists... */
-                if (!all_unified)
-                        (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
-        }
+        /* 5. And pin it, so that it cannot be unmounted */
+        safe_close(m->pin_cgroupfs_fd);
+        m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
+        if (m->pin_cgroupfs_fd < 0)
+                return log_error_errno(errno, "Failed to open pin file: %m");
+
+        /* 6.  Always enable hierarchical support if it exists... */
+        if (!all_unified && m->test_run_flags == 0)
+                (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
 
         /* 7. Figure out which controllers are supported */
         r = cg_mask_supported(&m->cgroup_supported);
@@ -1992,11 +2088,18 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
 
 int unit_get_memory_current(Unit *u, uint64_t *ret) {
         _cleanup_free_ char *v = NULL;
+        CGroupContext *cc;
         int r;
 
         assert(u);
         assert(ret);
 
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -ENODATA;
+        if (!cc->memory_accounting)
+                return -ENODATA;
+
         if (!u->cgroup_path)
                 return -ENODATA;
 
@@ -2020,11 +2123,18 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
 
 int unit_get_tasks_current(Unit *u, uint64_t *ret) {
         _cleanup_free_ char *v = NULL;
+        CGroupContext *cc;
         int r;
 
         assert(u);
         assert(ret);
 
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -ENODATA;
+        if (!cc->tasks_accounting)
+                return -ENODATA;
+
         if (!u->cgroup_path)
                 return -ENODATA;
 
@@ -2091,6 +2201,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
 }
 
 int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
+        CGroupContext *cc;
         nsec_t ns;
         int r;
 
@@ -2100,6 +2211,12 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
          * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
          * call this function with a NULL return value. */
 
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -ENODATA;
+        if (!cc->cpu_accounting)
+                return -ENODATA;
+
         r = unit_get_cpu_usage_raw(u, &ns);
         if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
                 /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
@@ -2124,7 +2241,57 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
         return 0;
 }
 
-int unit_reset_cpu_usage(Unit *u) {
+int unit_get_ip_accounting(
+                Unit *u,
+                CGroupIPAccountingMetric metric,
+                uint64_t *ret) {
+
+        CGroupContext *cc;
+        uint64_t value;
+        int fd, r;
+
+        assert(u);
+        assert(metric >= 0);
+        assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
+        assert(ret);
+
+        /* IP accounting is currently not recursive, and hence we refuse to return any data for slice nodes. Slices are
+         * inner cgroup nodes and hence have no processes directly attached, hence their counters would be zero
+         * anyway. And if we block this now we can later open this up, if the kernel learns recursive BPF cgroup
+         * filters. */
+        if (u->type == UNIT_SLICE)
+                return -ENODATA;
+
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -ENODATA;
+        if (!cc->ip_accounting)
+                return -ENODATA;
+
+        fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
+                u->ip_accounting_ingress_map_fd :
+                u->ip_accounting_egress_map_fd;
+
+        if (fd < 0)
+                return -ENODATA;
+
+        if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
+                r = bpf_firewall_read_accounting(fd, &value, NULL);
+        else
+                r = bpf_firewall_read_accounting(fd, NULL, &value);
+        if (r < 0)
+                return r;
+
+        /* Add in additional metrics from a previous runtime. Note that when reexecing/reloading the daemon we compile
+         * all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
+         * ip_accounting_extra[] field, and add them in here transparently. */
+
+        *ret = value + u->ip_accounting_extra[metric];
+
+        return r;
+}
+
+int unit_reset_cpu_accounting(Unit *u) {
         nsec_t ns;
         int r;
 
@@ -2142,6 +2309,22 @@ int unit_reset_cpu_usage(Unit *u) {
         return 0;
 }
 
+int unit_reset_ip_accounting(Unit *u) {
+        int r = 0, q = 0;
+
+        assert(u);
+
+        if (u->ip_accounting_ingress_map_fd >= 0)
+                r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
+
+        if (u->ip_accounting_egress_map_fd >= 0)
+                q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
+
+        zero(u->ip_accounting_extra);
+
+        return r < 0 ? r : q;
+}
+
 bool unit_cgroup_delegate(Unit *u) {
         CGroupContext *c;
 
@@ -2167,6 +2350,9 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
         if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
                 m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
 
+        if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
+                m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
+
         if ((u->cgroup_realized_mask & m) == 0)
                 return;
 
@@ -2174,6 +2360,36 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
         unit_add_to_cgroup_queue(u);
 }
 
+void unit_invalidate_cgroup_bpf(Unit *u) {
+        assert(u);
+
+        if (!UNIT_HAS_CGROUP_CONTEXT(u))
+                return;
+
+        if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED)
+                return;
+
+        u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
+        unit_add_to_cgroup_queue(u);
+
+        /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
+         * list of our children includes our own. */
+        if (u->type == UNIT_SLICE) {
+                Unit *member;
+                Iterator i;
+
+                SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
+                        if (member == u)
+                                continue;
+
+                        if (UNIT_DEREF(member->slice) != u)
+                                continue;
+
+                        unit_invalidate_cgroup_bpf(member);
+                }
+        }
+}
+
 void manager_invalidate_startup_units(Manager *m) {
         Iterator i;
         Unit *u;
index 4cd168f63e5274c4d0f58c9333813922a03a0322..fcbf8d01ca07fa8d6b38dec1d93d1ffb4c047787 100644 (file)
 
 #include <stdbool.h>
 
+#include "cgroup-util.h"
+#include "ip-address-access.h"
 #include "list.h"
 #include "time-util.h"
-#include "cgroup-util.h"
 
 typedef struct CGroupContext CGroupContext;
 typedef struct CGroupDeviceAllow CGroupDeviceAllow;
@@ -87,6 +88,7 @@ struct CGroupContext {
         bool blockio_accounting;
         bool memory_accounting;
         bool tasks_accounting;
+        bool ip_accounting;
 
         /* For unified hierarchy */
         uint64_t cpu_weight;
@@ -103,6 +105,9 @@ struct CGroupContext {
         uint64_t memory_max;
         uint64_t memory_swap_max;
 
+        LIST_HEAD(IPAddressAccessItem, ip_address_allow);
+        LIST_HEAD(IPAddressAccessItem, ip_address_deny);
+
         /* For legacy hierarchies */
         uint64_t cpu_shares;
         uint64_t startup_cpu_shares;
@@ -123,6 +128,16 @@ struct CGroupContext {
         bool delegate;
 };
 
+/* Used when querying IP accounting data */
+typedef enum CGroupIPAccountingMetric {
+        CGROUP_IP_INGRESS_BYTES,
+        CGROUP_IP_INGRESS_PACKETS,
+        CGROUP_IP_EGRESS_BYTES,
+        CGROUP_IP_EGRESS_PACKETS,
+        _CGROUP_IP_ACCOUNTING_METRIC_MAX,
+        _CGROUP_IP_ACCOUNTING_METRIC_INVALID = -1,
+} CGroupIPAccountingMetric;
+
 #include "unit.h"
 
 void cgroup_context_init(CGroupContext *c);
@@ -145,6 +160,8 @@ CGroupMask unit_get_subtree_mask(Unit *u);
 CGroupMask unit_get_target_mask(Unit *u);
 CGroupMask unit_get_enable_mask(Unit *u);
 
+bool unit_get_needs_bpf(Unit *u);
+
 void unit_update_cgroup_members_masks(Unit *u);
 
 char *unit_default_cgroup_path(Unit *u);
@@ -172,7 +189,10 @@ int unit_watch_all_pids(Unit *u);
 int unit_get_memory_current(Unit *u, uint64_t *ret);
 int unit_get_tasks_current(Unit *u, uint64_t *ret);
 int unit_get_cpu_usage(Unit *u, nsec_t *ret);
-int unit_reset_cpu_usage(Unit *u);
+int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
+
+int unit_reset_cpu_accounting(Unit *u);
+int unit_reset_ip_accounting(Unit *u);
 
 bool unit_cgroup_delegate(Unit *u);
 
@@ -180,6 +200,7 @@ int unit_notify_cgroup_empty(Unit *u);
 int manager_notify_cgroup_empty(Manager *m, const char *group);
 
 void unit_invalidate_cgroup(Unit *u, CGroupMask m);
+void unit_invalidate_cgroup_bpf(Unit *u);
 
 void manager_invalidate_startup_units(Manager *m);
 
index c1026e3f5b483937219d304bb245164a04b433c9..f61ca08fcbac4e27da89f692b6e765f1569f723d 100644 (file)
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
+#include <arpa/inet.h>
+
+#include "af-list.h"
 #include "alloc-util.h"
+#include "bpf-firewall.h"
 #include "bus-util.h"
 #include "cgroup-util.h"
 #include "cgroup.h"
@@ -206,6 +210,48 @@ static int property_get_device_allow(
         return sd_bus_message_close_container(reply);
 }
 
+static int property_get_ip_address_access(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        IPAddressAccessItem** items = userdata, *i;
+        int r;
+
+        r = sd_bus_message_open_container(reply, 'a', "(iayu)");
+        if (r < 0)
+                return r;
+
+        LIST_FOREACH(items, i, *items) {
+
+                r = sd_bus_message_open_container(reply, 'r', "iayu");
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_message_append(reply, "i", i->family);
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_message_append_array(reply, 'y', &i->address, FAMILY_ADDRESS_SIZE(i->family));
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_message_append(reply, "u", (uint32_t) i->prefixlen);
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_message_close_container(reply);
+                if (r < 0)
+                        return r;
+        }
+
+        return sd_bus_message_close_container(reply);
+}
+
 const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
@@ -239,6 +285,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
         SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
         SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0),
+        SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
+        SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
+        SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
         SD_BUS_VTABLE_END
 };
 
@@ -1133,6 +1182,7 @@ int bus_cgroup_set_property(
                 }
 
                 return 1;
+
         } else if (streq(name, "TasksMaxScale")) {
                 uint64_t limit;
                 uint32_t raw;
@@ -1152,6 +1202,137 @@ int bus_cgroup_set_property(
                                                           (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
                 }
 
+                return 1;
+
+        } else if (streq(name, "IPAccounting")) {
+                int b;
+
+                r = sd_bus_message_read(message, "b", &b);
+                if (r < 0)
+                        return r;
+
+                if (mode != UNIT_CHECK) {
+                        c->ip_accounting = b;
+
+                        unit_invalidate_cgroup_bpf(u);
+                        unit_write_drop_in_private(u, mode, name, b ? "IPAccounting=yes" : "IPAccounting=no");
+                }
+
+                return 1;
+
+        } else if (STR_IN_SET(name, "IPAddressAllow", "IPAddressDeny")) {
+                IPAddressAccessItem **list;
+                size_t n = 0;
+
+                list = streq(name, "IPAddressAllow") ? &c->ip_address_allow : &c->ip_address_deny;
+
+                r = sd_bus_message_enter_container(message, 'a', "(iayu)");
+                if (r < 0)
+                        return r;
+
+                for (;;) {
+                        const void *ap;
+                        int32_t family;
+                        uint32_t prefixlen;
+                        size_t an;
+
+                        r = sd_bus_message_enter_container(message, 'r', "iayu");
+                        if (r < 0)
+                                return r;
+                        if (r == 0)
+                                break;
+
+                        r = sd_bus_message_read(message, "i", &family);
+                        if (r < 0)
+                                return r;
+
+                        if (!IN_SET(family, AF_INET, AF_INET6))
+                                return sd_bus_error_set_errnof(error, EINVAL, "IPAddressAllow= expects IPv4 or IPv6 addresses only.");
+
+                        r = sd_bus_message_read_array(message, 'y', &ap, &an);
+                        if (r < 0)
+                                return r;
+
+                        if (an != FAMILY_ADDRESS_SIZE(family))
+                                return sd_bus_error_set_errnof(error, EINVAL, "IP address has wrong size for family (%s, expected %zu, got %zu)",
+                                                               af_to_name(family), FAMILY_ADDRESS_SIZE(family), an);
+
+                        r = sd_bus_message_read(message, "u", &prefixlen);
+                        if (r < 0)
+                                return r;
+
+                        if (prefixlen > FAMILY_ADDRESS_SIZE(family)*8)
+                                return sd_bus_error_set_errnof(error, EINVAL, "Prefix length too large for family.");
+
+                        if (mode != UNIT_CHECK) {
+                                IPAddressAccessItem *item;
+
+                                item = new0(IPAddressAccessItem, 1);
+                                if (!item)
+                                        return -ENOMEM;
+
+                                item->family = family;
+                                item->prefixlen = prefixlen;
+                                memcpy(&item->address, ap, an);
+
+                                LIST_PREPEND(items, *list, item);
+                        }
+
+                        r = sd_bus_message_exit_container(message);
+                        if (r < 0)
+                                return r;
+
+                        n++;
+                }
+
+                r = sd_bus_message_exit_container(message);
+                if (r < 0)
+                        return r;
+
+                *list = ip_address_access_reduce(*list);
+
+                if (mode != UNIT_CHECK) {
+                        _cleanup_free_ char *buf = NULL;
+                        _cleanup_fclose_ FILE *f = NULL;
+                        IPAddressAccessItem *item;
+                        size_t size = 0;
+
+                        if (n == 0)
+                                *list = ip_address_access_free_all(*list);
+
+                        unit_invalidate_cgroup_bpf(u);
+                        f = open_memstream(&buf, &size);
+                        if (!f)
+                                return -ENOMEM;
+
+                        fputs_unlocked(name, f);
+                        fputs_unlocked("=\n", f);
+
+                        LIST_FOREACH(items, item, *list) {
+                                char buffer[CONST_MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
+
+                                errno = 0;
+                                if (!inet_ntop(item->family, &item->address, buffer, sizeof(buffer)))
+                                        return errno > 0 ? -errno : -EINVAL;
+
+                                fprintf(f, "%s=%s/%u\n", name, buffer, item->prefixlen);
+                        }
+
+                        r = fflush_and_check(f);
+                        if (r < 0)
+                                return r;
+                        unit_write_drop_in_private(u, mode, name, buf);
+
+                        if (*list) {
+                                r = bpf_firewall_supported();
+                                if (r < 0)
+                                        return r;
+                                if (r == 0)
+                                        log_warning("Transient unit %s configures an IP firewall, but the local system does not support BPF/cgroup firewalling.\n"
+                                                    "Proceeding WITHOUT firewalling in effect!", u->id);
+                        }
+                }
+
                 return 1;
         }
 
index b0645ce294071619932c09e2184edf8cb9cad511..8d2ae964d819b7ce72f522779c25dcc26e929880 100644 (file)
@@ -20,6 +20,7 @@
 #include "sd-bus.h"
 
 #include "alloc-util.h"
+#include "bpf-firewall.h"
 #include "bus-common-errors.h"
 #include "cgroup-util.h"
 #include "dbus-job.h"
@@ -1051,6 +1052,39 @@ int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bu
         return sd_bus_send(NULL, reply, NULL);
 }
 
+static int property_get_ip_counter(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        CGroupIPAccountingMetric metric;
+        uint64_t value = (uint64_t) -1;
+        Unit *u = userdata;
+
+        assert(bus);
+        assert(reply);
+        assert(property);
+        assert(u);
+
+        if (streq(property, "IPIngressBytes"))
+                metric = CGROUP_IP_INGRESS_BYTES;
+        else if (streq(property, "IPIngressPackets"))
+                metric = CGROUP_IP_INGRESS_PACKETS;
+        else if (streq(property, "IPEgressBytes"))
+                metric = CGROUP_IP_EGRESS_BYTES;
+        else {
+                assert(streq(property, "IPEgressPackets"));
+                metric = CGROUP_IP_EGRESS_PACKETS;
+        }
+
+        (void) unit_get_ip_accounting(u, metric, &value);
+        return sd_bus_message_append(reply, "t", value);
+}
+
 const sd_bus_vtable bus_unit_cgroup_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0),
@@ -1058,6 +1092,10 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = {
         SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
         SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0),
         SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0),
+        SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0),
+        SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0),
+        SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0),
+        SD_BUS_PROPERTY("IPEgressPackets", "t", property_get_ip_counter, 0, 0),
         SD_BUS_METHOD("GetProcesses", NULL, "a(sus)", bus_unit_method_get_processes, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_VTABLE_END
 };
index e1846e1adbee485b885f15c327bdd655070874f9..9b0dbaf248f8f8dccd26eed342b7f25908c3d492 100644 (file)
 
 #include "dynamic-user.h"
 #include "fd-util.h"
+#include "fileio.h"
 #include "fs-util.h"
+#include "io-util.h"
 #include "parse-util.h"
 #include "random-util.h"
 #include "stdio-util.h"
 #include "string-util.h"
 #include "user-util.h"
-#include "fileio.h"
 
 /* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
 #define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
@@ -245,8 +246,8 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
                 /* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
                 l = pwritev(lock_fd,
                             (struct iovec[2]) {
-                                    { .iov_base = (char*) name, .iov_len = strlen(name) },
-                                    { .iov_base = (char[1]) { '\n' }, .iov_len = 1 }
+                                    IOVEC_INIT_STRING(name),
+                                    IOVEC_INIT((char[1]) { '\n' }, 1),
                             }, 2, 0);
                 if (l < 0) {
                         (void) unlink(lock_path);
@@ -271,10 +272,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
 
 static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
         uid_t uid = UID_INVALID;
-        struct iovec iov = {
-                .iov_base = &uid,
-                .iov_len = sizeof(uid),
-        };
+        struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
         union {
                 struct cmsghdr cmsghdr;
                 uint8_t buf[CMSG_SPACE(sizeof(int))];
@@ -314,10 +312,7 @@ static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
 }
 
 static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
-        struct iovec iov = {
-                .iov_base = &uid,
-                .iov_len = sizeof(uid),
-        };
+        struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
         union {
                 struct cmsghdr cmsghdr;
                 uint8_t buf[CMSG_SPACE(sizeof(int))];
index 28c6b2fc389bd34e381d6413124eecddfffc3af2..0b49be20007a070513eddb0961080b36bc3d5c0a 100644 (file)
@@ -2351,9 +2351,9 @@ static int send_user_lookup(
 
         if (writev(user_lookup_fd,
                (struct iovec[]) {
-                           { .iov_base = &uid, .iov_len = sizeof(uid) },
-                           { .iov_base = &gid, .iov_len = sizeof(gid) },
-                           { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
+                           IOVEC_INIT(&uid, sizeof(uid)),
+                           IOVEC_INIT(&gid, sizeof(gid)),
+                           IOVEC_INIT_STRING(unit->id) }, 3) < 0)
                 return -errno;
 
         return 0;
@@ -3150,6 +3150,7 @@ static int exec_child(
                                    "EXECUTABLE=%s", command->path,
                                    LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
                                    LOG_UNIT_ID(unit),
+                                   LOG_UNIT_INVOCATION_ID(unit),
                                    NULL);
                         log_close();
                 }
@@ -3223,6 +3224,7 @@ int exec_spawn(Unit *unit,
                    LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
                    "EXECUTABLE=%s", command->path,
                    LOG_UNIT_ID(unit),
+                   LOG_UNIT_INVOCATION_ID(unit),
                    NULL);
         pid = fork();
         if (pid < 0)
@@ -3254,6 +3256,7 @@ int exec_spawn(Unit *unit,
                                 log_struct_errno(LOG_ERR, r,
                                                  "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
                                                  LOG_UNIT_ID(unit),
+                                                 LOG_UNIT_INVOCATION_ID(unit),
                                                  LOG_UNIT_MESSAGE(unit, "%s: %m",
                                                                   error_message),
                                                  "EXECUTABLE=%s", command->path,
@@ -3262,6 +3265,7 @@ int exec_spawn(Unit *unit,
                                 log_struct_errno(LOG_INFO, r,
                                                  "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
                                                  LOG_UNIT_ID(unit),
+                                                 LOG_UNIT_INVOCATION_ID(unit),
                                                  LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
                                                                   command->path),
                                                  "EXECUTABLE=%s", command->path,
@@ -3270,6 +3274,7 @@ int exec_spawn(Unit *unit,
                                 log_struct_errno(LOG_ERR, r,
                                                  "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
                                                  LOG_UNIT_ID(unit),
+                                                 LOG_UNIT_INVOCATION_ID(unit),
                                                  LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
                                                                   exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
                                                                   command->path),
diff --git a/src/core/ip-address-access.c b/src/core/ip-address-access.c
new file mode 100644 (file)
index 0000000..cfb7d51
--- /dev/null
@@ -0,0 +1,217 @@
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "extract-word.h"
+#include "hostname-util.h"
+#include "ip-address-access.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+int config_parse_ip_address_access(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        IPAddressAccessItem **list = data;
+        const char *p;
+        int r;
+
+        assert(list);
+
+        if (isempty(rvalue)) {
+                *list = ip_address_access_free_all(*list);
+                return 0;
+        }
+
+        p = rvalue;
+
+        for (;;) {
+                _cleanup_free_ IPAddressAccessItem *a = NULL;
+                _cleanup_free_ char *word = NULL;
+
+                r = extract_first_word(&p, &word, NULL, 0);
+                if (r == 0)
+                        break;
+                if (r == -ENOMEM)
+                        return log_oom();
+                if (r < 0) {
+                        log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+                        break;
+                }
+
+                a = new0(IPAddressAccessItem, 1);
+                if (!a)
+                        return log_oom();
+
+                if (streq(word, "any")) {
+                        /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+                        a->family = AF_INET;
+                        LIST_APPEND(items, *list, a);
+
+                        a = new0(IPAddressAccessItem, 1);
+                        if (!a)
+                                return log_oom();
+
+                        a->family = AF_INET6;
+
+                } else if (is_localhost(word)) {
+                        /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+                        a->family = AF_INET;
+                        a->address.in.s_addr = htobe32(0x7f000000);
+                        a->prefixlen = 8;
+                        LIST_APPEND(items, *list, a);
+
+                        a = new0(IPAddressAccessItem, 1);
+                        if (!a)
+                                return log_oom();
+
+                        a->family = AF_INET6;
+                        a->address.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+                        a->prefixlen = 128;
+
+                } else if (streq(word, "link-local")) {
+
+                        /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+                        a->family = AF_INET;
+                        a->address.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+                        a->prefixlen = 16;
+                        LIST_APPEND(items, *list, a);
+
+                        a = new0(IPAddressAccessItem, 1);
+                        if (!a)
+                                return log_oom();
+
+                        a->family = AF_INET6;
+                        a->address.in6 = (struct in6_addr) {
+                                .__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
+                        };
+                        a->prefixlen = 64;
+
+                } else if (streq(word, "multicast")) {
+
+                        /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+                        a->family = AF_INET;
+                        a->address.in.s_addr = htobe32((UINT32_C(224) << 24));
+                        a->prefixlen = 4;
+                        LIST_APPEND(items, *list, a);
+
+                        a = new0(IPAddressAccessItem, 1);
+                        if (!a)
+                                return log_oom();
+
+                        a->family = AF_INET6;
+                        a->address.in6 = (struct in6_addr) {
+                                .__in6_u.__u6_addr32[0] = htobe32(0xff000000)
+                        };
+                        a->prefixlen = 8;
+
+                } else {
+                        r = in_addr_prefix_from_string_auto(word, &a->family, &a->address, &a->prefixlen);
+                        if (r < 0) {
+                                log_syntax(unit, LOG_WARNING, filename, line, r, "Address prefix is invalid, ignoring assignment: %s", word);
+                                return 0;
+                        }
+                }
+
+                LIST_APPEND(items, *list, a);
+                a = NULL;
+        }
+
+        *list = ip_address_access_reduce(*list);
+
+        if (*list) {
+                r = bpf_firewall_supported();
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        log_warning("File %s:%u configures an IP firewall (%s=%s), but the local system does not support BPF/cgroup based firewalling.\n"
+                                    "Proceeding WITHOUT firewalling in effect!", filename, line, lvalue, rvalue);
+        }
+
+        return 0;
+}
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first) {
+        IPAddressAccessItem *next, *p = first;
+
+        while (p) {
+                next = p->items_next;
+                free(p);
+
+                p = next;
+        }
+
+        return NULL;
+}
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first) {
+        IPAddressAccessItem *a, *b, *tmp;
+        int r;
+
+        /* Drops all entries from the list that are covered by another entry in full, thus removing all redundant
+         * entries. */
+
+        LIST_FOREACH_SAFE(items, a, tmp, first) {
+
+                /* Drop irrelevant bits */
+                (void) in_addr_mask(a->family, &a->address, a->prefixlen);
+
+                LIST_FOREACH(items, b, first) {
+
+                        if (a == b)
+                                continue;
+
+                        if (a->family != b->family)
+                                continue;
+
+                        if (b->prefixlen > a->prefixlen)
+                                continue;
+
+                        r = in_addr_prefix_covers(b->family,
+                                                  &b->address,
+                                                  b->prefixlen,
+                                                  &a->address);
+                        if (r <= 0)
+                                continue;
+
+                        /* b covers a fully, then let's drop a */
+
+                        LIST_REMOVE(items, first, a);
+                        free(a);
+                }
+        }
+
+        return first;
+}
diff --git a/src/core/ip-address-access.h b/src/core/ip-address-access.h
new file mode 100644 (file)
index 0000000..9aeab1f
--- /dev/null
@@ -0,0 +1,38 @@
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "in-addr-util.h"
+#include "list.h"
+
+typedef struct IPAddressAccessItem IPAddressAccessItem;
+
+struct IPAddressAccessItem {
+        int family;
+        unsigned char prefixlen;
+        union in_addr_union address;
+        LIST_FIELDS(IPAddressAccessItem, items);
+};
+
+int config_parse_ip_address_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first);
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first);
index 8e2039d321473223d6133bd7cb769e12a0126e29..f04c8a21683e43b167cd922ca2eaa0783777b400 100644 (file)
@@ -806,21 +806,26 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
         default:
                 log_struct(job_result_log_level[result],
                            LOG_MESSAGE("%s", buf),
-                           "RESULT=%s", job_result_to_string(result),
+                           "JOB_TYPE=%s", job_type_to_string(t),
+                           "JOB_RESULT=%s", job_result_to_string(result),
                            LOG_UNIT_ID(u),
+                           LOG_UNIT_INVOCATION_ID(u),
                            NULL);
                 return;
         }
 
         log_struct(job_result_log_level[result],
                    LOG_MESSAGE("%s", buf),
-                   "RESULT=%s", job_result_to_string(result),
+                   "JOB_TYPE=%s", job_type_to_string(t),
+                   "JOB_RESULT=%s", job_result_to_string(result),
                    LOG_UNIT_ID(u),
+                   LOG_UNIT_INVOCATION_ID(u),
                    mid,
                    NULL);
 }
 
 static void job_emit_status_message(Unit *u, JobType t, JobResult result) {
+        assert(u);
 
         /* No message if the job did not actually do anything due to failed condition. */
         if (t == JOB_START && result == JOB_DONE && !u->condition_result)
@@ -903,7 +908,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
          * the unit itself. We don't treat JOB_CANCELED as failure in
          * this context. And JOB_FAILURE is already handled by the
          * unit itself. */
-        if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
+        if (IN_SET(result, JOB_TIMEOUT, JOB_DEPENDENCY)) {
                 log_struct(LOG_NOTICE,
                            "JOB_TYPE=%s", job_type_to_string(t),
                            "JOB_RESULT=%s", job_result_to_string(result),
index f7d5f24861923af76d0c0b000d99cca0be86f859..cc8aad05a08bd8fa33ef713be823ef1f640f512e 100644 (file)
@@ -174,6 +174,9 @@ $1.BlockIOWriteBandwidth,        config_parse_blockio_bandwidth,     0,
 $1.TasksAccounting,              config_parse_bool,                  0,                             offsetof($1, cgroup_context.tasks_accounting)
 $1.TasksMax,                     config_parse_tasks_max,             0,                             offsetof($1, cgroup_context.tasks_max)
 $1.Delegate,                     config_parse_bool,                  0,                             offsetof($1, cgroup_context.delegate)
+$1.IPAccounting,                 config_parse_bool,                  0,                             offsetof($1, cgroup_context.ip_accounting)
+$1.IPAddressAllow,               config_parse_ip_address_access,     0,                             offsetof($1, cgroup_context.ip_address_allow)
+$1.IPAddressDeny,                config_parse_ip_address_access,     0,                             offsetof($1, cgroup_context.ip_address_deny)
 $1.NetClass,                     config_parse_warn_compat,           DISABLED_LEGACY,               0'
 )m4_dnl
 Unit.Description,                config_parse_unit_string_printf,    0,                             offsetof(Unit, description)
index fbf8876a2de02f155cbc24c6f7b2a1e2d5203987..2dfd48005b75012e3a63ca09e59741bfe929093c 100644 (file)
@@ -128,6 +128,7 @@ static Set* arg_syscall_archs = NULL;
 static FILE* arg_serialization = NULL;
 static bool arg_default_cpu_accounting = false;
 static bool arg_default_io_accounting = false;
+static bool arg_default_ip_accounting = false;
 static bool arg_default_blockio_accounting = false;
 static bool arg_default_memory_accounting = false;
 static bool arg_default_tasks_accounting = true;
@@ -748,6 +749,7 @@ static int parse_config_file(void) {
                 { "Manager", "DefaultLimitRTTIME",        config_parse_limit,            RLIMIT_RTTIME, arg_default_rlimit         },
                 { "Manager", "DefaultCPUAccounting",      config_parse_bool,             0, &arg_default_cpu_accounting            },
                 { "Manager", "DefaultIOAccounting",       config_parse_bool,             0, &arg_default_io_accounting             },
+                { "Manager", "DefaultIPAccounting",       config_parse_bool,             0, &arg_default_ip_accounting             },
                 { "Manager", "DefaultBlockIOAccounting",  config_parse_bool,             0, &arg_default_blockio_accounting        },
                 { "Manager", "DefaultMemoryAccounting",   config_parse_bool,             0, &arg_default_memory_accounting         },
                 { "Manager", "DefaultTasksAccounting",    config_parse_bool,             0, &arg_default_tasks_accounting          },
@@ -792,6 +794,7 @@ static void manager_set_defaults(Manager *m) {
         m->default_start_limit_burst = arg_default_start_limit_burst;
         m->default_cpu_accounting = arg_default_cpu_accounting;
         m->default_io_accounting = arg_default_io_accounting;
+        m->default_ip_accounting = arg_default_ip_accounting;
         m->default_blockio_accounting = arg_default_blockio_accounting;
         m->default_memory_accounting = arg_default_memory_accounting;
         m->default_tasks_accounting = arg_default_tasks_accounting;
@@ -1202,6 +1205,26 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
         return 0;
 }
 
+static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
+        int r;
+
+        assert(saved_rlimit);
+        assert(getuid() == 0);
+
+        /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
+         * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
+         * bump the value high enough for the root user. */
+
+        if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
+                return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
+
+        r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
+        if (r < 0)
+                return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
+
+        return 0;
+}
+
 static void test_usr(void) {
 
         /* Check that /usr is not a separate fs */
@@ -1385,7 +1408,7 @@ int main(int argc, char *argv[]) {
         bool queue_default_job = false;
         bool empty_etc = false;
         char *switch_root_dir = NULL, *switch_root_init = NULL;
-        struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0);
+        struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
         const char *error_message = NULL;
 
 #ifdef HAVE_SYSV_COMPAT
@@ -1812,9 +1835,11 @@ int main(int argc, char *argv[]) {
                         if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
                                 log_warning_errno(errno, "Failed to make us a subreaper: %m");
 
-                if (arg_system)
+                if (arg_system) {
                         /* Bump up RLIMIT_NOFILE for systemd itself */
                         (void) bump_rlimit_nofile(&saved_rlimit_nofile);
+                        (void) bump_rlimit_memlock(&saved_rlimit_memlock);
+                }
         }
 
         r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
@@ -2048,6 +2073,8 @@ finish:
                  * its child processes */
                 if (saved_rlimit_nofile.rlim_cur > 0)
                         (void) setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
+                if (saved_rlimit_memlock.rlim_cur != (rlim_t) -1)
+                        (void) setrlimit(RLIMIT_MEMLOCK, &saved_rlimit_memlock);
 
                 if (switch_root_dir) {
                         /* Kill all remaining processes from the
index 46036aa50c7e289f8573e6b35c11f73af7420919..5cf4bc4ee6045b80e2a0d18a2a0d67aa0740e3ed 100644 (file)
@@ -616,6 +616,9 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
         m->default_timer_accuracy_usec = USEC_PER_MINUTE;
         m->default_tasks_accounting = true;
         m->default_tasks_max = UINT64_MAX;
+        m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
+        m->default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
+        m->default_restart_usec = DEFAULT_RESTART_USEC;
 
 #ifdef ENABLE_EFI
         if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
@@ -628,13 +631,13 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
                 m->unit_log_format_string = "UNIT=%s";
 
                 m->invocation_log_field = "INVOCATION_ID=";
-                m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR;
+                m->invocation_log_format_string = "INVOCATION_ID=%s";
         } else {
                 m->unit_log_field = "USER_UNIT=";
                 m->unit_log_format_string = "USER_UNIT=%s";
 
                 m->invocation_log_field = "USER_INVOCATION_ID=";
-                m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR;
+                m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
         }
 
         m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
index 713d2db70cd71ed8eda64ab11ea7ab451a423b35..e8a62674711b758dc6776f5b358ea94394d7e042 100644 (file)
@@ -29,6 +29,7 @@
 #include "cgroup-util.h"
 #include "fdset.h"
 #include "hashmap.h"
+#include "ip-address-access.h"
 #include "list.h"
 #include "ratelimit.h"
 
@@ -271,6 +272,7 @@ struct Manager {
         bool default_io_accounting;
         bool default_blockio_accounting;
         bool default_tasks_accounting;
+        bool default_ip_accounting;
 
         uint64_t default_tasks_max;
         usec_t default_timer_accuracy_usec;
index 569eed9cad394e5dd1180aa25cfb839962b1cb12..ac600be117df9bd6ff54a2264a7249c53676a4f5 100644 (file)
 libcore_la_sources = '''
-        unit.c
-        unit.h
-        unit-printf.c
-        unit-printf.h
-        job.c
-        job.h
-        manager.c
-        manager.h
-        transaction.c
-        transaction.h
-        load-fragment.c
-        load-fragment.h
-        service.c
-        service.h
-        socket.c
-        socket.h
-        target.c
-        target.h
-        device.c
-        device.h
-        mount.c
-        mount.h
+        audit-fd.c
+        audit-fd.h
         automount.c
         automount.h
-        swap.c
-        swap.h
-        timer.c
-        timer.h
-        path.c
-        path.h
-        slice.c
-        slice.h
-        scope.c
-        scope.h
-        load-dropin.c
-        load-dropin.h
-        execute.c
-        execute.h
-        dynamic-user.c
-        dynamic-user.h
-        kill.c
-        kill.h
-        dbus.c
-        dbus.h
-        dbus-manager.c
-        dbus-manager.h
-        dbus-unit.c
-        dbus-unit.h
+        bpf-firewall.c
+        bpf-firewall.h
+        cgroup.c
+        cgroup.h
+        dbus-automount.c
+        dbus-automount.h
+        dbus-cgroup.c
+        dbus-cgroup.h
+        dbus-device.c
+        dbus-device.h
+        dbus-execute.c
+        dbus-execute.h
         dbus-job.c
         dbus-job.h
+        dbus-kill.c
+        dbus-kill.h
+        dbus-manager.c
+        dbus-manager.h
+        dbus-mount.c
+        dbus-mount.h
+        dbus-path.c
+        dbus-path.h
+        dbus-scope.c
+        dbus-scope.h
         dbus-service.c
         dbus-service.h
+        dbus-slice.c
+        dbus-slice.h
         dbus-socket.c
         dbus-socket.h
-        dbus-target.c
-        dbus-target.h
-        dbus-device.c
-        dbus-device.h
-        dbus-mount.c
-        dbus-mount.h
-        dbus-automount.c
-        dbus-automount.h
         dbus-swap.c
         dbus-swap.h
+        dbus-target.c
+        dbus-target.h
         dbus-timer.c
         dbus-timer.h
-        dbus-path.c
-        dbus-path.h
-        dbus-slice.c
-        dbus-slice.h
-        dbus-scope.c
-        dbus-scope.h
-        dbus-execute.c
-        dbus-execute.h
-        dbus-kill.c
-        dbus-kill.h
-        dbus-cgroup.c
-        dbus-cgroup.h
-        cgroup.c
-        cgroup.h
-        selinux-access.c
-        selinux-access.h
-        selinux-setup.c
-        selinux-setup.h
-        smack-setup.c
-        smack-setup.h
+        dbus-unit.c
+        dbus-unit.h
+        dbus.c
+        dbus.h
+        device.c
+        device.h
+        dynamic-user.c
+        dynamic-user.h
+        emergency-action.c
+        emergency-action.h
+        execute.c
+        execute.h
+        hostname-setup.c
+        hostname-setup.h
         ima-setup.c
         ima-setup.h
-        locale-setup.h
+        ip-address-access.c
+        ip-address-access.h
+        job.c
+        job.h
+        kill.c
+        kill.h
+        killall.c
+        killall.h
+        kmod-setup.c
+        kmod-setup.h
+        load-dropin.c
+        load-dropin.h
+        load-fragment.c
+        load-fragment.h
         locale-setup.c
-        hostname-setup.c
-        hostname-setup.h
+        locale-setup.h
+        loopback-setup.c
+        loopback-setup.h
         machine-id-setup.c
         machine-id-setup.h
+        manager.c
+        manager.h
         mount-setup.c
         mount-setup.h
-        kmod-setup.c
-        kmod-setup.h
-        loopback-setup.h
-        loopback-setup.c
+        mount.c
+        mount.h
         namespace.c
         namespace.h
-        killall.h
-        killall.c
-        audit-fd.c
-        audit-fd.h
+        path.c
+        path.h
+        scope.c
+        scope.h
+        selinux-access.c
+        selinux-access.h
+        selinux-setup.c
+        selinux-setup.h
+        service.c
+        service.h
         show-status.c
         show-status.h
-        emergency-action.c
-        emergency-action.h
+        slice.c
+        slice.h
+        smack-setup.c
+        smack-setup.h
+        socket.c
+        socket.h
+        swap.c
+        swap.h
+        target.c
+        target.h
+        timer.c
+        timer.h
+        transaction.c
+        transaction.h
+        unit-printf.c
+        unit-printf.h
+        unit.c
+        unit.h
 '''.split()
 
 load_fragment_gperf_gperf = custom_target(
index c3805ee055fb658c8f035ce3d2c76c0b5bad0407..46bcf37ae06cfca72a484d54979fe0613a86de50 100644 (file)
@@ -736,6 +736,7 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
 
         exec_context_dump(&m->exec_context, f, prefix);
         kill_context_dump(&m->kill_context, f, prefix);
+        cgroup_context_dump(&m->cgroup_context, f, prefix);
 }
 
 static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
@@ -753,9 +754,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
         assert(_pid);
 
         (void) unit_realize_cgroup(UNIT(m));
-        if (m->reset_cpu_usage) {
-                (void) unit_reset_cpu_usage(UNIT(m));
-                m->reset_cpu_usage = false;
+        if (m->reset_accounting) {
+                (void) unit_reset_cpu_accounting(UNIT(m));
+                (void) unit_reset_ip_accounting(UNIT(m));
+                m->reset_accounting = false;
         }
 
         r = unit_setup_exec_runtime(UNIT(m));
@@ -1043,7 +1045,7 @@ static int mount_start(Unit *u) {
 
         m->result = MOUNT_SUCCESS;
         m->reload_result = MOUNT_SUCCESS;
-        m->reset_cpu_usage = true;
+        m->reset_accounting = true;
 
         mount_enter_mounting(m);
         return 1;
index 9f7326ba6ada3b74a940ef06d16891bf0fb7b5bc..f81e4217dfbb3431814e1b03638e8f94db9f4520 100644 (file)
@@ -67,7 +67,7 @@ struct Mount {
         bool just_mounted:1;
         bool just_changed:1;
 
-        bool reset_cpu_usage:1;
+        bool reset_accounting:1;
 
         bool sloppy_options;
 
index a1d5c1cfd541122bcfbe607009aadd6f484de5ba..8f9df3b9b7f62d090deea13d6e9f2b33946be4f0 100644 (file)
@@ -333,7 +333,8 @@ static int scope_start(Unit *u) {
                 return r;
 
         (void) unit_realize_cgroup(u);
-        (void) unit_reset_cpu_usage(u);
+        (void) unit_reset_cpu_accounting(u);
+        (void) unit_reset_ip_accounting(u);
 
         r = unit_attach_pids_to_cgroup(u);
         if (r < 0) {
index c9a7222cc641ace38611295c19ec60bb8bceb730..21fc4e2abec3c4bb87546bf33bde86c402567485 100644 (file)
@@ -803,6 +803,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
                         "%sFile Descriptor Store Current: %u\n",
                         prefix, s->n_fd_store_max,
                         prefix, s->n_fd_store);
+
+        cgroup_context_dump(&s->cgroup_context, f, prefix);
 }
 
 static int service_load_pid_file(Service *s, bool may_warn) {
@@ -1242,9 +1244,10 @@ static int service_spawn(
         }
 
         (void) unit_realize_cgroup(UNIT(s));
-        if (s->reset_cpu_usage) {
-                (void) unit_reset_cpu_usage(UNIT(s));
-                s->reset_cpu_usage = false;
+        if (s->reset_accounting) {
+                (void) unit_reset_cpu_accounting(UNIT(s));
+                (void) unit_reset_ip_accounting(UNIT(s));
+                s->reset_accounting = false;
         }
 
         r = unit_setup_exec_runtime(UNIT(s));
@@ -1953,6 +1956,7 @@ static void service_enter_restart(Service *s) {
         log_struct(LOG_INFO,
                    "MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
                    LOG_UNIT_ID(UNIT(s)),
+                   LOG_UNIT_INVOCATION_ID(UNIT(s)),
                    LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts),
                    "N_RESTARTS=%u", s->n_restarts,
                    NULL);
@@ -2136,7 +2140,7 @@ static int service_start(Unit *u) {
         s->main_pid_known = false;
         s->main_pid_alien = false;
         s->forbid_restart = false;
-        s->reset_cpu_usage = true;
+        s->reset_accounting = true;
 
         s->status_text = mfree(s->status_text);
         s->status_errno = 0;
@@ -2948,6 +2952,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                            "EXIT_CODE=%s", sigchld_code_to_string(code),
                            "EXIT_STATUS=%i", status,
                            LOG_UNIT_ID(u),
+                           LOG_UNIT_INVOCATION_ID(u),
                            NULL);
 
                 if (s->result == SERVICE_SUCCESS)
index 0ac8bc9a675ad591a67e6f9ee06879ce0c015e1c..16b700637c2f3fe33fef9ff0cb8f733fe91908a2 100644 (file)
@@ -165,7 +165,7 @@ struct Service {
         bool forbid_restart:1;
         bool start_timeout_defined:1;
 
-        bool reset_cpu_usage:1;
+        bool reset_accounting:1;
 
         char *bus_name;
         char *bus_name_owner; /* unique name of the current owner */
index 65f9cb888af00d60b4e52ff4ea40bc7e07ef67f7..8c94573844013d24a4188090d959cdf4883fc88a 100644 (file)
@@ -93,21 +93,21 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
         }
 
         if (prev_ephemeral)
-                IOVEC_SET_STRING(iovec[n++], "\r" ANSI_ERASE_TO_END_OF_LINE);
+                iovec[n++] = IOVEC_MAKE_STRING("\r" ANSI_ERASE_TO_END_OF_LINE);
         prev_ephemeral = ephemeral;
 
         if (status) {
                 if (!isempty(status)) {
-                        IOVEC_SET_STRING(iovec[n++], "[");
-                        IOVEC_SET_STRING(iovec[n++], status);
-                        IOVEC_SET_STRING(iovec[n++], "] ");
+                        iovec[n++] = IOVEC_MAKE_STRING("[");
+                        iovec[n++] = IOVEC_MAKE_STRING(status);
+                        iovec[n++] = IOVEC_MAKE_STRING("] ");
                 } else
-                        IOVEC_SET_STRING(iovec[n++], status_indent);
+                        iovec[n++] = IOVEC_MAKE_STRING(status_indent);
         }
 
-        IOVEC_SET_STRING(iovec[n++], s);
+        iovec[n++] = IOVEC_MAKE_STRING(s);
         if (!ephemeral)
-                IOVEC_SET_STRING(iovec[n++], "\n");
+                iovec[n++] = IOVEC_MAKE_STRING("\n");
 
         if (writev(fd, iovec, n) < 0)
                 return -errno;
index ed5d3fd701a024ad0026dcf057cd81299abc0f77..b15f751c82d1d993591703d3252193ec0983f96a 100644 (file)
@@ -222,7 +222,8 @@ static int slice_start(Unit *u) {
                 return r;
 
         (void) unit_realize_cgroup(u);
-        (void) unit_reset_cpu_usage(u);
+        (void) unit_reset_cpu_accounting(u);
+        (void) unit_reset_ip_accounting(u);
 
         slice_set_state(t, SLICE_ACTIVE);
         return 1;
index 5993ce0d0028bb39edf8ab91cb81fe1e7d0c3bbd..3b84ffa2a7941872261aaf8a0463101294a35ce5 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/sctp.h>
 
 #include "alloc-util.h"
+#include "bpf-firewall.h"
 #include "bus-error.h"
 #include "bus-util.h"
 #include "copy.h"
@@ -37,6 +38,7 @@
 #include "exit-status.h"
 #include "fd-util.h"
 #include "format-util.h"
+#include "in-addr-util.h"
 #include "io-util.h"
 #include "label.h"
 #include "log.h"
@@ -56,7 +58,6 @@
 #include "unit-name.h"
 #include "unit.h"
 #include "user-util.h"
-#include "in-addr-util.h"
 
 struct SocketPeer {
         unsigned n_ref;
@@ -852,6 +853,8 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
 
                 exec_command_dump_list(s->exec_command[c], f, prefix2);
         }
+
+        cgroup_context_dump(&s->cgroup_context, f, prefix);
 }
 
 static int instance_from_socket(int fd, unsigned nr, char **instance) {
@@ -1435,6 +1438,102 @@ no_label:
         return 0;
 }
 
+static int socket_address_listen_do(
+                Socket *s,
+                const SocketAddress *address,
+                const char *label) {
+
+        assert(s);
+        assert(address);
+
+        return socket_address_listen(
+                        address,
+                        SOCK_CLOEXEC|SOCK_NONBLOCK,
+                        s->backlog,
+                        s->bind_ipv6_only,
+                        s->bind_to_device,
+                        s->reuse_port,
+                        s->free_bind,
+                        s->transparent,
+                        s->directory_mode,
+                        s->socket_mode,
+                        label);
+}
+
+static int socket_address_listen_in_cgroup(
+                Socket *s,
+                const SocketAddress *address,
+                const char *label) {
+
+        _cleanup_close_pair_ int pair[2] = { -1, -1 };
+        int fd, r;
+        pid_t pid;
+
+        assert(s);
+        assert(address);
+
+        /* This is a wrapper around socket_address_listen(), that forks off a helper process inside the socket's cgroup
+         * in which the socket is actually created. This way we ensure the socket is actually properly attached to the
+         * unit's cgroup for the purpose of BPF filtering and such. */
+
+        if (!IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6))
+                goto shortcut; /* BPF filtering only applies to IPv4 + IPv6, shortcut things for other protocols */
+
+        r = bpf_firewall_supported();
+        if (r < 0)
+                return r;
+        if (r == 0) /* If BPF firewalling isn't supported anyway â€” there's no point in this forking complexity */
+                goto shortcut;
+
+        if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+                return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+        r = unit_fork_helper_process(UNIT(s), &pid);
+        if (r < 0)
+                return log_unit_error_errno(UNIT(s), r, "Failed to fork off listener stub process: %m");
+        if (r == 0) {
+                /* Child */
+
+                pair[0] = safe_close(pair[0]);
+
+                fd = socket_address_listen_do(s, address, label);
+                if (fd < 0) {
+                        log_unit_error_errno(UNIT(s), fd, "Failed to create listening socket: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                r = send_one_fd(pair[1], fd, 0);
+                if (r < 0) {
+                        log_unit_error_errno(UNIT(s), r, "Failed to send listening socket to parent: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        pair[1] = safe_close(pair[1]);
+        fd = receive_one_fd(pair[0], 0);
+
+        /* We synchronously wait for the helper, as it shouldn't be slow */
+        r = wait_for_terminate_and_warn("listen-cgroup-helper", pid, false);
+        if (r < 0) {
+                safe_close(fd);
+                return r;
+        }
+
+        if (fd < 0)
+                return log_unit_error_errno(UNIT(s), fd, "Failed to receive listening socket: %m");
+
+        return fd;
+
+shortcut:
+        fd = socket_address_listen_do(s, address, label);
+        if (fd < 0)
+                return log_error_errno(fd, "Failed to create listening socket: %m");
+
+        return fd;
+}
+
 static int socket_open_fds(Socket *s) {
         _cleanup_(mac_selinux_freep) char *label = NULL;
         bool know_label = false;
@@ -1478,18 +1577,7 @@ static int socket_open_fds(Socket *s) {
                                 break;
                         }
 
-                        r = socket_address_listen(
-                                        &p->address,
-                                        SOCK_CLOEXEC|SOCK_NONBLOCK,
-                                        s->backlog,
-                                        s->bind_ipv6_only,
-                                        s->bind_to_device,
-                                        s->reuse_port,
-                                        s->free_bind,
-                                        s->transparent,
-                                        s->directory_mode,
-                                        s->socket_mode,
-                                        label);
+                        r = socket_address_listen_in_cgroup(s, &p->address, label);
                         if (r < 0)
                                 goto rollback;
 
@@ -1773,9 +1861,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
         assert(_pid);
 
         (void) unit_realize_cgroup(UNIT(s));
-        if (s->reset_cpu_usage) {
-                (void) unit_reset_cpu_usage(UNIT(s));
-                s->reset_cpu_usage = false;
+        if (s->reset_accounting) {
+                (void) unit_reset_cpu_accounting(UNIT(s));
+                (void) unit_reset_ip_accounting(UNIT(s));
+                s->reset_accounting = false;
         }
 
         r = unit_setup_exec_runtime(UNIT(s));
@@ -1826,27 +1915,23 @@ static int socket_chown(Socket *s, pid_t *_pid) {
         /* We have to resolve the user names out-of-process, hence
          * let's fork here. It's messy, but well, what can we do? */
 
-        pid = fork();
-        if (pid < 0)
-                return -errno;
-
-        if (pid == 0) {
-                SocketPort *p;
+        r = unit_fork_helper_process(UNIT(s), &pid);
+        if (r < 0)
+                return r;
+        if (r == 0) {
                 uid_t uid = UID_INVALID;
                 gid_t gid = GID_INVALID;
-                int ret;
+                SocketPort *p;
 
-                (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
-                (void) ignore_signals(SIGPIPE, -1);
-                log_forget_fds();
+                /* Child */
 
                 if (!isempty(s->user)) {
                         const char *user = s->user;
 
                         r = get_user_creds(&user, &uid, &gid, NULL, NULL);
                         if (r < 0) {
-                                ret = EXIT_USER;
-                                goto fail_child;
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve user %s: %m", user);
+                                _exit(EXIT_USER);
                         }
                 }
 
@@ -1855,8 +1940,8 @@ static int socket_chown(Socket *s, pid_t *_pid) {
 
                         r = get_group_creds(&group, &gid);
                         if (r < 0) {
-                                ret = EXIT_GROUP;
-                                goto fail_child;
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve group %s: %m", group);
+                                _exit(EXIT_GROUP);
                         }
                 }
 
@@ -1872,19 +1957,12 @@ static int socket_chown(Socket *s, pid_t *_pid) {
                                 continue;
 
                         if (chown(path, uid, gid) < 0) {
-                                r = -errno;
-                                ret = EXIT_CHOWN;
-                                goto fail_child;
+                                log_unit_error_errno(UNIT(s), errno, "Failed to chown(): %m");
+                                _exit(EXIT_CHOWN);
                         }
                 }
 
-                _exit(0);
-
-        fail_child:
-                log_open();
-                log_error_errno(r, "Failed to chown socket at step %s: %m", exit_status_to_string(ret, EXIT_STATUS_SYSTEMD));
-
-                _exit(ret);
+                _exit(EXIT_SUCCESS);
         }
 
         r = unit_watch_pid(UNIT(s), pid);
@@ -2371,7 +2449,7 @@ static int socket_start(Unit *u) {
                 return r;
 
         s->result = SOCKET_SUCCESS;
-        s->reset_cpu_usage = true;
+        s->reset_accounting = true;
 
         socket_enter_start_pre(s);
         return 1;
@@ -2696,6 +2774,97 @@ _pure_ static bool socket_check_gc(Unit *u) {
         return s->n_connections > 0;
 }
 
+static int socket_accept_do(Socket *s, int fd) {
+        int cfd;
+
+        assert(s);
+        assert(fd >= 0);
+
+        for (;;) {
+                cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
+                if (cfd < 0) {
+                        if (errno == EINTR)
+                                continue;
+
+                        return -errno;
+                }
+
+                break;
+        }
+
+        return cfd;
+}
+
+static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) {
+        _cleanup_close_pair_ int pair[2] = { -1, -1 };
+        int cfd, r;
+        pid_t pid;
+
+        assert(s);
+        assert(p);
+        assert(fd >= 0);
+
+        /* Similar to socket_address_listen_in_cgroup(), but for accept() rathern than socket(): make sure that any
+         * connection socket is also properly associated with the cgroup. */
+
+        if (!IN_SET(p->address.sockaddr.sa.sa_family, AF_INET, AF_INET6))
+                goto shortcut;
+
+        r = bpf_firewall_supported();
+        if (r < 0)
+                return r;
+        if (r == 0)
+                goto shortcut;
+
+        if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+                return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+        r = unit_fork_helper_process(UNIT(s), &pid);
+        if (r < 0)
+                return log_unit_error_errno(UNIT(s), r, "Failed to fork off accept stub process: %m");
+        if (r == 0) {
+                /* Child */
+
+                pair[0] = safe_close(pair[0]);
+
+                cfd = socket_accept_do(s, fd);
+                if (cfd < 0) {
+                        log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                r = send_one_fd(pair[1], cfd, 0);
+                if (r < 0) {
+                        log_unit_error_errno(UNIT(s), r, "Failed to send connection socket to parent: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        pair[1] = safe_close(pair[1]);
+        cfd = receive_one_fd(pair[0], 0);
+
+        /* We synchronously wait for the helper, as it shouldn't be slow */
+        r = wait_for_terminate_and_warn("accept-cgroup-helper", pid, false);
+        if (r < 0) {
+                safe_close(cfd);
+                return r;
+        }
+
+        if (cfd < 0)
+                return log_unit_error_errno(UNIT(s), cfd, "Failed to receive connection socket: %m");
+
+        return cfd;
+
+shortcut:
+        cfd = socket_accept_do(s, fd);
+        if (cfd < 0)
+                return log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+
+        return cfd;
+}
+
 static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
         SocketPort *p = userdata;
         int cfd = -1;
@@ -2721,20 +2890,9 @@ static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
             p->type == SOCKET_SOCKET &&
             socket_address_can_accept(&p->address)) {
 
-                for (;;) {
-
-                        cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
-                        if (cfd < 0) {
-
-                                if (errno == EINTR)
-                                        continue;
-
-                                log_unit_error_errno(UNIT(p->socket), errno, "Failed to accept socket: %m");
-                                goto fail;
-                        }
-
-                        break;
-                }
+                cfd = socket_accept_in_cgroup(p->socket, p, fd);
+                if (cfd < 0)
+                        goto fail;
 
                 socket_apply_socket_options(p->socket, cfd);
         }
index 89f4664510b0a492af520fd23b7eea1d4a90217b..8c263963c482b96c3d3fc8606de858177bbecc02 100644 (file)
@@ -161,7 +161,7 @@ struct Socket {
 
         char *user, *group;
 
-        bool reset_cpu_usage:1;
+        bool reset_accounting:1;
 
         char *fdname;
 
index 9553ee16a83b027931fab1324f8229224b85bd9c..d58f68458baab36175c50e45655f0189f0974fa1 100644 (file)
@@ -602,6 +602,7 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) {
 
         exec_context_dump(&s->exec_context, f, prefix);
         kill_context_dump(&s->kill_context, f, prefix);
+        cgroup_context_dump(&s->cgroup_context, f, prefix);
 }
 
 static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
@@ -619,9 +620,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
         assert(_pid);
 
         (void) unit_realize_cgroup(UNIT(s));
-        if (s->reset_cpu_usage) {
-                (void) unit_reset_cpu_usage(UNIT(s));
-                s->reset_cpu_usage = false;
+        if (s->reset_accounting) {
+                (void) unit_reset_cpu_accounting(UNIT(s));
+                (void) unit_reset_ip_accounting(UNIT(s));
+                s->reset_accounting = false;
         }
 
         r = unit_setup_exec_runtime(UNIT(s));
@@ -860,7 +862,7 @@ static int swap_start(Unit *u) {
                 return r;
 
         s->result = SWAP_SUCCESS;
-        s->reset_cpu_usage = true;
+        s->reset_accounting = true;
 
         swap_enter_activating(s);
         return 1;
index b0ef50f1e8fb6be16077db2b49daa115fe568868..45da63c5e2dc5c87b14faaf01c10f3c2e5ad653d 100644 (file)
@@ -70,7 +70,7 @@ struct Swap {
         bool is_active:1;
         bool just_activated:1;
 
-        bool reset_cpu_usage:1;
+        bool reset_accounting:1;
 
         SwapResult result;
 
index 746572b7ff25db7c0c54028a3a2b49ce9a152777..6b86eac33db0005ce6f0b64267e5da4f43be40a7 100644 (file)
@@ -40,6 +40,7 @@
 #DefaultEnvironment=
 #DefaultCPUAccounting=no
 #DefaultIOAccounting=no
+#DefaultIPAccounting=no
 #DefaultBlockIOAccounting=no
 #DefaultMemoryAccounting=no
 #DefaultTasksAccounting=yes
@@ -60,3 +61,5 @@
 #DefaultLimitNICE=
 #DefaultLimitRTPRIO=
 #DefaultLimitRTTIME=
+#IPAddressAllow=
+#IPAddressDeny=
index f1936bdf0b0b2dae09d5ed74ba62b027cd4e5f73..0fe881436ea31e660d734cef6fb6a1261c8cabf5 100644 (file)
 #include "dropin.h"
 #include "escape.h"
 #include "execute.h"
+#include "fd-util.h"
 #include "fileio-label.h"
 #include "format-util.h"
 #include "id128-util.h"
+#include "io-util.h"
 #include "load-dropin.h"
 #include "load-fragment.h"
 #include "log.h"
@@ -103,6 +105,13 @@ Unit *unit_new(Manager *m, size_t size) {
         u->ref_gid = GID_INVALID;
         u->cpu_usage_last = NSEC_INFINITY;
 
+        u->ip_accounting_ingress_map_fd = -1;
+        u->ip_accounting_egress_map_fd = -1;
+        u->ipv4_allow_map_fd = -1;
+        u->ipv6_allow_map_fd = -1;
+        u->ipv4_deny_map_fd = -1;
+        u->ipv6_deny_map_fd = -1;
+
         RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
         RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
 
@@ -153,9 +162,11 @@ static void unit_init(Unit *u) {
 
                 cc->cpu_accounting = u->manager->default_cpu_accounting;
                 cc->io_accounting = u->manager->default_io_accounting;
+                cc->ip_accounting = u->manager->default_ip_accounting;
                 cc->blockio_accounting = u->manager->default_blockio_accounting;
                 cc->memory_accounting = u->manager->default_memory_accounting;
                 cc->tasks_accounting = u->manager->default_tasks_accounting;
+                cc->ip_accounting = u->manager->default_ip_accounting;
 
                 if (u->type != UNIT_SLICE)
                         cc->tasks_max = u->manager->default_tasks_max;
@@ -610,6 +621,17 @@ void unit_free(Unit *u) {
         while (u->refs)
                 unit_ref_unset(u->refs);
 
+        safe_close(u->ip_accounting_ingress_map_fd);
+        safe_close(u->ip_accounting_egress_map_fd);
+
+        safe_close(u->ipv4_allow_map_fd);
+        safe_close(u->ipv6_allow_map_fd);
+        safe_close(u->ipv4_deny_map_fd);
+        safe_close(u->ipv6_deny_map_fd);
+
+        bpf_program_unref(u->ip_bpf_ingress);
+        bpf_program_unref(u->ip_bpf_egress);
+
         free(u);
 }
 
@@ -1523,6 +1545,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
         log_struct(LOG_INFO,
                    LOG_MESSAGE("%s", buf),
                    LOG_UNIT_ID(u),
+                   LOG_UNIT_INVOCATION_ID(u),
                    mid,
                    NULL);
 }
@@ -1979,6 +2002,134 @@ void unit_trigger_notify(Unit *u) {
                         UNIT_VTABLE(other)->trigger_notify(other, u);
 }
 
+static int unit_log_resources(Unit *u) {
+
+        struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + 4];
+        size_t n_message_parts = 0, n_iovec = 0;
+        char* message_parts[3 + 1], *t;
+        nsec_t nsec = NSEC_INFINITY;
+        CGroupIPAccountingMetric m;
+        size_t i;
+        int r;
+        const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+                [CGROUP_IP_INGRESS_BYTES]   = "IP_METRIC_INGRESS_BYTES",
+                [CGROUP_IP_INGRESS_PACKETS] = "IP_METRIC_INGRESS_PACKETS",
+                [CGROUP_IP_EGRESS_BYTES]    = "IP_METRIC_EGRESS_BYTES",
+                [CGROUP_IP_EGRESS_PACKETS]  = "IP_METRIC_EGRESS_PACKETS",
+        };
+
+        assert(u);
+
+        /* Invoked whenever a unit enters failed or dead state. Logs information about consumed resources if resource
+         * accounting was enabled for a unit. It does this in two ways: a friendly human readable string with reduced
+         * information and the complete data in structured fields. */
+
+        (void) unit_get_cpu_usage(u, &nsec);
+        if (nsec != NSEC_INFINITY) {
+                char buf[FORMAT_TIMESPAN_MAX] = "";
+
+                /* Format the CPU time for inclusion in the structured log message */
+                if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, nsec) < 0) {
+                        r = log_oom();
+                        goto finish;
+                }
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+                /* Format the CPU time for inclusion in the human language message string */
+                format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC);
+                t = strjoin(n_message_parts > 0 ? "consumed " : "Consumed ", buf, " CPU time");
+                if (!t) {
+                        r = log_oom();
+                        goto finish;
+                }
+
+                message_parts[n_message_parts++] = t;
+        }
+
+        for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+                char buf[FORMAT_BYTES_MAX] = "";
+                uint64_t value = UINT64_MAX;
+
+                assert(ip_fields[m]);
+
+                (void) unit_get_ip_accounting(u, m, &value);
+                if (value == UINT64_MAX)
+                        continue;
+
+                /* Format IP accounting data for inclusion in the structured log message */
+                if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
+                        r = log_oom();
+                        goto finish;
+                }
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+                /* Format the IP accounting data for inclusion in the human language message string, but only for the
+                 * bytes counters (and not for the packets counters) */
+                if (m == CGROUP_IP_INGRESS_BYTES)
+                        t = strjoin(n_message_parts > 0 ? "received " : "Received ",
+                                    format_bytes(buf, sizeof(buf), value),
+                                    " IP traffic");
+                else if (m == CGROUP_IP_EGRESS_BYTES)
+                        t = strjoin(n_message_parts > 0 ? "sent " : "Sent ",
+                                    format_bytes(buf, sizeof(buf), value),
+                                    " IP traffic");
+                else
+                        continue;
+                if (!t) {
+                        r = log_oom();
+                        goto finish;
+                }
+
+                message_parts[n_message_parts++] = t;
+        }
+
+        /* Is there any accounting data available at all? */
+        if (n_iovec == 0) {
+                r = 0;
+                goto finish;
+        }
+
+        if (n_message_parts == 0)
+                t = strjoina("MESSAGE=", u->id, ": Completed");
+        else {
+                _cleanup_free_ char *joined;
+
+                message_parts[n_message_parts] = NULL;
+
+                joined = strv_join(message_parts, ", ");
+                if (!joined) {
+                        r = log_oom();
+                        goto finish;
+                }
+
+                t = strjoina("MESSAGE=", u->id, ": ", joined);
+        }
+
+        /* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
+         * and hence don't increase n_iovec for them */
+        iovec[n_iovec] = IOVEC_MAKE_STRING(t);
+        iovec[n_iovec + 1] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_UNIT_RESOURCES_STR);
+
+        t = strjoina(u->manager->unit_log_field, u->id);
+        iovec[n_iovec + 2] = IOVEC_MAKE_STRING(t);
+
+        t = strjoina(u->manager->invocation_log_field, u->invocation_id_string);
+        iovec[n_iovec + 3] = IOVEC_MAKE_STRING(t);
+
+        log_struct_iovec(LOG_INFO, iovec, n_iovec + 4);
+        r = 0;
+
+finish:
+        for (i = 0; i < n_message_parts; i++)
+                free(message_parts[i]);
+
+        for (i = 0; i < n_iovec; i++)
+                free(iovec[i].iov_base);
+
+        return r;
+
+}
+
 void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success) {
         Manager *m;
         bool unexpected;
@@ -2150,28 +2301,33 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
                         manager_send_unit_plymouth(m, u);
 
         } else {
+                /* We don't care about D-Bus going down here, since we'll get an asynchronous notification for it
+                 * anyway. */
 
-                /* We don't care about D-Bus here, since we'll get an
-                 * asynchronous notification for it anyway. */
+                if (UNIT_IS_INACTIVE_OR_FAILED(ns) &&
+                    !UNIT_IS_INACTIVE_OR_FAILED(os)
+                    && !MANAGER_IS_RELOADING(m)) {
 
-                if (u->type == UNIT_SERVICE &&
-                    UNIT_IS_INACTIVE_OR_FAILED(ns) &&
-                    !UNIT_IS_INACTIVE_OR_FAILED(os) &&
-                    !MANAGER_IS_RELOADING(m)) {
+                        /* This unit just stopped/failed. */
+                        if (u->type == UNIT_SERVICE) {
 
-                        /* Hmm, if there was no start record written
-                         * write it now, so that we always have a nice
-                         * pair */
-                        if (!u->in_audit) {
-                                manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
+                                /* Hmm, if there was no start record written
+                                 * write it now, so that we always have a nice
+                                 * pair */
+                                if (!u->in_audit) {
+                                        manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
 
-                                if (ns == UNIT_INACTIVE)
-                                        manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
-                        } else
-                                /* Write audit record if we have just finished shutting down */
-                                manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
+                                        if (ns == UNIT_INACTIVE)
+                                                manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
+                                } else
+                                        /* Write audit record if we have just finished shutting down */
+                                        manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
+
+                                u->in_audit = false;
+                        }
 
-                        u->in_audit = false;
+                        /* Write a log message about consumed resources */
+                        unit_log_resources(u);
                 }
         }
 
@@ -2749,7 +2905,15 @@ static int unit_serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask)
         return r;
 }
 
+static const char *ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+        [CGROUP_IP_INGRESS_BYTES] = "ip-accounting-ingress-bytes",
+        [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
+        [CGROUP_IP_EGRESS_BYTES] = "ip-accounting-egress-bytes",
+        [CGROUP_IP_EGRESS_PACKETS] = "ip-accounting-egress-packets",
+};
+
 int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
+        CGroupIPAccountingMetric m;
         int r;
 
         assert(u);
@@ -2798,6 +2962,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
         unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
         (void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
         (void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
+        unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state);
 
         if (uid_is_valid(u->ref_uid))
                 unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
@@ -2809,6 +2974,14 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
 
         bus_track_serialize(u->bus_track, f, "ref");
 
+        for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+                uint64_t v;
+
+                r = unit_get_ip_accounting(u, m, &v);
+                if (r >= 0)
+                        unit_serialize_item_format(u, f, ip_accounting_metric_field[m], "%" PRIu64, v);
+        }
+
         if (serialize_jobs) {
                 if (u->job) {
                         fprintf(f, "job\n");
@@ -2915,6 +3088,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
 
         for (;;) {
                 char line[LINE_MAX], *l, *v;
+                CGroupIPAccountingMetric m;
                 size_t k;
 
                 if (!fgets(line, sizeof(line), f)) {
@@ -3069,6 +3243,20 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                                 log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
                         continue;
 
+                } else if (streq(l, "cgroup-bpf-realized")) {
+                        int i;
+
+                        r = safe_atoi(v, &i);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v);
+                        else
+                                u->cgroup_bpf_state =
+                                        i < 0 ? UNIT_CGROUP_BPF_INVALIDATED :
+                                        i > 0 ? UNIT_CGROUP_BPF_ON :
+                                        UNIT_CGROUP_BPF_OFF;
+
+                        continue;
+
                 } else if (streq(l, "ref-uid")) {
                         uid_t uid;
 
@@ -3111,6 +3299,21 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                         continue;
                 }
 
+                /* Check if this is an IP accounting metric serialization field */
+                for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++)
+                        if (streq(l, ip_accounting_metric_field[m]))
+                                break;
+                if (m < _CGROUP_IP_ACCOUNTING_METRIC_MAX) {
+                        uint64_t c;
+
+                        r = safe_atou64(v, &c);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
+                        else
+                                u->ip_accounting_extra[m] = c;
+                        continue;
+                }
+
                 if (unit_can_serialize(u)) {
                         if (rt) {
                                 r = exec_runtime_deserialize_item(u, rt, l, v, fds);
@@ -3137,6 +3340,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
         if (!dual_timestamp_is_set(&u->state_change_timestamp))
                 dual_timestamp_get(&u->state_change_timestamp);
 
+        /* Let's make sure that everything that is deserialized also gets any potential new cgroup settings applied
+         * after we are done. For that we invalidate anything already realized, so that we can realize it again. */
+        unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
+        unit_invalidate_cgroup_bpf(u);
+
         return 0;
 }
 
@@ -4169,6 +4377,7 @@ void unit_warn_if_dir_nonempty(Unit *u, const char* where) {
         log_struct(LOG_NOTICE,
                    "MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
                    LOG_UNIT_ID(u),
+                   LOG_UNIT_INVOCATION_ID(u),
                    LOG_UNIT_MESSAGE(u, "Directory %s to mount over is not empty, mounting anyway.", where),
                    "WHERE=%s", where,
                    NULL);
@@ -4191,6 +4400,7 @@ int unit_fail_if_symlink(Unit *u, const char* where) {
         log_struct(LOG_ERR,
                    "MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
                    LOG_UNIT_ID(u),
+                   LOG_UNIT_INVOCATION_ID(u),
                    LOG_UNIT_MESSAGE(u, "Mount on symlink %s not allowed.", where),
                    "WHERE=%s", where,
                    NULL);
@@ -4436,3 +4646,43 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) {
         p->cgroup_path = u->cgroup_path;
         SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
 }
+
+int unit_fork_helper_process(Unit *u, pid_t *ret) {
+        pid_t pid;
+        int r;
+
+        assert(u);
+        assert(ret);
+
+        /* Forks off a helper process and makes sure it is a member of the unit's cgroup. Returns == 0 in the child,
+         * and > 0 in the parent. The pid parameter is always filled in with the child's PID. */
+
+        (void) unit_realize_cgroup(u);
+
+        pid = fork();
+        if (pid < 0)
+                return -errno;
+
+        if (pid == 0) {
+
+                (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
+                (void) ignore_signals(SIGPIPE, -1);
+
+                log_close();
+                log_open();
+
+                if (u->cgroup_path) {
+                        r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
+                        if (r < 0) {
+                                log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", u->cgroup_path);
+                                _exit(EXIT_CGROUP);
+                        }
+                }
+
+                *ret = getpid_cached();
+                return 0;
+        }
+
+        *ret = pid;
+        return 1;
+}
index 4d9751a4069c738cb87b5ffe0070e3c65a98aa97..9aa00b056f1b664d858dd23a61c1e5d7b28390d9 100644 (file)
@@ -28,11 +28,13 @@ typedef struct UnitVTable UnitVTable;
 typedef struct UnitRef UnitRef;
 typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
 
+#include "bpf-program.h"
 #include "condition.h"
 #include "emergency-action.h"
 #include "install.h"
 #include "list.h"
 #include "unit-name.h"
+#include "cgroup.h"
 
 typedef enum KillOperation {
         KILL_TERMINATE,
@@ -70,6 +72,12 @@ struct UnitRef {
         LIST_FIELDS(UnitRef, refs);
 };
 
+typedef enum UnitCGroupBPFState {
+        UNIT_CGROUP_BPF_OFF = 0,
+        UNIT_CGROUP_BPF_ON = 1,
+        UNIT_CGROUP_BPF_INVALIDATED = -1,
+} UnitCGroupBPFState;
+
 struct Unit {
         Manager *manager;
 
@@ -205,6 +213,20 @@ struct Unit {
         CGroupMask cgroup_members_mask;
         int cgroup_inotify_wd;
 
+        /* IP BPF Firewalling/accounting */
+        int ip_accounting_ingress_map_fd;
+        int ip_accounting_egress_map_fd;
+
+        int ipv4_allow_map_fd;
+        int ipv6_allow_map_fd;
+        int ipv4_deny_map_fd;
+        int ipv6_deny_map_fd;
+
+        BPFProgram *ip_bpf_ingress;
+        BPFProgram *ip_bpf_egress;
+
+        uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
+
         /* How to start OnFailure units */
         JobMode on_failure_job_mode;
 
@@ -254,6 +276,8 @@ struct Unit {
         bool cgroup_members_mask_valid:1;
         bool cgroup_subtree_mask_valid:1;
 
+        UnitCGroupBPFState cgroup_bpf_state:2;
+
         bool start_limit_hit:1;
 
         /* Did we already invoke unit_coldplug() for this unit? */
@@ -661,6 +685,8 @@ bool unit_shall_confirm_spawn(Unit *u);
 
 void unit_set_exec_params(Unit *s, ExecParameters *p);
 
+int unit_fork_helper_process(Unit *u, pid_t *ret);
+
 /* Macros which append UNIT= or USER_UNIT= to the message */
 
 #define log_unit_full(unit, level, error, ...)                          \
@@ -684,3 +710,4 @@ void unit_set_exec_params(Unit *s, ExecParameters *p);
 
 #define LOG_UNIT_MESSAGE(unit, fmt, ...) "MESSAGE=%s: " fmt, (unit)->id, ##__VA_ARGS__
 #define LOG_UNIT_ID(unit) (unit)->manager->unit_log_format_string, (unit)->id
+#define LOG_UNIT_INVOCATION_ID(unit) (unit)->manager->invocation_log_format_string, (unit)->invocation_id_string
index 57d1af454a3e424bc3454572c2bfc8e8ed79a98b..96a4d400f89677779d7fd3fac16cf294e2191eaf 100644 (file)
@@ -749,7 +749,7 @@ static int submit_coredump(
                 const char *coredump_filename;
 
                 coredump_filename = strjoina("COREDUMP_FILENAME=", filename);
-                IOVEC_SET_STRING(iovec[n_iovec++], coredump_filename);
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(coredump_filename);
         } else if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
                 log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
                          coredump_size, arg_external_size_max);
@@ -804,10 +804,10 @@ log:
                 return 0;
         }
 
-        IOVEC_SET_STRING(iovec[n_iovec++], core_message);
+        iovec[n_iovec++] = IOVEC_MAKE_STRING(core_message);
 
         if (truncated)
-                IOVEC_SET_STRING(iovec[n_iovec++], "COREDUMP_TRUNCATED=1");
+                iovec[n_iovec++] = IOVEC_MAKE_STRING("COREDUMP_TRUNCATED=1");
 
         /* Optionally store the entire coredump in the journal */
         if (arg_storage == COREDUMP_STORAGE_JOURNAL) {
@@ -817,11 +817,9 @@ log:
                         /* Store the coredump itself in the journal */
 
                         r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
-                        if (r >= 0) {
-                                iovec[n_iovec].iov_base = coredump_data;
-                                iovec[n_iovec].iov_len = sz;
-                                n_iovec++;
-                        } else
+                        if (r >= 0)
+                                iovec[n_iovec++] = IOVEC_MAKE(coredump_data, sz);
+                        else
                                 log_warning_errno(r, "Failed to attach the core to the journal entry: %m");
                 } else
                         log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
@@ -1070,7 +1068,7 @@ static char* set_iovec_field(struct iovec iovec[27], size_t *n_iovec, const char
 
         x = strappend(field, value);
         if (x)
-                IOVEC_SET_STRING(iovec[(*n_iovec)++], x);
+                iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x);
         return x;
 }
 
@@ -1162,7 +1160,7 @@ static int gather_pid_metadata(
         if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) {
                 r = asprintf(&t, "COREDUMP_OWNER_UID=" UID_FMT, owner_uid);
                 if (r > 0)
-                        IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
+                        iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
         }
 
         if (sd_pid_get_slice(pid, &t) >= 0)
@@ -1218,7 +1216,7 @@ static int gather_pid_metadata(
 
         t = strjoin("COREDUMP_TIMESTAMP=", context[CONTEXT_TIMESTAMP], "000000", NULL);
         if (t)
-                IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
+                iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
 
         if (safe_atoi(context[CONTEXT_SIGNAL], &signo) >= 0 && SIGNAL_VALID(signo))
                 set_iovec_field(iovec, n_iovec, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo));
@@ -1253,10 +1251,10 @@ static int process_kernel(int argc, char* argv[]) {
 
         n_iovec = n_to_free;
 
-        IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
+        iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
 
         assert_cc(2 == LOG_CRIT);
-        IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
+        iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
 
         assert(n_iovec <= ELEMENTSOF(iovec));
 
@@ -1344,15 +1342,15 @@ static int process_backtrace(int argc, char *argv[]) {
                         r = log_oom();
                         goto finish;
                 }
-                IOVEC_SET_STRING(iovec[n_iovec++], message);
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(message);
         } else {
                 for (i = 0; i < importer.iovw.count; i++)
                         iovec[n_iovec++] = importer.iovw.iovec[i];
         }
 
-        IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
+        iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
         assert_cc(2 == LOG_CRIT);
-        IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
+        iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
 
         assert(n_iovec <= n_allocated);
 
index 440fba67ca6cc98bb16dd8f973718feb5ffc851f..5d8b394752a74e0102a4fd961b0d3b0cb6e03cda 100644 (file)
@@ -114,9 +114,8 @@ _public_ int sd_journal_printv(int priority, const char *format, va_list ap) {
         if (isempty(buffer+8))
                 return 0;
 
-        zero(iov);
-        IOVEC_SET_STRING(iov[0], buffer);
-        IOVEC_SET_STRING(iov[1], p);
+        iov[0] = IOVEC_MAKE_STRING(buffer);
+        iov[1] = IOVEC_MAKE_STRING(p);
 
         return sd_journal_sendv(iov, 2);
 }
@@ -167,7 +166,7 @@ _printf_(1, 0) static int fill_iovec_sprintf(const char *format, va_list ap, int
 
                 (void) strstrip(buffer); /* strip trailing whitespace, keep prefixing whitespace */
 
-                IOVEC_SET_STRING(iov[i++], buffer);
+                iov[i++] = IOVEC_MAKE_STRING(buffer);
 
                 format = va_arg(ap, char *);
         }
@@ -259,27 +258,19 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
                          * newline, then the size (64bit LE), followed
                          * by the data and a final newline */
 
-                        w[j].iov_base = iov[i].iov_base;
-                        w[j].iov_len = c - (char*) iov[i].iov_base;
-                        j++;
-
-                        IOVEC_SET_STRING(w[j++], "\n");
+                        w[j++] = IOVEC_MAKE(iov[i].iov_base, c - (char*) iov[i].iov_base);
+                        w[j++] = IOVEC_MAKE_STRING("\n");
 
                         l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
-                        w[j].iov_base = &l[i];
-                        w[j].iov_len = sizeof(uint64_t);
-                        j++;
-
-                        w[j].iov_base = c + 1;
-                        w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1;
-                        j++;
+                        w[j++] = IOVEC_MAKE(&l[i], sizeof(uint64_t));
 
+                        w[j++] = IOVEC_MAKE(c + 1, iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
                 } else
                         /* Nothing special? Then just add the line and
                          * append a newline */
                         w[j++] = iov[i];
 
-                IOVEC_SET_STRING(w[j++], "\n");
+                w[j++] = IOVEC_MAKE_STRING("\n");
         }
 
         if (!have_syslog_identifier &&
@@ -291,9 +282,9 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
                  * since everything else is much nicer to retrieve
                  * from the outside. */
 
-                IOVEC_SET_STRING(w[j++], "SYSLOG_IDENTIFIER=");
-                IOVEC_SET_STRING(w[j++], program_invocation_short_name);
-                IOVEC_SET_STRING(w[j++], "\n");
+                w[j++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=");
+                w[j++] = IOVEC_MAKE_STRING(program_invocation_short_name);
+                w[j++] = IOVEC_MAKE_STRING("\n");
         }
 
         fd = journal_fd();
@@ -380,9 +371,9 @@ static int fill_iovec_perror_and_send(const char *message, int skip, struct iove
                         xsprintf(error, "ERRNO=%i", _saved_errno_);
 
                         assert_cc(3 == LOG_ERR);
-                        IOVEC_SET_STRING(iov[skip+0], "PRIORITY=3");
-                        IOVEC_SET_STRING(iov[skip+1], buffer);
-                        IOVEC_SET_STRING(iov[skip+2], error);
+                        iov[skip+0] = IOVEC_MAKE_STRING("PRIORITY=3");
+                        iov[skip+1] = IOVEC_MAKE_STRING(buffer);
+                        iov[skip+2] = IOVEC_MAKE_STRING(error);
 
                         return sd_journal_sendv(iov, skip + 3);
                 }
@@ -492,20 +483,19 @@ _public_ int sd_journal_printv_with_location(int priority, const char *file, con
          * CODE_FUNC=, hence let's do it manually here. */
         ALLOCA_CODE_FUNC(f, func);
 
-        zero(iov);
-        IOVEC_SET_STRING(iov[0], buffer);
-        IOVEC_SET_STRING(iov[1], p);
-        IOVEC_SET_STRING(iov[2], file);
-        IOVEC_SET_STRING(iov[3], line);
-        IOVEC_SET_STRING(iov[4], f);
+        iov[0] = IOVEC_MAKE_STRING(buffer);
+        iov[1] = IOVEC_MAKE_STRING(p);
+        iov[2] = IOVEC_MAKE_STRING(file);
+        iov[3] = IOVEC_MAKE_STRING(line);
+        iov[4] = IOVEC_MAKE_STRING(f);
 
         return sd_journal_sendv(iov, ELEMENTSOF(iov));
 }
 
 _public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) {
+        _cleanup_free_ struct iovec *iov = NULL;
         int r, i, j;
         va_list ap;
-        struct iovec *iov = NULL;
         char *f;
 
         va_start(ap, format);
@@ -519,9 +509,9 @@ _public_ int sd_journal_send_with_location(const char *file, const char *line, c
 
         ALLOCA_CODE_FUNC(f, func);
 
-        IOVEC_SET_STRING(iov[0], file);
-        IOVEC_SET_STRING(iov[1], line);
-        IOVEC_SET_STRING(iov[2], f);
+        iov[0] = IOVEC_MAKE_STRING(file);
+        iov[1] = IOVEC_MAKE_STRING(line);
+        iov[2] = IOVEC_MAKE_STRING(f);
 
         r = sd_journal_sendv(iov, i);
 
@@ -529,8 +519,6 @@ finish:
         for (j = 3; j < i; j++)
                 free(iov[j].iov_base);
 
-        free(iov);
-
         return r;
 }
 
@@ -550,9 +538,9 @@ _public_ int sd_journal_sendv_with_location(
 
         ALLOCA_CODE_FUNC(f, func);
 
-        IOVEC_SET_STRING(niov[n++], file);
-        IOVEC_SET_STRING(niov[n++], line);
-        IOVEC_SET_STRING(niov[n++], f);
+        niov[n++] = IOVEC_MAKE_STRING(file);
+        niov[n++] = IOVEC_MAKE_STRING(line);
+        niov[n++] = IOVEC_MAKE_STRING(f);
 
         return sd_journal_sendv(niov, n);
 }
@@ -567,9 +555,9 @@ _public_ int sd_journal_perror_with_location(
 
         ALLOCA_CODE_FUNC(f, func);
 
-        IOVEC_SET_STRING(iov[0], file);
-        IOVEC_SET_STRING(iov[1], line);
-        IOVEC_SET_STRING(iov[2], f);
+        iov[0] = IOVEC_MAKE_STRING(file);
+        iov[1] = IOVEC_MAKE_STRING(line);
+        iov[2] = IOVEC_MAKE_STRING(f);
 
         return fill_iovec_perror_and_send(message, 3, iov);
 }
index 3f4c38ccde1bc3b4e1d6986ca6b4b846371e008c..0904b758c84bb2f99795315cc9bc406cd12bbfa6 100644 (file)
@@ -150,7 +150,7 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                         warning(offset, "Unused data (entry_offset==0)");
 
                 if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0)) {
-                        error(offset, "Bad n_entries: %"PRIu64, o->data.n_entries);
+                        error(offset, "Bad n_entries: %"PRIu64, le64toh(o->data.n_entries));
                         return -EBADMSG;
                 }
 
@@ -187,15 +187,15 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                         return -EBADMSG;
                 }
 
-                if (!VALID64(o->data.next_hash_offset) ||
-                    !VALID64(o->data.next_field_offset) ||
-                    !VALID64(o->data.entry_offset) ||
-                    !VALID64(o->data.entry_array_offset)) {
+                if (!VALID64(le64toh(o->data.next_hash_offset)) ||
+                    !VALID64(le64toh(o->data.next_field_offset)) ||
+                    !VALID64(le64toh(o->data.entry_offset)) ||
+                    !VALID64(le64toh(o->data.entry_array_offset))) {
                         error(offset, "Invalid offset (next_hash_offset="OFSfmt", next_field_offset="OFSfmt", entry_offset="OFSfmt", entry_array_offset="OFSfmt,
-                              o->data.next_hash_offset,
-                              o->data.next_field_offset,
-                              o->data.entry_offset,
-                              o->data.entry_array_offset);
+                              le64toh(o->data.next_hash_offset),
+                              le64toh(o->data.next_field_offset),
+                              le64toh(o->data.entry_offset),
+                              le64toh(o->data.entry_array_offset));
                         return -EBADMSG;
                 }
 
@@ -211,12 +211,12 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                         return -EBADMSG;
                 }
 
-                if (!VALID64(o->field.next_hash_offset) ||
-                    !VALID64(o->field.head_data_offset)) {
+                if (!VALID64(le64toh(o->field.next_hash_offset)) ||
+                    !VALID64(le64toh(o->field.head_data_offset))) {
                         error(offset,
                               "Invalid offset (next_hash_offset="OFSfmt", head_data_offset="OFSfmt,
-                              o->field.next_hash_offset,
-                              o->field.head_data_offset);
+                              le64toh(o->field.next_hash_offset),
+                              le64toh(o->field.head_data_offset));
                         return -EBADMSG;
                 }
                 break;
@@ -259,12 +259,12 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                 }
 
                 for (i = 0; i < journal_file_entry_n_items(o); i++) {
-                        if (o->entry.items[i].object_offset == 0 ||
-                            !VALID64(o->entry.items[i].object_offset)) {
+                        if (le64toh(o->entry.items[i].object_offset) == 0 ||
+                            !VALID64(le64toh(o->entry.items[i].object_offset))) {
                                 error(offset,
                                       "Invalid entry item (%"PRIu64"/%"PRIu64" offset: "OFSfmt,
                                       i, journal_file_entry_n_items(o),
-                                      o->entry.items[i].object_offset);
+                                      le64toh(o->entry.items[i].object_offset));
                                 return -EBADMSG;
                         }
                 }
@@ -325,10 +325,10 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                         return -EBADMSG;
                 }
 
-                if (!VALID64(o->entry_array.next_entry_array_offset)) {
+                if (!VALID64(le64toh(o->entry_array.next_entry_array_offset))) {
                         error(offset,
                               "Invalid object entry array next_entry_array_offset: "OFSfmt,
-                              o->entry_array.next_entry_array_offset);
+                              le64toh(o->entry_array.next_entry_array_offset));
                         return -EBADMSG;
                 }
 
@@ -352,10 +352,10 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                         return -EBADMSG;
                 }
 
-                if (!VALID_EPOCH(o->tag.epoch)) {
+                if (!VALID_EPOCH(le64toh(o->tag.epoch))) {
                         error(offset,
                               "Invalid object tag epoch: %"PRIu64,
-                              o->tag.epoch);
+                              le64toh(o->tag.epoch));
                         return -EBADMSG;
                 }
 
@@ -1109,7 +1109,7 @@ int journal_file_verify(
 
                                 debug(p, "Checking tag %"PRIu64"...", le64toh(o->tag.seqnum));
 
-                                rt = f->fss_start_usec + o->tag.epoch * f->fss_interval_usec;
+                                rt = f->fss_start_usec + le64toh(o->tag.epoch) * f->fss_interval_usec;
                                 if (entry_realtime_set && entry_realtime >= rt + f->fss_interval_usec) {
                                         error(p, "tag/entry realtime timestamp out of synchronization");
                                         r = -EBADMSG;
index 38ac3befddc8ca76a2eec531bd7ef908e03fd4db..869c996aefe5616b74ec6b9b575588efc9d7218f 100644 (file)
@@ -383,26 +383,26 @@ static void process_audit_string(Server *s, int type, const char *data, size_t s
                 return;
         }
 
-        IOVEC_SET_STRING(iov[n_iov++], "_TRANSPORT=audit");
+        iov[n_iov++] = IOVEC_MAKE_STRING("_TRANSPORT=audit");
 
         sprintf(source_time_field, "_SOURCE_REALTIME_TIMESTAMP=%" PRIu64,
                 (usec_t) seconds * USEC_PER_SEC + (usec_t) msec * USEC_PER_MSEC);
-        IOVEC_SET_STRING(iov[n_iov++], source_time_field);
+        iov[n_iov++] = IOVEC_MAKE_STRING(source_time_field);
 
         sprintf(type_field, "_AUDIT_TYPE=%i", type);
-        IOVEC_SET_STRING(iov[n_iov++], type_field);
+        iov[n_iov++] = IOVEC_MAKE_STRING(type_field);
 
         sprintf(id_field, "_AUDIT_ID=%" PRIu64, id);
-        IOVEC_SET_STRING(iov[n_iov++], id_field);
+        iov[n_iov++] = IOVEC_MAKE_STRING(id_field);
 
         assert_cc(4 == LOG_FAC(LOG_AUTH));
-        IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_FACILITY=4");
-        IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_IDENTIFIER=audit");
+        iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=4");
+        iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=audit");
 
         type_name = audit_type_name_alloca(type);
 
         m = strjoina("MESSAGE=", type_name, " ", p);
-        IOVEC_SET_STRING(iov[n_iov++], m);
+        iov[n_iov++] = IOVEC_MAKE_STRING(m);
 
         z = n_iov;
 
index 5fbcdb43c2ba63623d93a56494771c86974bb55e..039f1a68cef9cde23e3127c0bd65599eca385db1 100644 (file)
@@ -59,9 +59,10 @@ void server_forward_console(
         struct timespec ts;
         char tbuf[sizeof("[] ")-1 + DECIMAL_STR_MAX(ts.tv_sec) + DECIMAL_STR_MAX(ts.tv_nsec)-3 + 1];
         char header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t)];
-        int n = 0, fd;
         _cleanup_free_ char *ident_buf = NULL;
+        _cleanup_close_ int fd = -1;
         const char *tty;
+        int n = 0;
 
         assert(s);
         assert(message);
@@ -75,7 +76,8 @@ void server_forward_console(
                 xsprintf(tbuf, "[%5"PRI_TIME".%06"PRI_NSEC"] ",
                          ts.tv_sec,
                          (nsec_t)ts.tv_nsec / 1000);
-                IOVEC_SET_STRING(iovec[n++], tbuf);
+
+                iovec[n++] = IOVEC_MAKE_STRING(tbuf);
         }
 
         /* Second: identifier and PID */
@@ -88,19 +90,19 @@ void server_forward_console(
                 xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
 
                 if (identifier)
-                        IOVEC_SET_STRING(iovec[n++], identifier);
+                        iovec[n++] = IOVEC_MAKE_STRING(identifier);
 
-                IOVEC_SET_STRING(iovec[n++], header_pid);
+                iovec[n++] = IOVEC_MAKE_STRING(header_pid);
         } else if (identifier) {
-                IOVEC_SET_STRING(iovec[n++], identifier);
-                IOVEC_SET_STRING(iovec[n++], ": ");
+                iovec[n++] = IOVEC_MAKE_STRING(identifier);
+                iovec[n++] = IOVEC_MAKE_STRING(": ");
         }
 
         /* Fourth: message */
-        IOVEC_SET_STRING(iovec[n++], message);
-        IOVEC_SET_STRING(iovec[n++], "\n");
+        iovec[n++] = IOVEC_MAKE_STRING(message);
+        iovec[n++] = IOVEC_MAKE_STRING("\n");
 
-        tty = s->tty_path ? s->tty_path : "/dev/console";
+        tty = s->tty_path ?: "/dev/console";
 
         /* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a
          * good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely,
@@ -115,6 +117,4 @@ void server_forward_console(
 
         if (writev(fd, iovec, n) < 0)
                 log_debug_errno(errno, "Failed to write to %s for logging: %m", tty);
-
-        safe_close(fd);
 }
index 2be82be5f64c62004616e3a892b17cde8356d729..1bad7cb2eeb191093b33f78adaf07ae23ae89492 100644 (file)
@@ -26,6 +26,7 @@
 #include "libudev.h"
 #include "sd-messages.h"
 
+#include "alloc-util.h"
 #include "escape.h"
 #include "fd-util.h"
 #include "format-util.h"
@@ -45,11 +46,11 @@ void server_forward_kmsg(
         const char *message,
         const struct ucred *ucred) {
 
+        _cleanup_free_ char *ident_buf = NULL;
         struct iovec iovec[5];
         char header_priority[DECIMAL_STR_MAX(priority) + 3],
              header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t) + 1];
         int n = 0;
-        char *ident_buf = NULL;
 
         assert(s);
         assert(priority >= 0);
@@ -68,7 +69,7 @@ void server_forward_kmsg(
 
         /* First: priority field */
         xsprintf(header_priority, "<%i>", priority);
-        IOVEC_SET_STRING(iovec[n++], header_priority);
+        iovec[n++] = IOVEC_MAKE_STRING(header_priority);
 
         /* Second: identifier and PID */
         if (ucred) {
@@ -80,22 +81,20 @@ void server_forward_kmsg(
                 xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
 
                 if (identifier)
-                        IOVEC_SET_STRING(iovec[n++], identifier);
+                        iovec[n++] = IOVEC_MAKE_STRING(identifier);
 
-                IOVEC_SET_STRING(iovec[n++], header_pid);
+                iovec[n++] = IOVEC_MAKE_STRING(header_pid);
         } else if (identifier) {
-                IOVEC_SET_STRING(iovec[n++], identifier);
-                IOVEC_SET_STRING(iovec[n++], ": ");
+                iovec[n++] = IOVEC_MAKE_STRING(identifier);
+                iovec[n++] = IOVEC_MAKE_STRING(": ");
         }
 
         /* Fourth: message */
-        IOVEC_SET_STRING(iovec[n++], message);
-        IOVEC_SET_STRING(iovec[n++], "\n");
+        iovec[n++] = IOVEC_MAKE_STRING(message);
+        iovec[n++] = IOVEC_MAKE_STRING("\n");
 
         if (writev(s->dev_kmsg_fd, iovec, n) < 0)
                 log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
-
-        free(ident_buf);
 }
 
 static bool is_us(const char *pid) {
@@ -111,11 +110,11 @@ static bool is_us(const char *pid) {
 
 static void dev_kmsg_record(Server *s, const char *p, size_t l) {
         struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
-        char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
+        _cleanup_free_ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL, *identifier = NULL, *pid = NULL;
         int priority, r;
         unsigned n = 0, z = 0, j;
         unsigned long long usec;
-        char *identifier = NULL, *pid = NULL, *e, *f, *k;
+        char *e, *f, *k;
         uint64_t serial;
         size_t pl;
         char *kernel_device = NULL;
@@ -216,7 +215,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                 if (startswith(m, "_KERNEL_DEVICE="))
                         kernel_device = m + 15;
 
-                IOVEC_SET_STRING(iovec[n++], m);
+                iovec[n++] = IOVEC_MAKE_STRING(m);
                 z++;
 
                 l -= (e - k) + 1;
@@ -236,7 +235,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                         if (g) {
                                 b = strappend("_UDEV_DEVNODE=", g);
                                 if (b) {
-                                        IOVEC_SET_STRING(iovec[n++], b);
+                                        iovec[n++] = IOVEC_MAKE_STRING(b);
                                         z++;
                                 }
                         }
@@ -245,7 +244,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                         if (g) {
                                 b = strappend("_UDEV_SYSNAME=", g);
                                 if (b) {
-                                        IOVEC_SET_STRING(iovec[n++], b);
+                                        iovec[n++] = IOVEC_MAKE_STRING(b);
                                         z++;
                                 }
                         }
@@ -261,7 +260,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                                 if (g) {
                                         b = strappend("_UDEV_DEVLINK=", g);
                                         if (b) {
-                                                IOVEC_SET_STRING(iovec[n++], b);
+                                                iovec[n++] = IOVEC_MAKE_STRING(b);
                                                 z++;
                                         }
                                 }
@@ -274,18 +273,18 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
         }
 
         if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0)
-                IOVEC_SET_STRING(iovec[n++], source_time);
+                iovec[n++] = IOVEC_MAKE_STRING(source_time);
 
-        IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
+        iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel");
 
         if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
-                IOVEC_SET_STRING(iovec[n++], syslog_priority);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
 
         if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
-                IOVEC_SET_STRING(iovec[n++], syslog_facility);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
 
         if ((priority & LOG_FACMASK) == LOG_KERN)
-                IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
+                iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel");
         else {
                 pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);
 
@@ -297,33 +296,24 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                 if (identifier) {
                         syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
                         if (syslog_identifier)
-                                IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+                                iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
                 }
 
                 if (pid) {
                         syslog_pid = strappend("SYSLOG_PID=", pid);
                         if (syslog_pid)
-                                IOVEC_SET_STRING(iovec[n++], syslog_pid);
+                                iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
                 }
         }
 
         if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
-                IOVEC_SET_STRING(iovec[n++], message);
+                iovec[n++] = IOVEC_MAKE_STRING(message);
 
         server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, priority, 0);
 
 finish:
         for (j = 0; j < z; j++)
                 free(iovec[j].iov_base);
-
-        free(message);
-        free(syslog_priority);
-        free(syslog_identifier);
-        free(syslog_pid);
-        free(syslog_facility);
-        free(source_time);
-        free(identifier);
-        free(pid);
 }
 
 static int server_read_dev_kmsg(Server *s) {
index 23afe59bd532cd37d1a8fd5511448d89808a8747..554f91460d45f0597cb4bfb3682cd5ede236e343 100644 (file)
@@ -282,7 +282,7 @@ static int server_process_entry(
         }
 
         tn = n++;
-        IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
+        iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
         entry_size += strlen("_TRANSPORT=journal");
 
         if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
index 27c2571cfc9ff11b37c1eb42b537fc0cdbe7d457..2d51be7c89ec0cdd48e4c8eda12329ce26a68eaa 100644 (file)
@@ -724,14 +724,14 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
                 char *k;                                                \
                 k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
                 sprintf(k, field "=" format, value);                    \
-                IOVEC_SET_STRING(iovec[n++], k);                        \
+                iovec[n++] = IOVEC_MAKE_STRING(k);                      \
         }
 
 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
         if (!isempty(value)) {                                          \
                 char *k;                                                \
                 k = strjoina(field "=", value);                         \
-                IOVEC_SET_STRING(iovec[n++], k);                        \
+                iovec[n++] = IOVEC_MAKE_STRING(k);                      \
         }
 
 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
@@ -739,7 +739,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
                 char *k;                                                \
                 k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
                 sd_id128_to_string(value, stpcpy(k, field "="));        \
-                IOVEC_SET_STRING(iovec[n++], k);                        \
+                iovec[n++] = IOVEC_MAKE_STRING(k);                      \
         }
 
 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
@@ -747,7 +747,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
                 char *k;                                                \
                 k = newa(char, strlen(field "=") + value_size + 1);     \
                 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
-                IOVEC_SET_STRING(iovec[n++], k);                        \
+                iovec[n++] = IOVEC_MAKE_STRING(k);                      \
         }                                                               \
 
 static void dispatch_message_real(
@@ -826,20 +826,20 @@ static void dispatch_message_real(
 
         if (tv) {
                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
-                IOVEC_SET_STRING(iovec[n++], source_time);
+                iovec[n++] = IOVEC_MAKE_STRING(source_time);
         }
 
         /* Note that strictly speaking storing the boot id here is
          * redundant since the entry includes this in-line
          * anyway. However, we need this indexed, too. */
         if (!isempty(s->boot_id_field))
-                IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
+                iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
 
         if (!isempty(s->machine_id_field))
-                IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
+                iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
 
         if (!isempty(s->hostname_field))
-                IOVEC_SET_STRING(iovec[n++], s->hostname_field);
+                iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
 
         assert(n <= m);
 
@@ -870,15 +870,15 @@ void server_driver_message(Server *s, const char *message_id, const char *format
         assert(format);
 
         assert_cc(3 == LOG_FAC(LOG_DAEMON));
-        IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
-        IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
+        iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
+        iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
 
-        IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
+        iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
         assert_cc(6 == LOG_INFO);
-        IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
+        iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
 
         if (message_id)
-                IOVEC_SET_STRING(iovec[n++], message_id);
+                iovec[n++] = IOVEC_MAKE_STRING(message_id);
         m = n;
 
         va_start(ap, format);
@@ -899,8 +899,8 @@ void server_driver_message(Server *s, const char *message_id, const char *format
                 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
 
                 n = 3;
-                IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
-                IOVEC_SET_STRING(iovec[n++], buf);
+                iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
+                iovec[n++] = IOVEC_MAKE_STRING(buf);
                 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), s->my_context, NULL, LOG_INFO, 0);
         }
 }
index a44c540f67e0d2da050cf778c305883eaac6df64..d0b95ea02ccbc2ee60eec884e8133cd76ba36b1e 100644 (file)
@@ -282,22 +282,21 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
         if (s->server->forward_to_wall)
                 server_forward_wall(s->server, priority, s->identifier, p, &s->ucred);
 
-        IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
-
-        IOVEC_SET_STRING(iovec[n++], s->id_field);
+        iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=stdout");
+        iovec[n++] = IOVEC_MAKE_STRING(s->id_field);
 
         syslog_priority[strlen("PRIORITY=")] = '0' + LOG_PRI(priority);
-        IOVEC_SET_STRING(iovec[n++], syslog_priority);
+        iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
 
         if (priority & LOG_FACMASK) {
                 xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
-                IOVEC_SET_STRING(iovec[n++], syslog_facility);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
         }
 
         if (s->identifier) {
                 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
                 if (syslog_identifier)
-                        IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+                        iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
         }
 
         if (line_break != LINE_BREAK_NEWLINE) {
@@ -309,12 +308,12 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
                 c =     line_break == LINE_BREAK_NUL ?      "_LINE_BREAK=nul" :
                         line_break == LINE_BREAK_LINE_MAX ? "_LINE_BREAK=line-max" :
                                                             "_LINE_BREAK=eof";
-                IOVEC_SET_STRING(iovec[n++], c);
+                iovec[n++] = IOVEC_MAKE_STRING(c);
         }
 
         message = strappend("MESSAGE=", p);
         if (message)
-                IOVEC_SET_STRING(iovec[n++], message);
+                iovec[n++] = IOVEC_MAKE_STRING(message);
 
         if (s->context)
                 (void) client_context_maybe_refresh(s->server, s->context, NULL, NULL, 0, NULL, USEC_INFINITY);
index a03c36df34764ee320301e5dfb0538b314b820db..fa597e47a234bc7d9ef39378eedbe83c4391d4fe 100644 (file)
@@ -124,7 +124,7 @@ static void forward_syslog_raw(Server *s, int priority, const char *buffer, cons
         if (LOG_PRI(priority) > s->max_level_syslog)
                 return;
 
-        IOVEC_SET_STRING(iovec, buffer);
+        iovec = IOVEC_MAKE_STRING(buffer);
         forward_syslog_iovec(s, &iovec, 1, ucred, tv);
 }
 
@@ -135,7 +135,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
         int n = 0;
         time_t t;
         struct tm *tm;
-        char *ident_buf = NULL;
+        _cleanup_free_ char *ident_buf = NULL;
 
         assert(s);
         assert(priority >= 0);
@@ -147,7 +147,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
 
         /* First: priority field */
         xsprintf(header_priority, "<%i>", priority);
-        IOVEC_SET_STRING(iovec[n++], header_priority);
+        iovec[n++] = IOVEC_MAKE_STRING(header_priority);
 
         /* Second: timestamp */
         t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
@@ -156,7 +156,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
                 return;
         if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
                 return;
-        IOVEC_SET_STRING(iovec[n++], header_time);
+        iovec[n++] = IOVEC_MAKE_STRING(header_time);
 
         /* Third: identifier and PID */
         if (ucred) {
@@ -168,20 +168,18 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
                 xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
 
                 if (identifier)
-                        IOVEC_SET_STRING(iovec[n++], identifier);
+                        iovec[n++] = IOVEC_MAKE_STRING(identifier);
 
-                IOVEC_SET_STRING(iovec[n++], header_pid);
+                iovec[n++] = IOVEC_MAKE_STRING(header_pid);
         } else if (identifier) {
-                IOVEC_SET_STRING(iovec[n++], identifier);
-                IOVEC_SET_STRING(iovec[n++], ": ");
+                iovec[n++] = IOVEC_MAKE_STRING(identifier);
+                iovec[n++] = IOVEC_MAKE_STRING(": ");
         }
 
         /* Fourth: message */
-        IOVEC_SET_STRING(iovec[n++], message);
+        iovec[n++] = IOVEC_MAKE_STRING(message);
 
         forward_syslog_iovec(s, iovec, n, ucred, tv);
-
-        free(ident_buf);
 }
 
 int syslog_fixup_facility(int priority) {
@@ -353,29 +351,29 @@ void server_process_syslog_message(
         if (s->forward_to_wall)
                 server_forward_wall(s, priority, identifier, buf, ucred);
 
-        IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
+        iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog");
 
         xsprintf(syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK);
-        IOVEC_SET_STRING(iovec[n++], syslog_priority);
+        iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
 
         if (priority & LOG_FACMASK) {
                 xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
-                IOVEC_SET_STRING(iovec[n++], syslog_facility);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
         }
 
         if (identifier) {
                 syslog_identifier = strjoina("SYSLOG_IDENTIFIER=", identifier);
-                IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
         }
 
         if (pid) {
                 syslog_pid = strjoina("SYSLOG_PID=", pid);
-                IOVEC_SET_STRING(iovec[n++], syslog_pid);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
         }
 
         message = strjoina("MESSAGE=", buf);
         if (message)
-                IOVEC_SET_STRING(iovec[n++], message);
+                iovec[n++] = IOVEC_MAKE_STRING(message);
 
         if (ucred && pid_is_valid(ucred->pid)) {
                 r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
index 6f0e51720a118ddca387b20739fc380bd015a1ad..1ab569765d8caaa5801334376bae770fe6310447 100644 (file)
@@ -471,7 +471,7 @@ static int lease_parse_routes(
                 struct sd_dhcp_route *route = *routes + *routes_size;
                 int r;
 
-                r = in_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
+                r = in4_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
                 if (r < 0) {
                         log_debug("Failed to determine destination prefix length from class based IP, ignoring");
                         continue;
@@ -1253,7 +1253,7 @@ int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease) {
         address.s_addr = lease->address;
 
         /* fall back to the default subnet masks based on address class */
-        r = in_addr_default_subnet_mask(&address, &mask);
+        r = in4_addr_default_subnet_mask(&address, &mask);
         if (r < 0)
                 return r;
 
index 5a59c377f8c210d30a58907937dbcfecf4e6d617..727cc16ab569b3e33c689fe8c178ba8fe27f57bd 100644 (file)
@@ -56,7 +56,7 @@ int sd_dhcp_server_configure_pool(sd_dhcp_server *server, struct in_addr *addres
         assert_return(address->s_addr != INADDR_ANY, -EINVAL);
         assert_return(prefixlen <= 32, -ERANGE);
 
-        assert_se(in_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
+        assert_se(in4_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
         netmask = netmask_addr.s_addr;
 
         server_off = be32toh(address->s_addr & ~netmask);
index 7f536b4ba9a5574bd1b61c73e4c7276473a434b4..8f625975fbb33aa1826921eccb940982eb366ebd 100644 (file)
@@ -768,7 +768,7 @@ int config_parse_address(const char *unit,
         }
 
         if (!e && f == AF_INET) {
-                r = in_addr_default_prefixlen(&buffer.in, &n->prefixlen);
+                r = in4_addr_default_prefixlen(&buffer.in, &n->prefixlen);
                 if (r < 0) {
                         log_syntax(unit, LOG_ERR, filename, line, r, "Prefix length not specified, and a default one can not be deduced for '%s', ignoring assignment", address);
                         return 0;
index 9c69979c7bbbfaa75dbd1305be995fdf4d7aa160..3b5bacd13bfaf65eba730da0f632a68da820be72 100644 (file)
@@ -237,7 +237,7 @@ static int dhcp_lease_lost(Link *link) {
                 if (r >= 0) {
                         r = sd_dhcp_lease_get_netmask(link->dhcp_lease, &netmask);
                         if (r >= 0)
-                                prefixlen = in_addr_netmask_to_prefixlen(&netmask);
+                                prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
 
                         address->family = AF_INET;
                         address->in_addr.in = addr;
@@ -316,7 +316,7 @@ static int dhcp4_update_address(Link *link,
         assert(netmask);
         assert(lifetime);
 
-        prefixlen = in_addr_netmask_to_prefixlen(netmask);
+        prefixlen = in4_addr_netmask_to_prefixlen(netmask);
 
         r = address_new(&addr);
         if (r < 0)
@@ -406,7 +406,7 @@ static int dhcp_lease_acquired(sd_dhcp_client *client, Link *link) {
         if (r < 0)
                 return log_link_error_errno(link, r, "DHCP error: No netmask: %m");
 
-        prefixlen = in_addr_netmask_to_prefixlen(&netmask);
+        prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
 
         r = sd_dhcp_lease_get_router(lease, &gateway);
         if (r < 0 && r != -ENODATA)
index cf804ed1b356b9840cca4d457c12b438903be427..5ba09a994a96114c81c91e7887d3bd63959a734d 100644 (file)
@@ -1616,6 +1616,27 @@ static int setup_dev_console(const char *dest, const char *console) {
         return mount_verbose(LOG_ERR, console, to, NULL, MS_BIND, NULL);
 }
 
+static int setup_keyring(void) {
+        key_serial_t keyring;
+
+        /* Allocate a new session keyring for the container. This makes sure the keyring of the session systemd-nspawn
+         * was invoked from doesn't leak into the container. Note that by default we block keyctl() and request_key()
+         * anyway via seccomp so doing this operation isn't strictly necessary, but in case people explicitly whitelist
+         * these system calls let's make sure we don't leak anything into the container. */
+
+        keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
+        if (keyring == -1) {
+                if (errno == ENOSYS)
+                        log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
+                else if (IN_SET(errno, EACCES, EPERM))
+                        log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
+                else
+                        return log_error_errno(errno, "Setting up kernel keyring failed: %m");
+        }
+
+        return 0;
+}
+
 static int setup_kmsg(const char *dest, int kmsg_socket) {
         const char *from, *to;
         _cleanup_umask_ mode_t u;
@@ -2642,6 +2663,10 @@ static int outer_child(
         if (r < 0)
                 return r;
 
+        r = setup_keyring();
+        if (r < 0)
+                return r;
+
         r = setup_seccomp(arg_caps_retain, arg_syscall_whitelist, arg_syscall_blacklist);
         if (r < 0)
                 return r;
index d6b119987c0b33481ce27c5349137f2dce9cfe4b..d216df465f45e8222a665be21d472f05d2f8a1dd 100644 (file)
@@ -28,6 +28,8 @@
 #include "errno-list.h"
 #include "escape.h"
 #include "hashmap.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
 #include "list.h"
 #include "locale-util.h"
 #include "mount-util.h"
@@ -66,6 +68,31 @@ int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
                         &u->job_path);
 }
 
+static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) {
+        int r;
+
+        assert(m);
+        assert(prefix);
+
+        r = sd_bus_message_open_container(m, 'r', "iayu");
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_append(m, "i", family);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family));
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_append(m, "u", prefixlen);
+        if (r < 0)
+                return r;
+
+        return sd_bus_message_close_container(m);
+}
+
 int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignment) {
         const char *eq, *field;
         UnitDependency dep;
@@ -207,13 +234,13 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
                 r = sd_bus_message_append(m, "sv", sn, "t", l.rlim_cur);
 
         } else if (STR_IN_SET(field,
-                              "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", "TasksAccounting",
-                              "SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies",
-                              "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate", "RemainAfterExit",
-                              "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
-                              "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
-                              "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
-                              "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
+                              "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting",
+                              "TasksAccounting", "IPAccounting", "SendSIGHUP", "SendSIGKILL", "WakeSystem",
+                              "DefaultDependencies", "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
+                              "RemainAfterExit", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
+                              "NoNewPrivileges", "SyslogLevelPrefix", "Delegate", "RemainAfterElapse",
+                              "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
+                              "ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
                               "CPUSchedulingResetOnFork", "LockPersonality")) {
 
                 r = parse_boolean(eq);
@@ -433,6 +460,98 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
                         r = sd_bus_message_append(m, "v", "a(st)", 1, path, u);
                 }
 
+        } else if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) {
+
+                if (isempty(eq))
+                        r = sd_bus_message_append(m, "v", "a(iayu)", 0);
+                else {
+                        unsigned char prefixlen;
+                        union in_addr_union prefix = {};
+                        int family;
+
+                        r = sd_bus_message_open_container(m, 'v', "a(iayu)");
+                        if (r < 0)
+                                return bus_log_create_error(r);
+
+                        r = sd_bus_message_open_container(m, 'a', "(iayu)");
+                        if (r < 0)
+                                return bus_log_create_error(r);
+
+                        if (streq(eq, "any")) {
+                                /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+                                r = bus_append_ip_address_access(m, AF_INET, &prefix, 0);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                                r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                        } else if (is_localhost(eq)) {
+                                /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+                                prefix.in.s_addr = htobe32(0x7f000000);
+                                r = bus_append_ip_address_access(m, AF_INET, &prefix, 8);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                                prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+                                r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128);
+                                if (r < 0)
+                                        return r;
+
+                        } else if (streq(eq, "link-local")) {
+
+                                /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+                                prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+                                r = bus_append_ip_address_access(m, AF_INET, &prefix, 16);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                                prefix.in6 = (struct in6_addr) {
+                                        .__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
+                                };
+                                r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                        } else if (streq(eq, "multicast")) {
+
+                                /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+                                prefix.in.s_addr = htobe32((UINT32_C(224) << 24));
+                                r = bus_append_ip_address_access(m, AF_INET, &prefix, 4);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                                prefix.in6 = (struct in6_addr) {
+                                        .__in6_u.__u6_addr32[0] = htobe32(0xff000000)
+                                };
+                                r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                        } else {
+                                r = in_addr_prefix_from_string_auto(eq, &family, &prefix, &prefixlen);
+                                if (r < 0)
+                                        return log_error_errno(r, "Failed to parse IP address prefix: %s", eq);
+
+                                r = bus_append_ip_address_access(m, family, &prefix, prefixlen);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+                        }
+
+                        r = sd_bus_message_close_container(m);
+                        if (r < 0)
+                                return bus_log_create_error(r);
+
+                        r = sd_bus_message_close_container(m);
+                        if (r < 0)
+                                return bus_log_create_error(r);
+                }
+
         } else if (streq(field, "CPUSchedulingPolicy")) {
                 int n;
 
index 952fc48c4590e24d7e1102a3a6b40e42d397145e..3a6e987ee18b94e522235472415ef7e21f7b6497 100644 (file)
@@ -72,7 +72,7 @@ static int entry_fill_basics(
         }
         if (source) {
                 entry->ip.src = source->in;
-                in_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
+                in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
         }
 
         if (out_interface) {
@@ -84,7 +84,7 @@ static int entry_fill_basics(
         }
         if (destination) {
                 entry->ip.dst = destination->in;
-                in_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
+                in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
         }
 
         return 0;
index e14a869321e0e1d59f07672ac0ae9ca5927dc18a..0a8628c26ade3fe08fed7d2dcfe8205595be4724 100644 (file)
@@ -847,7 +847,6 @@ static int find_symlinks_fd(
 
                                 /* Check if symlink name is in the set of names used by [Install] */
                                 q = is_symlink_with_known_name(i, de->d_name);
-                                log_info("is_symlink_with_known_name(%s, %s) â†’ %d", i->name, de->d_name, q);
                                 if (q < 0)
                                         return q;
                                 if (q > 0)
diff --git a/src/shared/linux/bpf.h b/src/shared/linux/bpf.h
new file mode 100644 (file)
index 0000000..8477b44
--- /dev/null
@@ -0,0 +1,673 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __LINUX_BPF_H__
+#define __LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64      0x07    /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW         0x18    /* double word */
+#define BPF_XADD       0xc0    /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV                0xb0    /* mov reg to reg */
+#define BPF_ARSH       0xc0    /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END                0xd0    /* flags for endianness conversion: */
+#define BPF_TO_LE      0x00    /* convert to little-endian */
+#define BPF_TO_BE      0x08    /* convert to big-endian */
+#define BPF_FROM_LE    BPF_TO_LE
+#define BPF_FROM_BE    BPF_TO_BE
+
+#define BPF_JNE                0x50    /* jump != */
+#define BPF_JSGT       0x60    /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE       0x70    /* SGE is signed '>=', GE in x86 */
+#define BPF_CALL       0x80    /* function call */
+#define BPF_EXIT       0x90    /* function return */
+
+/* Register numbers */
+enum {
+        BPF_REG_0 = 0,
+        BPF_REG_1,
+        BPF_REG_2,
+        BPF_REG_3,
+        BPF_REG_4,
+        BPF_REG_5,
+        BPF_REG_6,
+        BPF_REG_7,
+        BPF_REG_8,
+        BPF_REG_9,
+        BPF_REG_10,
+        __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG    __MAX_BPF_REG
+
+struct bpf_insn {
+        __u8   code;           /* opcode */
+        __u8   dst_reg:4;      /* dest register */
+        __u8   src_reg:4;      /* source register */
+        __s16  off;            /* signed offset */
+        __s32  imm;            /* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+        __u32  prefixlen;      /* up to 32 for AF_INET, 128 for AF_INET6 */
+        __u8   data[0];        /* Arbitrary size */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+        BPF_MAP_CREATE,
+        BPF_MAP_LOOKUP_ELEM,
+        BPF_MAP_UPDATE_ELEM,
+        BPF_MAP_DELETE_ELEM,
+        BPF_MAP_GET_NEXT_KEY,
+        BPF_PROG_LOAD,
+        BPF_OBJ_PIN,
+        BPF_OBJ_GET,
+        BPF_PROG_ATTACH,
+        BPF_PROG_DETACH,
+        BPF_PROG_TEST_RUN,
+};
+
+enum bpf_map_type {
+        BPF_MAP_TYPE_UNSPEC,
+        BPF_MAP_TYPE_HASH,
+        BPF_MAP_TYPE_ARRAY,
+        BPF_MAP_TYPE_PROG_ARRAY,
+        BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+        BPF_MAP_TYPE_PERCPU_HASH,
+        BPF_MAP_TYPE_PERCPU_ARRAY,
+        BPF_MAP_TYPE_STACK_TRACE,
+        BPF_MAP_TYPE_CGROUP_ARRAY,
+        BPF_MAP_TYPE_LRU_HASH,
+        BPF_MAP_TYPE_LRU_PERCPU_HASH,
+        BPF_MAP_TYPE_LPM_TRIE,
+        BPF_MAP_TYPE_ARRAY_OF_MAPS,
+        BPF_MAP_TYPE_HASH_OF_MAPS,
+};
+
+enum bpf_prog_type {
+        BPF_PROG_TYPE_UNSPEC,
+        BPF_PROG_TYPE_SOCKET_FILTER,
+        BPF_PROG_TYPE_KPROBE,
+        BPF_PROG_TYPE_SCHED_CLS,
+        BPF_PROG_TYPE_SCHED_ACT,
+        BPF_PROG_TYPE_TRACEPOINT,
+        BPF_PROG_TYPE_XDP,
+        BPF_PROG_TYPE_PERF_EVENT,
+        BPF_PROG_TYPE_CGROUP_SKB,
+        BPF_PROG_TYPE_CGROUP_SOCK,
+        BPF_PROG_TYPE_LWT_IN,
+        BPF_PROG_TYPE_LWT_OUT,
+        BPF_PROG_TYPE_LWT_XMIT,
+};
+
+enum bpf_attach_type {
+        BPF_CGROUP_INET_INGRESS,
+        BPF_CGROUP_INET_EGRESS,
+        BPF_CGROUP_INET_SOCK_CREATE,
+        __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
+ * to the given target_fd cgroup the descendent cgroup will be able to
+ * override effective bpf program that was inherited from this cgroup
+ */
+#define BPF_F_ALLOW_OVERRIDE   (1U << 0)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+
+#define BPF_PSEUDO_MAP_FD      1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY                0 /* create new element or update existing */
+#define BPF_NOEXIST    1 /* create new element if it didn't exist */
+#define BPF_EXIST      2 /* update existing element */
+
+#define BPF_F_NO_PREALLOC      (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU    (1U << 1)
+
+union bpf_attr {
+        struct { /* anonymous struct used by BPF_MAP_CREATE command */
+                __u32  map_type;       /* one of enum bpf_map_type */
+                __u32  key_size;       /* size of key in bytes */
+                __u32  value_size;     /* size of value in bytes */
+                __u32  max_entries;    /* max number of entries in a map */
+                __u32  map_flags;      /* prealloc or not */
+                __u32  inner_map_fd;   /* fd pointing to the inner map */
+        };
+
+        struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+                __u32          map_fd;
+                __aligned_u64  key;
+                union {
+                        __aligned_u64 value;
+                        __aligned_u64 next_key;
+                };
+                __u64          flags;
+        };
+
+        struct { /* anonymous struct used by BPF_PROG_LOAD command */
+                __u32          prog_type;      /* one of enum bpf_prog_type */
+                __u32          insn_cnt;
+                __aligned_u64  insns;
+                __aligned_u64  license;
+                __u32          log_level;      /* verbosity level of verifier */
+                __u32          log_size;       /* size of user buffer */
+                __aligned_u64  log_buf;        /* user supplied buffer */
+                __u32          kern_version;   /* checked when prog_type=kprobe */
+                __u32          prog_flags;
+        };
+
+        struct { /* anonymous struct used by BPF_OBJ_* commands */
+                __aligned_u64  pathname;
+                __u32          bpf_fd;
+        };
+
+        struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+                __u32          target_fd;      /* container object to attach to */
+                __u32          attach_bpf_fd;  /* eBPF program to attach */
+                __u32          attach_type;
+                __u32          attach_flags;
+        };
+
+        struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+                __u32          prog_fd;
+                __u32          retval;
+                __u32          data_size_in;
+                __u32          data_size_out;
+                __aligned_u64  data_in;
+                __aligned_u64  data_out;
+                __u32          repeat;
+                __u32          duration;
+        } test;
+} __attribute__((aligned(8)));
+
+/* BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(&map, &key)
+ *     Return: Map value or NULL
+ *
+ * int bpf_map_update_elem(&map, &key, &value, flags)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_map_delete_elem(&map, &key)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_probe_read(void *dst, int size, void *src)
+ *     Return: 0 on success or negative error
+ *
+ * u64 bpf_ktime_get_ns(void)
+ *     Return: current ktime
+ *
+ * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
+ *     Return: length of buffer written or negative error
+ *
+ * u32 bpf_prandom_u32(void)
+ *     Return: random value
+ *
+ * u32 bpf_raw_smp_processor_id(void)
+ *     Return: SMP processor ID
+ *
+ * int bpf_skb_store_bytes(skb, offset, from, len, flags)
+ *     store bytes into packet
+ *     @skb: pointer to skb
+ *     @offset: offset within packet from skb->mac_header
+ *     @from: pointer where to copy bytes from
+ *     @len: number of bytes to store into packet
+ *     @flags: bit 0 - if true, recompute skb->csum
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_l3_csum_replace(skb, offset, from, to, flags)
+ *     recompute IP checksum
+ *     @skb: pointer to skb
+ *     @offset: offset within packet where IP checksum is located
+ *     @from: old value of header field
+ *     @to: new value of header field
+ *     @flags: bits 0-3 - size of header field
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_l4_csum_replace(skb, offset, from, to, flags)
+ *     recompute TCP/UDP checksum
+ *     @skb: pointer to skb
+ *     @offset: offset within packet where TCP/UDP checksum is located
+ *     @from: old value of header field
+ *     @to: new value of header field
+ *     @flags: bits 0-3 - size of header field
+ *             bit 4 - is pseudo header
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_tail_call(ctx, prog_array_map, index)
+ *     jump into another BPF program
+ *     @ctx: context pointer passed to next program
+ *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+ *     @index: index inside array that selects specific program to run
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_clone_redirect(skb, ifindex, flags)
+ *     redirect to another netdev
+ *     @skb: pointer to skb
+ *     @ifindex: ifindex of the net device
+ *     @flags: bit 0 - if set, redirect to ingress instead of egress
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ *     Return: current->tgid << 32 | current->pid
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ *     Return: current_gid << 32 | current_uid
+ *
+ * int bpf_get_current_comm(char *buf, int size_of_buf)
+ *     stores current->comm into buf
+ *     Return: 0 on success or negative error
+ *
+ * u32 bpf_get_cgroup_classid(skb)
+ *     retrieve a proc's classid
+ *     @skb: pointer to skb
+ *     Return: classid if != 0
+ *
+ * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_vlan_pop(skb)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_get_tunnel_key(skb, key, size, flags)
+ * int bpf_skb_set_tunnel_key(skb, key, size, flags)
+ *     retrieve or populate tunnel metadata
+ *     @skb: pointer to skb
+ *     @key: pointer to 'struct bpf_tunnel_key'
+ *     @size: size of 'struct bpf_tunnel_key'
+ *     @flags: room for future extensions
+ *     Return: 0 on success or negative error
+ *
+ * u64 bpf_perf_event_read(&map, index)
+ *     Return: Number events read or error code
+ *
+ * int bpf_redirect(ifindex, flags)
+ *     redirect to another netdev
+ *     @ifindex: ifindex of the net device
+ *     @flags: bit 0 - if set, redirect to ingress instead of egress
+ *             other bits - reserved
+ *     Return: TC_ACT_REDIRECT
+ *
+ * u32 bpf_get_route_realm(skb)
+ *     retrieve a dst's tclassid
+ *     @skb: pointer to skb
+ *     Return: realm if != 0
+ *
+ * int bpf_perf_event_output(ctx, map, index, data, size)
+ *     output perf raw sample
+ *     @ctx: struct pt_regs*
+ *     @map: pointer to perf_event_array map
+ *     @index: index of event in the map
+ *     @data: data on stack to be output as raw data
+ *     @size: size of data
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_get_stackid(ctx, map, flags)
+ *     walk user or kernel stack and return id
+ *     @ctx: struct pt_regs*
+ *     @map: pointer to stack_trace map
+ *     @flags: bits 0-7 - numer of stack frames to skip
+ *             bit 8 - collect user stack instead of kernel
+ *             bit 9 - compare stacks by hash only
+ *             bit 10 - if two different stacks hash into the same stackid
+ *                      discard old
+ *             other bits - reserved
+ *     Return: >= 0 stackid on success or negative error
+ *
+ * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
+ *     calculate csum diff
+ *     @from: raw from buffer
+ *     @from_size: length of from buffer
+ *     @to: raw to buffer
+ *     @to_size: length of to buffer
+ *     @seed: optional seed
+ *     Return: csum result or negative error code
+ *
+ * int bpf_skb_get_tunnel_opt(skb, opt, size)
+ *     retrieve tunnel options metadata
+ *     @skb: pointer to skb
+ *     @opt: pointer to raw tunnel option data
+ *     @size: size of @opt
+ *     Return: option size
+ *
+ * int bpf_skb_set_tunnel_opt(skb, opt, size)
+ *     populate tunnel options metadata
+ *     @skb: pointer to skb
+ *     @opt: pointer to raw tunnel option data
+ *     @size: size of @opt
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_proto(skb, proto, flags)
+ *     Change protocol of the skb. Currently supported is v4 -> v6,
+ *     v6 -> v4 transitions. The helper will also resize the skb. eBPF
+ *     program is expected to fill the new headers via skb_store_bytes
+ *     and lX_csum_replace.
+ *     @skb: pointer to skb
+ *     @proto: new skb->protocol type
+ *     @flags: reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_type(skb, type)
+ *     Change packet type of skb.
+ *     @skb: pointer to skb
+ *     @type: new skb->pkt_type type
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_under_cgroup(skb, map, index)
+ *     Check cgroup2 membership of skb
+ *     @skb: pointer to skb
+ *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ *     @index: index of the cgroup in the bpf_map
+ *     Return:
+ *       == 0 skb failed the cgroup2 descendant test
+ *       == 1 skb succeeded the cgroup2 descendant test
+ *        < 0 error
+ *
+ * u32 bpf_get_hash_recalc(skb)
+ *     Retrieve and possibly recalculate skb->hash.
+ *     @skb: pointer to skb
+ *     Return: hash
+ *
+ * u64 bpf_get_current_task(void)
+ *     Returns current task_struct
+ *     Return: current
+ *
+ * int bpf_probe_write_user(void *dst, void *src, int len)
+ *     safely attempt to write to a location
+ *     @dst: destination address in userspace
+ *     @src: source address on stack
+ *     @len: number of bytes to copy
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_current_task_under_cgroup(map, index)
+ *     Check cgroup2 membership of current task
+ *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ *     @index: index of the cgroup in the bpf_map
+ *     Return:
+ *       == 0 current failed the cgroup2 descendant test
+ *       == 1 current succeeded the cgroup2 descendant test
+ *        < 0 error
+ *
+ * int bpf_skb_change_tail(skb, len, flags)
+ *     The helper will resize the skb to the given new size, to be used f.e.
+ *     with control messages.
+ *     @skb: pointer to skb
+ *     @len: new skb length
+ *     @flags: reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_pull_data(skb, len)
+ *     The helper will pull in non-linear data in case the skb is non-linear
+ *     and not all of len are part of the linear section. Only needed for
+ *     read/write with direct packet access.
+ *     @skb: pointer to skb
+ *     @len: len to make read/writeable
+ *     Return: 0 on success or negative error
+ *
+ * s64 bpf_csum_update(skb, csum)
+ *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+ *     @skb: pointer to skb
+ *     @csum: csum to add
+ *     Return: csum on success or negative error
+ *
+ * void bpf_set_hash_invalid(skb)
+ *     Invalidate current skb->hash.
+ *     @skb: pointer to skb
+ *
+ * int bpf_get_numa_node_id()
+ *     Return: Id of current NUMA node.
+ *
+ * int bpf_skb_change_head()
+ *     Grows headroom of skb and adjusts MAC header offset accordingly.
+ *     Will extends/reallocae as required automatically.
+ *     May change skb data pointer and will thus invalidate any check
+ *     performed for direct packet access.
+ *     @skb: pointer to skb
+ *     @len: length of header to be pushed in front
+ *     @flags: Flags (unused for now)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_xdp_adjust_head(xdp_md, delta)
+ *     Adjust the xdp_md.data by delta
+ *     @xdp_md: pointer to xdp_md
+ *     @delta: An positive/negative integer to be added to xdp_md.data
+ *     Return: 0 on success or negative on error
+ *
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ *     Copy a NUL terminated string from unsafe address. In case the string
+ *     length is smaller than size, the target is not padded with further NUL
+ *     bytes. In case the string length is larger than size, just count-1
+ *     bytes are copied and the last byte is set to NUL.
+ *     @dst: destination address
+ *     @size: maximum number of bytes to copy, including the trailing NUL
+ *     @unsafe_ptr: unsafe address
+ *     Return:
+ *       > 0 length of the string including the trailing NUL on success
+ *       < 0 error
+ *
+ * u64 bpf_get_socket_cookie(skb)
+ *     Get the cookie for the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
+ *     field is missing inside sk_buff
+ *
+ * u32 bpf_get_socket_uid(skb)
+ *     Get the owner uid of the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: uid of the socket owner on success or overflowuid if failed.
+ */
+#define __BPF_FUNC_MAPPER(FN)          \
+        FN(unspec),                    \
+        FN(map_lookup_elem),           \
+        FN(map_update_elem),           \
+        FN(map_delete_elem),           \
+        FN(probe_read),                        \
+        FN(ktime_get_ns),              \
+        FN(trace_printk),              \
+        FN(get_prandom_u32),           \
+        FN(get_smp_processor_id),      \
+        FN(skb_store_bytes),           \
+        FN(l3_csum_replace),           \
+        FN(l4_csum_replace),           \
+        FN(tail_call),                 \
+        FN(clone_redirect),            \
+        FN(get_current_pid_tgid),      \
+        FN(get_current_uid_gid),       \
+        FN(get_current_comm),          \
+        FN(get_cgroup_classid),                \
+        FN(skb_vlan_push),             \
+        FN(skb_vlan_pop),              \
+        FN(skb_get_tunnel_key),                \
+        FN(skb_set_tunnel_key),                \
+        FN(perf_event_read),           \
+        FN(redirect),                  \
+        FN(get_route_realm),           \
+        FN(perf_event_output),         \
+        FN(skb_load_bytes),            \
+        FN(get_stackid),               \
+        FN(csum_diff),                 \
+        FN(skb_get_tunnel_opt),                \
+        FN(skb_set_tunnel_opt),                \
+        FN(skb_change_proto),          \
+        FN(skb_change_type),           \
+        FN(skb_under_cgroup),          \
+        FN(get_hash_recalc),           \
+        FN(get_current_task),          \
+        FN(probe_write_user),          \
+        FN(current_task_under_cgroup), \
+        FN(skb_change_tail),           \
+        FN(skb_pull_data),             \
+        FN(csum_update),               \
+        FN(set_hash_invalid),          \
+        FN(get_numa_node_id),          \
+        FN(skb_change_head),           \
+        FN(xdp_adjust_head),           \
+        FN(probe_read_str),            \
+        FN(get_socket_cookie),         \
+        FN(get_socket_uid),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+        __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+        __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM           (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH          (1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK           0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR               (1ULL << 4)
+#define BPF_F_MARK_MANGLED_0           (1ULL << 5)
+#define BPF_F_MARK_ENFORCE             (1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS                  (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6             (1ULL << 0)
+
+/* BPF_FUNC_get_stackid flags. */
+#define BPF_F_SKIP_FIELD_MASK          0xffULL
+#define BPF_F_USER_STACK               (1ULL << 8)
+#define BPF_F_FAST_STACK_CMP           (1ULL << 9)
+#define BPF_F_REUSE_STACKID            (1ULL << 10)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT            (1ULL << 2)
+
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
+#define BPF_F_INDEX_MASK               0xffffffffULL
+#define BPF_F_CURRENT_CPU              BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK              (0xfffffULL << 32)
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+        __u32 len;
+        __u32 pkt_type;
+        __u32 mark;
+        __u32 queue_mapping;
+        __u32 protocol;
+        __u32 vlan_present;
+        __u32 vlan_tci;
+        __u32 vlan_proto;
+        __u32 priority;
+        __u32 ingress_ifindex;
+        __u32 ifindex;
+        __u32 tc_index;
+        __u32 cb[5];
+        __u32 hash;
+        __u32 tc_classid;
+        __u32 data;
+        __u32 data_end;
+        __u32 napi_id;
+};
+
+struct bpf_tunnel_key {
+        __u32 tunnel_id;
+        union {
+                __u32 remote_ipv4;
+                __u32 remote_ipv6[4];
+        };
+        __u8 tunnel_tos;
+        __u8 tunnel_ttl;
+        __u16 tunnel_ext;
+        __u32 tunnel_label;
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+        BPF_OK = 0,
+        /* 1 reserved */
+        BPF_DROP = 2,
+        /* 3-6 reserved */
+        BPF_REDIRECT = 7,
+        /* >127 are reserved for prog type specific return codes */
+};
+
+struct bpf_sock {
+        __u32 bound_dev_if;
+        __u32 family;
+        __u32 type;
+        __u32 protocol;
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+        XDP_ABORTED = 0,
+        XDP_DROP,
+        XDP_PASS,
+        XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+        __u32 data;
+        __u32 data_end;
+};
+
+#endif /* __LINUX_BPF_H__ */
diff --git a/src/shared/linux/bpf_common.h b/src/shared/linux/bpf_common.h
new file mode 100644 (file)
index 0000000..afe7433
--- /dev/null
@@ -0,0 +1,55 @@
+#ifndef __LINUX_BPF_COMMON_H__
+#define __LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define                BPF_LD          0x00
+#define                BPF_LDX         0x01
+#define                BPF_ST          0x02
+#define                BPF_STX         0x03
+#define                BPF_ALU         0x04
+#define                BPF_JMP         0x05
+#define                BPF_RET         0x06
+#define                BPF_MISC        0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)  ((code) & 0x18)
+#define                BPF_W           0x00
+#define                BPF_H           0x08
+#define                BPF_B           0x10
+#define BPF_MODE(code)  ((code) & 0xe0)
+#define                BPF_IMM         0x00
+#define                BPF_ABS         0x20
+#define                BPF_IND         0x40
+#define                BPF_MEM         0x60
+#define                BPF_LEN         0x80
+#define                BPF_MSH         0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code)    ((code) & 0xf0)
+#define                BPF_ADD         0x00
+#define                BPF_SUB         0x10
+#define                BPF_MUL         0x20
+#define                BPF_DIV         0x30
+#define                BPF_OR          0x40
+#define                BPF_AND         0x50
+#define                BPF_LSH         0x60
+#define                BPF_RSH         0x70
+#define                BPF_NEG         0x80
+#define                BPF_MOD         0x90
+#define                BPF_XOR         0xa0
+
+#define                BPF_JA          0x00
+#define                BPF_JEQ         0x10
+#define                BPF_JGT         0x20
+#define                BPF_JGE         0x30
+#define                BPF_JSET        0x40
+#define BPF_SRC(code)   ((code) & 0x08)
+#define                BPF_K           0x00
+#define                BPF_X           0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* __LINUX_BPF_COMMON_H__ */
diff --git a/src/shared/linux/libbpf.h b/src/shared/linux/libbpf.h
new file mode 100644 (file)
index 0000000..1989e3a
--- /dev/null
@@ -0,0 +1,198 @@
+/* eBPF mini library */
+#ifndef __LIBBPF_H
+#define __LIBBPF_H
+
+#include <linux/bpf.h>
+
+struct bpf_insn;
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC)                            \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,       \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = 0,                                    \
+                .imm   = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC)                            \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_ALU | BPF_OP(OP) | BPF_X,         \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = 0,                                    \
+                .imm   = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM)                            \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,       \
+                .dst_reg = DST,                                        \
+                .src_reg = 0,                                  \
+                .off   = 0,                                    \
+                .imm   = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM)                            \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_ALU | BPF_OP(OP) | BPF_K,         \
+                .dst_reg = DST,                                        \
+                .src_reg = 0,                                  \
+                .off   = 0,                                    \
+                .imm   = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC)                                        \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_ALU64 | BPF_MOV | BPF_X,          \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = 0,                                    \
+                .imm   = 0 })
+
+#define BPF_MOV32_REG(DST, SRC)                                        \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_ALU | BPF_MOV | BPF_X,            \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = 0,                                    \
+                .imm   = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM)                                        \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_ALU64 | BPF_MOV | BPF_K,          \
+                .dst_reg = DST,                                        \
+                .src_reg = 0,                                  \
+                .off   = 0,                                    \
+                .imm   = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM)                                        \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_ALU | BPF_MOV | BPF_K,            \
+                .dst_reg = DST,                                        \
+                .src_reg = 0,                                  \
+                .off   = 0,                                    \
+                .imm   = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM)                                 \
+        BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM)                                \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_LD | BPF_DW | BPF_IMM,            \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = 0,                                    \
+                .imm   = (__u32) (IMM) }),                     \
+        ((struct bpf_insn) {                                   \
+                .code  = 0, /* zero is reserved opcode */      \
+                .dst_reg = 0,                                  \
+                .src_reg = 0,                                  \
+                .off   = 0,                                    \
+                .imm   = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD     1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD)                             \
+        BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM)                                  \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,    \
+                .dst_reg = 0,                                  \
+                .src_reg = 0,                                  \
+                .off   = 0,                                    \
+                .imm   = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)                       \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,   \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = OFF,                                  \
+                .imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF)                       \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,   \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = OFF,                                  \
+                .imm   = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF)                      \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,  \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = OFF,                                  \
+                .imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM)                                \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,    \
+                .dst_reg = DST,                                        \
+                .src_reg = 0,                                  \
+                .off   = OFF,                                  \
+                .imm   = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF)                         \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_JMP | BPF_OP(OP) | BPF_X,         \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = OFF,                                  \
+                .imm   = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF)                         \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_JMP | BPF_OP(OP) | BPF_K,         \
+                .dst_reg = DST,                                        \
+                .src_reg = 0,                                  \
+                .off   = OFF,                                  \
+                .imm   = IMM })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)                 \
+        ((struct bpf_insn) {                                   \
+                .code  = CODE,                                 \
+                .dst_reg = DST,                                        \
+                .src_reg = SRC,                                        \
+                .off   = OFF,                                  \
+                .imm   = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN()                                                \
+        ((struct bpf_insn) {                                   \
+                .code  = BPF_JMP | BPF_EXIT,                   \
+                .dst_reg = 0,                                  \
+                .src_reg = 0,                                  \
+                .off   = 0,                                    \
+                .imm   = 0 })
+
+#endif
index 318cd35bf606f9e1e43e40da8e239c1eabf1be4e..897fc48b9895e06242fc01378aa645f2daf93c2a 100644 (file)
@@ -3878,6 +3878,9 @@ typedef struct UnitStatusInfo {
         uint64_t tasks_current;
         uint64_t tasks_max;
 
+        uint64_t ip_ingress_bytes;
+        uint64_t ip_egress_bytes;
+
         LIST_HEAD(ExecStatusInfo, exec);
 } UnitStatusInfo;
 
@@ -4194,6 +4197,14 @@ static void print_status_info(
         if (i->status_errno > 0)
                 printf("    Error: %i (%s)\n", i->status_errno, strerror(i->status_errno));
 
+        if (i->ip_ingress_bytes != (uint64_t) -1 && i->ip_egress_bytes != (uint64_t) -1) {
+                char buf_in[FORMAT_BYTES_MAX], buf_out[FORMAT_BYTES_MAX];
+
+                printf("       IP: %s in, %s out\n",
+                        format_bytes(buf_in, sizeof(buf_in), i->ip_ingress_bytes),
+                        format_bytes(buf_out, sizeof(buf_out), i->ip_egress_bytes));
+        }
+
         if (i->tasks_current != (uint64_t) -1) {
                 printf("    Tasks: %" PRIu64, i->tasks_current);
 
@@ -4484,6 +4495,10 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo *
                         i->next_elapse_monotonic = u;
                 else if (streq(name, "NextElapseUSecRealtime"))
                         i->next_elapse_real = u;
+                else if (streq(name, "IPIngressBytes"))
+                        i->ip_ingress_bytes = u;
+                else if (streq(name, "IPEgressBytes"))
+                        i->ip_egress_bytes = u;
 
                 break;
         }
@@ -4998,6 +5013,8 @@ static int show_one(
                 .cpu_usage_nsec = (uint64_t) -1,
                 .tasks_current = (uint64_t) -1,
                 .tasks_max = (uint64_t) -1,
+                .ip_ingress_bytes = (uint64_t) -1,
+                .ip_egress_bytes = (uint64_t) -1,
         };
         int r;
 
index 4bc248a4b1668943db00fca6d58ab5ee671a9e7e..8c23486779fe08db133247ece8cf9485d85787ac 100644 (file)
@@ -103,6 +103,9 @@ _SD_BEGIN_DECLARATIONS;
 #define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR                   \
                                           SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
 
+#define SD_MESSAGE_UNIT_RESOURCES         SD_ID128_MAKE(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+#define SD_MESSAGE_UNIT_RESOURCES_STR     SD_ID128_MAKE_STR(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+
 #define SD_MESSAGE_SPAWN_FAILED           SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
 #define SD_MESSAGE_SPAWN_FAILED_STR       SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
 
index 57f76559a7f97136495a0c871512cd44c4bdf002..1f3db65781ebca787ab0e9e58dd79fe2dc39af8f 100644 (file)
@@ -277,6 +277,10 @@ tests += [
          [],
          []],
 
+        [['src/test/test-in-addr-util.c'],
+         [],
+         []],
+
         [['src/test/test-barrier.c'],
          [],
          []],
@@ -335,6 +339,17 @@ tests += [
          [libbasic],
          []],
 
+        [['src/test/test-bpf.c',
+          'src/test/test-helper.c'],
+         [libcore,
+          libshared],
+         [libmount,
+          threads,
+          librt,
+          libseccomp,
+          libselinux,
+          libblkid]],
+
         [['src/test/test-hashmap.c',
           'src/test/test-hashmap-plain.c',
           test_hashmap_ordered_c],
diff --git a/src/test/test-bpf.c b/src/test/test-bpf.c
new file mode 100644 (file)
index 0000000..74e9d50
--- /dev/null
@@ -0,0 +1,162 @@
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <linux/libbpf.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "load-fragment.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "service.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "unit.h"
+
+int main(int argc, char *argv[]) {
+        struct bpf_insn exit_insn[] = {
+                BPF_MOV64_IMM(BPF_REG_0, 1),
+                BPF_EXIT_INSN()
+        };
+
+        _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+        CGroupContext *cc = NULL;
+        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+        Manager *m = NULL;
+        Unit *u;
+        char log_buf[65535];
+        int r;
+
+        log_set_max_level(LOG_DEBUG);
+        log_parse_environment();
+        log_open();
+
+        enter_cgroup_subroot();
+        assert_se(set_unit_path(get_testdata_dir("")) >= 0);
+        assert_se(runtime_dir = setup_fake_runtime_dir());
+
+        r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+        assert(r == 0);
+
+        r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn));
+        assert(r == 0);
+
+        if (getuid() != 0) {
+                log_notice("Not running as root, skipping kernel related tests.");
+                return EXIT_TEST_SKIP;
+        }
+
+        r = bpf_firewall_supported();
+        if (r == 0) {
+                log_notice("BPF firewalling not supported, skipping");
+                return EXIT_TEST_SKIP;
+        }
+        assert_se(r > 0);
+
+        r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
+        assert(r >= 0);
+
+        p = bpf_program_unref(p);
+
+        /* The simple tests suceeded. Now let's try full unit-based use-case. */
+
+        assert_se(manager_new(UNIT_FILE_USER, true, &m) >= 0);
+        assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+        assert_se(u = unit_new(m, sizeof(Service)));
+        assert_se(unit_add_name(u, "foo.service") == 0);
+        assert_se(cc = unit_get_cgroup_context(u));
+        u->perpetual = true;
+
+        cc->ip_accounting = true;
+
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "10.0.1.0/24", &cc->ip_address_allow, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "127.0.0.2", &cc->ip_address_allow, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.3", &cc->ip_address_deny, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "10.0.3.2/24", &cc->ip_address_deny, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.1/25", &cc->ip_address_deny, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.4", &cc->ip_address_deny, NULL) == 0);
+
+        assert(cc->ip_address_allow);
+        assert(cc->ip_address_allow->items_next);
+        assert(!cc->ip_address_allow->items_next->items_next);
+
+        /* The deny list is defined redundantly, let's ensure it got properly reduced */
+        assert(cc->ip_address_deny);
+        assert(cc->ip_address_deny->items_next);
+        assert(!cc->ip_address_deny->items_next->items_next);
+
+        assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.2 -W 5", SERVICE(u)->exec_command, u) == 0);
+        assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.3 -W 5", SERVICE(u)->exec_command, u) == 0);
+
+        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]);
+        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next);
+        assert_se(!SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->command_next);
+
+        SERVICE(u)->type = SERVICE_ONESHOT;
+        u->load_state = UNIT_LOADED;
+
+        unit_dump(u, stdout, NULL);
+
+        r = bpf_firewall_compile(u);
+        if (IN_SET(r, -ENOTTY, -ENOSYS, -EPERM )) {
+                /* Kernel doesn't support the necessary bpf bits, or masked out via seccomp? */
+                manager_free(m);
+                return EXIT_TEST_SKIP;
+        }
+        assert_se(r >= 0);
+
+        assert(u->ip_bpf_ingress);
+        assert(u->ip_bpf_egress);
+
+        r = bpf_program_load_kernel(u->ip_bpf_ingress, log_buf, ELEMENTSOF(log_buf));
+
+        log_notice("log:");
+        log_notice("-------");
+        log_notice("%s", log_buf);
+        log_notice("-------");
+
+        assert(r >= 0);
+
+        r = bpf_program_load_kernel(u->ip_bpf_egress, log_buf, ELEMENTSOF(log_buf));
+
+        log_notice("log:");
+        log_notice("-------");
+        log_notice("%s", log_buf);
+        log_notice("-------");
+
+        assert(r >= 0);
+
+        assert(unit_start(u) >= 0);
+
+        while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
+                assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
+
+        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code == CLD_EXITED &&
+                  SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status == EXIT_SUCCESS);
+
+        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
+                  SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);
+
+        manager_free(m);
+
+        return 0;
+}
index 8818d1ffb7dd4477fb90e76c3afbb1bfa44becd7..e510633584db0d39ad85b3ec4df0454ec7ed7197 100644 (file)
@@ -130,7 +130,7 @@ static void test_parse_cpu_set(void) {
         assert_se(ncpus == 0);  /* empty string returns 0 */
         assert_se(!c);
 
-        /* Runnaway quoted string */
+        /* Runaway quoted string */
         ncpus = parse_cpu_set_and_warn("0 1 2 3 \"4 5 6 7 ", &c, NULL, "fake", 1, "CPUAffinity");
         assert_se(ncpus < 0);
         assert_se(!c);
index 4f319b3c6b0587acfebf6aa4e5238628539a4862..b5b6391cdd81e191d7deac7f91fe14714b43c3df 100644 (file)
@@ -609,9 +609,9 @@ static void test_writing_tmpfile(void) {
         int fd, r;
         struct iovec iov[3];
 
-        IOVEC_SET_STRING(iov[0], "abc\n");
-        IOVEC_SET_STRING(iov[1], ALPHANUMERICAL "\n");
-        IOVEC_SET_STRING(iov[2], "");
+        iov[0] = IOVEC_MAKE_STRING("abc\n");
+        iov[1] = IOVEC_MAKE_STRING(ALPHANUMERICAL "\n");
+        iov[2] = IOVEC_MAKE_STRING("");
 
         fd = mkostemp_safe(name);
         printf("tmpfile: %s", name);
diff --git a/src/test/test-in-addr-util.c b/src/test/test-in-addr-util.c
new file mode 100644 (file)
index 0000000..8b7a122
--- /dev/null
@@ -0,0 +1,75 @@
+/***
+  This file is part of systemd
+
+  Copyright 2017 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <netinet/in.h>
+
+#include "in-addr-util.h"
+
+static void test_in_addr_prefix_from_string(const char *p, int family, int ret, const union in_addr_union *u, unsigned char prefixlen) {
+        union in_addr_union q;
+        unsigned char l;
+        int r;
+
+        r = in_addr_prefix_from_string(p, family, &q, &l);
+        assert_se(r == ret);
+
+        if (r >= 0) {
+                int f;
+
+                assert_se(in_addr_equal(family, &q, u));
+                assert_se(l == prefixlen);
+
+                r = in_addr_prefix_from_string_auto(p, &f, &q, &l);
+                assert_se(r >= 0);
+
+                assert_se(f == family);
+                assert_se(in_addr_equal(family, &q, u));
+                assert_se(l == prefixlen);
+        }
+}
+
+int main(int argc, char *argv[]) {
+        test_in_addr_prefix_from_string("", AF_INET, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("/", AF_INET, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("/8", AF_INET, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("1.2.3.4", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
+        test_in_addr_prefix_from_string("1.2.3.4/0", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 0);
+        test_in_addr_prefix_from_string("1.2.3.4/1", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 1);
+        test_in_addr_prefix_from_string("1.2.3.4/2", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 2);
+        test_in_addr_prefix_from_string("1.2.3.4/32", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
+        test_in_addr_prefix_from_string("1.2.3.4/33", AF_INET, -ERANGE, NULL, 0);
+        test_in_addr_prefix_from_string("1.2.3.4/-1", AF_INET, -ERANGE, NULL, 0);
+        test_in_addr_prefix_from_string("::1", AF_INET, -EINVAL, NULL, 0);
+
+        test_in_addr_prefix_from_string("", AF_INET6, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("/", AF_INET6, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("/8", AF_INET6, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("::1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
+        test_in_addr_prefix_from_string("::1/0", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0);
+        test_in_addr_prefix_from_string("::1/1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 1);
+        test_in_addr_prefix_from_string("::1/2", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 2);
+        test_in_addr_prefix_from_string("::1/32", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 32);
+        test_in_addr_prefix_from_string("::1/33", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 33);
+        test_in_addr_prefix_from_string("::1/64", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 64);
+        test_in_addr_prefix_from_string("::1/128", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
+        test_in_addr_prefix_from_string("::1/129", AF_INET6, -ERANGE, NULL, 0);
+        test_in_addr_prefix_from_string("::1/-1", AF_INET6, -ERANGE, NULL, 0);
+
+        return 0;
+}
index 5373930df44430c7e374439e4c9bac57082f6e24..2221c0d7fd67448a38dc6db0613b590e3e9b2ac2 100755 (executable)
@@ -21,7 +21,7 @@
 set -e
 
 ROOTDIR=$(dirname $(dirname $(readlink -f $0)))
-SYSTEMD_HWDB=${builddir:-.}/systemd-hwdb
+SYSTEMD_HWDB=./systemd-hwdb
 
 if [ ! -x "$SYSTEMD_HWDB" ]; then
     echo "$SYSTEMD_HWDB does not exist, please build first"
index bddeeb62b44ba929b13ca12b57c76d75dde2a384..9fdcd2d8d2c9b7f854686e7ece0d8e3686e14c16 100644 (file)
@@ -165,3 +165,21 @@ endif
 sysv_generator_test_py = find_program('sysv-generator-test.py')
 test('sysv-generator-test',
      sysv_generator_test_py)
+
+############################################################
+
+# prepare test/sys tree
+sys_script_py = find_program('sys-script.py')
+custom_target(
+        'sys',
+        command : [sys_script_py, meson.current_build_dir()],
+        output : 'sys',
+        build_by_default : true)
+
+udev_test_pl = find_program('udev-test.pl')
+test('udev-test',
+     udev_test_pl)
+
+hwdb_test_sh = find_program('hwdb-test.sh')
+test('hwdb-test',
+     hwdb_test_sh)
index 99e323db3b2c36a13c8ca9de83553a3b4e788ece..18bb40f8128151b3aaf6bdafd33ddf1ce0a00203 100755 (executable)
@@ -26,14 +26,9 @@ import tempfile
 import shutil
 from glob import glob
 import collections
+from configparser import RawConfigParser
 
-try:
-    from configparser import RawConfigParser
-except ImportError:
-    # python 2
-    from ConfigParser import RawConfigParser
-
-sysv_generator = os.path.join(os.environ.get('builddir', '.'), 'systemd-sysv-generator')
+sysv_generator = './systemd-sysv-generator'
 
 class MultiDict(collections.OrderedDict):
     def __setitem__(self, key, value):