]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: firewall integration of cgroups with NFTSet=
authorTopi Miettinen <toiwoton@gmail.com>
Sat, 2 Sep 2023 18:55:36 +0000 (21:55 +0300)
committerTopi Miettinen <topimiettinen@users.noreply.github.com>
Wed, 27 Sep 2023 18:10:11 +0000 (18:10 +0000)
New directive `NFTSet=` provides a method for integrating dynamic cgroup IDs
into firewall rules with NFT sets. The benefit of using this setting is to be
able to use control group as a selector in firewall rules easily and this in
turn allows more fine grained filtering. Also, NFT rules for cgroup matching
use numeric cgroup IDs, which change every time a service is restarted, making
them hard to use in systemd environment.

This option expects a whitespace separated list of NFT set definitions. Each
definition consists of a colon-separated tuple of source type (only "cgroup"),
NFT address family (one of "arp", "bridge", "inet", "ip", "ip6", or "netdev"),
table name and set name. The names of tables and sets must conform to lexical
restrictions of NFT table names. The type of the element used in the NFT filter
must be "cgroupsv2". When a control group for a unit is realized, the cgroup ID
will be appended to the NFT sets and it will be be removed when the control
group is removed.  systemd only inserts elements to (or removes from) the sets,
so the related NFT rules, tables and sets must be prepared elsewhere in
advance.  Failures to manage the sets will be ignored.

If the firewall rules are reinstalled so that the contents of NFT sets are
destroyed, command systemctl daemon-reload can be used to refill the sets.

Example:

```
table inet filter {
...
        set timesyncd {
                type cgroupsv2
        }

        chain ntp_output {
                socket cgroupv2 != @timesyncd counter drop
                accept
        }
...
}
```

/etc/systemd/system/systemd-timesyncd.service.d/override.conf
```
[Service]
NFTSet=cgroup:inet:filter:timesyncd
```

```
$ sudo nft list set inet filter timesyncd
table inet filter {
        set timesyncd {
                type cgroupsv2
                elements = { "system.slice/systemd-timesyncd.service" }
        }
}
```

17 files changed:
man/org.freedesktop.systemd1.xml
man/systemd.resource-control.xml
src/core/cgroup.c
src/core/cgroup.h
src/core/dbus-cgroup.c
src/core/load-fragment-gperf.gperf.in
src/core/load-fragment.c
src/core/load-fragment.h
src/core/manager.c
src/core/manager.h
src/core/socket.c
src/core/unit.c
src/network/networkd-network-gperf.gperf
src/shared/bus-unit-util.c
src/shared/firewall-util-nft.c
src/shared/firewall-util.h
src/test/test-nft-set.c

index 8a033c92be60f628fd248bcfc1d6ac0ab3f45b8e..ba7728b61459c8a681e008991041d69fe0a4436a 100644 (file)
@@ -2920,6 +2920,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
       readonly s MemoryPressureWatch = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
       readonly t MemoryPressureThresholdUSec = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+      readonly a(iiss) NFTSet = [...];
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly as Environment = ['...', ...];
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@@ -3538,6 +3540,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <!--property MemoryPressureThresholdUSec is not documented!-->
 
+    <!--property NFTSet is not documented!-->
+
     <!--property EnvironmentFiles is not documented!-->
 
     <!--property PassEnvironment is not documented!-->
@@ -4168,6 +4172,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@@ -4957,6 +4963,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
       readonly s MemoryPressureWatch = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
       readonly t MemoryPressureThresholdUSec = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+      readonly a(iiss) NFTSet = [...];
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly as Environment = ['...', ...];
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@@ -5585,6 +5593,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <!--property MemoryPressureThresholdUSec is not documented!-->
 
+    <!--property NFTSet is not documented!-->
+
     <!--property EnvironmentFiles is not documented!-->
 
     <!--property PassEnvironment is not documented!-->
@@ -6197,6 +6207,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@@ -6860,6 +6872,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
       readonly s MemoryPressureWatch = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
       readonly t MemoryPressureThresholdUSec = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+      readonly a(iiss) NFTSet = [...];
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly as Environment = ['...', ...];
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@@ -7416,6 +7430,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
 
     <!--property MemoryPressureThresholdUSec is not documented!-->
 
+    <!--property NFTSet is not documented!-->
+
     <!--property EnvironmentFiles is not documented!-->
 
     <!--property PassEnvironment is not documented!-->
@@ -7942,6 +7958,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@@ -8728,6 +8746,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
       readonly s MemoryPressureWatch = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
       readonly t MemoryPressureThresholdUSec = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+      readonly a(iiss) NFTSet = [...];
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly as Environment = ['...', ...];
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@@ -9270,6 +9290,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
 
     <!--property MemoryPressureThresholdUSec is not documented!-->
 
+    <!--property NFTSet is not documented!-->
+
     <!--property EnvironmentFiles is not documented!-->
 
     <!--property PassEnvironment is not documented!-->
@@ -9782,6 +9804,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@@ -10427,6 +10451,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
       readonly s MemoryPressureWatch = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
       readonly t MemoryPressureThresholdUSec = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+      readonly a(iiss) NFTSet = [...];
   };
   interface org.freedesktop.DBus.Peer { ... };
   interface org.freedesktop.DBus.Introspectable { ... };
@@ -10597,6 +10623,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
 
     <!--property MemoryPressureThresholdUSec is not documented!-->
 
+    <!--property NFTSet is not documented!-->
+
     <!--Autogenerated cross-references for systemd.directives, do not edit-->
 
     <variablelist class="dbus-interface" generated="True" extra-ref="org.freedesktop.systemd1.Unit"/>
@@ -10775,6 +10803,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
+
     <!--End of Autogenerated section-->
 
     <refsect2>
@@ -10976,6 +11006,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
       readonly s MemoryPressureWatch = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
       readonly t MemoryPressureThresholdUSec = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+      readonly a(iiss) NFTSet = [...];
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s KillMode = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@@ -11166,6 +11198,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
 
     <!--property MemoryPressureThresholdUSec is not documented!-->
 
+    <!--property NFTSet is not documented!-->
+
     <!--property KillMode is not documented!-->
 
     <!--property KillSignal is not documented!-->
@@ -11374,6 +11408,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="KillMode"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="KillSignal"/>
index 6673f0c3a0e00402d9836e84a759d47a1a9ded3f..038a58e3dc7562b5e515f370b376b7b865a8c521 100644 (file)
@@ -1500,6 +1500,73 @@ DeviceAllow=/dev/loop-control
 
         <xi:include href="version-info.xml" xpointer="v254"/></listitem>
       </varlistentry>
+
+      <varlistentry>
+        <term><varname>NFTSet=</varname><replaceable>family</replaceable>:<replaceable>table</replaceable>:<replaceable>set</replaceable></term>
+        <listitem>
+          <para>This setting provides a method for integrating dynamic cgroup IDs into firewall rules with
+          <ulink url="https://netfilter.org/projects/nftables/index.html">NFT</ulink> sets. The benefit of
+          using this setting is to be able to use the IDs as selectors in firewall rules easily and this in
+          turn allows more fine grained filtering. NFT rules for cgroup matching use numeric cgroup IDs,
+          which change every time a service is restarted, making them hard to use in systemd environment
+          otherwise.</para>
+
+          <para>This option expects a whitespace separated list of NFT set definitions. Each definition
+          consists of a colon-separated tuple of source type (only <literal>cgroup</literal>), NFT address
+          family (one of <literal>arp</literal>, <literal>bridge</literal>, <literal>inet</literal>,
+          <literal>ip</literal>, <literal>ip6</literal>, or <literal>netdev</literal>), table name and set
+          name. The names of tables and sets must conform to lexical restrictions of NFT table names. The
+          type of the element used in the NFT filter must match the type implied by the directive
+          (<literal>cgroup</literal>) as shown in the table below. When a control group is realized, the
+          corresponding ID will be appended to the NFT sets and it will be be removed when the control group
+          is removed. <command>systemd</command> only inserts elements to (or removes from) the sets, so the
+          related NFT rules, tables and sets must be prepared elsewhere in advance. Failures to manage the
+          sets will be ignored.</para>
+
+          <table>
+            <title>Defined <varname>source type</varname> values</title>
+            <tgroup cols='3'>
+              <colspec colname='source type'/>
+              <colspec colname='description'/>
+              <colspec colname='NFT type name'/>
+              <thead>
+                <row>
+                  <entry>Source type</entry>
+                  <entry>Description</entry>
+                  <entry>Corresponding NFT type name</entry>
+                </row>
+              </thead>
+
+              <tbody>
+                <row>
+                  <entry><literal>cgroup</literal></entry>
+                  <entry>control group ID</entry>
+                  <entry><literal>cgroupsv2</literal></entry>
+                </row>
+              </tbody>
+            </tgroup>
+          </table>
+
+          <para>If the firewall rules are reinstalled so that the contents of NFT sets are destroyed, command
+          <command>systemctl daemon-reload</command> can be used to refill the sets.</para>
+
+          <para>Example:
+          <programlisting>[Unit]
+NFTSet=cgroup:inet:filter:my_service
+</programlisting>
+          Corresponding NFT rules:
+          <programlisting>table inet filter {
+        set my_service {
+                type cgroupsv2
+        }
+        chain x {
+                socket cgroupv2 level 2 @my_service accept
+                drop
+        }
+}</programlisting>
+          </para>
+        <xi:include href="version-info.xml" xpointer="v255"/></listitem>
+      </varlistentry>
     </variablelist>
     </refsect2>
   </refsect1>
index b304b39e8c1e5c5ca395572302f70dceadd0e744..32c78a449b99c29d09d70e8b6b39d6409c9229c0 100644 (file)
@@ -20,6 +20,7 @@
 #include "devnum-util.h"
 #include "fd-util.h"
 #include "fileio.h"
+#include "firewall-util.h"
 #include "in-addr-prefix-util.h"
 #include "inotify-util.h"
 #include "io-util.h"
@@ -291,6 +292,8 @@ void cgroup_context_done(CGroupContext *c) {
         cpu_set_reset(&c->startup_cpuset_mems);
 
         c->delegate_subgroup = mfree(c->delegate_subgroup);
+
+        nft_set_context_clear(&c->nft_set_context);
 }
 
 static int unit_get_kernel_memory_limit(Unit *u, const char *file, uint64_t *ret) {
@@ -664,6 +667,10 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
                 SET_FOREACH(iface, c->restrict_network_interfaces)
                         fprintf(f, "%sRestrictNetworkInterfaces: %s\n", prefix, iface);
         }
+
+        FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets)
+                fprintf(f, "%sNFTSet: %s:%s:%s:%s\n", prefix, nft_set_source_to_string(nft_set->source),
+                        nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set);
 }
 
 void cgroup_context_dump_socket_bind_item(const CGroupSocketBindItem *item, FILE *f) {
@@ -1342,6 +1349,43 @@ static void cgroup_apply_firewall(Unit *u) {
         (void) bpf_firewall_install(u);
 }
 
+void cgroup_modify_nft_set(Unit *u, bool add) {
+        int r;
+        CGroupContext *c;
+
+        assert(u);
+
+        if (!MANAGER_IS_SYSTEM(u->manager))
+                return;
+
+        if (cg_all_unified() <= 0)
+                return;
+
+        assert_se(c = unit_get_cgroup_context(u));
+        if (u->cgroup_id == 0)
+                return;
+
+        if (!u->manager->fw_ctx) {
+                r = fw_ctx_new_full(&u->manager->fw_ctx, /* init_tables= */ false);
+                if (r < 0)
+                        return;
+
+                assert(u->manager->fw_ctx);
+        }
+
+        FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets) {
+                uint64_t element = u->cgroup_id;
+
+                r = nft_set_element_modify_any(u->manager->fw_ctx, add, nft_set->nfproto, nft_set->table, nft_set->set, &element, sizeof(element));
+                if (r < 0)
+                        log_warning_errno(r, "Failed to %s NFT set: family %s, table %s, set %s, cgroup %" PRIu64 ", ignoring: %m",
+                                          add? "add" : "delete", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, u->cgroup_id);
+                else
+                        log_debug("%s NFT set: family %s, table %s, set %s, cgroup %" PRIu64,
+                                  add? "Added" : "Deleted", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, u->cgroup_id);
+        }
+}
+
 static void cgroup_apply_socket_bind(Unit *u) {
         assert(u);
 
@@ -1781,6 +1825,8 @@ static void cgroup_context_apply(
 
         if (apply_mask & CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES)
                 cgroup_apply_restrict_network_interfaces(u);
+
+        cgroup_modify_nft_set(u, /* add = */ true);
 }
 
 static bool unit_get_needs_bpf_firewall(Unit *u) {
@@ -2950,6 +2996,8 @@ void unit_prune_cgroup(Unit *u) {
         (void) lsm_bpf_cleanup(u); /* Remove cgroup from the global LSM BPF map */
 #endif
 
+        cgroup_modify_nft_set(u, /* add = */ false);
+
         is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
 
         r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
index bbbf9408cc24a830f8d28a645c4bf3a07bb43420..90159fd84ccc2bd1d8ad0ea3bec1f2b353822ceb 100644 (file)
@@ -6,6 +6,7 @@
 #include "bpf-lsm.h"
 #include "cgroup-util.h"
 #include "cpu-set-util.h"
+#include "firewall-util.h"
 #include "list.h"
 #include "time-util.h"
 
@@ -223,6 +224,8 @@ struct CGroupContext {
         usec_t memory_pressure_threshold_usec;
         /* NB: For now we don't make the period configurable, not the type, nor do we allow multiple
          * triggers, nor triggers for non-memory pressure. We might add that later. */
+
+        NFTSetContext nft_set_context;
 };
 
 /* Used when querying IP accounting data */
@@ -277,6 +280,8 @@ int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const
 void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path);
 int cgroup_log_xattr_apply(Unit *u, const char *cgroup_path);
 
+void cgroup_modify_nft_set(Unit *u, bool add);
+
 CGroupMask unit_get_own_mask(Unit *u);
 CGroupMask unit_get_delegate_mask(Unit *u);
 CGroupMask unit_get_members_mask(Unit *u);
index 5347525844e27e7e8bfa55bdb79c1eef82529076..edb5dfa13e58a18d1e0d49fe34ece919ac234237 100644 (file)
 #include "dbus-cgroup.h"
 #include "dbus-util.h"
 #include "errno-util.h"
+#include "escape.h"
 #include "fd-util.h"
 #include "fileio.h"
+#include "firewall-util.h"
 #include "in-addr-prefix-util.h"
 #include "ip-protocol-list.h"
 #include "limits-util.h"
@@ -423,6 +425,34 @@ static int property_get_restrict_network_interfaces(
         return sd_bus_message_close_container(reply);
 }
 
+static int property_get_cgroup_nft_set(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+        int r;
+        CGroupContext *c = userdata;
+
+        assert(bus);
+        assert(reply);
+        assert(c);
+
+        r = sd_bus_message_open_container(reply, 'a', "(iiss)");
+        if (r < 0)
+                return r;
+
+        FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets) {
+                r = sd_bus_message_append(reply, "(iiss)", nft_set->source, nft_set->nfproto, nft_set->table, nft_set->set);
+                if (r < 0)
+                        return r;
+        }
+
+        return sd_bus_message_close_container(reply);
+}
+
 const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
@@ -490,6 +520,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_PROPERTY("RestrictNetworkInterfaces", "(bas)", property_get_restrict_network_interfaces, 0, 0),
         SD_BUS_PROPERTY("MemoryPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(CGroupContext, memory_pressure_watch), 0),
         SD_BUS_PROPERTY("MemoryPressureThresholdUSec", "t", bus_property_get_usec, offsetof(CGroupContext, memory_pressure_threshold_usec), 0),
+        SD_BUS_PROPERTY("NFTSet", "a(iiss)", property_get_cgroup_nft_set, 0, 0),
         SD_BUS_VTABLE_END
 };
 
@@ -2192,6 +2223,75 @@ int bus_cgroup_set_property(
                 return 1;
         }
 
+        if (streq(name, "NFTSet")) {
+                int source, nfproto;
+                const char *table, *set;
+                bool empty = true;
+
+                r = sd_bus_message_enter_container(message, 'a', "(iiss)");
+                if (r < 0)
+                        return r;
+
+                while ((r = sd_bus_message_read(message, "(iiss)", &source, &nfproto, &table, &set)) > 0) {
+                        const char *source_name, *nfproto_name;
+
+                        if (source != NFT_SET_SOURCE_CGROUP)
+                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid source %d.", source);
+
+                        source_name = nft_set_source_to_string(source);
+                        assert(source_name);
+
+                        nfproto_name = nfproto_to_string(nfproto);
+                        if (!nfproto_name)
+                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid protocol %d.", nfproto);
+
+                        if (!nft_identifier_valid(table)) {
+                                _cleanup_free_ char *esc = NULL;
+
+                                esc = cescape(table);
+                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NFT table name %s.", strna(esc));
+                        }
+
+                        if (!nft_identifier_valid(set)) {
+                                _cleanup_free_ char *esc = NULL;
+
+                                esc = cescape(set);
+                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NFT set name %s.", strna(esc));
+                        }
+
+                        if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+                                r = nft_set_add(&c->nft_set_context, source, nfproto, table, set);
+                                if (r < 0)
+                                        return r;
+
+                                unit_write_settingf(
+                                                u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+                                                "%s=%s:%s:%s:%s",
+                                                name,
+                                                source_name,
+                                                nfproto_name,
+                                                table,
+                                                set);
+                        }
+
+                        empty = false;
+                }
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_message_exit_container(message);
+                if (r < 0)
+                        return r;
+
+                if (empty && !UNIT_WRITE_FLAGS_NOOP(flags)) {
+                        nft_set_context_clear(&c->nft_set_context);
+                        unit_write_settingf(u, flags, name, "%s=", name);
+                }
+
+                return 1;
+        }
+
+        /* must be last */
         if (streq(name, "DisableControllers") || (u->transient && u->load_state == UNIT_STUB))
                 return bus_cgroup_set_transient_property(u, c, name, message, flags, error);
 
index 0d1ee9c231aa1c91eccb6dd63e1c6c8c0dcb5d77..6cdf13197517b5f030bce56ecef84a12ca85028a 100644 (file)
 {{type}}.RestrictNetworkInterfaces,        config_parse_restrict_network_interfaces,    0,                                  offsetof({{type}}, cgroup_context)
 {{type}}.MemoryPressureThresholdSec,       config_parse_sec,                            0,                                  offsetof({{type}}, cgroup_context.memory_pressure_threshold_usec)
 {{type}}.MemoryPressureWatch,              config_parse_memory_pressure_watch,          0,                                  offsetof({{type}}, cgroup_context.memory_pressure_watch)
+{{type}}.NFTSet,                           config_parse_cgroup_nft_set,                 NFT_SET_PARSE_CGROUP,               offsetof({{type}}, cgroup_context)
 {%- endmacro -%}
 
 %{
index d89f5ca229ebc3a6d62dc91d36f588f159776c56..935a4ef35da81f97a998086c6eae7f9a07684e19 100644 (file)
 #include "errno-list.h"
 #include "escape.h"
 #include "exec-credential.h"
+#include "execute.h"
 #include "fd-util.h"
 #include "fileio.h"
+#include "firewall-util.h"
 #include "fs-util.h"
 #include "hexdecoct.h"
 #include "io-util.h"
@@ -6696,3 +6698,21 @@ int config_parse_open_file(
 
         return 0;
 }
+
+int config_parse_cgroup_nft_set(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        CGroupContext *c = ASSERT_PTR(data);
+        Unit *u = ASSERT_PTR(userdata);
+
+        return config_parse_nft_set(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &c->nft_set_context, u);
+}
index 39378b3a3c76a56f78f1bf8c311ee62bd41d8066..69198050eadde6365affb6439a821ba4beb0602f 100644 (file)
@@ -158,6 +158,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_tty_size);
 CONFIG_PARSER_PROTOTYPE(config_parse_log_filter_patterns);
 CONFIG_PARSER_PROTOTYPE(config_parse_open_file);
 CONFIG_PARSER_PROTOTYPE(config_parse_memory_pressure_watch);
+CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_nft_set);
 
 /* gperf prototypes */
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
index 07845af6d17b9e5e889ca023c95bc89d3d1b4e9e..aed2c988af3bd29a8f58dfa12407d2601246c7c9 100644 (file)
@@ -1679,6 +1679,8 @@ Manager* manager_free(Manager *m) {
         free(m->watchdog_pretimeout_governor);
         free(m->watchdog_pretimeout_governor_overridden);
 
+        m->fw_ctx = fw_ctx_free(m->fw_ctx);
+
 #if BPF_FRAMEWORK
         lsm_bpf_destroy(m->restrict_fs);
 #endif
index ee2ace70a812d080e3979385868fbe048c9435a2..55543703cda778d71ee0acf25654c3ce30c2ad81 100644 (file)
@@ -486,6 +486,9 @@ struct Manager {
         RateLimit dump_ratelimit;
 
         sd_event_source *memory_pressure_event_source;
+
+        /* For NFTSet= */
+        FirewallContext *fw_ctx;
 };
 
 static inline usec_t manager_default_timeout_abort_usec(Manager *m) {
index 43f412bc895fcd0f362cbafde9926c9ce771755f..0c905afddc427a833a5c608f9224b103efe6a77b 100644 (file)
@@ -1489,14 +1489,22 @@ static int socket_address_listen_do(
                 log_unit_error_errno(u, error, fmt, strna(_t));  \
         })
 
-static int fork_needed(const SocketAddress *address, const ExecContext *context) {
+static int fork_needed(const SocketAddress *address, Socket *s) {
         int r;
 
         assert(address);
-        assert(context);
+        assert(s);
 
         /* Check if we need to do the cgroup or netns stuff. If not we can do things much simpler. */
 
+        /* If there are any NFTSet= directives with cgroup source, we need the cgroup */
+        Unit *u = UNIT(s);
+        CGroupContext *c = unit_get_cgroup_context(u);
+        if (c)
+                FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets)
+                        if (nft_set->source == NFT_SET_SOURCE_CGROUP)
+                                return true;
+
         if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6)) {
                 r = bpf_firewall_supported();
                 if (r < 0)
@@ -1505,7 +1513,7 @@ static int fork_needed(const SocketAddress *address, const ExecContext *context)
                         return true;
         }
 
-        return exec_needs_network_namespace(context);
+        return exec_needs_network_namespace(&s->exec_context);
 }
 
 static int socket_address_listen_in_cgroup(
@@ -1525,7 +1533,7 @@ static int socket_address_listen_in_cgroup(
          * the socket is actually properly attached to the unit's cgroup for the purpose of BPF filtering and
          * such. */
 
-        r = fork_needed(address, &s->exec_context);
+        r = fork_needed(address, s);
         if (r < 0)
                 return r;
         if (r == 0) {
index 5fd66e7b6e042bde4759fe0e98e029a1fa07345e..80ac7870226b89aecc2edb0e3ea20e8b2943dde6 100644 (file)
@@ -3820,6 +3820,10 @@ int unit_coldplug(Unit *u) {
                         r = q;
         }
 
+        CGroupContext *c = unit_get_cgroup_context(u);
+        if (c)
+                cgroup_modify_nft_set(u, /* add = */ true);
+
         return r;
 }
 
index bbb87e00bd22fc4c68f66e2776a7b9eead11797c..ab456efb9e98549cb7be74fc7b31bc3eb163a4d8 100644 (file)
@@ -161,7 +161,7 @@ Address.DuplicateAddressDetection,           config_parse_duplicate_address_dete
 Address.Scope,                               config_parse_address_scope,                               0,                             0
 Address.RouteMetric,                         config_parse_address_route_metric,                        0,                             0
 Address.NetLabel,                            config_parse_address_netlabel,                            0,                             0
-Address.NFTSet,                              config_parse_address_ip_nft_set,                          0,                             0
+Address.NFTSet,                              config_parse_address_ip_nft_set,                          NFT_SET_PARSE_NETWORK,         0
 IPv6AddressLabel.Prefix,                     config_parse_address_label_prefix,                        0,                             0
 IPv6AddressLabel.Label,                      config_parse_address_label,                               0,                             0
 Neighbor.Address,                            config_parse_neighbor_address,                            0,                             0
@@ -258,7 +258,7 @@ DHCPv4.InitialAdvertisedReceiveWindow,       config_parse_tcp_window,
 DHCPv4.FallbackLeaseLifetimeSec,             config_parse_dhcp_fallback_lease_lifetime,                0,                             0
 DHCPv4.Use6RD,                               config_parse_bool,                                        0,                             offsetof(Network, dhcp_use_6rd)
 DHCPv4.NetLabel,                             config_parse_string,                                      CONFIG_PARSE_STRING_SAFE,      offsetof(Network, dhcp_netlabel)
-DHCPv4.NFTSet,                               config_parse_nft_set,                                     0,                             offsetof(Network, dhcp_nft_set_context)
+DHCPv4.NFTSet,                               config_parse_nft_set,                                     NFT_SET_PARSE_NETWORK,         offsetof(Network, dhcp_nft_set_context)
 DHCPv6.UseAddress,                           config_parse_bool,                                        0,                             offsetof(Network, dhcp6_use_address)
 DHCPv6.UseDelegatedPrefix,                   config_parse_bool,                                        0,                             offsetof(Network, dhcp6_use_pd_prefix)
 DHCPv6.UseDNS,                               config_parse_dhcp_use_dns,                                AF_INET6,                      0
@@ -280,7 +280,7 @@ DHCPv6.DUIDRawData,                          config_parse_duid_rawdata,
 DHCPv6.RapidCommit,                          config_parse_bool,                                        0,                             offsetof(Network, dhcp6_use_rapid_commit)
 DHCPv6.NetLabel,                             config_parse_string,                                      CONFIG_PARSE_STRING_SAFE,      offsetof(Network, dhcp6_netlabel)
 DHCPv6.SendRelease,                          config_parse_bool,                                        0,                             offsetof(Network, dhcp6_send_release)
-DHCPv6.NFTSet,                               config_parse_nft_set,                                     0,                             offsetof(Network, dhcp6_nft_set_context)
+DHCPv6.NFTSet,                               config_parse_nft_set,                                     NFT_SET_PARSE_NETWORK,         offsetof(Network, dhcp6_nft_set_context)
 IPv6AcceptRA.UseGateway,                     config_parse_bool,                                        0,                             offsetof(Network, ipv6_accept_ra_use_gateway)
 IPv6AcceptRA.UseRoutePrefix,                 config_parse_bool,                                        0,                             offsetof(Network, ipv6_accept_ra_use_route_prefix)
 IPv6AcceptRA.UseAutonomousPrefix,            config_parse_bool,                                        0,                             offsetof(Network, ipv6_accept_ra_use_autonomous_prefix)
@@ -304,7 +304,7 @@ IPv6AcceptRA.RouteAllowList,                 config_parse_in_addr_prefixes,
 IPv6AcceptRA.RouteDenyList,                  config_parse_in_addr_prefixes,                            AF_INET6,                      offsetof(Network, ndisc_deny_listed_route_prefix)
 IPv6AcceptRA.Token,                          config_parse_address_generation_type,                     0,                             offsetof(Network, ndisc_tokens)
 IPv6AcceptRA.NetLabel,                       config_parse_string,                                      CONFIG_PARSE_STRING_SAFE,      offsetof(Network, ndisc_netlabel)
-IPv6AcceptRA.NFTSet,                         config_parse_nft_set,                                     0,                             offsetof(Network, ndisc_nft_set_context)
+IPv6AcceptRA.NFTSet,                         config_parse_nft_set,                                     NFT_SET_PARSE_NETWORK,         offsetof(Network, ndisc_nft_set_context)
 DHCPServer.ServerAddress,                    config_parse_dhcp_server_address,                         0,                             0
 DHCPServer.UplinkInterface,                  config_parse_uplink,                                      0,                             0
 DHCPServer.RelayTarget,                      config_parse_in_addr_non_null,                            AF_INET,                       offsetof(Network, dhcp_server_relay_target)
@@ -372,7 +372,7 @@ DHCPPrefixDelegation.ManageTemporaryAddress, config_parse_bool,
 DHCPPrefixDelegation.Token,                  config_parse_address_generation_type,                     0,                             offsetof(Network, dhcp_pd_tokens)
 DHCPPrefixDelegation.RouteMetric,            config_parse_uint32,                                      0,                             offsetof(Network, dhcp_pd_route_metric)
 DHCPPrefixDelegation.NetLabel,               config_parse_string,                                      CONFIG_PARSE_STRING_SAFE,      offsetof(Network, dhcp_pd_netlabel)
-DHCPPrefixDelegation.NFTSet,                 config_parse_nft_set,                                     0,                             offsetof(Network, dhcp_pd_nft_set_context)
+DHCPPrefixDelegation.NFTSet,                 config_parse_nft_set,                                     NFT_SET_PARSE_NETWORK,         offsetof(Network, dhcp_pd_nft_set_context)
 IPv6SendRA.RouterLifetimeSec,                config_parse_router_lifetime,                             0,                             offsetof(Network, router_lifetime_usec)
 IPv6SendRA.RetransmitSec,                    config_parse_router_retransmit,                           0,                             offsetof(Network, router_retransmit_usec)
 IPv6SendRA.Managed,                          config_parse_bool,                                        0,                             offsetof(Network, router_managed)
index e2d6bfebf320fb335ad3f6dbbe33cfd3e101cef4..47cfc199d3a1548067b055f917381308c3c74f1a 100644 (file)
@@ -17,6 +17,7 @@
 #include "exec-util.h"
 #include "exit-status.h"
 #include "fileio.h"
+#include "firewall-util.h"
 #include "hexdecoct.h"
 #include "hostname-util.h"
 #include "in-addr-util.h"
@@ -449,6 +450,91 @@ static int bus_append_ip_address_access(sd_bus_message *m, int family, const uni
         return sd_bus_message_close_container(m);
 }
 
+static int bus_append_nft_set(sd_bus_message *m, const char *field, const char *eq) {
+        int r;
+
+        assert(m);
+        assert(field);
+        assert(eq);
+
+        if (isempty(eq)) {
+                r = sd_bus_message_append(m, "(sv)", field, "a(iiss)", 0);
+                if (r < 0)
+                        return bus_log_create_error(r);
+
+                return 1;
+        }
+
+        r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        r = sd_bus_message_open_container(m, 'v', "a(iiss)");
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        r = sd_bus_message_open_container(m, 'a', "(iiss)");
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        for (const char *p = eq;;) {
+                _cleanup_free_ char *tuple = NULL, *source_str = NULL, *nfproto_str = NULL, *table = NULL, *set = NULL;
+                const char *q = NULL;
+                int source, nfproto;
+
+                r = extract_first_word(&p, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE);
+                if (r == -ENOMEM)
+                        return log_oom();
+                if (r < 0)
+                        return log_error_errno(r, "Failed to parse %s: %m", field);
+                if (r == 0)
+                        break;
+                if (isempty(tuple))
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s", field);
+
+                q = tuple;
+                r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE, &source_str, &nfproto_str, &table, &set, NULL);
+                if (r == -ENOMEM)
+                        return log_oom();
+                if (r != 4 || !isempty(q))
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s", field);
+
+                assert(source_str);
+                assert(nfproto_str);
+                assert(table);
+                assert(set);
+
+                source = nft_set_source_from_string(source_str);
+                if (source != NFT_SET_SOURCE_CGROUP)
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s", field);
+
+                nfproto = nfproto_from_string(nfproto_str);
+                if (nfproto < 0 || !nft_identifier_valid(table) || !nft_identifier_valid(set))
+                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s", field);
+
+                r = sd_bus_message_append(m, "(iiss)", source, nfproto, table, set);
+                if (r < 0)
+                        return bus_log_create_error(r);
+        }
+        r = sd_bus_message_close_container(m);
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        r = sd_bus_message_close_container(m);
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        r = sd_bus_message_close_container(m);
+        if (r < 0)
+                return bus_log_create_error(r);
+
+        return 1;
+}
+
 static int bus_append_cgroup_property(sd_bus_message *m, const char *field, const char *eq) {
         int r;
 
@@ -914,6 +1000,9 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
         if (streq(field, "MemoryPressureThresholdSec"))
                 return bus_append_parse_sec_rename(m, field, eq);
 
+        if (streq(field, "NFTSet"))
+                return bus_append_nft_set(m, field, eq);
+
         return 0;
 }
 
index 39f9188de29d07c0b9685c290c7665549159d252..a71ea060e34f078d522064c802d38149a1cea467 100644 (file)
@@ -1202,6 +1202,7 @@ static const char *const nft_set_source_table[] = {
         [NFT_SET_SOURCE_ADDRESS] = "address",
         [NFT_SET_SOURCE_PREFIX]  = "prefix",
         [NFT_SET_SOURCE_IFINDEX] = "ifindex",
+        [NFT_SET_SOURCE_CGROUP]  = "cgroup",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(nft_set_source, int);
@@ -1218,11 +1219,11 @@ void nft_set_context_clear(NFTSetContext *s) {
         s->sets = mfree(s->sets);
 }
 
-static int nft_set_add(NFTSetContext *s, NFTSetSource source, int nfproto, const char *table, const char *set) {
+int nft_set_add(NFTSetContext *s, NFTSetSource source, int nfproto, const char *table, const char *set) {
         _cleanup_free_ char *table_dup = NULL, *set_dup = NULL;
 
         assert(s);
-        assert(IN_SET(source, NFT_SET_SOURCE_ADDRESS, NFT_SET_SOURCE_PREFIX, NFT_SET_SOURCE_IFINDEX));
+        assert(IN_SET(source, NFT_SET_SOURCE_ADDRESS, NFT_SET_SOURCE_PREFIX, NFT_SET_SOURCE_IFINDEX, NFT_SET_SOURCE_CGROUP));
         assert(nfproto_is_valid(nfproto));
         assert(table);
         assert(set);
@@ -1285,6 +1286,7 @@ int config_parse_nft_set(
         assert(lvalue);
         assert(rvalue);
         assert(nft_set_context);
+        assert(IN_SET(ltype, NFT_SET_PARSE_NETWORK, NFT_SET_PARSE_CGROUP));
 
         if (isempty(rvalue)) {
                 nft_set_context_clear(nft_set_context);
@@ -1328,7 +1330,9 @@ int config_parse_nft_set(
                 assert(set);
 
                 source = nft_set_source_from_string(source_str);
-                if (source < 0) {
+                if (source < 0 ||
+                    (ltype == NFT_SET_PARSE_NETWORK && !IN_SET(source, NFT_SET_SOURCE_ADDRESS, NFT_SET_SOURCE_PREFIX, NFT_SET_SOURCE_IFINDEX)) ||
+                    (ltype == NFT_SET_PARSE_CGROUP && source != NFT_SET_SOURCE_CGROUP)) {
                         _cleanup_free_ char *esc = NULL;
 
                         esc = cescape(source_str);
index bb35e76f21efd094c95c24f205c218454751ba90..e45f51fab4c0d679542c1b7611f878e424462e16 100644 (file)
@@ -36,6 +36,7 @@ typedef enum NFTSetSource {
         NFT_SET_SOURCE_ADDRESS,
         NFT_SET_SOURCE_PREFIX,
         NFT_SET_SOURCE_IFINDEX,
+        NFT_SET_SOURCE_CGROUP,
         _NFT_SET_SOURCE_MAX,
         _NFT_SET_SOURCE_INVALID = -EINVAL,
 }  NFTSetSource;
@@ -89,4 +90,11 @@ int nft_set_element_modify_any(
                 const void *element,
                 size_t element_size);
 
+int nft_set_add(NFTSetContext *s, NFTSetSource source, int nfproto, const char *table, const char *set);
+
+typedef enum NFTSetParseFlags {
+        NFT_SET_PARSE_NETWORK,
+        NFT_SET_PARSE_CGROUP,
+} NFTSetParseFlags;
+
 CONFIG_PARSER_PROTOTYPE(config_parse_nft_set);
index df8e93aaa2e3a47e1672c54b8a9dada3c9b9e594..bb0c902493e5f2dc06804c712e9e72e56f7a4cef 100644 (file)
@@ -43,6 +43,14 @@ int main(int argc, char **argv) {
                 r = safe_atou32(argv[6], &element);
                 assert_se(r == 0);
 
+                r = nft_set_element_modify_any(ctx, add, nfproto, table, set, &element, sizeof(element));
+                assert_se(r == 0);
+        } else if (streq(argv[5], "uint64")) {
+                uint64_t element;
+
+                r = safe_atou64(argv[6], &element);
+                assert_se(r == 0);
+
                 r = nft_set_element_modify_any(ctx, add, nfproto, table, set, &element, sizeof(element));
                 assert_se(r == 0);
         } else if (streq(argv[5], "in_addr")) {