]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
network/netdev: add support to create IPoIB subinterface
authorYu Watanabe <watanabe.yu+github@gmail.com>
Sun, 7 Nov 2021 21:34:43 +0000 (06:34 +0900)
committerYu Watanabe <watanabe.yu+github@gmail.com>
Sat, 4 Dec 2021 06:06:58 +0000 (15:06 +0900)
13 files changed:
man/systemd.netdev.xml
man/systemd.network.xml
src/network/meson.build
src/network/netdev/ipoib.c [new file with mode: 0644]
src/network/netdev/ipoib.h [new file with mode: 0644]
src/network/netdev/netdev-gperf.gperf
src/network/netdev/netdev.c
src/network/netdev/netdev.h
src/network/netdev/veth.c
src/network/networkd-network-gperf.gperf
src/network/networkd-network.c
test/fuzz/fuzz-netdev-parser/directives.netdev
test/fuzz/fuzz-network-parser/directives.network

index e4e7e611e7783db9c78fd01ac2feea1e78c5f136..255b85f4049c16e96a38dfbc6264ae0a60774fe1 100644 (file)
 
           <row><entry><varname>batadv</varname></entry>
           <entry><ulink url="https://www.open-mesh.org/projects/open-mesh/wiki">B.A.T.M.A.N. Advanced</ulink> is a routing protocol for multi-hop mobile ad-hoc networks which operates on layer 2.</entry></row>
+
+          <row><entry><varname>ipoib</varname></entry>
+          <entry>An IP over Infiniband subinterface.</entry></row>
         </tbody>
       </tgroup>
     </table>
     </variablelist>
   </refsect1>
 
+  <refsect1>
+    <title>[IPoIB] Section Options</title>
+    <para>The [IPoIB] section only applies for netdevs of kind <literal>ipoib</literal> and accepts the
+    following keys:</para>
+
+    <variablelist class='network-directives'>
+      <varlistentry>
+        <term><varname>PartitionKey=</varname></term>
+        <listitem>
+          <para>Takes an integer in the range 1…0xffff, except for 0x8000. Defaults to unset, and the
+          kernel's default is used.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry id='ipoib_mode'>
+        <term><varname>Mode=</varname></term>
+        <listitem>
+          <para>Takes one of the special values <literal>datagram</literal> or
+          <literal>connected</literal>. Defaults to unset, and the kernel's default is used.</para>
+
+          <para>When <literal>datagram</literal>, the Infiniband unreliable datagram (UD) transport is
+          used, and so the interface MTU is equal to the IB L2 MTU minus the IPoIB encapsulation
+          header (4 bytes). For example, in a typical IB fabric with a 2K MTU, the IPoIB MTU will be
+          2048 - 4 = 2044 bytes.</para>
+
+          <para>When <literal>connected</literal>, the Infiniband reliable connected (RC) transport is
+          used. Connected mode takes advantage of the connected nature of the IB transport and allows
+          an MTU up to the maximal IP packet size of 64K, which reduces the number of IP packets needed
+          for handling large UDP datagrams, TCP segments, etc and increases the performance for large
+          messages.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry id='ipoib_umcast'>
+        <term><varname>IgnoreUserspaceMulticastGroup=</varname></term>
+        <listitem>
+          <para>Takes an boolean value. When true, the kernel ignores multicast groups handled by
+          userspace. Defaults to unset, and the kernel's default is used.</para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
   <refsect1>
     <title>Examples</title>
     <example>
index 50367ecdcd70582d06205f9703c10f6bacfbf61f..1de7bb05387b516d6f9df64282495c0895fc5910 100644 (file)
@@ -902,6 +902,7 @@ Table=1234</programlisting></para>
           </listitem>
         </varlistentry>
         <varlistentry>
+          <term><varname>IPoIB=</varname></term>
           <term><varname>IPVLAN=</varname></term>
           <term><varname>IPVTAP=</varname></term>
           <term><varname>L2TP=</varname></term>
@@ -913,8 +914,8 @@ Table=1234</programlisting></para>
           <term><varname>VXLAN=</varname></term>
           <term><varname>Xfrm=</varname></term>
           <listitem>
-            <para>The name of an IPVLAN, IPVTAP, L2TP, MACsec, MACVLAN, MACVTAP, tunnel, VLAN, VXLAN, or
-            Xfrm to be created on the link. See
+            <para>The name of an IPoIB, IPVLAN, IPVTAP, L2TP, MACsec, MACVLAN, MACVTAP, tunnel, VLAN,
+            VXLAN, or Xfrm to be created on the link. See
             <citerefentry><refentrytitle>systemd.netdev</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
             This option may be specified more than once.</para>
           </listitem>
index cfa16a8ecf08a4481d03c7f53a07b36eaff49739..c1cf227ffcb2739b9482ee902db59be2696cd57b 100644 (file)
@@ -13,6 +13,8 @@ sources = files('''
         netdev/dummy.h
         netdev/ifb.c
         netdev/ifb.h
+        netdev/ipoib.c
+        netdev/ipoib.h
         netdev/ipvlan.c
         netdev/ipvlan.h
         netdev/macvlan.c
diff --git a/src/network/netdev/ipoib.c b/src/network/netdev/ipoib.c
new file mode 100644 (file)
index 0000000..b341001
--- /dev/null
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/if_arp.h>
+#include <linux/if_link.h>
+
+#include "ipoib.h"
+#include "parse-util.h"
+#include "string-table.h"
+
+assert_cc((int) IP_OVER_INFINIBAND_MODE_DATAGRAM  == (int) IPOIB_MODE_DATAGRAM);
+assert_cc((int) IP_OVER_INFINIBAND_MODE_CONNECTED == (int) IPOIB_MODE_CONNECTED);
+
+static void netdev_ipoib_init(NetDev *netdev) {
+        IPoIB *ipoib;
+
+        assert(netdev);
+
+        ipoib = IPOIB(netdev);
+
+        assert(ipoib);
+
+        ipoib->mode = _IP_OVER_INFINIBAND_MODE_INVALID;
+        ipoib->umcast = -1;
+}
+
+static int netdev_ipoib_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+        IPoIB *ipoib;
+        int r;
+
+        assert(netdev);
+        assert(link);
+        assert(m);
+
+        ipoib = IPOIB(netdev);
+
+        assert(ipoib);
+
+        if (ipoib->pkey > 0) {
+                r = sd_netlink_message_append_u16(m, IFLA_IPOIB_PKEY, ipoib->pkey);
+                if (r < 0)
+                        return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_PKEY attribute: %m");
+        }
+
+        if (ipoib->mode >= 0) {
+                r = sd_netlink_message_append_u16(m, IFLA_IPOIB_MODE, ipoib->mode);
+                if (r < 0)
+                        return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_MODE attribute: %m");
+        }
+
+        if (ipoib->umcast >= 0) {
+                r = sd_netlink_message_append_u16(m, IFLA_IPOIB_UMCAST, ipoib->umcast);
+                if (r < 0)
+                        return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_UMCAST attribute: %m");
+        }
+
+        return 0;
+}
+
+static const char * const ipoib_mode_table[_IP_OVER_INFINIBAND_MODE_MAX] = {
+        [IP_OVER_INFINIBAND_MODE_DATAGRAM]  = "datagram",
+        [IP_OVER_INFINIBAND_MODE_CONNECTED] = "connected",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(ipoib_mode, IPoIBMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ipoib_mode, ipoib_mode, IPoIBMode, "Failed to parse IPoIB mode");
+
+int config_parse_ipoib_pkey(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        uint16_t u, *pkey = data;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        if (isempty(rvalue)) {
+                *pkey = 0; /* 0 means unset. */
+                return 0;
+        }
+
+        r = safe_atou16(rvalue, &u);
+        if (r < 0) {
+                log_syntax(unit, LOG_WARNING, filename, line, r,
+                           "Failed to parse IPoIB pkey '%s', ignoring assignment: %m",
+                           rvalue);
+                return 0;
+        }
+        if (u == 0 || u == 0x8000) {
+                log_syntax(unit, LOG_WARNING, filename, line, 0,
+                           "IPoIB pkey cannot be 0 nor 0x8000, ignoring assignment: %s",
+                           rvalue);
+                return 0;
+        }
+
+        *pkey = u;
+        return 0;
+}
+
+
+const NetDevVTable ipoib_vtable = {
+        .object_size = sizeof(IPoIB),
+        .sections = NETDEV_COMMON_SECTIONS "IPoIB\0",
+        .init = netdev_ipoib_init,
+        .fill_message_create = netdev_ipoib_fill_message_create,
+        .create_type = NETDEV_CREATE_STACKED,
+        .iftype = ARPHRD_INFINIBAND,
+        .generate_mac = true,
+};
diff --git a/src/network/netdev/ipoib.h b/src/network/netdev/ipoib.h
new file mode 100644 (file)
index 0000000..d2f5d93
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+
+#include "conf-parser.h"
+#include "netdev.h"
+
+typedef enum IPoIBMode {
+        IP_OVER_INFINIBAND_MODE_DATAGRAM,
+        IP_OVER_INFINIBAND_MODE_CONNECTED,
+        _IP_OVER_INFINIBAND_MODE_MAX,
+        _IP_OVER_INFINIBAND_MODE_INVALID = -EINVAL,
+} IPoIBMode;
+
+typedef struct IPoIB {
+        NetDev meta;
+
+        uint16_t pkey;
+        IPoIBMode mode;
+        int umcast;
+} IPoIB;
+
+DEFINE_NETDEV_CAST(IPOIB, IPoIB);
+extern const NetDevVTable ipoib_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ipoib_pkey);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipoib_mode);
index 37a0d9fa5d55a528c9b56f8e663eff866fe6c706..a948ec2c8a0eed435e32162e623f1a36bc8aab68 100644 (file)
@@ -11,6 +11,7 @@ _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
 #include "conf-parser.h"
 #include "fou-tunnel.h"
 #include "geneve.h"
+#include "ipoib.h"
 #include "ipvlan.h"
 #include "l2tp-tunnel.h"
 #include "macsec.h"
@@ -253,3 +254,6 @@ BatmanAdvanced.GatewayBandwidthUp,        config_parse_badadv_bandwidth,
 BatmanAdvanced.HopPenalty,                config_parse_uint8,                        0,                             offsetof(BatmanAdvanced, hop_penalty)
 BatmanAdvanced.OriginatorIntervalSec,     config_parse_sec,                          0,                             offsetof(BatmanAdvanced, originator_interval)
 BatmanAdvanced.RoutingAlgorithm,          config_parse_batadv_routing_algorithm,     0,                             offsetof(BatmanAdvanced, routing_algorithm)
+IPoIB.PartitionKey,                       config_parse_ipoib_pkey,                   0,                             offsetof(IPoIB, pkey)
+IPoIB.Mode,                               config_parse_ipoib_mode,                   0,                             offsetof(IPoIB, mode)
+IPoIB.IgnoreUserspaceMulticastGroups,     config_parse_tristate,                     0,                             offsetof(IPoIB, umcast)
index 6c6b4c306857770ba0973ff8b058d835d54fe827..f67329259406866cd829193eeeddb7ffcd36073c 100644 (file)
@@ -18,6 +18,7 @@
 #include "fou-tunnel.h"
 #include "geneve.h"
 #include "ifb.h"
+#include "ipoib.h"
 #include "ipvlan.h"
 #include "l2tp-tunnel.h"
 #include "list.h"
@@ -64,6 +65,7 @@ const NetDevVTable * const netdev_vtable[_NETDEV_KIND_MAX] = {
         [NETDEV_KIND_IP6GRETAP] = &ip6gretap_vtable,
         [NETDEV_KIND_IP6TNL]    = &ip6tnl_vtable,
         [NETDEV_KIND_IPIP]      = &ipip_vtable,
+        [NETDEV_KIND_IPOIB]     = &ipoib_vtable,
         [NETDEV_KIND_IPVLAN]    = &ipvlan_vtable,
         [NETDEV_KIND_IPVTAP]    = &ipvtap_vtable,
         [NETDEV_KIND_L2TP]      = &l2tptnl_vtable,
@@ -103,6 +105,7 @@ static const char* const netdev_kind_table[_NETDEV_KIND_MAX] = {
         [NETDEV_KIND_IP6GRETAP] = "ip6gretap",
         [NETDEV_KIND_IP6TNL]    = "ip6tnl",
         [NETDEV_KIND_IPIP]      = "ipip",
+        [NETDEV_KIND_IPOIB]     = "ipoib",
         [NETDEV_KIND_IPVLAN]    = "ipvlan",
         [NETDEV_KIND_IPVTAP]    = "ipvtap",
         [NETDEV_KIND_L2TP]      = "l2tp",
@@ -393,6 +396,7 @@ int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *message) {
 
 int netdev_generate_hw_addr(
                 NetDev *netdev,
+                Link *parent,
                 const char *name,
                 const struct hw_addr_data *hw_addr,
                 struct hw_addr_data *ret) {
@@ -419,7 +423,7 @@ int netdev_generate_hw_addr(
                 if (!NETDEV_VTABLE(netdev)->generate_mac)
                         goto finalize;
 
-                if (NETDEV_VTABLE(netdev)->iftype != ARPHRD_ETHER)
+                if (!IN_SET(NETDEV_VTABLE(netdev)->iftype, ARPHRD_ETHER, ARPHRD_INFINIBAND))
                         goto finalize;
 
                 r = net_get_unique_predictable_data_from_name(name, &HASH_KEY, &result);
@@ -430,21 +434,42 @@ int netdev_generate_hw_addr(
                 }
 
                 a.length = arphrd_to_hw_addr_len(NETDEV_VTABLE(netdev)->iftype);
-                assert(a.length <= sizeof(result));
-                memcpy(a.bytes, &result, a.length);
 
-                if (ether_addr_is_null(&a.ether) || ether_addr_is_broadcast(&a.ether)) {
-                        log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
-                                                 "Failed to generate persistent MAC address, ignoring: %m");
-                        a = HW_ADDR_NULL;
-                        goto finalize;
+                switch (NETDEV_VTABLE(netdev)->iftype) {
+                case ARPHRD_ETHER:
+                        assert(a.length <= sizeof(result));
+                        memcpy(a.bytes, &result, a.length);
+
+                        if (ether_addr_is_null(&a.ether) || ether_addr_is_broadcast(&a.ether)) {
+                                log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+                                                         "Failed to generate persistent MAC address, ignoring: %m");
+                                a = HW_ADDR_NULL;
+                                goto finalize;
+                        }
+
+                        break;
+                case ARPHRD_INFINIBAND:
+                        if (result == 0) {
+                                log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+                                                         "Failed to generate persistent MAC address: %m");
+                                goto finalize;
+                        }
+
+                        assert(a.length >= sizeof(result));
+                        memzero(a.bytes, a.length - sizeof(result));
+                        memcpy(a.bytes + a.length - sizeof(result), &result, sizeof(result));
+                        break;
+                default:
+                        assert_not_reached();
                 }
+
         } else {
                 a = *hw_addr;
                 warn_invalid = true;
         }
 
-        r = net_verify_hardware_address(name, warn_invalid, NETDEV_VTABLE(netdev)->iftype, NULL, &a);
+        r = net_verify_hardware_address(name, warn_invalid, NETDEV_VTABLE(netdev)->iftype,
+                                        parent ? &parent->hw_addr : NULL, &a);
         if (r < 0)
                 return r;
 
@@ -481,7 +506,7 @@ static int netdev_create(NetDev *netdev, Link *link, link_netlink_message_handle
         if (r < 0)
                 return log_netdev_error_errno(netdev, r, "Could not append IFLA_IFNAME, attribute: %m");
 
-        r = netdev_generate_hw_addr(netdev, netdev->ifname, &netdev->hw_addr, &hw_addr);
+        r = netdev_generate_hw_addr(netdev, link, netdev->ifname, &netdev->hw_addr, &hw_addr);
         if (r < 0)
                 return r;
 
index b226cf20a7a5e18d6d2f6d5acd24b7cb9a17d702..c7262f550a527cf6095e8bbbee16970b86332da9 100644 (file)
@@ -22,6 +22,7 @@
         "-Bridge\0"                               \
         "-FooOverUDP\0"                           \
         "-GENEVE\0"                               \
+        "-IPoIB\0"                                \
         "-IPVLAN\0"                               \
         "-IPVTAP\0"                               \
         "-L2TP\0"                                 \
@@ -60,6 +61,7 @@ typedef enum NetDevKind {
         NETDEV_KIND_IP6GRETAP,
         NETDEV_KIND_IP6TNL,
         NETDEV_KIND_IPIP,
+        NETDEV_KIND_IPOIB,
         NETDEV_KIND_IPVLAN,
         NETDEV_KIND_IPVTAP,
         NETDEV_KIND_L2TP,
@@ -201,7 +203,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(NetDev*, netdev_unref);
 bool netdev_is_managed(NetDev *netdev);
 int netdev_get(Manager *manager, const char *name, NetDev **ret);
 int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *newlink);
-int netdev_generate_hw_addr(NetDev *netdev, const char *name,
+int netdev_generate_hw_addr(NetDev *netdev, Link *link, const char *name,
                             const struct hw_addr_data *hw_addr, struct hw_addr_data *ret);
 int netdev_join(NetDev *netdev, Link *link, link_netlink_message_handler_t cb);
 
index 5dd8586a3a7eccceacd45043bb9b3b568d5b7f99..c946e81fc0a3a36f080d6026f23e05ce3157f1dd 100644 (file)
@@ -32,7 +32,7 @@ static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlin
                         return log_netdev_error_errno(netdev, r, "Failed to add netlink interface name: %m");
         }
 
-        r = netdev_generate_hw_addr(netdev, v->ifname_peer, &v->hw_addr_peer, &hw_addr);
+        r = netdev_generate_hw_addr(netdev, NULL, v->ifname_peer, &v->hw_addr_peer, &hw_addr);
         if (r < 0)
                 return r;
 
index df9721a9bcbed1ccdb80d85b8ebe92cb350f3458..4ac58a26ad01e68ac7620cf8b37be96639735af7 100644 (file)
@@ -87,6 +87,7 @@ Network.BatmanAdvanced,                      config_parse_ifname,
 Network.Bond,                                config_parse_ifname,                                      0,                             offsetof(Network, bond_name)
 Network.Bridge,                              config_parse_ifname,                                      0,                             offsetof(Network, bridge_name)
 Network.VRF,                                 config_parse_ifname,                                      0,                             offsetof(Network, vrf_name)
+Network.IPoIB,                               config_parse_stacked_netdev,                              NETDEV_KIND_IPOIB,             offsetof(Network, stacked_netdev_names)
 Network.IPVLAN,                              config_parse_stacked_netdev,                              NETDEV_KIND_IPVLAN,            offsetof(Network, stacked_netdev_names)
 Network.IPVTAP,                              config_parse_stacked_netdev,                              NETDEV_KIND_IPVTAP,            offsetof(Network, stacked_netdev_names)
 Network.L2TP,                                config_parse_stacked_netdev,                              NETDEV_KIND_L2TP,              offsetof(Network, stacked_netdev_names)
index 443222f61062054d682aeedfbc51123a32008c46..7640429f461d6c38e417a6b0cebe3d39ba86c429 100644 (file)
@@ -852,6 +852,7 @@ int config_parse_stacked_netdev(
         assert(rvalue);
         assert(data);
         assert(IN_SET(kind,
+                      NETDEV_KIND_IPOIB,
                       NETDEV_KIND_IPVLAN,
                       NETDEV_KIND_IPVTAP,
                       NETDEV_KIND_L2TP,
index e34d16af117c677846aff0f372ad8d56d03b4baf..f5fa2418feeb25b0bad0c8c6233e70077972e9b3 100644 (file)
@@ -241,3 +241,7 @@ GatewayBandwithUp=
 GatewayBandwidthDown=
 GatewayBandwidthUp=
 RoutingAlgorithm=
+[IPoIB]
+PartitionKey=
+Mode=
+IgnoreUserspaceMulticastGroups=
index 5b5a4f8c60e93e3633b9c4abedd6a1718287645a..68cf1ba6919f0b584490203a8ca6a33936cd4527 100644 (file)
@@ -242,6 +242,7 @@ IgnoreCarrierLoss=
 KeepConfiguration=
 DHCPv6PrefixDelegation=
 BatmanAdvanced=
+IPoIB=
 [IPv6Prefix]
 Prefix=
 OnLink=