]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
network: introduce TrafficControl
authorSusant Sahani <ssahani@vmware.com>
Mon, 7 Oct 2019 14:19:00 +0000 (16:19 +0200)
committerYu Watanabe <watanabe.yu+github@gmail.com>
Wed, 30 Oct 2019 00:33:51 +0000 (09:33 +0900)
Add network delay to a interface

19 files changed:
man/systemd.network.xml
src/basic/linux/pkt_sched.h [new file with mode: 0644]
src/libsystemd/sd-netlink/netlink-types.c
src/libsystemd/sd-netlink/netlink-util.h
src/libsystemd/sd-netlink/rtnl-message.c
src/network/meson.build
src/network/networkd-link.c
src/network/networkd-link.h
src/network/networkd-network-gperf.gperf
src/network/networkd-network.c
src/network/networkd-network.h
src/network/tc/netem.c [new file with mode: 0644]
src/network/tc/netem.h [new file with mode: 0644]
src/network/tc/qdisc.c [new file with mode: 0644]
src/network/tc/qdisc.h [new file with mode: 0644]
src/network/tc/tc-util.c [new file with mode: 0644]
src/network/tc/tc-util.h [new file with mode: 0644]
src/systemd/sd-netlink.h
test/fuzz/fuzz-network-parser/directives.network

index 04cbde391e06756e6f125ee81c2b10b9cdd57c6b..6f08edb369689425efe8e000db239dd2403d7ed6 100644 (file)
       </variablelist>
   </refsect1>
 
+  <refsect1>
+    <title>[TrafficControlQueueingDiscipline] Section Options</title>
+    <para>The <literal>[TrafficControlQueueingDiscipline]</literal> section manages the Traffic control. It can be used
+    to configure the kernel packet scheduler and simulate packet delay and loss for UDP or TCP applications,
+    or limit the bandwidth usage of a particular service to simulate internet connections.</para>
+
+    <variablelist class='network-directives'>
+      <varlistentry>
+        <term><varname>Parent=</varname></term>
+        <listitem>
+          <para>Specifies the parent Queueing Discipline (qdisc). Takes one of <literal>root</literal>
+          or <literal>clsact</literal>. Defaults to <literal>root</literal>.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>NetworkEmulatorDelaySec=</varname></term>
+        <listitem>
+          <para>Specifies the fixed amount of delay to be added to all packets going out of the
+          interface. Defaults to unset.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>NetworkEmulatorDelayJitterSec=</varname></term>
+        <listitem>
+          <para>Specifies the chosen delay to be added to the packets outgoing to the network
+          interface. Defaults to unset.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>NetworkEmulatorPacketLimit=</varname></term>
+        <listitem>
+          <para>Specifies the maximum number of packets the qdisc may hold queued at a time.
+          An unsigned integer ranges 0 to 4294967294. Defaults to 1000.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>NetworkEmulatorLossRate=</varname></term>
+        <listitem>
+          <para>Specifies an independent loss probability to be added to the packets outgoing from the
+          network interface. Takes a percentage value, suffixed with "%". Defaults to unset.</para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
   <refsect1>
     <title>[BridgeVLAN] Section Options</title>
       <para>The <literal>[BridgeVLAN]</literal> section manages the VLAN ID configuration of a bridge port and accepts
diff --git a/src/basic/linux/pkt_sched.h b/src/basic/linux/pkt_sched.h
new file mode 100644 (file)
index 0000000..daf6057
--- /dev/null
@@ -0,0 +1,1184 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_PKT_SCHED_H
+#define __LINUX_PKT_SCHED_H
+
+#include <linux/const.h>
+#include <linux/types.h>
+
+/* Logical priority bands not depending on specific packet scheduler.
+   Every scheduler will map them to real traffic classes, if it has
+   no more precise mechanism to classify packets.
+
+   These numbers have no special meaning, though their coincidence
+   with obsolete IPv6 values is not occasional :-). New IPv6 drafts
+   preferred full anarchy inspired by diffserv group.
+
+   Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
+   class, actually, as rule it will be handled with more care than
+   filler or even bulk.
+*/
+
+#define TC_PRIO_BESTEFFORT              0
+#define TC_PRIO_FILLER                  1
+#define TC_PRIO_BULK                    2
+#define TC_PRIO_INTERACTIVE_BULK        4
+#define TC_PRIO_INTERACTIVE             6
+#define TC_PRIO_CONTROL                 7
+
+#define TC_PRIO_MAX                     15
+
+/* Generic queue statistics, available for all the elements.
+   Particular schedulers may have also their private records.
+*/
+
+struct tc_stats {
+        __u64   bytes;                  /* Number of enqueued bytes */
+        __u32   packets;                /* Number of enqueued packets   */
+        __u32   drops;                  /* Packets dropped because of lack of resources */
+        __u32   overlimits;             /* Number of throttle events when this
+                                         * flow goes out of allocated bandwidth */
+        __u32   bps;                    /* Current flow byte rate */
+        __u32   pps;                    /* Current flow packet rate */
+        __u32   qlen;
+        __u32   backlog;
+};
+
+struct tc_estimator {
+        signed char     interval;
+        unsigned char   ewma_log;
+};
+
+/* "Handles"
+   ---------
+
+   All the traffic control objects have 32bit identifiers, or "handles".
+
+   They can be considered as opaque numbers from user API viewpoint,
+   but actually they always consist of two fields: major and
+   minor numbers, which are interpreted by kernel specially,
+   that may be used by applications, though not recommended.
+
+   F.e. qdisc handles always have minor number equal to zero,
+   classes (or flows) have major equal to parent qdisc major, and
+   minor uniquely identifying class inside qdisc.
+
+   Macros to manipulate handles:
+*/
+
+#define TC_H_MAJ_MASK (0xFFFF0000U)
+#define TC_H_MIN_MASK (0x0000FFFFU)
+#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
+#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
+#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
+
+#define TC_H_UNSPEC     (0U)
+#define TC_H_ROOT       (0xFFFFFFFFU)
+#define TC_H_INGRESS    (0xFFFFFFF1U)
+#define TC_H_CLSACT     TC_H_INGRESS
+
+#define TC_H_MIN_PRIORITY       0xFFE0U
+#define TC_H_MIN_INGRESS        0xFFF2U
+#define TC_H_MIN_EGRESS         0xFFF3U
+
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+                    TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+                    TC_LINKLAYER_ETHERNET,
+                    TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
+struct tc_ratespec {
+        unsigned char   cell_log;
+        __u8            linklayer; /* lower 4 bits */
+        unsigned short  overhead;
+        short           cell_align;
+        unsigned short  mpu;
+        __u32           rate;
+};
+
+#define TC_RTAB_SIZE    1024
+
+struct tc_sizespec {
+        unsigned char   cell_log;
+        unsigned char   size_log;
+        short           cell_align;
+        int             overhead;
+        unsigned int    linklayer;
+        unsigned int    mpu;
+        unsigned int    mtu;
+        unsigned int    tsize;
+};
+
+enum {
+      TCA_STAB_UNSPEC,
+      TCA_STAB_BASE,
+      TCA_STAB_DATA,
+      __TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
+/* FIFO section */
+
+struct tc_fifo_qopt {
+        __u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
+};
+
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+        __u32   limit;          /* Queue length in packets. */
+};
+
+/* PRIO section */
+
+#define TCQ_PRIO_BANDS  16
+#define TCQ_MIN_PRIO_BANDS 2
+
+struct tc_prio_qopt {
+        int     bands;                  /* Number of bands */
+        __u8    priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
+};
+
+/* MULTIQ section */
+
+struct tc_multiq_qopt {
+        __u16   bands;                  /* Number of bands */
+        __u16   max_bands;              /* Maximum number of queues */
+};
+
+/* PLUG section */
+
+#define TCQ_PLUG_BUFFER                0
+#define TCQ_PLUG_RELEASE_ONE           1
+#define TCQ_PLUG_RELEASE_INDEFINITE    2
+#define TCQ_PLUG_LIMIT                 3
+
+struct tc_plug_qopt {
+        /* TCQ_PLUG_BUFFER: Inset a plug into the queue and
+         *  buffer any incoming packets
+         * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head
+         *   to beginning of the next plug.
+         * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue.
+         *   Stop buffering packets until the next TCQ_PLUG_BUFFER
+         *   command is received (just act as a pass-thru queue).
+         * TCQ_PLUG_LIMIT: Increase/decrease queue size
+         */
+        int             action;
+        __u32           limit;
+};
+
+/* TBF section */
+
+struct tc_tbf_qopt {
+        struct tc_ratespec rate;
+        struct tc_ratespec peakrate;
+        __u32           limit;
+        __u32           buffer;
+        __u32           mtu;
+};
+
+enum {
+      TCA_TBF_UNSPEC,
+      TCA_TBF_PARMS,
+      TCA_TBF_RTAB,
+      TCA_TBF_PTAB,
+      TCA_TBF_RATE64,
+      TCA_TBF_PRATE64,
+      TCA_TBF_BURST,
+      TCA_TBF_PBURST,
+      TCA_TBF_PAD,
+      __TCA_TBF_MAX,
+};
+
+#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
+
+
+/* TEQL section */
+
+/* TEQL does not require any parameters */
+
+/* SFQ section */
+
+struct tc_sfq_qopt {
+        unsigned        quantum;        /* Bytes per round allocated to flow */
+        int             perturb_period; /* Period of hash perturbation */
+        __u32           limit;          /* Maximal packets in queue */
+        unsigned        divisor;        /* Hash divisor  */
+        unsigned        flows;          /* Maximal number of flows  */
+};
+
+struct tc_sfqred_stats {
+        __u32           prob_drop;      /* Early drops, below max threshold */
+        __u32           forced_drop;    /* Early drops, after max threshold */
+        __u32           prob_mark;      /* Marked packets, below max threshold */
+        __u32           forced_mark;    /* Marked packets, after max threshold */
+        __u32           prob_mark_head; /* Marked packets, below max threshold */
+        __u32           forced_mark_head;/* Marked packets, after max threshold */
+};
+
+struct tc_sfq_qopt_v1 {
+        struct tc_sfq_qopt v0;
+        unsigned int    depth;          /* max number of packets per flow */
+        unsigned int    headdrop;
+        /* SFQRED parameters */
+        __u32           limit;          /* HARD maximal flow queue length (bytes) */
+        __u32           qth_min;        /* Min average length threshold (bytes) */
+        __u32           qth_max;        /* Max average length threshold (bytes) */
+        unsigned char   Wlog;           /* log(W)               */
+        unsigned char   Plog;           /* log(P_max/(qth_max-qth_min)) */
+        unsigned char   Scell_log;      /* cell size for idle damping */
+        unsigned char   flags;
+        __u32           max_P;          /* probability, high resolution */
+        /* SFQRED stats */
+        struct tc_sfqred_stats stats;
+};
+
+
+struct tc_sfq_xstats {
+        __s32           allot;
+};
+
+/* RED section */
+
+enum {
+      TCA_RED_UNSPEC,
+      TCA_RED_PARMS,
+      TCA_RED_STAB,
+      TCA_RED_MAX_P,
+      __TCA_RED_MAX,
+};
+
+#define TCA_RED_MAX (__TCA_RED_MAX - 1)
+
+struct tc_red_qopt {
+        __u32           limit;          /* HARD maximal queue length (bytes)    */
+        __u32           qth_min;        /* Min average length threshold (bytes) */
+        __u32           qth_max;        /* Max average length threshold (bytes) */
+        unsigned char   Wlog;           /* log(W)               */
+        unsigned char   Plog;           /* log(P_max/(qth_max-qth_min)) */
+        unsigned char   Scell_log;      /* cell size for idle damping */
+        unsigned char   flags;
+#define TC_RED_ECN              1
+#define TC_RED_HARDDROP         2
+#define TC_RED_ADAPTATIVE       4
+};
+
+struct tc_red_xstats {
+        __u32           early;          /* Early drops */
+        __u32           pdrop;          /* Drops due to queue limits */
+        __u32           other;          /* Drops due to drop() calls */
+        __u32           marked;         /* Marked packets */
+};
+
+/* GRED section */
+
+#define MAX_DPs 16
+
+enum {
+      TCA_GRED_UNSPEC,
+      TCA_GRED_PARMS,
+      TCA_GRED_STAB,
+      TCA_GRED_DPS,
+      TCA_GRED_MAX_P,
+      TCA_GRED_LIMIT,
+      TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */
+      __TCA_GRED_MAX,
+};
+
+#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
+
+enum {
+      TCA_GRED_VQ_ENTRY_UNSPEC,
+      TCA_GRED_VQ_ENTRY,        /* nested TCA_GRED_VQ_* */
+      __TCA_GRED_VQ_ENTRY_MAX,
+};
+#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1)
+
+enum {
+      TCA_GRED_VQ_UNSPEC,
+      TCA_GRED_VQ_PAD,
+      TCA_GRED_VQ_DP,                   /* u32 */
+      TCA_GRED_VQ_STAT_BYTES,           /* u64 */
+      TCA_GRED_VQ_STAT_PACKETS, /* u32 */
+      TCA_GRED_VQ_STAT_BACKLOG, /* u32 */
+      TCA_GRED_VQ_STAT_PROB_DROP,       /* u32 */
+      TCA_GRED_VQ_STAT_PROB_MARK,       /* u32 */
+      TCA_GRED_VQ_STAT_FORCED_DROP,     /* u32 */
+      TCA_GRED_VQ_STAT_FORCED_MARK,     /* u32 */
+      TCA_GRED_VQ_STAT_PDROP,           /* u32 */
+      TCA_GRED_VQ_STAT_OTHER,           /* u32 */
+      TCA_GRED_VQ_FLAGS,                /* u32 */
+      __TCA_GRED_VQ_MAX
+};
+
+#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1)
+
+struct tc_gred_qopt {
+        __u32           limit;        /* HARD maximal queue length (bytes)    */
+        __u32           qth_min;      /* Min average length threshold (bytes) */
+        __u32           qth_max;      /* Max average length threshold (bytes) */
+        __u32           DP;           /* up to 2^32 DPs */
+        __u32           backlog;
+        __u32           qave;
+        __u32           forced;
+        __u32           early;
+        __u32           other;
+        __u32           pdrop;
+        __u8            Wlog;         /* log(W)               */
+        __u8            Plog;         /* log(P_max/(qth_max-qth_min)) */
+        __u8            Scell_log;    /* cell size for idle damping */
+        __u8            prio;         /* prio of this VQ */
+        __u32           packets;
+        __u32           bytesin;
+};
+
+/* gred setup */
+struct tc_gred_sopt {
+        __u32           DPs;
+        __u32           def_DP;
+        __u8            grio;
+        __u8            flags;
+        __u16           pad1;
+};
+
+/* CHOKe section */
+
+enum {
+      TCA_CHOKE_UNSPEC,
+      TCA_CHOKE_PARMS,
+      TCA_CHOKE_STAB,
+      TCA_CHOKE_MAX_P,
+      __TCA_CHOKE_MAX,
+};
+
+#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
+
+struct tc_choke_qopt {
+        __u32           limit;          /* Hard queue length (packets)  */
+        __u32           qth_min;        /* Min average threshold (packets) */
+        __u32           qth_max;        /* Max average threshold (packets) */
+        unsigned char   Wlog;           /* log(W)               */
+        unsigned char   Plog;           /* log(P_max/(qth_max-qth_min)) */
+        unsigned char   Scell_log;      /* cell size for idle damping */
+        unsigned char   flags;          /* see RED flags */
+};
+
+struct tc_choke_xstats {
+        __u32           early;          /* Early drops */
+        __u32           pdrop;          /* Drops due to queue limits */
+        __u32           other;          /* Drops due to drop() calls */
+        __u32           marked;         /* Marked packets */
+        __u32           matched;        /* Drops due to flow match */
+};
+
+/* HTB section */
+#define TC_HTB_NUMPRIO          8
+#define TC_HTB_MAXDEPTH         8
+#define TC_HTB_PROTOVER         3 /* the same as HTB and TC's major */
+
+struct tc_htb_opt {
+        struct tc_ratespec      rate;
+        struct tc_ratespec      ceil;
+        __u32   buffer;
+        __u32   cbuffer;
+        __u32   quantum;
+        __u32   level;          /* out only */
+        __u32   prio;
+};
+struct tc_htb_glob {
+        __u32 version;          /* to match HTB/TC */
+        __u32 rate2quantum;     /* bps->quantum divisor */
+        __u32 defcls;           /* default class number */
+        __u32 debug;            /* debug flags */
+
+        /* stats */
+        __u32 direct_pkts; /* count of non shaped packets */
+};
+enum {
+      TCA_HTB_UNSPEC,
+      TCA_HTB_PARMS,
+      TCA_HTB_INIT,
+      TCA_HTB_CTAB,
+      TCA_HTB_RTAB,
+      TCA_HTB_DIRECT_QLEN,
+      TCA_HTB_RATE64,
+      TCA_HTB_CEIL64,
+      TCA_HTB_PAD,
+      __TCA_HTB_MAX,
+};
+
+#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
+
+struct tc_htb_xstats {
+        __u32 lends;
+        __u32 borrows;
+        __u32 giants;   /* unused since 'Make HTB scheduler work with TSO.' */
+        __s32 tokens;
+        __s32 ctokens;
+};
+
+/* HFSC section */
+
+struct tc_hfsc_qopt {
+        __u16   defcls;         /* default class */
+};
+
+struct tc_service_curve {
+        __u32   m1;             /* slope of the first segment in bps */
+        __u32   d;              /* x-projection of the first segment in us */
+        __u32   m2;             /* slope of the second segment in bps */
+};
+
+struct tc_hfsc_stats {
+        __u64   work;           /* total work done */
+        __u64   rtwork;         /* work done by real-time criteria */
+        __u32   period;         /* current period */
+        __u32   level;          /* class level in hierarchy */
+};
+
+enum {
+      TCA_HFSC_UNSPEC,
+      TCA_HFSC_RSC,
+      TCA_HFSC_FSC,
+      TCA_HFSC_USC,
+      __TCA_HFSC_MAX,
+};
+
+#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
+
+
+/* CBQ section */
+
+#define TC_CBQ_MAXPRIO          8
+#define TC_CBQ_MAXLEVEL         8
+#define TC_CBQ_DEF_EWMA         5
+
+struct tc_cbq_lssopt {
+        unsigned char   change;
+        unsigned char   flags;
+#define TCF_CBQ_LSS_BOUNDED     1
+#define TCF_CBQ_LSS_ISOLATED    2
+        unsigned char   ewma_log;
+        unsigned char   level;
+#define TCF_CBQ_LSS_FLAGS       1
+#define TCF_CBQ_LSS_EWMA        2
+#define TCF_CBQ_LSS_MAXIDLE     4
+#define TCF_CBQ_LSS_MINIDLE     8
+#define TCF_CBQ_LSS_OFFTIME     0x10
+#define TCF_CBQ_LSS_AVPKT       0x20
+        __u32           maxidle;
+        __u32           minidle;
+        __u32           offtime;
+        __u32           avpkt;
+};
+
+struct tc_cbq_wrropt {
+        unsigned char   flags;
+        unsigned char   priority;
+        unsigned char   cpriority;
+        unsigned char   __reserved;
+        __u32           allot;
+        __u32           weight;
+};
+
+struct tc_cbq_ovl {
+        unsigned char   strategy;
+#define TC_CBQ_OVL_CLASSIC      0
+#define TC_CBQ_OVL_DELAY        1
+#define TC_CBQ_OVL_LOWPRIO      2
+#define TC_CBQ_OVL_DROP         3
+#define TC_CBQ_OVL_RCLASSIC     4
+        unsigned char   priority2;
+        __u16           pad;
+        __u32           penalty;
+};
+
+struct tc_cbq_police {
+        unsigned char   police;
+        unsigned char   __res1;
+        unsigned short  __res2;
+};
+
+struct tc_cbq_fopt {
+        __u32           split;
+        __u32           defmap;
+        __u32           defchange;
+};
+
+struct tc_cbq_xstats {
+        __u32           borrows;
+        __u32           overactions;
+        __s32           avgidle;
+        __s32           undertime;
+};
+
+enum {
+      TCA_CBQ_UNSPEC,
+      TCA_CBQ_LSSOPT,
+      TCA_CBQ_WRROPT,
+      TCA_CBQ_FOPT,
+      TCA_CBQ_OVL_STRATEGY,
+      TCA_CBQ_RATE,
+      TCA_CBQ_RTAB,
+      TCA_CBQ_POLICE,
+      __TCA_CBQ_MAX,
+};
+
+#define TCA_CBQ_MAX     (__TCA_CBQ_MAX - 1)
+
+/* dsmark section */
+
+enum {
+      TCA_DSMARK_UNSPEC,
+      TCA_DSMARK_INDICES,
+      TCA_DSMARK_DEFAULT_INDEX,
+      TCA_DSMARK_SET_TC_INDEX,
+      TCA_DSMARK_MASK,
+      TCA_DSMARK_VALUE,
+      __TCA_DSMARK_MAX,
+};
+
+#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
+
+/* ATM  section */
+
+enum {
+      TCA_ATM_UNSPEC,
+      TCA_ATM_FD,               /* file/socket descriptor */
+      TCA_ATM_PTR,              /* pointer to descriptor - later */
+      TCA_ATM_HDR,              /* LL header */
+      TCA_ATM_EXCESS,           /* excess traffic class (0 for CLP)  */
+      TCA_ATM_ADDR,             /* PVC address (for output only) */
+      TCA_ATM_STATE,            /* VC state (ATM_VS_*; for output only) */
+      __TCA_ATM_MAX,
+};
+
+#define TCA_ATM_MAX     (__TCA_ATM_MAX - 1)
+
+/* Network emulator */
+
+enum {
+      TCA_NETEM_UNSPEC,
+      TCA_NETEM_CORR,
+      TCA_NETEM_DELAY_DIST,
+      TCA_NETEM_REORDER,
+      TCA_NETEM_CORRUPT,
+      TCA_NETEM_LOSS,
+      TCA_NETEM_RATE,
+      TCA_NETEM_ECN,
+      TCA_NETEM_RATE64,
+      TCA_NETEM_PAD,
+      TCA_NETEM_LATENCY64,
+      TCA_NETEM_JITTER64,
+      TCA_NETEM_SLOT,
+      TCA_NETEM_SLOT_DIST,
+      __TCA_NETEM_MAX,
+};
+
+#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
+
+struct tc_netem_qopt {
+        __u32   latency;        /* added delay (us) */
+        __u32   limit;          /* fifo limit (packets) */
+        __u32   loss;           /* random packet loss (0=none ~0=100%) */
+        __u32   gap;            /* re-ordering gap (0 for none) */
+        __u32   duplicate;      /* random packet dup  (0=none ~0=100%) */
+        __u32   jitter;         /* random jitter in latency (us) */
+};
+
+struct tc_netem_corr {
+        __u32   delay_corr;     /* delay correlation */
+        __u32   loss_corr;      /* packet loss correlation */
+        __u32   dup_corr;       /* duplicate correlation  */
+};
+
+struct tc_netem_reorder {
+        __u32   probability;
+        __u32   correlation;
+};
+
+struct tc_netem_corrupt {
+        __u32   probability;
+        __u32   correlation;
+};
+
+struct tc_netem_rate {
+        __u32   rate;   /* byte/s */
+        __s32   packet_overhead;
+        __u32   cell_size;
+        __s32   cell_overhead;
+};
+
+struct tc_netem_slot {
+        __s64   min_delay; /* nsec */
+        __s64   max_delay;
+        __s32   max_packets;
+        __s32   max_bytes;
+        __s64   dist_delay; /* nsec */
+        __s64   dist_jitter; /* nsec */
+};
+
+enum {
+      NETEM_LOSS_UNSPEC,
+      NETEM_LOSS_GI,            /* General Intuitive - 4 state model */
+      NETEM_LOSS_GE,            /* Gilbert Elliot models */
+      __NETEM_LOSS_MAX
+};
+#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
+
+/* State transition probabilities for 4 state model */
+struct tc_netem_gimodel {
+        __u32   p13;
+        __u32   p31;
+        __u32   p32;
+        __u32   p14;
+        __u32   p23;
+};
+
+/* Gilbert-Elliot models */
+struct tc_netem_gemodel {
+        __u32 p;
+        __u32 r;
+        __u32 h;
+        __u32 k1;
+};
+
+#define NETEM_DIST_SCALE        8192
+#define NETEM_DIST_MAX          16384
+
+/* DRR */
+
+enum {
+      TCA_DRR_UNSPEC,
+      TCA_DRR_QUANTUM,
+      __TCA_DRR_MAX
+};
+
+#define TCA_DRR_MAX     (__TCA_DRR_MAX - 1)
+
+struct tc_drr_stats {
+        __u32   deficit;
+};
+
+/* MQPRIO */
+#define TC_QOPT_BITMASK 15
+#define TC_QOPT_MAX_QUEUE 16
+
+enum {
+      TC_MQPRIO_HW_OFFLOAD_NONE,        /* no offload requested */
+      TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */
+      __TC_MQPRIO_HW_OFFLOAD_MAX
+};
+
+#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1)
+
+enum {
+      TC_MQPRIO_MODE_DCB,
+      TC_MQPRIO_MODE_CHANNEL,
+      __TC_MQPRIO_MODE_MAX
+};
+
+#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1)
+
+enum {
+      TC_MQPRIO_SHAPER_DCB,
+      TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */
+      __TC_MQPRIO_SHAPER_MAX
+};
+
+#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1)
+
+struct tc_mqprio_qopt {
+        __u8    num_tc;
+        __u8    prio_tc_map[TC_QOPT_BITMASK + 1];
+        __u8    hw;
+        __u16   count[TC_QOPT_MAX_QUEUE];
+        __u16   offset[TC_QOPT_MAX_QUEUE];
+};
+
+#define TC_MQPRIO_F_MODE                0x1
+#define TC_MQPRIO_F_SHAPER              0x2
+#define TC_MQPRIO_F_MIN_RATE            0x4
+#define TC_MQPRIO_F_MAX_RATE            0x8
+
+enum {
+      TCA_MQPRIO_UNSPEC,
+      TCA_MQPRIO_MODE,
+      TCA_MQPRIO_SHAPER,
+      TCA_MQPRIO_MIN_RATE64,
+      TCA_MQPRIO_MAX_RATE64,
+      __TCA_MQPRIO_MAX,
+};
+
+#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1)
+
+/* SFB */
+
+enum {
+      TCA_SFB_UNSPEC,
+      TCA_SFB_PARMS,
+      __TCA_SFB_MAX,
+};
+
+#define TCA_SFB_MAX (__TCA_SFB_MAX - 1)
+
+/*
+ * Note: increment, decrement are Q0.16 fixed-point values.
+ */
+struct tc_sfb_qopt {
+        __u32 rehash_interval;  /* delay between hash move, in ms */
+        __u32 warmup_time;      /* double buffering warmup time in ms (warmup_time < rehash_interval) */
+        __u32 max;              /* max len of qlen_min */
+        __u32 bin_size;         /* maximum queue length per bin */
+        __u32 increment;        /* probability increment, (d1 in Blue) */
+        __u32 decrement;        /* probability decrement, (d2 in Blue) */
+        __u32 limit;            /* max SFB queue length */
+        __u32 penalty_rate;     /* inelastic flows are rate limited to 'rate' pps */
+        __u32 penalty_burst;
+};
+
+struct tc_sfb_xstats {
+        __u32 earlydrop;
+        __u32 penaltydrop;
+        __u32 bucketdrop;
+        __u32 queuedrop;
+        __u32 childdrop; /* drops in child qdisc */
+        __u32 marked;
+        __u32 maxqlen;
+        __u32 maxprob;
+        __u32 avgprob;
+};
+
+#define SFB_MAX_PROB 0xFFFF
+
+/* QFQ */
+enum {
+      TCA_QFQ_UNSPEC,
+      TCA_QFQ_WEIGHT,
+      TCA_QFQ_LMAX,
+      __TCA_QFQ_MAX
+};
+
+#define TCA_QFQ_MAX     (__TCA_QFQ_MAX - 1)
+
+struct tc_qfq_stats {
+        __u32 weight;
+        __u32 lmax;
+};
+
+/* CODEL */
+
+enum {
+      TCA_CODEL_UNSPEC,
+      TCA_CODEL_TARGET,
+      TCA_CODEL_LIMIT,
+      TCA_CODEL_INTERVAL,
+      TCA_CODEL_ECN,
+      TCA_CODEL_CE_THRESHOLD,
+      __TCA_CODEL_MAX
+};
+
+#define TCA_CODEL_MAX   (__TCA_CODEL_MAX - 1)
+
+struct tc_codel_xstats {
+        __u32   maxpacket; /* largest packet we've seen so far */
+        __u32   count;     /* how many drops we've done since the last time we
+                            * entered dropping state
+                            */
+        __u32   lastcount; /* count at entry to dropping state */
+        __u32   ldelay;    /* in-queue delay seen by most recently dequeued packet */
+        __s32   drop_next; /* time to drop next packet */
+        __u32   drop_overlimit; /* number of time max qdisc packet limit was hit */
+        __u32   ecn_mark;  /* number of packets we ECN marked instead of dropped */
+        __u32   dropping;  /* are we in dropping state ? */
+        __u32   ce_mark;   /* number of CE marked packets because of ce_threshold */
+};
+
+/* FQ_CODEL */
+
+enum {
+      TCA_FQ_CODEL_UNSPEC,
+      TCA_FQ_CODEL_TARGET,
+      TCA_FQ_CODEL_LIMIT,
+      TCA_FQ_CODEL_INTERVAL,
+      TCA_FQ_CODEL_ECN,
+      TCA_FQ_CODEL_FLOWS,
+      TCA_FQ_CODEL_QUANTUM,
+      TCA_FQ_CODEL_CE_THRESHOLD,
+      TCA_FQ_CODEL_DROP_BATCH_SIZE,
+      TCA_FQ_CODEL_MEMORY_LIMIT,
+      __TCA_FQ_CODEL_MAX
+};
+
+#define TCA_FQ_CODEL_MAX        (__TCA_FQ_CODEL_MAX - 1)
+
+enum {
+      TCA_FQ_CODEL_XSTATS_QDISC,
+      TCA_FQ_CODEL_XSTATS_CLASS,
+};
+
+struct tc_fq_codel_qd_stats {
+        __u32   maxpacket;      /* largest packet we've seen so far */
+        __u32   drop_overlimit; /* number of time max qdisc
+                                 * packet limit was hit
+                                 */
+        __u32   ecn_mark;       /* number of packets we ECN marked
+                                 * instead of being dropped
+                                 */
+        __u32   new_flow_count; /* number of time packets
+                                 * created a 'new flow'
+                                 */
+        __u32   new_flows_len;  /* count of flows in new list */
+        __u32   old_flows_len;  /* count of flows in old list */
+        __u32   ce_mark;        /* packets above ce_threshold */
+        __u32   memory_usage;   /* in bytes */
+        __u32   drop_overmemory;
+};
+
+struct tc_fq_codel_cl_stats {
+        __s32   deficit;
+        __u32   ldelay;         /* in-queue delay seen by most recently
+                                 * dequeued packet
+                                 */
+        __u32   count;
+        __u32   lastcount;
+        __u32   dropping;
+        __s32   drop_next;
+};
+
+struct tc_fq_codel_xstats {
+        __u32   type;
+        union {
+                struct tc_fq_codel_qd_stats qdisc_stats;
+                struct tc_fq_codel_cl_stats class_stats;
+        };
+};
+
+/* FQ */
+
+enum {
+      TCA_FQ_UNSPEC,
+
+      TCA_FQ_PLIMIT,            /* limit of total number of packets in queue */
+
+      TCA_FQ_FLOW_PLIMIT,       /* limit of packets per flow */
+
+      TCA_FQ_QUANTUM,           /* RR quantum */
+
+      TCA_FQ_INITIAL_QUANTUM,           /* RR quantum for new flow */
+
+      TCA_FQ_RATE_ENABLE,       /* enable/disable rate limiting */
+
+      TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */
+
+      TCA_FQ_FLOW_MAX_RATE,     /* per flow max rate */
+
+      TCA_FQ_BUCKETS_LOG,       /* log2(number of buckets) */
+
+      TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */
+
+      TCA_FQ_ORPHAN_MASK,       /* mask applied to orphaned skb hashes */
+
+      TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
+
+      TCA_FQ_CE_THRESHOLD,      /* DCTCP-like CE-marking threshold */
+
+      __TCA_FQ_MAX
+};
+
+#define TCA_FQ_MAX      (__TCA_FQ_MAX - 1)
+
+struct tc_fq_qd_stats {
+        __u64   gc_flows;
+        __u64   highprio_packets;
+        __u64   tcp_retrans;
+        __u64   throttled;
+        __u64   flows_plimit;
+        __u64   pkts_too_long;
+        __u64   allocation_errors;
+        __s64   time_next_delayed_flow;
+        __u32   flows;
+        __u32   inactive_flows;
+        __u32   throttled_flows;
+        __u32   unthrottle_latency_ns;
+        __u64   ce_mark;                /* packets above ce_threshold */
+};
+
+/* Heavy-Hitter Filter */
+
+enum {
+      TCA_HHF_UNSPEC,
+      TCA_HHF_BACKLOG_LIMIT,
+      TCA_HHF_QUANTUM,
+      TCA_HHF_HH_FLOWS_LIMIT,
+      TCA_HHF_RESET_TIMEOUT,
+      TCA_HHF_ADMIT_BYTES,
+      TCA_HHF_EVICT_TIMEOUT,
+      TCA_HHF_NON_HH_WEIGHT,
+      __TCA_HHF_MAX
+};
+
+#define TCA_HHF_MAX     (__TCA_HHF_MAX - 1)
+
+struct tc_hhf_xstats {
+        __u32   drop_overlimit; /* number of times max qdisc packet limit
+                                 * was hit
+                                 */
+        __u32   hh_overlimit;   /* number of times max heavy-hitters was hit */
+        __u32   hh_tot_count;   /* number of captured heavy-hitters so far */
+        __u32   hh_cur_count;   /* number of current heavy-hitters */
+};
+
+/* PIE */
+enum {
+      TCA_PIE_UNSPEC,
+      TCA_PIE_TARGET,
+      TCA_PIE_LIMIT,
+      TCA_PIE_TUPDATE,
+      TCA_PIE_ALPHA,
+      TCA_PIE_BETA,
+      TCA_PIE_ECN,
+      TCA_PIE_BYTEMODE,
+      __TCA_PIE_MAX
+};
+#define TCA_PIE_MAX   (__TCA_PIE_MAX - 1)
+
+struct tc_pie_xstats {
+        __u64 prob;             /* current probability */
+        __u32 delay;            /* current delay in ms */
+        __u32 avg_dq_rate;      /* current average dq_rate in bits/pie_time */
+        __u32 packets_in;       /* total number of packets enqueued */
+        __u32 dropped;          /* packets dropped due to pie_action */
+        __u32 overlimit;        /* dropped due to lack of space in queue */
+        __u32 maxq;             /* maximum queue size */
+        __u32 ecn_mark;         /* packets marked with ecn*/
+};
+
+/* CBS */
+struct tc_cbs_qopt {
+        __u8 offload;
+        __u8 _pad[3];
+        __s32 hicredit;
+        __s32 locredit;
+        __s32 idleslope;
+        __s32 sendslope;
+};
+
+enum {
+      TCA_CBS_UNSPEC,
+      TCA_CBS_PARMS,
+      __TCA_CBS_MAX,
+};
+
+#define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
+
+
+/* ETF */
+struct tc_etf_qopt {
+        __s32 delta;
+        __s32 clockid;
+        __u32 flags;
+#define TC_ETF_DEADLINE_MODE_ON _BITUL(0)
+#define TC_ETF_OFFLOAD_ON       _BITUL(1)
+#define TC_ETF_SKIP_SOCK_CHECK  _BITUL(2)
+};
+
+enum {
+      TCA_ETF_UNSPEC,
+      TCA_ETF_PARMS,
+      __TCA_ETF_MAX,
+};
+
+#define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
+
+
+/* CAKE */
+enum {
+      TCA_CAKE_UNSPEC,
+      TCA_CAKE_PAD,
+      TCA_CAKE_BASE_RATE64,
+      TCA_CAKE_DIFFSERV_MODE,
+      TCA_CAKE_ATM,
+      TCA_CAKE_FLOW_MODE,
+      TCA_CAKE_OVERHEAD,
+      TCA_CAKE_RTT,
+      TCA_CAKE_TARGET,
+      TCA_CAKE_AUTORATE,
+      TCA_CAKE_MEMORY,
+      TCA_CAKE_NAT,
+      TCA_CAKE_RAW,
+      TCA_CAKE_WASH,
+      TCA_CAKE_MPU,
+      TCA_CAKE_INGRESS,
+      TCA_CAKE_ACK_FILTER,
+      TCA_CAKE_SPLIT_GSO,
+      TCA_CAKE_FWMARK,
+      __TCA_CAKE_MAX
+};
+#define TCA_CAKE_MAX    (__TCA_CAKE_MAX - 1)
+
+enum {
+      __TCA_CAKE_STATS_INVALID,
+      TCA_CAKE_STATS_PAD,
+      TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
+      TCA_CAKE_STATS_MEMORY_LIMIT,
+      TCA_CAKE_STATS_MEMORY_USED,
+      TCA_CAKE_STATS_AVG_NETOFF,
+      TCA_CAKE_STATS_MIN_NETLEN,
+      TCA_CAKE_STATS_MAX_NETLEN,
+      TCA_CAKE_STATS_MIN_ADJLEN,
+      TCA_CAKE_STATS_MAX_ADJLEN,
+      TCA_CAKE_STATS_TIN_STATS,
+      TCA_CAKE_STATS_DEFICIT,
+      TCA_CAKE_STATS_COBALT_COUNT,
+      TCA_CAKE_STATS_DROPPING,
+      TCA_CAKE_STATS_DROP_NEXT_US,
+      TCA_CAKE_STATS_P_DROP,
+      TCA_CAKE_STATS_BLUE_TIMER_US,
+      __TCA_CAKE_STATS_MAX
+};
+#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+
+enum {
+      __TCA_CAKE_TIN_STATS_INVALID,
+      TCA_CAKE_TIN_STATS_PAD,
+      TCA_CAKE_TIN_STATS_SENT_PACKETS,
+      TCA_CAKE_TIN_STATS_SENT_BYTES64,
+      TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
+      TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
+      TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
+      TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
+      TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
+      TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
+      TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
+      TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
+      TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
+      TCA_CAKE_TIN_STATS_TARGET_US,
+      TCA_CAKE_TIN_STATS_INTERVAL_US,
+      TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
+      TCA_CAKE_TIN_STATS_WAY_MISSES,
+      TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
+      TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
+      TCA_CAKE_TIN_STATS_AVG_DELAY_US,
+      TCA_CAKE_TIN_STATS_BASE_DELAY_US,
+      TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
+      TCA_CAKE_TIN_STATS_BULK_FLOWS,
+      TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
+      TCA_CAKE_TIN_STATS_MAX_SKBLEN,
+      TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
+      __TCA_CAKE_TIN_STATS_MAX
+};
+#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
+#define TC_CAKE_MAX_TINS (8)
+
+enum {
+      CAKE_FLOW_NONE = 0,
+      CAKE_FLOW_SRC_IP,
+      CAKE_FLOW_DST_IP,
+      CAKE_FLOW_HOSTS,    /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
+      CAKE_FLOW_FLOWS,
+      CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
+      CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
+      CAKE_FLOW_TRIPLE,   /* = CAKE_FLOW_HOSTS  | CAKE_FLOW_FLOWS */
+      CAKE_FLOW_MAX,
+};
+
+enum {
+      CAKE_DIFFSERV_DIFFSERV3 = 0,
+      CAKE_DIFFSERV_DIFFSERV4,
+      CAKE_DIFFSERV_DIFFSERV8,
+      CAKE_DIFFSERV_BESTEFFORT,
+      CAKE_DIFFSERV_PRECEDENCE,
+      CAKE_DIFFSERV_MAX
+};
+
+enum {
+      CAKE_ACK_NONE = 0,
+      CAKE_ACK_FILTER,
+      CAKE_ACK_AGGRESSIVE,
+      CAKE_ACK_MAX
+};
+
+enum {
+      CAKE_ATM_NONE = 0,
+      CAKE_ATM_ATM,
+      CAKE_ATM_PTM,
+      CAKE_ATM_MAX
+};
+
+
+/* TAPRIO */
+enum {
+      TC_TAPRIO_CMD_SET_GATES = 0x00,
+      TC_TAPRIO_CMD_SET_AND_HOLD = 0x01,
+      TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02,
+};
+
+enum {
+      TCA_TAPRIO_SCHED_ENTRY_UNSPEC,
+      TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */
+      TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */
+      TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */
+      TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */
+      __TCA_TAPRIO_SCHED_ENTRY_MAX,
+};
+#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1)
+
+/* The format for schedule entry list is:
+ * [TCA_TAPRIO_SCHED_ENTRY_LIST]
+ *   [TCA_TAPRIO_SCHED_ENTRY]
+ *     [TCA_TAPRIO_SCHED_ENTRY_CMD]
+ *     [TCA_TAPRIO_SCHED_ENTRY_GATES]
+ *     [TCA_TAPRIO_SCHED_ENTRY_INTERVAL]
+ */
+enum {
+      TCA_TAPRIO_SCHED_UNSPEC,
+      TCA_TAPRIO_SCHED_ENTRY,
+      __TCA_TAPRIO_SCHED_MAX,
+};
+
+#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1)
+
+/* The format for the admin sched (dump only):
+ * [TCA_TAPRIO_SCHED_ADMIN_SCHED]
+ *   [TCA_TAPRIO_ATTR_SCHED_BASE_TIME]
+ *   [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]
+ *     [TCA_TAPRIO_ATTR_SCHED_ENTRY]
+ *       [TCA_TAPRIO_ATTR_SCHED_ENTRY_CMD]
+ *       [TCA_TAPRIO_ATTR_SCHED_ENTRY_GATES]
+ *       [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL]
+ */
+
+#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST      BIT(0)
+#define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD       BIT(1)
+
+enum {
+      TCA_TAPRIO_ATTR_UNSPEC,
+      TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
+      TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */
+      TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */
+      TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */
+      TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */
+      TCA_TAPRIO_PAD,
+      TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */
+      TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */
+      TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
+      TCA_TAPRIO_ATTR_FLAGS, /* u32 */
+      TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */
+      __TCA_TAPRIO_ATTR_MAX,
+};
+
+#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1)
+
+#endif
index ac85b8b18c1176bf6a9f0170a65dea31780eb431..cd6e5c87d5837d2ba12aafb18052bb89f1fb7753 100644 (file)
 #include <linux/if_link.h>
 #include <linux/if_macsec.h>
 #include <linux/if_tunnel.h>
-#include <linux/nexthop.h>
 #include <linux/l2tp.h>
+#include <linux/nexthop.h>
 #include <linux/nl80211.h>
+#include <linux/pkt_sched.h>
 #include <linux/veth.h>
 #include <linux/wireguard.h>
 
@@ -733,6 +734,18 @@ static const NLTypeSystem rtnl_nexthop_type_system = {
        .types = rtnl_nexthop_types,
 };
 
+static const NLType rtnl_qdisc_types[] = {
+        [TCA_KIND]           = { .type = NETLINK_TYPE_STRING },
+        [TCA_OPTIONS]        = { .size = sizeof(struct tc_netem_qopt) },
+        [TCA_INGRESS_BLOCK]  = { .type = NETLINK_TYPE_U32 },
+        [TCA_EGRESS_BLOCK]   = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_qdisc_type_system = {
+        .count = ELEMENTSOF(rtnl_qdisc_types),
+        .types = rtnl_qdisc_types,
+};
+
 static const NLType rtnl_types[] = {
         [NLMSG_DONE]       = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system, .size = 0 },
         [NLMSG_ERROR]      = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system, .size = sizeof(struct nlmsgerr) },
@@ -758,6 +771,9 @@ static const NLType rtnl_types[] = {
         [RTM_NEWNEXTHOP]   = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_nexthop_type_system, .size = sizeof(struct nhmsg) },
         [RTM_DELNEXTHOP]   = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_nexthop_type_system, .size = sizeof(struct nhmsg) },
         [RTM_GETNEXTHOP]   = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_nexthop_type_system, .size = sizeof(struct nhmsg) },
+        [RTM_NEWQDISC]     = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_qdisc_type_system, .size = sizeof(struct tcmsg) },
+        [RTM_DELQDISC]     = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_qdisc_type_system, .size = sizeof(struct tcmsg) },
+        [RTM_GETQDISC]     = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_qdisc_type_system, .size = sizeof(struct tcmsg) },
 };
 
 const NLTypeSystem rtnl_type_system_root = {
index 923accb7e304d4575128147d24f992d9b29d4e25..94410e520a55da40b8d17f0ac4dd3196a7da2d43 100644 (file)
@@ -41,6 +41,10 @@ static inline bool rtnl_message_type_is_routing_policy_rule(uint16_t type) {
         return IN_SET(type, RTM_NEWRULE, RTM_DELRULE, RTM_GETRULE);
 }
 
+static inline bool rtnl_message_type_is_qdisc(uint16_t type) {
+        return IN_SET(type, RTM_NEWQDISC, RTM_DELQDISC, RTM_GETQDISC);
+}
+
 int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name);
 int rtnl_set_link_properties(sd_netlink **rtnl, int ifindex, const char *alias, const struct ether_addr *mac, uint32_t mtu);
 
index 429b21b149744091fd8658ca13b63ada311e2ab7..194676a6e5813daa8bc8ecb41cf82f1d69f761f3 100644 (file)
@@ -1033,3 +1033,46 @@ int sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(const sd_netlink_m
 
         return 0;
 }
+
+int sd_rtnl_message_new_qdisc(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int tcm_family, int tcm_ifindex) {
+        struct tcmsg *tcm;
+        int r;
+
+        assert_return(rtnl_message_type_is_qdisc(nlmsg_type), -EINVAL);
+        assert_return(ret, -EINVAL);
+
+        r = message_new(rtnl, ret, nlmsg_type);
+        if (r < 0)
+                return r;
+
+        if (nlmsg_type == RTM_NEWQDISC)
+                (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+        tcm = NLMSG_DATA((*ret)->hdr);
+        tcm->tcm_family = tcm_family;
+        tcm->tcm_ifindex = tcm_ifindex;
+
+        return 0;
+}
+
+int sd_rtnl_message_set_qdisc_parent(sd_netlink_message *m, uint32_t parent) {
+        struct tcmsg *tcm;
+
+        assert_return(rtnl_message_type_is_qdisc(m->hdr->nlmsg_type), -EINVAL);
+
+        tcm = NLMSG_DATA(m->hdr);
+        tcm->tcm_parent = parent;
+
+        return 0;
+}
+
+int sd_rtnl_message_set_qdisc_handle(sd_netlink_message *m, uint32_t handle) {
+        struct tcmsg *tcm;
+
+        assert_return(rtnl_message_type_is_qdisc(m->hdr->nlmsg_type), -EINVAL);
+
+        tcm = NLMSG_DATA(m->hdr);
+        tcm->tcm_handle = handle;
+
+        return 0;
+}
index fd21008d10fe9ff828b6ddaa76418dc6d5f50196..06b6ec64a27ba87ce927f9fbc49cd4accf0acb1d 100644 (file)
@@ -105,6 +105,12 @@ sources = files('''
         networkd-util.h
         networkd-wifi.c
         networkd-wifi.h
+        tc/netem.c
+        tc/netem.h
+        tc/qdisc.c
+        tc/qdisc.h
+        tc/tc-util.c
+        tc/tc-util.h
 '''.split())
 
 systemd_networkd_sources = files('networkd.c')
index 906267764e1313b18b9d9c55b6cf412466e714d8..ccac6004ba5f338995aba6ba0b74bef9cd8666b2 100644 (file)
@@ -43,6 +43,7 @@
 #include "tmpfile-util.h"
 #include "udev-util.h"
 #include "util.h"
+#include "tc/qdisc.h"
 #include "virt.h"
 
 uint32_t link_get_vrf_table(Link *link) {
@@ -2581,6 +2582,8 @@ static int link_drop_config(Link *link) {
 }
 
 static int link_configure(Link *link) {
+        QDiscs *qdisc;
+        Iterator i;
         int r;
 
         assert(link);
@@ -2590,6 +2593,9 @@ static int link_configure(Link *link) {
         if (link->iftype == ARPHRD_CAN)
                 return link_configure_can(link);
 
+        ORDERED_HASHMAP_FOREACH(qdisc, link->network->qdiscs_by_section, i)
+                (void) qdisc_configure(link, qdisc);
+
         /* Drop foreign config, but ignore loopback or critical devices.
          * We do not want to remove loopback address or addresses used for root NFS. */
         if (!(link->flags & IFF_LOOPBACK) &&
index 1e4510b277813e39a1e9ffa5132c85d3bbd65bcb..e6a9c41ca55cb2dd4d713cbd27a10482114a11bf 100644 (file)
@@ -78,6 +78,7 @@ typedef struct Link {
         unsigned nexthop_messages;
         unsigned routing_policy_rule_messages;
         unsigned routing_policy_rule_remove_messages;
+        unsigned qdisc_messages;
         unsigned enslaving;
 
         Set *addresses;
index 68199ac45f55bc254747cd516bcc5205670c76ac..1375626e8655a38f81df7fb57ee4925eca4734c9 100644 (file)
@@ -13,6 +13,8 @@ _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
 #include "networkd-ndisc.h"
 #include "networkd-network.h"
 #include "vlan-util.h"
+#include "tc/qdisc.h"
+#include "tc/netem.h"
 %}
 struct ConfigPerfItem;
 %null_strings
@@ -241,6 +243,11 @@ CAN.BitRate,                            config_parse_si_size,
 CAN.SamplePoint,                        config_parse_permille,                           0,                             offsetof(Network, can_sample_point)
 CAN.RestartSec,                         config_parse_sec,                                0,                             offsetof(Network, can_restart_us)
 CAN.TripleSampling,                     config_parse_tristate,                           0,                             offsetof(Network, can_triple_sampling)
+TrafficControlQueueingDiscipline.Parent,                             config_parse_tc_qdiscs_parent,                     0,                             0
+TrafficControlQueueingDiscipline.NetworkEmulatorDelaySec,            config_parse_tc_network_emulator_delay,            0,                             0
+TrafficControlQueueingDiscipline.NetworkEmulatorDelayJitterSec,      config_parse_tc_network_emulator_delay,            0,                             0
+TrafficControlQueueingDiscipline.NetworkEmulatorLossRate,            config_parse_tc_network_emulator_loss_rate,        0,                             0
+TrafficControlQueueingDiscipline.NetworkEmulatorPacketLimit,         config_parse_tc_network_emulator_packet_limit,     0,                             0
 /* backwards compatibility: do not add new entries to this section */
 Network.IPv4LL,                         config_parse_ipv4ll,                             0,                             offsetof(Network, link_local)
 DHCP.ClientIdentifier,                  config_parse_dhcp_client_identifier,             0,                             offsetof(Network, dhcp_client_identifier)
index 8cdcb5730641a7f694d8bdabcc571539fd6edd9c..0956d2a9b797227bd203f23f81889b6517aadf6e 100644 (file)
@@ -471,6 +471,7 @@ int network_load_one(Manager *manager, OrderedHashmap **networks, const char *fi
                               "IPv6PrefixDelegation\0"
                               "IPv6Prefix\0"
                               "IPv6RoutePrefix\0"
+                              "TrafficControlQueueingDiscipline\0"
                               "CAN\0",
                               config_item_perf_lookup, network_network_gperf_lookup,
                               CONFIG_PARSE_WARN, network);
@@ -663,6 +664,7 @@ static Network *network_free(Network *network) {
         hashmap_free(network->address_labels_by_section);
         hashmap_free(network->prefixes_by_section);
         hashmap_free(network->rules_by_section);
+        ordered_hashmap_free_with_destructor(network->qdiscs_by_section, qdisc_free);
 
         if (network->manager &&
             network->manager->duids_requesting_uuid)
index 8b4b5d042d22037eb7a61ef34ea62fc103b3da3c..9820d6af2997aa98ef7b56aaf622e0459862d21b 100644 (file)
@@ -28,6 +28,7 @@
 #include "networkd-util.h"
 #include "ordered-set.h"
 #include "resolve-util.h"
+#include "tc/qdisc.h"
 
 typedef enum IPv6PrivacyExtensions {
         /* The values map to the kernel's /proc/sys/net/ipv6/conf/xxx/use_tempaddr values */
@@ -265,6 +266,7 @@ struct Network {
         Hashmap *prefixes_by_section;
         Hashmap *route_prefixes_by_section;
         Hashmap *rules_by_section;
+        OrderedHashmap *qdiscs_by_section;
 
         /* All kinds of DNS configuration */
         struct in_addr_data *dns;
diff --git a/src/network/tc/netem.c b/src/network/tc/netem.c
new file mode 100644 (file)
index 0000000..e0e3e9a
--- /dev/null
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+#include <math.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "hashmap.h"
+#include "in-addr-util.h"
+#include "netem.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "string-util.h"
+#include "tc-util.h"
+#include "util.h"
+
+int network_emulator_new(NetworkEmulator **ret) {
+        NetworkEmulator *ne = NULL;
+
+        ne = new(NetworkEmulator, 1);
+        if (!ne)
+                return -ENOMEM;
+
+        *ne = (NetworkEmulator) {
+                .delay = USEC_INFINITY,
+                .jitter = USEC_INFINITY,
+        };
+
+        *ret = TAKE_PTR(ne);
+
+        return 0;
+}
+
+int network_emulator_fill_message(Link *link, QDiscs *qdisc, sd_netlink_message *req) {
+        struct tc_netem_qopt opt = {
+               .limit = 1000,
+        };
+        int r;
+
+        assert(link);
+        assert(qdisc);
+        assert(req);
+
+        if (qdisc->ne.limit > 0)
+                opt.limit = qdisc->ne.limit;
+
+        if (qdisc->ne.loss > 0)
+                opt.loss = qdisc->ne.loss;
+
+        if (qdisc->ne.delay != USEC_INFINITY) {
+                r = tc_time_to_tick(qdisc->ne.delay, &opt.latency);
+                if (r < 0)
+                        return log_link_error_errno(link, r, "Failed to calculate latency in TCA_OPTION: %m");
+        }
+
+        if (qdisc->ne.jitter != USEC_INFINITY) {
+                r = tc_time_to_tick(qdisc->ne.jitter, &opt.jitter);
+                if (r < 0)
+                        return log_link_error_errno(link, r, "Failed to calculate jitter in TCA_OPTION: %m");
+        }
+
+        r = sd_netlink_message_append_data(req, TCA_OPTIONS, &opt, sizeof(struct tc_netem_qopt));
+        if (r < 0)
+                return log_link_error_errno(link, r, "Could not append TCA_OPTION attribute: %m");
+
+        return 0;
+}
+
+int config_parse_tc_network_emulator_delay(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        _cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
+        Network *network = data;
+        usec_t u;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        r = qdisc_new_static(network, filename, section_line, &qdisc);
+        if (r < 0)
+                return r;
+
+        if (isempty(rvalue)) {
+                if (streq(lvalue, "NetworkEmulatorDelaySec"))
+                        qdisc->ne.delay = USEC_INFINITY;
+                else if (streq(lvalue, "NetworkEmulatorDelayJitterSec"))
+                        qdisc->ne.jitter = USEC_INFINITY;
+
+                qdisc = NULL;
+                return 0;
+        }
+
+        r = parse_sec(rvalue, &u);
+        if (r < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, r,
+                           "Failed to parse '%s=', ignoring assignment: %s",
+                           lvalue, rvalue);
+                return 0;
+        }
+
+        if (streq(lvalue, "NetworkEmulatorDelaySec"))
+                qdisc->ne.delay = u;
+        else if (streq(lvalue, "NetworkEmulatorDelayJitterSec"))
+                qdisc->ne.jitter = u;
+
+        qdisc->has_network_emulator = true;
+        qdisc = NULL;
+
+        return 0;
+}
+
+int config_parse_tc_network_emulator_loss_rate(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        _cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
+        Network *network = data;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        r = qdisc_new_static(network, filename, section_line, &qdisc);
+        if (r < 0)
+                return r;
+
+        if (isempty(rvalue)) {
+                qdisc->ne.loss = 0;
+
+                qdisc = NULL;
+                return 0;
+        }
+
+        r = parse_tc_percent(rvalue, &qdisc->ne.loss);
+        if (r < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, r,
+                           "Failed to parse 'NetworkEmularorLossRate=', ignoring assignment: %s",
+                           rvalue);
+                return 0;
+        }
+
+        qdisc = NULL;
+        return 0;
+}
+
+int config_parse_tc_network_emulator_packet_limit(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        _cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
+        Network *network = data;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        r = qdisc_new_static(network, filename, section_line, &qdisc);
+        if (r < 0)
+                return r;
+
+        if (isempty(rvalue)) {
+                qdisc->ne.limit = 0;
+                qdisc = NULL;
+
+                return 0;
+        }
+
+        r = safe_atou(rvalue, &qdisc->ne.limit);
+        if (r < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, r,
+                           "Failed to parse 'NetworkEmulatorPacketLimit=', ignoring assignment: %s",
+                           rvalue);
+                return 0;
+        }
+
+        qdisc = NULL;
+        return 0;
+}
diff --git a/src/network/tc/netem.h b/src/network/tc/netem.h
new file mode 100644 (file)
index 0000000..33dfdd8
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "macro.h"
+#include "../networkd-link.h"
+#include "time-util.h"
+
+typedef struct NetworkEmulator NetworkEmulator;
+typedef struct QDiscs QDiscs;
+
+struct NetworkEmulator {
+        usec_t delay;
+        usec_t jitter;
+
+        uint32_t limit;
+        uint32_t loss;
+};
+
+int network_emulator_new(NetworkEmulator **ret);
+int network_emulator_fill_message(Link *link, QDiscs *qdisc, sd_netlink_message *req);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_tc_network_emulator_delay);
+CONFIG_PARSER_PROTOTYPE(config_parse_tc_network_emulator_loss_rate);
+CONFIG_PARSER_PROTOTYPE(config_parse_tc_network_emulator_packet_limit);
diff --git a/src/network/tc/qdisc.c b/src/network/tc/qdisc.c
new file mode 100644 (file)
index 0000000..2a4724e
--- /dev/null
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "set.h"
+#include "string-util.h"
+#include "util.h"
+
+static int qdisc_new(QDiscs **ret) {
+        QDiscs *qdisc;
+
+        qdisc = new(QDiscs, 1);
+        if (!qdisc)
+                return -ENOMEM;
+
+        *qdisc = (QDiscs) {
+                .family = AF_UNSPEC,
+                .parent = TC_H_ROOT,
+        };
+
+        *ret = TAKE_PTR(qdisc);
+
+        return 0;
+}
+
+int qdisc_new_static(Network *network, const char *filename, unsigned section_line, QDiscs **ret) {
+        _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+        _cleanup_(qdisc_freep) QDiscs *qdisc = NULL;
+        int r;
+
+        assert(network);
+        assert(ret);
+        assert(!!filename == (section_line > 0));
+
+        if (filename) {
+                r = network_config_section_new(filename, section_line, &n);
+                if (r < 0)
+                        return r;
+
+                qdisc = ordered_hashmap_get(network->qdiscs_by_section, n);
+                if (qdisc) {
+                        *ret = TAKE_PTR(qdisc);
+
+                        return 0;
+                }
+        }
+
+        r = qdisc_new(&qdisc);
+        if (r < 0)
+                return r;
+
+        qdisc->network = network;
+
+        if (filename) {
+                qdisc->section = TAKE_PTR(n);
+
+                r = ordered_hashmap_ensure_allocated(&network->qdiscs_by_section, &network_config_hash_ops);
+                if (r < 0)
+                        return r;
+
+                r = ordered_hashmap_put(network->qdiscs_by_section, qdisc->section, qdisc);
+                if (r < 0)
+                        return r;
+        }
+
+        *ret = TAKE_PTR(qdisc);
+
+        return 0;
+}
+
+void qdisc_free(QDiscs *qdisc) {
+        if (!qdisc)
+                return;
+
+        if (qdisc->network && qdisc->section)
+                ordered_hashmap_remove(qdisc->network->qdiscs_by_section, qdisc->section);
+
+        network_config_section_free(qdisc->section);
+
+        free(qdisc);
+}
+
+static int qdisc_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+        int r;
+
+        assert(link);
+        assert(link->qdisc_messages > 0);
+        link->qdisc_messages--;
+
+        if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+                return 1;
+
+        r = sd_netlink_message_get_errno(m);
+        if (r < 0 && r != -EEXIST) {
+                log_link_error_errno(link, r, "Could not set QDisc: %m");
+                return 1;
+        }
+
+        return 1;
+}
+
+int qdisc_configure(Link *link, QDiscs *qdisc) {
+        _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+        int r;
+
+        assert(link);
+        assert(link->manager);
+        assert(link->manager->rtnl);
+        assert(link->ifindex > 0);
+
+        r = sd_rtnl_message_new_qdisc(link->manager->rtnl, &req, RTM_NEWQDISC, qdisc->family, link->ifindex);
+        if (r < 0)
+                return log_link_error_errno(link, r, "Could not create RTM_NEWQDISC message: %m");
+
+        r = sd_rtnl_message_set_qdisc_parent(req, qdisc->parent);
+        if (r < 0)
+                return log_link_error_errno(link, r, "Could not create tcm_parent message: %m");
+
+        if (qdisc->parent == TC_H_CLSACT) {
+                r = sd_rtnl_message_set_qdisc_handle(req, TC_H_MAKE(TC_H_CLSACT, 0));
+                if (r < 0)
+                        return log_link_error_errno(link, r, "Could not set tcm_handle message: %m");
+
+                r = sd_netlink_message_append_string(req, TCA_KIND, "clsact");
+                if (r < 0)
+                        return log_link_error_errno(link, r, "Could not append TCA_KIND attribute: %m");
+        }
+
+        if (qdisc->has_network_emulator) {
+                r = sd_netlink_message_append_string(req, TCA_KIND, "netem");
+                if (r < 0)
+                        return log_link_error_errno(link, r, "Could not append TCA_KIND attribute: %m");
+
+                r = network_emulator_fill_message(link, qdisc, req);
+                if (r < 0)
+                        return r;
+        }
+
+        r = netlink_call_async(link->manager->rtnl, NULL, req, qdisc_handler, link_netlink_destroy_callback, link);
+        if (r < 0)
+                return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+        link_ref(link);
+        link->qdisc_messages++;
+
+        return 0;
+}
+
+int config_parse_tc_qdiscs_parent(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        _cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
+        Network *network = data;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        r = qdisc_new_static(network, filename, section_line, &qdisc);
+        if (r < 0)
+                return r;
+
+        if (streq(rvalue, "root"))
+                qdisc->parent = TC_H_ROOT;
+        else if (streq(rvalue, "clsact"))
+                qdisc->parent = TC_H_CLSACT;
+        else {
+                log_syntax(unit, LOG_ERR, filename, line, r,
+                           "Failed to parse [QueueDiscs] 'Parent=', ignoring assignment: %s",
+                           rvalue);
+                return 0;
+        }
+
+        qdisc = NULL;
+
+        return 0;
+}
diff --git a/src/network/tc/qdisc.h b/src/network/tc/qdisc.h
new file mode 100644 (file)
index 0000000..80b893b
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "macro.h"
+#include "netem.h"
+#include "../networkd-util.h"
+
+typedef struct QDiscs QDiscs;
+
+struct QDiscs {
+        NetworkConfigSection *section;
+        Network *network;
+
+        Link *link;
+
+        int family;
+
+        uint32_t handle;
+        uint32_t parent;
+
+        bool has_network_emulator:1;
+
+        NetworkEmulator ne;
+};
+
+void qdisc_free(QDiscs *qdisc);
+int qdisc_new_static(Network *network, const char *filename, unsigned section_line, QDiscs **ret);
+
+int qdisc_configure(Link *link, QDiscs *qdisc);
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(QDiscs, qdisc_free);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_tc_qdiscs_parent);
diff --git a/src/network/tc/tc-util.c b/src/network/tc/tc-util.c
new file mode 100644 (file)
index 0000000..7e1cf53
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "tc-util.h"
+#include "time-util.h"
+
+static int tc_init(double *ticks_in_usec) {
+        uint32_t clock_resolution, ticks_to_usec, usec_to_ticks;
+        _cleanup_free_ char *line = NULL;
+        double clock_factor;
+        int r;
+
+        r = read_one_line_file("/proc/net/psched", &line);
+        if (r < 0)
+                return r;
+
+        r = sscanf(line, "%08x%08x%08x", &ticks_to_usec, &usec_to_ticks, &clock_resolution);
+        if (r < 3)
+                return -EIO;
+
+        clock_factor =  (double) clock_resolution / USEC_PER_SEC;
+        *ticks_in_usec = (double) ticks_to_usec / usec_to_ticks * clock_factor;
+
+        return 0;
+}
+
+int tc_time_to_tick(usec_t t, uint32_t *ret) {
+        static double ticks_in_usec = -1;
+        usec_t a;
+        int r;
+
+        assert(ret);
+
+        if (ticks_in_usec < 0) {
+                r = tc_init(&ticks_in_usec);
+                if (r < 0)
+                        return r;
+        }
+
+        a = t * ticks_in_usec;
+        if (a > UINT32_MAX)
+                return -ERANGE;
+
+        *ret = a;
+        return 0;
+}
+
+int parse_tc_percent(const char *s, uint32_t *percent)  {
+        int r;
+
+        assert(s);
+        assert(percent);
+
+        r = parse_permille(s);
+        if (r < 0)
+                return r;
+
+        *percent = (double) r / 1000 * UINT32_MAX;
+        return 0;
+}
diff --git a/src/network/tc/tc-util.h b/src/network/tc/tc-util.h
new file mode 100644 (file)
index 0000000..ce7ab40
--- /dev/null
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "time-util.h"
+
+int tc_time_to_tick(usec_t t, uint32_t *ret);
+int parse_tc_percent(const char *s, uint32_t *percent);
index 099d76d3c7e0d96010849d783bdee5f9b90e4fc9..b34befebc8fb29594b03bf844b5bd071dd4e91f7 100644 (file)
@@ -202,6 +202,10 @@ int sd_rtnl_message_routing_policy_rule_get_rtm_type(const sd_netlink_message *m
 int sd_rtnl_message_routing_policy_rule_set_flags(sd_netlink_message *m, unsigned flags);
 int sd_rtnl_message_routing_policy_rule_get_flags(const sd_netlink_message *m, unsigned *flags);
 
+int sd_rtnl_message_new_qdisc(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int tcm_family, int tcm_ifindex);
+int sd_rtnl_message_set_qdisc_parent(sd_netlink_message *m, uint32_t parent);
+int sd_rtnl_message_set_qdisc_handle(sd_netlink_message *m, uint32_t handle);
+
 /* genl */
 int sd_genl_socket_open(sd_netlink **nl);
 int sd_genl_message_new(sd_netlink *nl, sd_genl_family family, uint8_t cmd, sd_netlink_message **m);
index 7a2556133666253a0b838f03d8820cd6130468ec..0d0892fd3afa7179c8f6eb4815a2d362b6d86374 100644 (file)
@@ -262,3 +262,9 @@ DNS=
 [NextHop]
 Id=
 Gateway=
+[TrafficControlQueueingDiscipline]
+Parent=
+NetworkEmulatorDelaySec=
+NetworkEmulatorDelayJitterSec=
+NetworkEmulatorLossRate=
+NetworkEmulatorPacketLimit=