uapi: update headers

author Stephen Hemminger <stephen@networkplumber.org>

Fri, 5 Jun 2020 15:36:54 +0000 (08:36 -0700)

committer Stephen Hemminger <stephen@networkplumber.org>

Fri, 5 Jun 2020 15:36:54 +0000 (08:36 -0700)
author Stephen Hemminger <stephen@networkplumber.org>
Fri, 5 Jun 2020 15:36:54 +0000 (08:36 -0700)
committer Stephen Hemminger <stephen@networkplumber.org>
Fri, 5 Jun 2020 15:36:54 +0000 (08:36 -0700)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index dc5314dd8113f3f943868b6f61f5b9f278f38771..6cad1444527aa9b11d3359f0f18ba8f128ab4414 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -116,6 +116,7 @@ enum bpf_cmd {
         BPF_LINK_GET_FD_BY_ID,
         BPF_LINK_GET_NEXT_ID,
         BPF_ENABLE_STATS,
+       BPF_ITER_CREATE,
  };
  
  enum bpf_map_type {
@@ -146,6 +147,7 @@ enum bpf_map_type {
         BPF_MAP_TYPE_SK_STORAGE,
         BPF_MAP_TYPE_DEVMAP_HASH,
         BPF_MAP_TYPE_STRUCT_OPS,
+       BPF_MAP_TYPE_RINGBUF,
  };
  
  /* Note that tracing related programs such as
@@ -218,6 +220,12 @@ enum bpf_attach_type {
         BPF_TRACE_FEXIT,
         BPF_MODIFY_RETURN,
         BPF_LSM_MAC,
+       BPF_TRACE_ITER,
+       BPF_CGROUP_INET4_GETPEERNAME,
+       BPF_CGROUP_INET6_GETPEERNAME,
+       BPF_CGROUP_INET4_GETSOCKNAME,
+       BPF_CGROUP_INET6_GETSOCKNAME,
+       BPF_XDP_DEVMAP,
         __MAX_BPF_ATTACH_TYPE
  };
  
@@ -228,6 +236,8 @@ enum bpf_link_type {
         BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
         BPF_LINK_TYPE_TRACING = 2,
         BPF_LINK_TYPE_CGROUP = 3,
+       BPF_LINK_TYPE_ITER = 4,
+       BPF_LINK_TYPE_NETNS = 5,
  
         MAX_BPF_LINK_TYPE,
  };
@@ -612,6 +622,11 @@ union bpf_attr {
                 __u32           type;
         } enable_stats;
  
+       struct { /* struct used by BPF_ITER_CREATE command */
+               __u32           link_fd;
+               __u32           flags;
+       } iter_create;
+
  } __attribute__((aligned(8)));
  
  /* The description below is an attempt at providing documentation to eBPF
@@ -667,8 +682,8 @@ union bpf_attr {
   *             For tracing programs, safely attempt to read *size* bytes from
   *             kernel space address *unsafe_ptr* and store the data in *dst*.
   *
- *             Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
- *             instead.
+ *             Generally, use **bpf_probe_read_user**\ () or
+ *             **bpf_probe_read_kernel**\ () instead.
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
@@ -676,7 +691,7 @@ union bpf_attr {
   *     Description
   *             Return the time elapsed since system boot, in nanoseconds.
   *             Does not include time the system was suspended.
- *             See: clock_gettime(CLOCK_MONOTONIC)
+ *             See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
   *     Return
   *             Current *ktime*.
   *
@@ -1535,11 +1550,11 @@ union bpf_attr {
   * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
   *     Description
   *             Copy a NUL terminated string from an unsafe kernel address
- *             *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ *             *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
   *             more details.
   *
- *             Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
- *             instead.
+ *             Generally, use **bpf_probe_read_user_str**\ () or
+ *             **bpf_probe_read_kernel_str**\ () instead.
   *     Return
   *             On success, the strictly positive length of the string,
   *             including the trailing NUL character. On error, a negative
@@ -1567,7 +1582,7 @@ union bpf_attr {
   *
   * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
   *     Description
- *             Equivalent to bpf_get_socket_cookie() helper that accepts
+ *             Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
   *             *skb*, but gets socket from **struct bpf_sock_ops** context.
   *     Return
   *             A 8-byte long non-decreasing number.
@@ -1596,6 +1611,7 @@ union bpf_attr {
   *             The option value of length *optlen* is pointed by *optval*.
   *
   *             *bpf_socket* should be one of the following:
+ *
   *             * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
   *             * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
   *               and **BPF_CGROUP_INET6_CONNECT**.
@@ -1619,6 +1635,13 @@ union bpf_attr {
   *             Grow or shrink the room for data in the packet associated to
   *             *skb* by *len_diff*, and according to the selected *mode*.
   *
+ *             By default, the helper will reset any offloaded checksum
+ *             indicator of the skb to CHECKSUM_NONE. This can be avoided
+ *             by the following flag:
+ *
+ *             * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
+ *               checksum data of the skb to CHECKSUM_NONE.
+ *
   *             There are two supported modes at this time:
   *
   *             * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
@@ -1664,12 +1687,12 @@ union bpf_attr {
   *
   *             The lower two bits of *flags* are used as the return code if
   *             the map lookup fails. This is so that the return value can be
- *             one of the XDP program return codes up to XDP_TX, as chosen by
- *             the caller. Any higher bits in the *flags* argument must be
+ *             one of the XDP program return codes up to **XDP_TX**, as chosen
+ *             by the caller. Any higher bits in the *flags* argument must be
   *             unset.
   *
- *             See also bpf_redirect(), which only supports redirecting to an
- *             ifindex, but doesn't require a map to do so.
+ *             See also **bpf_redirect**\ (), which only supports redirecting
+ *             to an ifindex, but doesn't require a map to do so.
   *     Return
   *             **XDP_REDIRECT** on success, or the value of the two lower bits
   *             of the *flags* argument on error.
@@ -1777,7 +1800,7 @@ union bpf_attr {
   *             the time running for event since last normalization. The
   *             enabled and running times are accumulated since the perf event
   *             open. To achieve scaling factor between two invocations of an
- *             eBPF program, users can can use CPU id as the key (which is
+ *             eBPF program, users can use CPU id as the key (which is
   *             typical for perf array usage model) to remember the previous
   *             value and do the calculation inside the eBPF program.
   *     Return
@@ -1804,6 +1827,7 @@ union bpf_attr {
   *             *opval* and of length *optlen*.
   *
   *             *bpf_socket* should be one of the following:
+ *
   *             * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
   *             * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
   *               and **BPF_CGROUP_INET6_CONNECT**.
@@ -1825,7 +1849,7 @@ union bpf_attr {
   *             The first argument is the context *regs* on which the kprobe
   *             works.
   *
- *             This helper works by setting setting the PC (program counter)
+ *             This helper works by setting the PC (program counter)
   *             to an override function which is run in place of the original
   *             probed function. This means the probed function is not run at
   *             all. The replacement function just returns with the required
@@ -1994,18 +2018,19 @@ union bpf_attr {
   *
   *             This helper works for IPv4 and IPv6, TCP and UDP sockets. The
   *             domain (*addr*\ **->sa_family**) must be **AF_INET** (or
- *             **AF_INET6**). Looking for a free port to bind to can be
- *             expensive, therefore binding to port is not permitted by the
- *             helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
- *             must be set to zero.
+ *             **AF_INET6**). It's advised to pass zero port (**sin_port**
+ *             or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
+ *             behavior and lets the kernel efficiently pick up an unused
+ *             port as long as 4-tuple is unique. Passing non-zero port might
+ *             lead to degraded performance.
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
   * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
   *     Description
   *             Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
- *             only possible to shrink the packet as of this writing,
- *             therefore *delta* must be a negative integer.
+ *             possible to both shrink and grow the packet tail.
+ *             Shrink done via *delta* being a negative integer.
   *
   *             A call to this helper is susceptible to change the underlying
   *             packet buffer. Therefore, at load time, all checks on pointers
@@ -2291,7 +2316,7 @@ union bpf_attr {
   *             **bpf_rc_keydown**\ () again with the same values, or calling
   *             **bpf_rc_repeat**\ ().
   *
- *             Some protocols include a toggle bit, in case the button was
+ *             Some protocols include a toggle bit, in case the button was
   *             released and pressed again between consecutive scancodes.
   *
   *             The *ctx* should point to the lirc sample as passed into
@@ -2637,7 +2662,6 @@ union bpf_attr {
   *
   *             *th* points to the start of the TCP header, while *th_len*
   *             contains **sizeof**\ (**struct tcphdr**).
- *
   *     Return
   *             0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
   *             error otherwise.
@@ -2820,7 +2844,6 @@ union bpf_attr {
   *
   *             *th* points to the start of the TCP header, while *th_len*
   *             contains the length of the TCP header.
- *
   *     Return
   *             On success, lower 32 bits hold the generated SYN cookie in
   *             followed by 16 bits which hold the MSS value for that cookie,
@@ -2903,7 +2926,7 @@ union bpf_attr {
   *                             // size, after checking its boundaries.
   *                     }
   *
- *             In comparison, using **bpf_probe_read_user()** helper here
+ *             In comparison, using **bpf_probe_read_user**\ () helper here
   *             instead to read the string would require to estimate the length
   *             at compile time, and would often result in copying more memory
   *             than necessary.
@@ -2921,14 +2944,14 @@ union bpf_attr {
   * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
   *     Description
   *             Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
- *             to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ *             to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
   *     Return
- *             On success, the strictly positive length of the string, including
+ *             On success, the strictly positive length of the string, including
   *             the trailing NUL character. On error, a negative value.
   *
   * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
   *     Description
- *             Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
+ *             Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
   *             *rcv_nxt* is the ack_seq to be sent out.
   *     Return
   *             0 on success, or a negative error in case of failure.
@@ -2956,19 +2979,19 @@ union bpf_attr {
   * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
   *     Description
   *             For an eBPF program attached to a perf event, retrieve the
- *             branch records (struct perf_branch_entry) associated to *ctx*
- *             and store it in the buffer pointed by *buf* up to size
+ *             branch records (**struct perf_branch_entry**) associated to *ctx*
+ *             and store it in the buffer pointed by *buf* up to size
   *             *size* bytes.
   *     Return
   *             On success, number of bytes written to *buf*. On error, a
   *             negative value.
   *
   *             The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
- *             instead return the number of bytes required to store all the
+ *             instead return the number of bytes required to store all the
   *             branch entries. If this flag is set, *buf* may be NULL.
   *
   *             **-EINVAL** if arguments invalid or **size** not a multiple
- *             of sizeof(struct perf_branch_entry).
+ *             of **sizeof**\ (**struct perf_branch_entry**\ ).
   *
   *             **-ENOENT** if architecture does not support branch records.
   *
@@ -2976,8 +2999,8 @@ union bpf_attr {
   *     Description
   *             Returns 0 on success, values for *pid* and *tgid* as seen from the current
   *             *namespace* will be returned in *nsdata*.
- *
- *             On failure, the returned value is one of the following:
+ *     Return
+ *             0 on success, or one of the following in case of failure:
   *
   *             **-EINVAL** if dev and inum supplied don't match dev_t and inode number
   *              with nsfs of current task, or if dev conversion to dev_t lost high bits.
@@ -3016,8 +3039,8 @@ union bpf_attr {
   *             a global identifier that can be assumed unique. If *ctx* is
   *             NULL, then the helper returns the cookie for the initial
   *             network namespace. The cookie itself is very similar to that
- *             of bpf_get_socket_cookie() helper, but for network namespaces
- *             instead of sockets.
+ *             of **bpf_get_socket_cookie**\ () helper, but for network
+ *             namespaces instead of sockets.
   *     Return
   *             A 8-byte long opaque number.
   *
@@ -3052,22 +3075,183 @@ union bpf_attr {
   *
   *             The *flags* argument must be zero.
   *     Return
- *             0 on success, or a negative errno in case of failure.
+ *             0 on success, or a negative error in case of failure:
+ *
+ *             **-EINVAL** if specified *flags* are not supported.
+ *
+ *             **-ENOENT** if the socket is unavailable for assignment.
+ *
+ *             **-ENETUNREACH** if the socket is unreachable (wrong netns).
+ *
+ *             **-EOPNOTSUPP** if the operation is not supported, for example
+ *             a call from outside of TC ingress.
   *
- *             * **-EINVAL**           Unsupported flags specified.
- *             * **-ENOENT**           Socket is unavailable for assignment.
- *             * **-ENETUNREACH**      Socket is unreachable (wrong netns).
- *             * **-EOPNOTSUPP**       Unsupported operation, for example a
- *                                     call from outside of TC ingress.
- *             * **-ESOCKTNOSUPPORT**  Socket type not supported (reuseport).
+ *             **-ESOCKTNOSUPPORT** if the socket type is not supported
+ *             (reuseport).
   *
   * u64 bpf_ktime_get_boot_ns(void)
   *     Description
   *             Return the time elapsed since system boot, in nanoseconds.
   *             Does include the time the system was suspended.
- *             See: clock_gettime(CLOCK_BOOTTIME)
+ *             See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
   *     Return
   *             Current *ktime*.
+ *
+ * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ *     Description
+ *             **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
+ *             out the format string.
+ *             The *m* represents the seq_file. The *fmt* and *fmt_size* are for
+ *             the format string itself. The *data* and *data_len* are format string
+ *             arguments. The *data* are a **u64** array and corresponding format string
+ *             values are stored in the array. For strings and pointers where pointees
+ *             are accessed, only the pointer values are stored in the *data* array.
+ *             The *data_len* is the size of *data* in bytes.
+ *
+ *             Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
+ *             Reading kernel memory may fail due to either invalid address or
+ *             valid address but requiring a major memory fault. If reading kernel memory
+ *             fails, the string for **%s** will be an empty string, and the ip
+ *             address for **%p{i,I}{4,6}** will be 0. Not returning error to
+ *             bpf program is consistent with what **bpf_trace_printk**\ () does for now.
+ *     Return
+ *             0 on success, or a negative error in case of failure:
+ *
+ *             **-EBUSY** if per-CPU memory copy buffer is busy, can try again
+ *             by returning 1 from bpf program.
+ *
+ *             **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
+ *
+ *             **-E2BIG** if *fmt* contains too many format specifiers.
+ *
+ *             **-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ *     Description
+ *             **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
+ *             The *m* represents the seq_file. The *data* and *len* represent the
+ *             data to write in bytes.
+ *     Return
+ *             0 on success, or a negative error in case of failure:
+ *
+ *             **-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ *     Description
+ *             Return the cgroup v2 id of the socket *sk*.
+ *
+ *             *sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ *             returned from **bpf_sk_lookup_xxx**\ (),
+ *             **bpf_sk_fullsock**\ (), etc. The format of returned id is
+ *             same as in **bpf_skb_cgroup_id**\ ().
+ *
+ *             This helper is available only if the kernel was compiled with
+ *             the **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ *     Return
+ *             The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ *     Description
+ *             Return id of cgroup v2 that is ancestor of cgroup associated
+ *             with the *sk* at the *ancestor_level*.  The root cgroup is at
+ *             *ancestor_level* zero and each step down the hierarchy
+ *             increments the level. If *ancestor_level* == level of cgroup
+ *             associated with *sk*, then return value will be same as that
+ *             of **bpf_sk_cgroup_id**\ ().
+ *
+ *             The helper is useful to implement policies based on cgroups
+ *             that are upper in hierarchy than immediate cgroup associated
+ *             with *sk*.
+ *
+ *             The format of returned id and helper limitations are same as in
+ *             **bpf_sk_cgroup_id**\ ().
+ *     Return
+ *             The id is returned or 0 in case the id could not be retrieved.
+ *
+ * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ *     Description
+ *             Copy *size* bytes from *data* into a ring buffer *ringbuf*.
+ *             If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ *             new data availability is sent.
+ *             IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ *             new data availability is sent unconditionally.
+ *     Return
+ *             0, on success;
+ *             < 0, on error.
+ *
+ * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
+ *     Description
+ *             Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ *     Return
+ *             Valid pointer with *size* bytes of memory available; NULL,
+ *             otherwise.
+ *
+ * void bpf_ringbuf_submit(void *data, u64 flags)
+ *     Description
+ *             Submit reserved ring buffer sample, pointed to by *data*.
+ *             If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ *             new data availability is sent.
+ *             IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ *             new data availability is sent unconditionally.
+ *     Return
+ *             Nothing. Always succeeds.
+ *
+ * void bpf_ringbuf_discard(void *data, u64 flags)
+ *     Description
+ *             Discard reserved ring buffer sample, pointed to by *data*.
+ *             If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ *             new data availability is sent.
+ *             IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ *             new data availability is sent unconditionally.
+ *     Return
+ *             Nothing. Always succeeds.
+ *
+ * u64 bpf_ringbuf_query(void *ringbuf, u64 flags)
+ *     Description
+ *             Query various characteristics of provided ring buffer. What
+ *             exactly is queries is determined by *flags*:
+ *               - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
+ *               - BPF_RB_RING_SIZE - the size of ring buffer;
+ *               - BPF_RB_CONS_POS - consumer position (can wrap around);
+ *               - BPF_RB_PROD_POS - producer(s) position (can wrap around);
+ *             Data returned is just a momentary snapshots of actual values
+ *             and could be inaccurate, so this facility should be used to
+ *             power heuristics and for reporting, not to make 100% correct
+ *             calculation.
+ *     Return
+ *             Requested value, or 0, if flags are not recognized.
+ *
+ * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ *     Description
+ *             Change the skbs checksum level by one layer up or down, or
+ *             reset it entirely to none in order to have the stack perform
+ *             checksum validation. The level is applicable to the following
+ *             protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+ *             | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+ *             through **bpf_skb_adjust_room**\ () helper with passing in
+ *             **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
+ *             to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
+ *             the UDP header is removed. Similarly, an encap of the latter
+ *             into the former could be accompanied by a helper call to
+ *             **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
+ *             skb is still intended to be processed in higher layers of the
+ *             stack instead of just egressing at tc.
+ *
+ *             There are three supported level settings at this time:
+ *
+ *             * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
+ *               with CHECKSUM_UNNECESSARY.
+ *             * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
+ *               with CHECKSUM_UNNECESSARY.
+ *             * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
+ *               sets CHECKSUM_NONE to force checksum validation by the stack.
+ *             * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
+ *               skb->csum_level.
+ *     Return
+ *             0 on success, or a negative error in case of failure. In the
+ *             case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
+ *             is returned or the error code -EACCES in case the skb is not
+ *             subject to CHECKSUM_UNNECESSARY.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -3195,7 +3379,17 @@ union bpf_attr {
         FN(get_netns_cookie),           \
         FN(get_current_ancestor_cgroup_id),     \
         FN(sk_assign),                  \
-       FN(ktime_get_boot_ns),
+       FN(ktime_get_boot_ns),          \
+       FN(seq_printf),                 \
+       FN(seq_write),                  \
+       FN(sk_cgroup_id),               \
+       FN(sk_ancestor_cgroup_id),      \
+       FN(ringbuf_output),             \
+       FN(ringbuf_reserve),            \
+       FN(ringbuf_submit),             \
+       FN(ringbuf_discard),            \
+       FN(ringbuf_query),              \
+       FN(csum_level),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@ -3272,6 +3466,14 @@ enum {
         BPF_F_CURRENT_NETNS             = (-1L),
  };
  
+/* BPF_FUNC_csum_level level values. */
+enum {
+       BPF_CSUM_LEVEL_QUERY,
+       BPF_CSUM_LEVEL_INC,
+       BPF_CSUM_LEVEL_DEC,
+       BPF_CSUM_LEVEL_RESET,
+};
+
  /* BPF_FUNC_skb_adjust_room flags. */
  enum {
         BPF_F_ADJ_ROOM_FIXED_GSO        = (1ULL << 0),
@@ -3279,6 +3481,7 @@ enum {
         BPF_F_ADJ_ROOM_ENCAP_L3_IPV6    = (1ULL << 2),
         BPF_F_ADJ_ROOM_ENCAP_L4_GRE     = (1ULL << 3),
         BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
+       BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
  };
  
  enum {
@@ -3305,6 +3508,29 @@ enum {
         BPF_F_GET_BRANCH_RECORDS_SIZE   = (1ULL << 0),
  };
  
+/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and
+ * BPF_FUNC_bpf_ringbuf_output flags.
+ */
+enum {
+       BPF_RB_NO_WAKEUP                = (1ULL << 0),
+       BPF_RB_FORCE_WAKEUP             = (1ULL << 1),
+};
+
+/* BPF_FUNC_bpf_ringbuf_query flags */
+enum {
+       BPF_RB_AVAIL_DATA = 0,
+       BPF_RB_RING_SIZE = 1,
+       BPF_RB_CONS_POS = 2,
+       BPF_RB_PROD_POS = 3,
+};
+
+/* BPF ring buffer constants */
+enum {
+       BPF_RINGBUF_BUSY_BIT            = (1U << 31),
+       BPF_RINGBUF_DISCARD_BIT         = (1U << 30),
+       BPF_RINGBUF_HDR_SZ              = 8,
+};
+
  /* Mode for BPF_FUNC_skb_adjust_room helper. */
  enum bpf_adj_room_mode {
         BPF_ADJ_ROOM_NET,
@@ -3437,6 +3663,7 @@ struct bpf_sock {
         __u32 dst_ip4;
         __u32 dst_ip6[4];
         __u32 state;
+       __s32 rx_queue_mapping;
  };
  
  struct bpf_tcp_sock {
@@ -3530,6 +3757,8 @@ struct xdp_md {
         /* Below access go through struct xdp_rxq_info */
         __u32 ingress_ifindex; /* rxq->dev->ifindex */
         __u32 rx_queue_index;  /* rxq->queue_index  */
+
+       __u32 egress_ifindex;  /* txq->dev->ifindex */
  };
  
  enum sk_action {
@@ -3552,6 +3781,8 @@ struct sk_msg_md {
         __u32 remote_port;      /* Stored in network byte order */
         __u32 local_port;       /* stored in host byte order */
         __u32 size;             /* Total size of sk_msg */
+
+       __bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
  };
  
  struct sk_reuseport_md {
@@ -3658,6 +3889,10 @@ struct bpf_link_info {
                         __u64 cgroup_id;
                         __u32 attach_type;
                 } cgroup;
+               struct  {
+                       __u32 netns_ino;
+                       __u32 attach_type;
+               } netns;
         };
  } __attribute__((aligned(8)));
  
@@ -3673,7 +3908,7 @@ struct bpf_sock_addr {
         __u32 user_ip6[4];      /* Allows 1,2,4,8-byte read and 4,8-byte write.
                                  * Stored in network byte order.
                                  */
-       __u32 user_port;        /* Allows 4-byte read and write.
+       __u32 user_port;        /* Allows 1,2,4-byte read and 4-byte write.
                                  * Stored in network byte order
                                  */
         __u32 family;           /* Allows 4-byte read, but no write */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h

index 80e33d2503fc30b1dcbb38a4c9cc46be40afb794..7b600bf1a251477519336a7a603d77d0bf576377 100644 (file)
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -233,10 +233,13 @@ enum {
   * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not
   *                            sent to the CPU.
   * @DEVLINK_TRAP_ACTION_TRAP: The sole copy of the packet is sent to the CPU.
+ * @DEVLINK_TRAP_ACTION_MIRROR: Packet is forwarded by the device and a copy is
+ *                              sent to the CPU.
   */
  enum devlink_trap_action {
         DEVLINK_TRAP_ACTION_DROP,
         DEVLINK_TRAP_ACTION_TRAP,
+       DEVLINK_TRAP_ACTION_MIRROR,
  };
  
  /**
@@ -250,10 +253,16 @@ enum devlink_trap_action {
   *                               control plane for resolution. Trapped packets
   *                               are processed by devlink and injected to
   *                               the kernel's Rx path.
+ * @DEVLINK_TRAP_TYPE_CONTROL: Packet was trapped because it is required for
+ *                             the correct functioning of the control plane.
+ *                             For example, an ARP request packet. Trapped
+ *                             packets are injected to the kernel's Rx path,
+ *                             but not reported to drop monitor.
   */
  enum devlink_trap_type {
         DEVLINK_TRAP_TYPE_DROP,
         DEVLINK_TRAP_TYPE_EXCEPTION,
+       DEVLINK_TRAP_TYPE_CONTROL,
  };
  
  enum {
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h

index a4ac9f5563dd8ba75416586df436cbd5000b7aa0..73e2d8623f34777199b4ff8a5cec5922a49d9c31 100644 (file)
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -169,15 +169,70 @@ enum {
         __IFLA_BRIDGE_MRP_MAX,
  };
  
+#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1)
+
+enum {
+       IFLA_BRIDGE_MRP_INSTANCE_UNSPEC,
+       IFLA_BRIDGE_MRP_INSTANCE_RING_ID,
+       IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX,
+       IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX,
+       IFLA_BRIDGE_MRP_INSTANCE_PRIO,
+       __IFLA_BRIDGE_MRP_INSTANCE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_INSTANCE_MAX (__IFLA_BRIDGE_MRP_INSTANCE_MAX - 1)
+
+enum {
+       IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC,
+       IFLA_BRIDGE_MRP_PORT_STATE_STATE,
+       __IFLA_BRIDGE_MRP_PORT_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_PORT_STATE_MAX (__IFLA_BRIDGE_MRP_PORT_STATE_MAX - 1)
+
+enum {
+       IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC,
+       IFLA_BRIDGE_MRP_PORT_ROLE_ROLE,
+       __IFLA_BRIDGE_MRP_PORT_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_PORT_ROLE_MAX (__IFLA_BRIDGE_MRP_PORT_ROLE_MAX - 1)
+
+enum {
+       IFLA_BRIDGE_MRP_RING_STATE_UNSPEC,
+       IFLA_BRIDGE_MRP_RING_STATE_RING_ID,
+       IFLA_BRIDGE_MRP_RING_STATE_STATE,
+       __IFLA_BRIDGE_MRP_RING_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_RING_STATE_MAX (__IFLA_BRIDGE_MRP_RING_STATE_MAX - 1)
+
+enum {
+       IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC,
+       IFLA_BRIDGE_MRP_RING_ROLE_RING_ID,
+       IFLA_BRIDGE_MRP_RING_ROLE_ROLE,
+       __IFLA_BRIDGE_MRP_RING_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_RING_ROLE_MAX (__IFLA_BRIDGE_MRP_RING_ROLE_MAX - 1)
+
+enum {
+       IFLA_BRIDGE_MRP_START_TEST_UNSPEC,
+       IFLA_BRIDGE_MRP_START_TEST_RING_ID,
+       IFLA_BRIDGE_MRP_START_TEST_INTERVAL,
+       IFLA_BRIDGE_MRP_START_TEST_MAX_MISS,
+       IFLA_BRIDGE_MRP_START_TEST_PERIOD,
+       IFLA_BRIDGE_MRP_START_TEST_MONITOR,
+       __IFLA_BRIDGE_MRP_START_TEST_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_START_TEST_MAX (__IFLA_BRIDGE_MRP_START_TEST_MAX - 1)
+
  struct br_mrp_instance {
         __u32 ring_id;
         __u32 p_ifindex;
         __u32 s_ifindex;
-};
-
-struct br_mrp_port_role {
-       __u32 ring_id;
-       __u32 role;
+       __u16 prio;
  };
  
  struct br_mrp_ring_state {
@@ -195,10 +250,9 @@ struct br_mrp_start_test {
         __u32 interval;
         __u32 max_miss;
         __u32 period;
+       __u32 monitor;
  };
  
-#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1)
-
  struct bridge_stp_xstats {
         __u64 transition_blk;
         __u64 transition_fwd;
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h

index cd144e3099a3c78768ac61abb2a7aa850a891b02..eefcda8ca44e9c780017d45b6bc7e81af3029453 100644 (file)
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -29,6 +29,7 @@ enum {
         NDA_LINK_NETNSID,
         NDA_SRC_VNI,
         NDA_PROTOCOL,  /* Originator of entry */
+       NDA_NH_ID,
         __NDA_MAX
  };
  
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h

index b56c5b8954768791dd9efd879d392ed2de3dd70c..b0a5613905eff8fcbd0a590175764295b5f29a75 100644 (file)
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -49,6 +49,9 @@ enum {
         NHA_GROUPS,     /* flag; only return nexthop groups in dump */
         NHA_MASTER,     /* u32;  only return nexthops with given master dev */
  
+       NHA_FDB,        /* flag; nexthop belongs to a bridge fdb */
+       /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
+
         __NHA_MAX,
  };
  
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h

index fc672b232437dcc4578aa90be14ae51caa69e099..7576209d96f9f6a8e8977d69a6a0a72a4040b58b 100644 (file)
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -576,6 +576,8 @@ enum {
         TCA_FLOWER_KEY_CT_LABELS,       /* u128 */
         TCA_FLOWER_KEY_CT_LABELS_MASK,  /* u128 */
  
+       TCA_FLOWER_KEY_MPLS_OPTS,
+
         __TCA_FLOWER_MAX,
  };
  
@@ -640,6 +642,27 @@ enum {
  #define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \
                 (__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1)
  
+enum {
+       TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC,
+       TCA_FLOWER_KEY_MPLS_OPTS_LSE,
+       __TCA_FLOWER_KEY_MPLS_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_MPLS_OPTS_MAX (__TCA_FLOWER_KEY_MPLS_OPTS_MAX - 1)
+
+enum {
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_UNSPEC,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_TC,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
+       __TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX,
+};
+
+#define TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX \
+               (__TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX - 1)
+
  enum {
         TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
         TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h

index 9d802cd7f695b39fdbbaffc7b6665b59b935a278..bcb1ba4d0146ef21856bc9206a9d6878e81126ee 100644 (file)
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -609,11 +609,17 @@ enum {
         TCA_HW_OFFLOAD,
         TCA_INGRESS_BLOCK,
         TCA_EGRESS_BLOCK,
+       TCA_DUMP_FLAGS,
         __TCA_MAX
  };
  
  #define TCA_MAX (__TCA_MAX - 1)
  
+#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic
+                                      * data necessary to identify the objects
+                                      * (handle, cookie, etc.) and stats.
+                                      */
+
  #define TCA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
  #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
author	Stephen Hemminger <stephen@networkplumber.org>
	Fri, 5 Jun 2020 15:36:54 +0000 (08:36 -0700)
committer	Stephen Hemminger <stephen@networkplumber.org>
	Fri, 5 Jun 2020 15:36:54 +0000 (08:36 -0700)
include/uapi/linux/bpf.h		patch \| blob \| blame \| history
include/uapi/linux/devlink.h		patch \| blob \| blame \| history
include/uapi/linux/if_bridge.h		patch \| blob \| blame \| history
include/uapi/linux/neighbour.h		patch \| blob \| blame \| history
include/uapi/linux/nexthop.h		patch \| blob \| blame \| history
include/uapi/linux/pkt_cls.h		patch \| blob \| blame \| history
include/uapi/linux/rtnetlink.h		patch \| blob \| blame \| history