shared/linux: update linux headers from v5.16-rc6

author Yu Watanabe <watanabe.yu+github@gmail.com>

Fri, 24 Dec 2021 02:23:11 +0000 (11:23 +0900)

committer Yu Watanabe <watanabe.yu+github@gmail.com>

Sat, 25 Dec 2021 06:34:02 +0000 (15:34 +0900)
author Yu Watanabe <watanabe.yu+github@gmail.com>
Fri, 24 Dec 2021 02:23:11 +0000 (11:23 +0900)
committer Yu Watanabe <watanabe.yu+github@gmail.com>
Sat, 25 Dec 2021 06:34:02 +0000 (15:34 +0900)
diff --git a/src/shared/linux/bpf.h b/src/shared/linux/bpf.h

index e6ceac3f7d624ec45f4828e76a77cce43cc3aa9b..ba5af15e25f5c108388429a8e86b2bae12f1f237 100644 (file)
--- a/src/shared/linux/bpf.h
+++ b/src/shared/linux/bpf.h
@@ -19,7 +19,8 @@
  
  /* ld/ldx fields */
  #define BPF_DW         0x18    /* double word (64-bit) */
-#define BPF_XADD       0xc0    /* exclusive add */
+#define BPF_ATOMIC     0xc0    /* atomic memory ops - op type in immediate */
+#define BPF_XADD       0xc0    /* exclusive add - legacy name */
  
  /* alu/jmp fields */
  #define BPF_MOV                0xb0    /* mov reg to reg */
@@ -43,6 +44,11 @@
  #define BPF_CALL       0x80    /* function call */
  #define BPF_EXIT       0x90    /* function return */
  
+/* atomic op type fields (stored in immediate) */
+#define BPF_FETCH      0x01    /* not an opcode on its own, used to build others */
+#define BPF_XCHG       (0xe0 | BPF_FETCH)      /* atomic exchange */
+#define BPF_CMPXCHG    (0xf0 | BPF_FETCH)      /* atomic compare-and-write */
+
  /* Register numbers */
  enum {
         BPF_REG_0 = 0,
@@ -78,7 +84,7 @@ struct bpf_lpm_trie_key {
  
  struct bpf_cgroup_storage_key {
         __u64   cgroup_inode_id;        /* cgroup inode id */
-       __u32   attach_type;            /* program attach type */
+       __u32   attach_type;            /* program attach type (enum bpf_attach_type) */
  };
  
  union bpf_iter_link_info {
@@ -87,7 +93,748 @@ union bpf_iter_link_info {
         } map;
  };
  
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ *     Description
+ *             Create a map and return a file descriptor that refers to the
+ *             map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ *             is automatically enabled for the new file descriptor.
+ *
+ *             Applying **close**\ (2) to the file descriptor returned by
+ *             **BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ *     Description
+ *             Look up an element with a given *key* in the map referred to
+ *             by the file descriptor *map_fd*.
+ *
+ *             The *flags* argument may be specified as one of the
+ *             following:
+ *
+ *             **BPF_F_LOCK**
+ *                     Look up the value of a spin-locked map without
+ *                     returning the lock. This must be specified if the
+ *                     elements contain a spinlock.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ *     Description
+ *             Create or update an element (key/value pair) in a specified map.
+ *
+ *             The *flags* argument should be specified as one of the
+ *             following:
+ *
+ *             **BPF_ANY**
+ *                     Create a new element or update an existing element.
+ *             **BPF_NOEXIST**
+ *                     Create a new element only if it did not exist.
+ *             **BPF_EXIST**
+ *                     Update an existing element.
+ *             **BPF_F_LOCK**
+ *                     Update a spin_lock-ed map element.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ *             May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ *             **E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ *             **E2BIG**
+ *                     The number of elements in the map reached the
+ *                     *max_entries* limit specified at map creation time.
+ *             **EEXIST**
+ *                     If *flags* specifies **BPF_NOEXIST** and the element
+ *                     with *key* already exists in the map.
+ *             **ENOENT**
+ *                     If *flags* specifies **BPF_EXIST** and the element with
+ *                     *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ *     Description
+ *             Look up and delete an element by key in a specified map.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ *     Description
+ *             Look up an element by key in a specified map and return the key
+ *             of the next element. Can be used to iterate over all elements
+ *             in the map.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ *             The following cases can be used to iterate over all elements of
+ *             the map:
+ *
+ *             * If *key* is not found, the operation returns zero and sets
+ *               the *next_key* pointer to the key of the first element.
+ *             * If *key* is found, the operation returns zero and sets the
+ *               *next_key* pointer to the key of the next element.
+ *             * If *key* is the last element, returns -1 and *errno* is set
+ *               to **ENOENT**.
+ *
+ *             May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ *             **EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ *     Description
+ *             Verify and load an eBPF program, returning a new file
+ *             descriptor associated with the program.
+ *
+ *             Applying **close**\ (2) to the file descriptor returned by
+ *             **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ *             The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ *             automatically enabled for the new file descriptor.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_PIN
+ *     Description
+ *             Pin an eBPF program or map referred by the specified *bpf_fd*
+ *             to the provided *pathname* on the filesystem.
+ *
+ *             The *pathname* argument must not contain a dot (".").
+ *
+ *             On success, *pathname* retains a reference to the eBPF object,
+ *             preventing deallocation of the object when the original
+ *             *bpf_fd* is closed. This allow the eBPF object to live beyond
+ *             **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ *             process.
+ *
+ *             Applying **unlink**\ (2) or similar calls to the *pathname*
+ *             unpins the object from the filesystem, removing the reference.
+ *             If no other file descriptors or filesystem nodes refer to the
+ *             same object, it will be deallocated (see NOTES).
+ *
+ *             The filesystem type for the parent directory of *pathname* must
+ *             be **BPF_FS_MAGIC**.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_OBJ_GET
+ *     Description
+ *             Open a file descriptor for the eBPF object pinned to the
+ *             specified *pathname*.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ *     Description
+ *             Attach an eBPF program to a *target_fd* at the specified
+ *             *attach_type* hook.
+ *
+ *             The *attach_type* specifies the eBPF attachment point to
+ *             attach the program to, and must be one of *bpf_attach_type*
+ *             (see below).
+ *
+ *             The *attach_bpf_fd* must be a valid file descriptor for a
+ *             loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ *             or sock_ops type corresponding to the specified *attach_type*.
+ *
+ *             The *target_fd* must be a valid file descriptor for a kernel
+ *             object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *             **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *             **BPF_PROG_TYPE_CGROUP_SKB**,
+ *             **BPF_PROG_TYPE_CGROUP_SOCK**,
+ *             **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *             **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *             **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *             **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *                     Control Group v2 hierarchy with the eBPF controller
+ *                     enabled. Requires the kernel to be compiled with
+ *                     **CONFIG_CGROUP_BPF**.
+ *
+ *             **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *                     Network namespace (eg /proc/self/ns/net).
+ *
+ *             **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *                     LIRC device path (eg /dev/lircN). Requires the kernel
+ *                     to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *             **BPF_PROG_TYPE_SK_SKB**,
+ *             **BPF_PROG_TYPE_SK_MSG**
+ *
+ *                     eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ *     Description
+ *             Detach the eBPF program associated with the *target_fd* at the
+ *             hook specified by *attach_type*. The program must have been
+ *             previously attached using **BPF_PROG_ATTACH**.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ *     Description
+ *             Run the eBPF program associated with the *prog_fd* a *repeat*
+ *             number of times against a provided program context *ctx_in* and
+ *             data *data_in*, and return the modified program context
+ *             *ctx_out*, *data_out* (for example, packet data), result of the
+ *             execution *retval*, and *duration* of the test run.
+ *
+ *             The sizes of the buffers provided as input and output
+ *             parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must
+ *             be provided in the corresponding variables *ctx_size_in*,
+ *             *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any
+ *             of these parameters are not provided (ie set to NULL), the
+ *             corresponding size field must be zero.
+ *
+ *             Some program types have particular requirements:
+ *
+ *             **BPF_PROG_TYPE_SK_LOOKUP**
+ *                     *data_in* and *data_out* must be NULL.
+ *
+ *             **BPF_PROG_TYPE_RAW_TRACEPOINT**,
+ *             **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
+ *
+ *                     *ctx_out*, *data_in* and *data_out* must be NULL.
+ *                     *repeat* must be zero.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ *             **ENOSPC**
+ *                     Either *data_size_out* or *ctx_size_out* is too small.
+ *             **ENOTSUPP**
+ *                     This command is not supported by the program type of
+ *                     the program referred to by *prog_fd*.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ *     Description
+ *             Fetch the next eBPF program currently loaded into the kernel.
+ *
+ *             Looks for the eBPF program with an id greater than *start_id*
+ *             and updates *next_id* on success. If no other eBPF programs
+ *             remain with ids higher than *start_id*, returns -1 and sets
+ *             *errno* to **ENOENT**.
+ *
+ *     Return
+ *             Returns zero on success. On error, or when no id remains, -1
+ *             is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ *     Description
+ *             Fetch the next eBPF map currently loaded into the kernel.
+ *
+ *             Looks for the eBPF map with an id greater than *start_id*
+ *             and updates *next_id* on success. If no other eBPF maps
+ *             remain with ids higher than *start_id*, returns -1 and sets
+ *             *errno* to **ENOENT**.
+ *
+ *     Return
+ *             Returns zero on success. On error, or when no id remains, -1
+ *             is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ *     Description
+ *             Open a file descriptor for the eBPF program corresponding to
+ *             *prog_id*.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ *     Description
+ *             Open a file descriptor for the eBPF map corresponding to
+ *             *map_id*.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ *     Description
+ *             Obtain information about the eBPF object corresponding to
+ *             *bpf_fd*.
+ *
+ *             Populates up to *info_len* bytes of *info*, which will be in
+ *             one of the following formats depending on the eBPF object type
+ *             of *bpf_fd*:
+ *
+ *             * **struct bpf_prog_info**
+ *             * **struct bpf_map_info**
+ *             * **struct bpf_btf_info**
+ *             * **struct bpf_link_info**
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ *     Description
+ *             Obtain information about eBPF programs associated with the
+ *             specified *attach_type* hook.
+ *
+ *             The *target_fd* must be a valid file descriptor for a kernel
+ *             object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *             **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *             **BPF_PROG_TYPE_CGROUP_SKB**,
+ *             **BPF_PROG_TYPE_CGROUP_SOCK**,
+ *             **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *             **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *             **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *             **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *                     Control Group v2 hierarchy with the eBPF controller
+ *                     enabled. Requires the kernel to be compiled with
+ *                     **CONFIG_CGROUP_BPF**.
+ *
+ *             **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *                     Network namespace (eg /proc/self/ns/net).
+ *
+ *             **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *                     LIRC device path (eg /dev/lircN). Requires the kernel
+ *                     to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *             **BPF_PROG_QUERY** always fetches the number of programs
+ *             attached and the *attach_flags* which were used to attach those
+ *             programs. Additionally, if *prog_ids* is nonzero and the number
+ *             of attached programs is less than *prog_cnt*, populates
+ *             *prog_ids* with the eBPF program ids of the programs attached
+ *             at *target_fd*.
+ *
+ *             The following flags may alter the result:
+ *
+ *             **BPF_F_QUERY_EFFECTIVE**
+ *                     Only return information regarding programs which are
+ *                     currently effective at the specified *target_fd*.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ *     Description
+ *             Attach an eBPF program to a tracepoint *name* to access kernel
+ *             internal arguments of the tracepoint in their raw form.
+ *
+ *             The *prog_fd* must be a valid file descriptor associated with
+ *             a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ *             No ABI guarantees are made about the content of tracepoint
+ *             arguments exposed to the corresponding eBPF program.
+ *
+ *             Applying **close**\ (2) to the file descriptor returned by
+ *             **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ *     Description
+ *             Verify and load BPF Type Format (BTF) metadata into the kernel,
+ *             returning a new file descriptor associated with the metadata.
+ *             BTF is described in more detail at
+ *             https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ *             The *btf* parameter must point to valid memory providing
+ *             *btf_size* bytes of BTF binary metadata.
+ *
+ *             The returned file descriptor can be passed to other **bpf**\ ()
+ *             subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ *             associate the BTF with those objects.
+ *
+ *             Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ *             parameters to specify a *btf_log_buf*, *btf_log_size* and
+ *             *btf_log_level* which allow the kernel to return freeform log
+ *             output regarding the BTF verification process.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ *     Description
+ *             Open a file descriptor for the BPF Type Format (BTF)
+ *             corresponding to *btf_id*.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ *     Description
+ *             Obtain information about eBPF programs associated with the
+ *             target process identified by *pid* and *fd*.
+ *
+ *             If the *pid* and *fd* are associated with a tracepoint, kprobe
+ *             or uprobe perf event, then the *prog_id* and *fd_type* will
+ *             be populated with the eBPF program id and file descriptor type
+ *             of type **bpf_task_fd_type**. If associated with a kprobe or
+ *             uprobe, the  *probe_offset* and *probe_addr* will also be
+ *             populated. Optionally, if *buf* is provided, then up to
+ *             *buf_len* bytes of *buf* will be populated with the name of
+ *             the tracepoint, kprobe or uprobe.
+ *
+ *             The resulting *prog_id* may be introspected in deeper detail
+ *             using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ *     Description
+ *             Look up an element with the given *key* in the map referred to
+ *             by the file descriptor *fd*, and if found, delete the element.
+ *
+ *             For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ *             types, the *flags* argument needs to be set to 0, but for other
+ *             map types, it may be specified as:
+ *
+ *             **BPF_F_LOCK**
+ *                     Look up and delete the value of a spin-locked map
+ *                     without returning the lock. This must be specified if
+ *                     the elements contain a spinlock.
+ *
+ *             The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ *             implement this command as a "pop" operation, deleting the top
+ *             element rather than one corresponding to *key*.
+ *             The *key* and *key_len* parameters should be zeroed when
+ *             issuing this operation for these map types.
+ *
+ *             This command is only valid for the following map types:
+ *             * **BPF_MAP_TYPE_QUEUE**
+ *             * **BPF_MAP_TYPE_STACK**
+ *             * **BPF_MAP_TYPE_HASH**
+ *             * **BPF_MAP_TYPE_PERCPU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ *     Description
+ *             Freeze the permissions of the specified map.
+ *
+ *             Write permissions may be frozen by passing zero *flags*.
+ *             Upon success, no future syscall invocations may alter the
+ *             map state of *map_fd*. Write operations from eBPF programs
+ *             are still possible for a frozen map.
+ *
+ *             Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ *     Description
+ *             Fetch the next BPF Type Format (BTF) object currently loaded
+ *             into the kernel.
+ *
+ *             Looks for the BTF object with an id greater than *start_id*
+ *             and updates *next_id* on success. If no other BTF objects
+ *             remain with ids higher than *start_id*, returns -1 and sets
+ *             *errno* to **ENOENT**.
+ *
+ *     Return
+ *             Returns zero on success. On error, or when no id remains, -1
+ *             is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ *     Description
+ *             Iterate and fetch multiple elements in a map.
+ *
+ *             Two opaque values are used to manage batch operations,
+ *             *in_batch* and *out_batch*. Initially, *in_batch* must be set
+ *             to NULL to begin the batched operation. After each subsequent
+ *             **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ *             *out_batch* as the *in_batch* for the next operation to
+ *             continue iteration from the current point.
+ *
+ *             The *keys* and *values* are output parameters which must point
+ *             to memory large enough to hold *count* items based on the key
+ *             and value size of the map *map_fd*. The *keys* buffer must be
+ *             of *key_size* * *count*. The *values* buffer must be of
+ *             *value_size* * *count*.
+ *
+ *             The *elem_flags* argument may be specified as one of the
+ *             following:
+ *
+ *             **BPF_F_LOCK**
+ *                     Look up the value of a spin-locked map without
+ *                     returning the lock. This must be specified if the
+ *                     elements contain a spinlock.
+ *
+ *             On success, *count* elements from the map are copied into the
+ *             user buffer, with the keys copied into *keys* and the values
+ *             copied into the corresponding indices in *values*.
+ *
+ *             If an error is returned and *errno* is not **EFAULT**, *count*
+ *             is set to the number of successfully processed elements.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ *             May set *errno* to **ENOSPC** to indicate that *keys* or
+ *             *values* is too small to dump an entire bucket during
+ *             iteration of a hash-based map type.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ *     Description
+ *             Iterate and delete all elements in a map.
+ *
+ *             This operation has the same behavior as
+ *             **BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ *             * Every element that is successfully returned is also deleted
+ *               from the map. This is at least *count* elements. Note that
+ *               *count* is both an input and an output parameter.
+ *             * Upon returning with *errno* set to **EFAULT**, up to
+ *               *count* elements may be deleted without returning the keys
+ *               and values of the deleted elements.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ *     Description
+ *             Update multiple elements in a map by *key*.
+ *
+ *             The *keys* and *values* are input parameters which must point
+ *             to memory large enough to hold *count* items based on the key
+ *             and value size of the map *map_fd*. The *keys* buffer must be
+ *             of *key_size* * *count*. The *values* buffer must be of
+ *             *value_size* * *count*.
+ *
+ *             Each element specified in *keys* is sequentially updated to the
+ *             value in the corresponding index in *values*. The *in_batch*
+ *             and *out_batch* parameters are ignored and should be zeroed.
+ *
+ *             The *elem_flags* argument should be specified as one of the
+ *             following:
+ *
+ *             **BPF_ANY**
+ *                     Create new elements or update a existing elements.
+ *             **BPF_NOEXIST**
+ *                     Create new elements only if they do not exist.
+ *             **BPF_EXIST**
+ *                     Update existing elements.
+ *             **BPF_F_LOCK**
+ *                     Update spin_lock-ed map elements. This must be
+ *                     specified if the map value contains a spinlock.
+ *
+ *             On success, *count* elements from the map are updated.
+ *
+ *             If an error is returned and *errno* is not **EFAULT**, *count*
+ *             is set to the number of successfully processed elements.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ *             May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ *             **E2BIG**. **E2BIG** indicates that the number of elements in
+ *             the map reached the *max_entries* limit specified at map
+ *             creation time.
+ *
+ *             May set *errno* to one of the following error codes under
+ *             specific circumstances:
+ *
+ *             **EEXIST**
+ *                     If *flags* specifies **BPF_NOEXIST** and the element
+ *                     with *key* already exists in the map.
+ *             **ENOENT**
+ *                     If *flags* specifies **BPF_EXIST** and the element with
+ *                     *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_BATCH
+ *     Description
+ *             Delete multiple elements in a map by *key*.
+ *
+ *             The *keys* parameter is an input parameter which must point
+ *             to memory large enough to hold *count* items based on the key
+ *             size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ *             Each element specified in *keys* is sequentially deleted. The
+ *             *in_batch*, *out_batch*, and *values* parameters are ignored
+ *             and should be zeroed.
+ *
+ *             The *elem_flags* argument may be specified as one of the
+ *             following:
+ *
+ *             **BPF_F_LOCK**
+ *                     Look up the value of a spin-locked map without
+ *                     returning the lock. This must be specified if the
+ *                     elements contain a spinlock.
+ *
+ *             On success, *count* elements from the map are updated.
+ *
+ *             If an error is returned and *errno* is not **EFAULT**, *count*
+ *             is set to the number of successfully processed elements. If
+ *             *errno* is **EFAULT**, up to *count* elements may be been
+ *             deleted.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ *     Description
+ *             Attach an eBPF program to a *target_fd* at the specified
+ *             *attach_type* hook and return a file descriptor handle for
+ *             managing the link.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ *     Description
+ *             Update the eBPF program in the specified *link_fd* to
+ *             *new_prog_fd*.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ *     Description
+ *             Open a file descriptor for the eBPF Link corresponding to
+ *             *link_id*.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ *     Description
+ *             Fetch the next eBPF link currently loaded into the kernel.
+ *
+ *             Looks for the eBPF link with an id greater than *start_id*
+ *             and updates *next_id* on success. If no other eBPF links
+ *             remain with ids higher than *start_id*, returns -1 and sets
+ *             *errno* to **ENOENT**.
+ *
+ *     Return
+ *             Returns zero on success. On error, or when no id remains, -1
+ *             is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ *     Description
+ *             Enable eBPF runtime statistics gathering.
+ *
+ *             Runtime statistics gathering for the eBPF runtime is disabled
+ *             by default to minimize the corresponding performance overhead.
+ *             This command enables statistics globally.
+ *
+ *             Multiple programs may independently enable statistics.
+ *             After gathering the desired statistics, eBPF runtime statistics
+ *             may be disabled again by calling **close**\ (2) for the file
+ *             descriptor returned by this function. Statistics will only be
+ *             disabled system-wide when all outstanding file descriptors
+ *             returned by prior calls for this subcommand are closed.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ *     Description
+ *             Create an iterator on top of the specified *link_fd* (as
+ *             previously created using **BPF_LINK_CREATE**) and return a
+ *             file descriptor that can be used to trigger the iteration.
+ *
+ *             If the resulting file descriptor is pinned to the filesystem
+ *             using  **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ *             for that path will trigger the iterator to read kernel state
+ *             using the eBPF program attached to *link_fd*.
+ *
+ *     Return
+ *             A new file descriptor (a nonnegative integer), or -1 if an
+ *             error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ *     Description
+ *             Forcefully detach the specified *link_fd* from its
+ *             corresponding attachment point.
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ *     Description
+ *             Bind a map to the lifetime of an eBPF program.
+ *
+ *             The map identified by *map_fd* is bound to the program
+ *             identified by *prog_fd* and only released when *prog_fd* is
+ *             released. This may be used in cases where metadata should be
+ *             associated with a program which otherwise does not contain any
+ *             references to the map (for example, embedded in the eBPF
+ *             program instructions).
+ *
+ *     Return
+ *             Returns zero on success. On error, -1 is returned and *errno*
+ *             is set appropriately.
+ *
+ * NOTES
+ *     eBPF objects (maps and programs) can be shared between processes.
+ *
+ *     * After **fork**\ (2), the child inherits file descriptors
+ *       referring to the same eBPF objects.
+ *     * File descriptors referring to eBPF objects can be transferred over
+ *       **unix**\ (7) domain sockets.
+ *     * File descriptors referring to eBPF objects can be duplicated in the
+ *       usual way, using **dup**\ (2) and similar calls.
+ *     * File descriptors referring to eBPF objects can be pinned to the
+ *       filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ *     An eBPF object is deallocated only after all file descriptors referring
+ *     to the object have been closed and no references remain pinned to the
+ *     filesystem or attached (for example, bound to a program or device).
+ */
  enum bpf_cmd {
         BPF_MAP_CREATE,
         BPF_MAP_LOOKUP_ELEM,
@@ -100,6 +847,7 @@ enum bpf_cmd {
         BPF_PROG_ATTACH,
         BPF_PROG_DETACH,
         BPF_PROG_TEST_RUN,
+       BPF_PROG_RUN = BPF_PROG_TEST_RUN,
         BPF_PROG_GET_NEXT_ID,
         BPF_MAP_GET_NEXT_ID,
         BPF_PROG_GET_FD_BY_ID,
@@ -157,6 +905,8 @@ enum bpf_map_type {
         BPF_MAP_TYPE_STRUCT_OPS,
         BPF_MAP_TYPE_RINGBUF,
         BPF_MAP_TYPE_INODE_STORAGE,
+       BPF_MAP_TYPE_TASK_STORAGE,
+       BPF_MAP_TYPE_BLOOM_FILTER,
  };
  
  /* Note that tracing related programs such as
@@ -199,6 +949,7 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_EXT,
         BPF_PROG_TYPE_LSM,
         BPF_PROG_TYPE_SK_LOOKUP,
+       BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
  };
  
  enum bpf_attach_type {
@@ -240,6 +991,10 @@ enum bpf_attach_type {
         BPF_XDP_CPUMAP,
         BPF_SK_LOOKUP,
         BPF_XDP,
+       BPF_SK_SKB_VERDICT,
+       BPF_SK_REUSEPORT_SELECT,
+       BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+       BPF_PERF_EVENT,
         __MAX_BPF_ATTACH_TYPE
  };
  
@@ -253,6 +1008,7 @@ enum bpf_link_type {
         BPF_LINK_TYPE_ITER = 4,
         BPF_LINK_TYPE_NETNS = 5,
         BPF_LINK_TYPE_XDP = 6,
+       BPF_LINK_TYPE_PERF_EVENT = 7,
  
         MAX_BPF_LINK_TYPE,
  };
@@ -358,8 +1114,8 @@ enum bpf_link_type {
  /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
   * the following extensions:
   *
- * insn[0].src_reg:  BPF_PSEUDO_MAP_FD
- * insn[0].imm:      map fd
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_[FD|IDX]
+ * insn[0].imm:      map fd or fd_idx
   * insn[1].imm:      0
   * insn[0].off:      0
   * insn[1].off:      0
@@ -367,15 +1123,19 @@ enum bpf_link_type {
   * verifier type:    CONST_PTR_TO_MAP
   */
  #define BPF_PSEUDO_MAP_FD      1
-/* insn[0].src_reg:  BPF_PSEUDO_MAP_VALUE
- * insn[0].imm:      map fd
+#define BPF_PSEUDO_MAP_IDX     5
+
+/* insn[0].src_reg:  BPF_PSEUDO_MAP_[IDX_]VALUE
+ * insn[0].imm:      map fd or fd_idx
   * insn[1].imm:      offset into value
   * insn[0].off:      0
   * insn[1].off:      0
   * ldimm64 rewrite:  address of map[0]+offset
   * verifier type:    PTR_TO_MAP_VALUE
   */
-#define BPF_PSEUDO_MAP_VALUE   2
+#define BPF_PSEUDO_MAP_VALUE           2
+#define BPF_PSEUDO_MAP_IDX_VALUE       6
+
  /* insn[0].src_reg:  BPF_PSEUDO_BTF_ID
   * insn[0].imm:      kernel btd id of VAR
   * insn[1].imm:      0
@@ -386,11 +1146,24 @@ enum bpf_link_type {
   *                   is struct/union.
   */
  #define BPF_PSEUDO_BTF_ID      3
+/* insn[0].src_reg:  BPF_PSEUDO_FUNC
+ * insn[0].imm:      insn offset to the func
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of the function
+ * verifier type:    PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC                4
  
  /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
   * offset to another bpf function
   */
  #define BPF_PSEUDO_CALL                1
+/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL,
+ * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel
+ */
+#define BPF_PSEUDO_KFUNC_CALL  2
  
  /* flags for BPF_MAP_UPDATE_ELEM command */
  enum {
@@ -502,6 +1275,13 @@ union bpf_attr {
                                                    * struct stored as the
                                                    * map value
                                                    */
+               /* Any per-map-type extra fields
+                *
+                * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
+                * number of hash functions (if 0, the bloom filter will default
+                * to using 5 hash functions).
+                */
+               __u64   map_extra;
         };
  
         struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -556,7 +1336,14 @@ union bpf_attr {
                 __aligned_u64   line_info;      /* line info */
                 __u32           line_info_cnt;  /* number of bpf_line_info records */
                 __u32           attach_btf_id;  /* in-kernel BTF type id to attach to */
-               __u32           attach_prog_fd; /* 0 to attach to vmlinux */
+               union {
+                       /* valid prog_fd to attach to bpf prog */
+                       __u32           attach_prog_fd;
+                       /* or valid module BTF object fd or 0 to attach to vmlinux */
+                       __u32           attach_btf_obj_fd;
+               };
+               __u32           :32;            /* pad */
+               __aligned_u64   fd_array;       /* array of FDs */
         };
  
         struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -669,6 +1456,13 @@ union bpf_attr {
                                 __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
                                 __u32           iter_info_len;  /* iter_info length */
                         };
+                       struct {
+                               /* black box user-provided value passed through
+                                * to BPF program at the execution time and
+                                * accessible through bpf_get_attach_cookie() BPF helper
+                                */
+                               __u64           bpf_cookie;
+                       } perf_event;
                 };
         } link_create;
  
@@ -708,7 +1502,7 @@ union bpf_attr {
   * parsed and used to produce a manual page. The workflow is the following,
   * and requires the rst2man utility:
   *
- *     $ ./scripts/bpf_helpers_doc.py \
+ *     $ ./scripts/bpf_doc.py \
   *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
   *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
   *     $ man /tmp/bpf-helpers.7
@@ -843,7 +1637,7 @@ union bpf_attr {
   * u32 bpf_get_smp_processor_id(void)
   *     Description
   *             Get the SMP (symmetric multiprocessing) processor id. Note that
- *             all programs run with preemption disabled, which means that the
+ *             all programs run with migration disabled, which means that the
   *             SMP processor id is stable during all the execution of the
   *             program.
   *     Return
@@ -1644,22 +2438,30 @@ union bpf_attr {
   *             networking traffic statistics as it provides a global socket
   *             identifier that can be assumed unique.
   *     Return
- *             A 8-byte long non-decreasing number on success, or 0 if the
- *             socket field is missing inside *skb*.
+ *             A 8-byte long unique number on success, or 0 if the socket
+ *             field is missing inside *skb*.
   *
   * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
   *     Description
   *             Equivalent to bpf_get_socket_cookie() helper that accepts
   *             *skb*, but gets socket from **struct bpf_sock_addr** context.
   *     Return
- *             A 8-byte long non-decreasing number.
+ *             A 8-byte long unique number.
   *
   * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
   *     Description
   *             Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
   *             *skb*, but gets socket from **struct bpf_sock_ops** context.
   *     Return
- *             A 8-byte long non-decreasing number.
+ *             A 8-byte long unique number.
+ *
+ * u64 bpf_get_socket_cookie(struct sock *sk)
+ *     Description
+ *             Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
+ *             *sk*, but gets socket from a BTF **struct sock**. This helper
+ *             also works for sleepable programs.
+ *     Return
+ *             A 8-byte long unique number or 0 if *sk* is NULL.
   *
   * u32 bpf_get_socket_uid(struct sk_buff *skb)
   *     Return
@@ -1745,6 +2547,10 @@ union bpf_attr {
   *               Use with ENCAP_L3/L4 flags to further specify the tunnel
   *               type; *len* is the length of the inner MAC header.
   *
+ *             * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *               Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *               L2 type as Ethernet.
+ *
   *             A call to this helper is susceptible to change the underlying
   *             packet buffer. Therefore, at load time, all checks on pointers
   *             previously done by the verifier are invalidated and must be
@@ -1765,8 +2571,12 @@ union bpf_attr {
   *             The lower two bits of *flags* are used as the return code if
   *             the map lookup fails. This is so that the return value can be
   *             one of the XDP program return codes up to **XDP_TX**, as chosen
- *             by the caller. Any higher bits in the *flags* argument must be
- *             unset.
+ *             by the caller. The higher bits of *flags* can be set to
+ *             BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+ *
+ *             With BPF_F_BROADCAST the packet will be broadcasted to all the
+ *             interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+ *             interface will be excluded when do broadcasting.
   *
   *             See also **bpf_redirect**\ (), which only supports redirecting
   *             to an ifindex, but doesn't require a map to do so.
@@ -2219,6 +3029,9 @@ union bpf_attr {
   *             * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
   *               packet is not forwarded or needs assist from full stack
   *
+ *             If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU
+ *             was exceeded and output params->mtu_result contains the MTU.
+ *
   * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
   *     Description
   *             Add an entry to, or update a sockhash *map* referencing sockets.
@@ -2442,7 +3255,7 @@ union bpf_attr {
   *             running simultaneously.
   *
   *             A user should care about the synchronization by himself.
- *             For example, by using the **BPF_STX_XADD** instruction to alter
+ *             For example, by using the **BPF_ATOMIC** instructions to alter
   *             the shared data.
   *     Return
   *             A pointer to the local storage area.
@@ -2450,7 +3263,7 @@ union bpf_attr {
   * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
   *     Description
   *             Select a **SO_REUSEPORT** socket from a
- *             **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ *             **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
   *             It checks the selected socket is matching the incoming
   *             request in the socket buffer.
   *     Return
@@ -2987,10 +3800,10 @@ union bpf_attr {
   *             string length is larger than *size*, just *size*-1 bytes are
   *             copied and the last byte is set to NUL.
   *
- *             On success, the length of the copied string is returned. This
- *             makes this helper useful in tracing programs for reading
- *             strings, and more importantly to get its length at runtime. See
- *             the following snippet:
+ *             On success, returns the number of bytes that were written,
+ *             including the terminal NUL. This makes this helper useful in
+ *             tracing programs for reading strings, and more importantly to
+ *             get its length at runtime. See the following snippet:
   *
   *             ::
   *
@@ -3018,7 +3831,7 @@ union bpf_attr {
   *             **->mm->env_start**: using this helper and the return value,
   *             one can quickly iterate at the right offset of the memory area.
   *     Return
- *             On success, the strictly positive length of the string,
+ *             On success, the strictly positive length of the output string,
   *             including the trailing NUL character. On error, a negative
   *             value.
   *
@@ -3241,7 +4054,7 @@ union bpf_attr {
   *             arguments. The *data* are a **u64** array and corresponding format string
   *             values are stored in the array. For strings and pointers where pointees
   *             are accessed, only the pointer values are stored in the *data* array.
- *             The *data_len* is the size of *data* in bytes.
+ *             The *data_len* is the size of *data* in bytes - must be a multiple of 8.
   *
   *             Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
   *             Reading kernel memory may fail due to either invalid address or
@@ -3310,12 +4123,20 @@ union bpf_attr {
   *             of new data availability is sent.
   *             If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
   *             of new data availability is sent unconditionally.
+ *             If **0** is specified in *flags*, an adaptive notification
+ *             of new data availability is sent.
+ *
+ *             An adaptive notification is a notification sent whenever the user-space
+ *             process has caught up and consumed all available payloads. In case the user-space
+ *             process is still processing a previous payload, then no notification is needed
+ *             as it will process the newly added payload automatically.
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
   * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
   *     Description
   *             Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ *             *flags* must be 0.
   *     Return
   *             Valid pointer with *size* bytes of memory available; NULL,
   *             otherwise.
@@ -3327,6 +4148,10 @@ union bpf_attr {
   *             of new data availability is sent.
   *             If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
   *             of new data availability is sent unconditionally.
+ *             If **0** is specified in *flags*, an adaptive notification
+ *             of new data availability is sent.
+ *
+ *             See 'bpf_ringbuf_output()' for the definition of adaptive notification.
   *     Return
   *             Nothing. Always succeeds.
   *
@@ -3337,6 +4162,10 @@ union bpf_attr {
   *             of new data availability is sent.
   *             If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
   *             of new data availability is sent unconditionally.
+ *             If **0** is specified in *flags*, an adaptive notification
+ *             of new data availability is sent.
+ *
+ *             See 'bpf_ringbuf_output()' for the definition of adaptive notification.
   *     Return
   *             Nothing. Always succeeds.
   *
@@ -3742,6 +4571,373 @@ union bpf_attr {
   *     Return
   *             The helper returns **TC_ACT_REDIRECT** on success or
   *             **TC_ACT_SHOT** on error.
+ *
+ * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags)
+ *     Description
+ *             Get a bpf_local_storage from the *task*.
+ *
+ *             Logically, it could be thought of as getting the value from
+ *             a *map* with *task* as the **key**.  From this
+ *             perspective,  the usage is not much different from
+ *             **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
+ *             helper enforces the key must be an task_struct and the map must also
+ *             be a **BPF_MAP_TYPE_TASK_STORAGE**.
+ *
+ *             Underneath, the value is stored locally at *task* instead of
+ *             the *map*.  The *map* is used as the bpf-local-storage
+ *             "type". The bpf-local-storage "type" (i.e. the *map*) is
+ *             searched against all bpf_local_storage residing at *task*.
+ *
+ *             An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *             used such that a new bpf_local_storage will be
+ *             created if one does not exist.  *value* can be used
+ *             together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *             the initial value of a bpf_local_storage.  If *value* is
+ *             **NULL**, the new bpf_local_storage will be zero initialized.
+ *     Return
+ *             A bpf_local_storage pointer is returned on success.
+ *
+ *             **NULL** if not found or there was an error in adding
+ *             a new bpf_local_storage.
+ *
+ * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task)
+ *     Description
+ *             Delete a bpf_local_storage from a *task*.
+ *     Return
+ *             0 on success.
+ *
+ *             **-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct task_struct *bpf_get_current_task_btf(void)
+ *     Description
+ *             Return a BTF pointer to the "current" task.
+ *             This pointer can also be used in helpers that accept an
+ *             *ARG_PTR_TO_BTF_ID* of type *task_struct*.
+ *     Return
+ *             Pointer to the current task.
+ *
+ * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags)
+ *     Description
+ *             Set or clear certain options on *bprm*:
+ *
+ *             **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit
+ *             which sets the **AT_SECURE** auxv for glibc. The bit
+ *             is cleared if the flag is not specified.
+ *     Return
+ *             **-EINVAL** if invalid *flags* are passed, zero otherwise.
+ *
+ * u64 bpf_ktime_get_coarse_ns(void)
+ *     Description
+ *             Return a coarse-grained version of the time elapsed since
+ *             system boot, in nanoseconds. Does not include time the system
+ *             was suspended.
+ *
+ *             See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
+ *     Return
+ *             Current *ktime*.
+ *
+ * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
+ *     Description
+ *             Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *             If the hash is larger than *size*, then only *size*
+ *             bytes will be copied to *dst*
+ *     Return
+ *             The **hash_algo** is returned on success,
+ *             **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
+ *             invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ *     Description
+ *             If the given file represents a socket, returns the associated
+ *             socket.
+ *     Return
+ *             A pointer to a struct socket on success or NULL if the file is
+ *             not a socket.
+ *
+ * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
+ *     Description
+ *             Check packet size against exceeding MTU of net device (based
+ *             on *ifindex*).  This helper will likely be used in combination
+ *             with helpers that adjust/change the packet size.
+ *
+ *             The argument *len_diff* can be used for querying with a planned
+ *             size change. This allows to check MTU prior to changing packet
+ *             ctx. Providing an *len_diff* adjustment that is larger than the
+ *             actual packet size (resulting in negative packet size) will in
+ *             principle not exceed the MTU, why it is not considered a
+ *             failure.  Other BPF-helpers are needed for performing the
+ *             planned size change, why the responsability for catch a negative
+ *             packet size belong in those helpers.
+ *
+ *             Specifying *ifindex* zero means the MTU check is performed
+ *             against the current net device.  This is practical if this isn't
+ *             used prior to redirect.
+ *
+ *             On input *mtu_len* must be a valid pointer, else verifier will
+ *             reject BPF program.  If the value *mtu_len* is initialized to
+ *             zero then the ctx packet size is use.  When value *mtu_len* is
+ *             provided as input this specify the L3 length that the MTU check
+ *             is done against. Remember XDP and TC length operate at L2, but
+ *             this value is L3 as this correlate to MTU and IP-header tot_len
+ *             values which are L3 (similar behavior as bpf_fib_lookup).
+ *
+ *             The Linux kernel route table can configure MTUs on a more
+ *             specific per route level, which is not provided by this helper.
+ *             For route level MTU checks use the **bpf_fib_lookup**\ ()
+ *             helper.
+ *
+ *             *ctx* is either **struct xdp_md** for XDP programs or
+ *             **struct sk_buff** for tc cls_act programs.
+ *
+ *             The *flags* argument can be a combination of one or more of the
+ *             following values:
+ *
+ *             **BPF_MTU_CHK_SEGS**
+ *                     This flag will only works for *ctx* **struct sk_buff**.
+ *                     If packet context contains extra packet segment buffers
+ *                     (often knows as GSO skb), then MTU check is harder to
+ *                     check at this point, because in transmit path it is
+ *                     possible for the skb packet to get re-segmented
+ *                     (depending on net device features).  This could still be
+ *                     a MTU violation, so this flag enables performing MTU
+ *                     check against segments, with a different violation
+ *                     return code to tell it apart. Check cannot use len_diff.
+ *
+ *             On return *mtu_len* pointer contains the MTU value of the net
+ *             device.  Remember the net device configured MTU is the L3 size,
+ *             which is returned here and XDP and TC length operate at L2.
+ *             Helper take this into account for you, but remember when using
+ *             MTU value in your BPF-code.
+ *
+ *     Return
+ *             * 0 on success, and populate MTU value in *mtu_len* pointer.
+ *
+ *             * < 0 if any input argument is invalid (*mtu_len* not updated)
+ *
+ *             MTU violations return positive values, but also populate MTU
+ *             value in *mtu_len* pointer, as this can be needed for
+ *             implementing PMTU handing:
+ *
+ *             * **BPF_MTU_CHK_RET_FRAG_NEEDED**
+ *             * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
+ *
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ *     Description
+ *             For each element in **map**, call **callback_fn** function with
+ *             **map**, **callback_ctx** and other map-specific parameters.
+ *             The **callback_fn** should be a static function and
+ *             the **callback_ctx** should be a pointer to the stack.
+ *             The **flags** is used to control certain aspects of the helper.
+ *             Currently, the **flags** must be 0.
+ *
+ *             The following are a list of supported map types and their
+ *             respective expected callback signatures:
+ *
+ *             BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ *             BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ *             BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ *             long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ *             For per_cpu maps, the map_value is the value on the cpu where the
+ *             bpf_prog is running.
+ *
+ *             If **callback_fn** return 0, the helper will continue to the next
+ *             element. If return value is 1, the helper will skip the rest of
+ *             elements and return. Other return values are not used now.
+ *
+ *     Return
+ *             The number of traversed map elements for success, **-EINVAL** for
+ *             invalid **flags**.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ *     Description
+ *             Outputs a string into the **str** buffer of size **str_size**
+ *             based on a format string stored in a read-only map pointed by
+ *             **fmt**.
+ *
+ *             Each format specifier in **fmt** corresponds to one u64 element
+ *             in the **data** array. For strings and pointers where pointees
+ *             are accessed, only the pointer values are stored in the *data*
+ *             array. The *data_len* is the size of *data* in bytes - must be
+ *             a multiple of 8.
+ *
+ *             Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ *             memory. Reading kernel memory may fail due to either invalid
+ *             address or valid address but requiring a major memory fault. If
+ *             reading kernel memory fails, the string for **%s** will be an
+ *             empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ *             Not returning error to bpf program is consistent with what
+ *             **bpf_trace_printk**\ () does for now.
+ *
+ *     Return
+ *             The strictly positive length of the formatted string, including
+ *             the trailing zero character. If the return value is greater than
+ *             **str_size**, **str** contains a truncated string, guaranteed to
+ *             be zero-terminated except when **str_size** is 0.
+ *
+ *             Or **-EBUSY** if the per-CPU memory copy buffer is busy.
+ *
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
+ *     Description
+ *             Execute bpf syscall with given arguments.
+ *     Return
+ *             A syscall result.
+ *
+ * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags)
+ *     Description
+ *             Find BTF type with given name and kind in vmlinux BTF or in module's BTFs.
+ *     Return
+ *             Returns btf_id and btf_obj_fd in lower and upper 32 bits.
+ *
+ * long bpf_sys_close(u32 fd)
+ *     Description
+ *             Execute close syscall for given FD.
+ *     Return
+ *             A syscall result.
+ *
+ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
+ *     Description
+ *             Initialize the timer.
+ *             First 4 bits of *flags* specify clockid.
+ *             Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
+ *             All other bits of *flags* are reserved.
+ *             The verifier will reject the program if *timer* is not from
+ *             the same *map*.
+ *     Return
+ *             0 on success.
+ *             **-EBUSY** if *timer* is already initialized.
+ *             **-EINVAL** if invalid *flags* are passed.
+ *             **-EPERM** if *timer* is in a map that doesn't have any user references.
+ *             The user space should either hold a file descriptor to a map with timers
+ *             or pin such map in bpffs. When map is unpinned or file descriptor is
+ *             closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
+ *     Description
+ *             Configure the timer to call *callback_fn* static function.
+ *     Return
+ *             0 on success.
+ *             **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ *             **-EPERM** if *timer* is in a map that doesn't have any user references.
+ *             The user space should either hold a file descriptor to a map with timers
+ *             or pin such map in bpffs. When map is unpinned or file descriptor is
+ *             closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
+ *     Description
+ *             Set timer expiration N nanoseconds from the current time. The
+ *             configured callback will be invoked in soft irq context on some cpu
+ *             and will not repeat unless another bpf_timer_start() is made.
+ *             In such case the next invocation can migrate to a different cpu.
+ *             Since struct bpf_timer is a field inside map element the map
+ *             owns the timer. The bpf_timer_set_callback() will increment refcnt
+ *             of BPF program to make sure that callback_fn code stays valid.
+ *             When user space reference to a map reaches zero all timers
+ *             in a map are cancelled and corresponding program's refcnts are
+ *             decremented. This is done to make sure that Ctrl-C of a user
+ *             process doesn't leave any timers running. If map is pinned in
+ *             bpffs the callback_fn can re-arm itself indefinitely.
+ *             bpf_map_update/delete_elem() helpers and user space sys_bpf commands
+ *             cancel and free the timer in the given map element.
+ *             The map can contain timers that invoke callback_fn-s from different
+ *             programs. The same callback_fn can serve different timers from
+ *             different maps if key/value layout matches across maps.
+ *             Every bpf_timer_set_callback() can have different callback_fn.
+ *
+ *     Return
+ *             0 on success.
+ *             **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
+ *             or invalid *flags* are passed.
+ *
+ * long bpf_timer_cancel(struct bpf_timer *timer)
+ *     Description
+ *             Cancel the timer and wait for callback_fn to finish if it was running.
+ *     Return
+ *             0 if the timer was not active.
+ *             1 if the timer was active.
+ *             **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ *             **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
+ *             own timer which would have led to a deadlock otherwise.
+ *
+ * u64 bpf_get_func_ip(void *ctx)
+ *     Description
+ *             Get address of the traced function (for tracing and kprobe programs).
+ *     Return
+ *             Address of the traced function.
+ *
+ * u64 bpf_get_attach_cookie(void *ctx)
+ *     Description
+ *             Get bpf_cookie value provided (optionally) during the program
+ *             attachment. It might be different for each individual
+ *             attachment, even if BPF program itself is the same.
+ *             Expects BPF program context *ctx* as a first argument.
+ *
+ *             Supported for the following program types:
+ *                     - kprobe/uprobe;
+ *                     - tracepoint;
+ *                     - perf_event.
+ *     Return
+ *             Value specified by user at BPF link creation/attachment time
+ *             or 0, if it was not specified.
+ *
+ * long bpf_task_pt_regs(struct task_struct *task)
+ *     Description
+ *             Get the struct pt_regs associated with **task**.
+ *     Return
+ *             A pointer to struct pt_regs.
+ *
+ * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
+ *     Description
+ *             Get branch trace from hardware engines like Intel LBR. The
+ *             hardware engine is stopped shortly after the helper is
+ *             called. Therefore, the user need to filter branch entries
+ *             based on the actual use case. To capture branch trace
+ *             before the trigger point of the BPF program, the helper
+ *             should be called at the beginning of the BPF program.
+ *
+ *             The data is stored as struct perf_branch_entry into output
+ *             buffer *entries*. *size* is the size of *entries* in bytes.
+ *             *flags* is reserved for now and must be zero.
+ *
+ *     Return
+ *             On success, number of bytes written to *buf*. On error, a
+ *             negative value.
+ *
+ *             **-EINVAL** if *flags* is not zero.
+ *
+ *             **-ENOENT** if architecture does not support branch records.
+ *
+ * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ *     Description
+ *             Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
+ *             to format and can handle more format args as a result.
+ *
+ *             Arguments are to be used as in **bpf_seq_printf**\ () helper.
+ *     Return
+ *             The number of bytes written to the buffer, or a negative error
+ *             in case of failure.
+ *
+ * struct unix_sock *bpf_skc_to_unix_sock(void *sk)
+ *     Description
+ *             Dynamically cast a *sk* pointer to a *unix_sock* pointer.
+ *     Return
+ *             *sk* if casting is valid, or **NULL** otherwise.
+ *
+ * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res)
+ *     Description
+ *             Get the address of a kernel symbol, returned in *res*. *res* is
+ *             set to 0 if the symbol is not found.
+ *     Return
+ *             On success, zero. On error, a negative value.
+ *
+ *             **-EINVAL** if *flags* is not zero.
+ *
+ *             **-EINVAL** if string *name* is not the same size as *name_sz*.
+ *
+ *             **-ENOENT** if symbol is not found.
+ *
+ *             **-EPERM** if caller does not have permission to obtain kernel address.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -3897,9 +5093,33 @@ union bpf_attr {
         FN(seq_printf_btf),             \
         FN(skb_cgroup_classid),         \
         FN(redirect_neigh),             \
-       FN(bpf_per_cpu_ptr),            \
-       FN(bpf_this_cpu_ptr),           \
+       FN(per_cpu_ptr),                \
+       FN(this_cpu_ptr),               \
         FN(redirect_peer),              \
+       FN(task_storage_get),           \
+       FN(task_storage_delete),        \
+       FN(get_current_task_btf),       \
+       FN(bprm_opts_set),              \
+       FN(ktime_get_coarse_ns),        \
+       FN(ima_inode_hash),             \
+       FN(sock_from_file),             \
+       FN(check_mtu),                  \
+       FN(for_each_map_elem),          \
+       FN(snprintf),                   \
+       FN(sys_bpf),                    \
+       FN(btf_find_by_name_kind),      \
+       FN(sys_close),                  \
+       FN(timer_init),                 \
+       FN(timer_set_callback),         \
+       FN(timer_start),                \
+       FN(timer_cancel),               \
+       FN(get_func_ip),                \
+       FN(get_attach_cookie),          \
+       FN(task_pt_regs),               \
+       FN(get_branch_snapshot),        \
+       FN(trace_vprintk),              \
+       FN(skc_to_unix_sock),           \
+       FN(kallsyms_lookup_name),       \
         /* */
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -3993,6 +5213,7 @@ enum {
         BPF_F_ADJ_ROOM_ENCAP_L4_GRE     = (1ULL << 3),
         BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
         BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
+       BPF_F_ADJ_ROOM_ENCAP_L2_ETH     = (1ULL << 6),
  };
  
  enum {
@@ -4071,6 +5292,17 @@ enum bpf_lwt_encap_mode {
         BPF_LWT_ENCAP_IP,
  };
  
+/* Flags for bpf_bprm_opts_set helper */
+enum {
+       BPF_F_BPRM_SECUREEXEC   = (1ULL << 0),
+};
+
+/* Flags for bpf_redirect_map helper */
+enum {
+       BPF_F_BROADCAST         = (1ULL << 3),
+       BPF_F_EXCLUDE_INGRESS   = (1ULL << 4),
+};
+
  #define __bpf_md_ptr(type, name)       \
  union {                                        \
         type name;                      \
@@ -4117,6 +5349,8 @@ struct __sk_buff {
         __u32 gso_segs;
         __bpf_md_ptr(struct bpf_sock *, sk);
         __u32 gso_size;
+       __u32 :32;              /* Padding, future use. */
+       __u64 hwtstamp;
  };
  
  struct bpf_tunnel_key {
@@ -4355,6 +5589,20 @@ struct sk_reuseport_md {
         __u32 ip_protocol;      /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
         __u32 bind_inany;       /* Is sock bound to an INANY address? */
         __u32 hash;             /* A hash of the packet 4 tuples */
+       /* When reuse->migrating_sk is NULL, it is selecting a sk for the
+        * new incoming connection request (e.g. selecting a listen sk for
+        * the received SYN in the TCP case).  reuse->sk is one of the sk
+        * in the reuseport group. The bpf prog can use reuse->sk to learn
+        * the local listening ip/port without looking into the skb.
+        *
+        * When reuse->migrating_sk is not NULL, reuse->sk is closed and
+        * reuse->migrating_sk is the socket that needs to be migrated
+        * to another listening socket.  migrating_sk could be a fullsock
+        * sk that is fully established or a reqsk that is in-the-middle
+        * of 3-way handshake.
+        */
+       __bpf_md_ptr(struct bpf_sock *, sk);
+       __bpf_md_ptr(struct bpf_sock *, migrating_sk);
  };
  
  #define BPF_TAG_SIZE   8
@@ -4395,6 +5643,8 @@ struct bpf_prog_info {
         __aligned_u64 prog_tags;
         __u64 run_time_ns;
         __u64 run_cnt;
+       __u64 recursion_misses;
+       __u32 verified_insns;
  } __attribute__((aligned(8)));
  
  struct bpf_map_info {
@@ -4412,12 +5662,17 @@ struct bpf_map_info {
         __u32 btf_id;
         __u32 btf_key_type_id;
         __u32 btf_value_type_id;
+       __u32 :32;      /* alignment pad */
+       __u64 map_extra;
  } __attribute__((aligned(8)));
  
  struct bpf_btf_info {
         __aligned_u64 btf;
         __u32 btf_size;
         __u32 id;
+       __aligned_u64 name;
+       __u32 name_len;
+       __u32 kernel_btf;
  } __attribute__((aligned(8)));
  
  struct bpf_link_info {
@@ -4431,6 +5686,8 @@ struct bpf_link_info {
                 } raw_tracepoint;
                 struct {
                         __u32 attach_type;
+                       __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
+                       __u32 target_btf_id; /* BTF type id inside the object */
                 } tracing;
                 struct {
                         __u64 cgroup_id;
@@ -4872,9 +6129,13 @@ struct bpf_fib_lookup {
         __be16  sport;
         __be16  dport;
  
-       /* total length of packet from network header - used for MTU check */
-       __u16   tot_len;
+       union { /* used for MTU check */
+               /* input to lookup */
+               __u16   tot_len; /* L3 length from network hdr (iph->tot_len) */
  
+               /* output: MTU value */
+               __u16   mtu_result;
+       };
         /* input: L3 device index for lookup
          * output: device index from FIB lookup
          */
@@ -4920,6 +6181,17 @@ struct bpf_redir_neigh {
         };
  };
  
+/* bpf_check_mtu flags*/
+enum  bpf_check_mtu_flags {
+       BPF_MTU_CHK_SEGS  = (1U << 0),
+};
+
+enum bpf_check_mtu_ret {
+       BPF_MTU_CHK_RET_SUCCESS,      /* check and lookup successful */
+       BPF_MTU_CHK_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+       BPF_MTU_CHK_RET_SEGS_TOOBIG,  /* GSO re-segmentation needed to fwd */
+};
+
  enum bpf_task_fd_type {
         BPF_FD_TYPE_RAW_TRACEPOINT,     /* tp name */
         BPF_FD_TYPE_TRACEPOINT,         /* tp name */
@@ -4979,6 +6251,11 @@ struct bpf_spin_lock {
         __u32   val;
  };
  
+struct bpf_timer {
+       __u64 :64;
+       __u64 :64;
+} __attribute__((aligned(8)));
+
  struct bpf_sysctl {
         __u32   write;          /* Sysctl is being read (= 0) or written (= 1).
                                  * Allows 1,2,4-byte read, but no write.
@@ -5006,7 +6283,10 @@ struct bpf_pidns_info {
  
  /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
  struct bpf_sk_lookup {
-       __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+       union {
+               __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+               __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+       };
  
         __u32 family;           /* Protocol family (AF_INET, AF_INET6) */
         __u32 protocol;         /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --git a/src/shared/linux/bpf_insn.h b/src/shared/linux/bpf_insn.h

index c459c03c519dacea609a5f09e6d483ddc6eb036b..92ec06b0e6c197a603baa281ad91ad9e5f6573cd 100644 (file)
--- a/src/shared/linux/bpf_insn.h
+++ b/src/shared/linux/bpf_insn.h
@@ -134,15 +134,31 @@ struct bpf_insn;
                 .off   = OFF,                                   \
                 .imm   = 0 })
  
-/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
-
-#define BPF_STX_XADD(SIZE, DST, SRC, OFF)                      \
-       ((struct bpf_insn) {                                    \
-               .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,   \
+/*
+ * Atomic operations:
+ *
+ *   BPF_ADD                  *(uint *) (dst_reg + off16) += src_reg
+ *   BPF_AND                  *(uint *) (dst_reg + off16) &= src_reg
+ *   BPF_OR                   *(uint *) (dst_reg + off16) |= src_reg
+ *   BPF_XOR                  *(uint *) (dst_reg + off16) ^= src_reg
+ *   BPF_ADD | BPF_FETCH      src_reg = atomic_fetch_add(dst_reg + off16, src_reg);
+ *   BPF_AND | BPF_FETCH      src_reg = atomic_fetch_and(dst_reg + off16, src_reg);
+ *   BPF_OR | BPF_FETCH       src_reg = atomic_fetch_or(dst_reg + off16, src_reg);
+ *   BPF_XOR | BPF_FETCH      src_reg = atomic_fetch_xor(dst_reg + off16, src_reg);
+ *   BPF_XCHG                 src_reg = atomic_xchg(dst_reg + off16, src_reg)
+ *   BPF_CMPXCHG              r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
+ */
+
+#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF)                 \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
                 .dst_reg = DST,                                 \
                 .src_reg = SRC,                                 \
                 .off   = OFF,                                   \
-               .imm   = 0 })
+               .imm   = OP })
+
+/* Legacy alias */
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF)
  
  /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
  
diff --git a/src/shared/linux/dm-ioctl.h b/src/shared/linux/dm-ioctl.h

index ab312e13fbcb9cc8afe6913905f3a1edbc02430a..a6cc1cc40f7944a9d0f08ab7c143c321df25d194 100644 (file)
--- a/src/shared/linux/dm-ioctl.h
+++ b/src/shared/linux/dm-ioctl.h
@@ -193,8 +193,22 @@ struct dm_name_list {
         __u32 next;             /* offset to the next record from
                                    the _start_ of this */
         char name[0];
+
+       /*
+        * The following members can be accessed by taking a pointer that
+        * points immediately after the terminating zero character in "name"
+        * and aligning this pointer to next 8-byte boundary.
+        * Uuid is present if the flag DM_NAME_LIST_FLAG_HAS_UUID is set.
+        *
+        * __u32 event_nr;
+        * __u32 flags;
+        * char uuid[0];
+        */
  };
  
+#define DM_NAME_LIST_FLAG_HAS_UUID             1
+#define DM_NAME_LIST_FLAG_DOESNT_HAVE_UUID     2
+
  /*
   * Used to retrieve the target versions
   */
@@ -274,7 +288,7 @@ enum {
  #define DM_VERSION_MAJOR       4
  #define DM_VERSION_MINOR       27
  #define DM_VERSION_PATCHLEVEL  0
-#define DM_VERSION_EXTRA       "-ioctl (2020-10-01)"
+#define DM_VERSION_EXTRA       "-ioctl (2021-03-22)"
  
  /* Status bits */
  #define DM_READONLY_FLAG       (1 << 0) /* In/Out */
@@ -362,4 +376,10 @@ enum {
   */
  #define DM_INTERNAL_SUSPEND_FLAG       (1 << 18) /* Out */
  
+/*
+ * If set, returns in the in buffer passed by UM, the raw table information
+ * that would be measured by IMA subsystem on device state change.
+ */
+#define DM_IMA_MEASUREMENT_FLAG        (1 << 19) /* In */
+
  #endif                         /* _LINUX_DM_IOCTL_H */
diff --git a/src/shared/linux/ethtool.h b/src/shared/linux/ethtool.h

index 974d4292e7d91ebdef2c594d8812e6aeb83d7b12..cf20b6dba5373ff6553dc63b311e1b77e089d439 100644 (file)
--- a/src/shared/linux/ethtool.h
+++ b/src/shared/linux/ethtool.h
@@ -14,7 +14,7 @@
  #ifndef _UAPI_LINUX_ETHTOOL_H
  #define _UAPI_LINUX_ETHTOOL_H
  
-#include <linux/kernel.h>
+#include <linux/const.h>
  #include <linux/types.h>
  #include <linux/if_ether.h>
  
@@ -30,6 +30,14 @@
   * have the same layout for 32-bit and 64-bit userland.
   */
  
+/* Note on reserved space.
+ * Reserved fields must not be accessed directly by user space because
+ * they may be replaced by a different field in the future. They must
+ * be initialized to zero before making the request, e.g. via memset
+ * of the entire structure or implicitly by not being set in a structure
+ * initializer.
+ */
+
  /**
   * struct ethtool_cmd - DEPRECATED, link control and status
   * This structure is DEPRECATED, please use struct ethtool_link_settings.
@@ -71,6 +79,7 @@
   *     and other link features that the link partner advertised
   *     through autonegotiation; 0 if unknown or not applicable.
   *     Read-only.
+ * @reserved: Reserved for future use; see the note on reserved space.
   *
   * The link speed in Mbps is split between @speed and @speed_hi.  Use
   * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to
@@ -159,6 +168,7 @@ static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
   * @bus_info: Device bus address.  This should match the dev_name()
   *     string for the underlying bus device, if there is one.  May be
   *     an empty string.
+ * @reserved2: Reserved for future use; see the note on reserved space.
   * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
   *     %ETHTOOL_SPFLAGS commands; also the number of strings in the
   *     %ETH_SS_PRIV_FLAGS set
@@ -227,7 +237,7 @@ enum tunable_id {
         ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
         /*
          * Add your fresh new tunable attribute above and remember to update
-        * tunable_strings[] in net/core/ethtool.c
+        * tunable_strings[] in net/ethtool/common.c
          */
         __ETHTOOL_TUNABLE_COUNT,
  };
@@ -291,7 +301,7 @@ enum phy_tunable_id {
         ETHTOOL_PHY_EDPD,
         /*
          * Add your fresh new phy tunable attribute above and remember to update
-        * phy_tunable_strings[] in net/core/ethtool.c
+        * phy_tunable_strings[] in net/ethtool/common.c
          */
         __ETHTOOL_PHY_TUNABLE_COUNT,
  };
@@ -360,6 +370,7 @@ struct ethtool_eeprom {
   * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting
   *     its tx lpi (after reaching 'idle' state). Effective only when eee
   *     was negotiated and tx_lpi_enabled was set.
+ * @reserved: Reserved for future use; see the note on reserved space.
   */
  struct ethtool_eee {
         __u32   cmd;
@@ -378,6 +389,7 @@ struct ethtool_eee {
   * @cmd: %ETHTOOL_GMODULEINFO
   * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx
   * @eeprom_len: Length of the eeprom
+ * @reserved: Reserved for future use; see the note on reserved space.
   *
   * This structure is used to return the information to
   * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM.
@@ -583,9 +595,7 @@ struct ethtool_pauseparam {
         __u32   tx_pause;
  };
  
-/**
- * enum ethtool_link_ext_state - link extended state
- */
+/* Link extended state */
  enum ethtool_link_ext_state {
         ETHTOOL_LINK_EXT_STATE_AUTONEG,
         ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
@@ -597,12 +607,10 @@ enum ethtool_link_ext_state {
         ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE,
         ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED,
         ETHTOOL_LINK_EXT_STATE_OVERHEAT,
+       ETHTOOL_LINK_EXT_STATE_MODULE,
  };
  
-/**
- * enum ethtool_link_ext_substate_autoneg - more information in addition to
- * ETHTOOL_LINK_EXT_STATE_AUTONEG.
- */
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */
  enum ethtool_link_ext_substate_autoneg {
         ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1,
         ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED,
@@ -612,9 +620,7 @@ enum ethtool_link_ext_substate_autoneg {
         ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD,
  };
  
-/**
- * enum ethtool_link_ext_substate_link_training - more information in addition to
- * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
   */
  enum ethtool_link_ext_substate_link_training {
         ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1,
@@ -623,9 +629,7 @@ enum ethtool_link_ext_substate_link_training {
         ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT,
  };
  
-/**
- * enum ethtool_link_ext_substate_logical_mismatch - more information in addition
- * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
   */
  enum ethtool_link_ext_substate_link_logical_mismatch {
         ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1,
@@ -635,24 +639,26 @@ enum ethtool_link_ext_substate_link_logical_mismatch {
         ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED,
  };
  
-/**
- * enum ethtool_link_ext_substate_bad_signal_integrity - more information in
- * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
   */
  enum ethtool_link_ext_substate_bad_signal_integrity {
         ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1,
         ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE,
+       ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST,
+       ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS,
  };
  
-/**
- * enum ethtool_link_ext_substate_cable_issue - more information in
- * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE.
- */
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */
  enum ethtool_link_ext_substate_cable_issue {
         ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1,
         ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE,
  };
  
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_MODULE. */
+enum ethtool_link_ext_substate_module {
+       ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY = 1,
+};
+
  #define ETH_GSTRING_LEN                32
  
  /**
@@ -665,6 +671,7 @@ enum ethtool_link_ext_substate_cable_issue {
   *     now deprecated
   * @ETH_SS_FEATURES: Device feature names
   * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
+ * @ETH_SS_TUNABLES: tunable names
   * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
   * @ETH_SS_PHY_TUNABLES: PHY tunable names
   * @ETH_SS_LINK_MODES: link mode names
@@ -674,6 +681,13 @@ enum ethtool_link_ext_substate_cable_issue {
   * @ETH_SS_TS_TX_TYPES: timestamping Tx types
   * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters
   * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types
+ * @ETH_SS_STATS_STD: standardized stats
+ * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics
+ * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
+ * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
+ * @ETH_SS_STATS_RMON: names of RMON statistics
+ *
+ * @ETH_SS_COUNT: number of defined string sets
   */
  enum ethtool_stringset {
         ETH_SS_TEST             = 0,
@@ -692,11 +706,39 @@ enum ethtool_stringset {
         ETH_SS_TS_TX_TYPES,
         ETH_SS_TS_RX_FILTERS,
         ETH_SS_UDP_TUNNEL_TYPES,
+       ETH_SS_STATS_STD,
+       ETH_SS_STATS_ETH_PHY,
+       ETH_SS_STATS_ETH_MAC,
+       ETH_SS_STATS_ETH_CTRL,
+       ETH_SS_STATS_RMON,
  
         /* add new constants above here */
         ETH_SS_COUNT
  };
  
+/**
+ * enum ethtool_module_power_mode_policy - plug-in module power mode policy
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH: Module is always in high power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO: Module is transitioned by the host
+ *     to high power mode when the first port using it is put administratively
+ *     up and to low power mode when the last port using it is put
+ *     administratively down.
+ */
+enum ethtool_module_power_mode_policy {
+       ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH = 1,
+       ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO,
+};
+
+/**
+ * enum ethtool_module_power_mode - plug-in module power mode
+ * @ETHTOOL_MODULE_POWER_MODE_LOW: Module is in low power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_HIGH: Module is in high power mode.
+ */
+enum ethtool_module_power_mode {
+       ETHTOOL_MODULE_POWER_MODE_LOW = 1,
+       ETHTOOL_MODULE_POWER_MODE_HIGH,
+};
+
  /**
   * struct ethtool_gstrings - string set for data tagging
   * @cmd: Command number = %ETHTOOL_GSTRINGS
@@ -719,6 +761,7 @@ struct ethtool_gstrings {
  /**
   * struct ethtool_sset_info - string set information
   * @cmd: Command number = %ETHTOOL_GSSET_INFO
+ * @reserved: Reserved for future use; see the note on reserved space.
   * @sset_mask: On entry, a bitmask of string sets to query, with bits
   *     numbered according to &enum ethtool_stringset.  On return, a
   *     bitmask of those string sets queried that are supported.
@@ -763,6 +806,7 @@ enum ethtool_test_flags {
   * @flags: A bitmask of flags from &enum ethtool_test_flags.  Some
   *     flags may be set by the user on entry; others may be set by
   *     the driver on return.
+ * @reserved: Reserved for future use; see the note on reserved space.
   * @len: On return, the number of test results
   * @data: Array of test results
   *
@@ -963,6 +1007,7 @@ union ethtool_flow_union {
   * @vlan_etype: VLAN EtherType
   * @vlan_tci: VLAN tag control information
   * @data: user defined data
+ * @padding: Reserved for future use; see the note on reserved space.
   *
   * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT
   * is set in &struct ethtool_rx_flow_spec @flow_type.
@@ -1138,7 +1183,8 @@ struct ethtool_rxfh_indir {
   *     hardware hash key.
   * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
   *     Valid values are one of the %ETH_RSS_HASH_*.
- * @rsvd:      Reserved for future extensions.
+ * @rsvd8: Reserved for future use; see the note on reserved space.
+ * @rsvd32: Reserved for future use; see the note on reserved space.
   * @rss_config: RX ring/queue index for each hash value i.e., indirection table
   *     of @indir_size __u32 elements, followed by hash key of @key_size
   *     bytes.
@@ -1306,7 +1352,9 @@ struct ethtool_sfeatures {
   * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
   * @phc_index: device index of the associated PHC, or -1 if there is none
   * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
+ * @tx_reserved: Reserved for future use; see the note on reserved space.
   * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
+ * @rx_reserved: Reserved for future use; see the note on reserved space.
   *
   * The bits in the 'tx_types' and 'rx_filters' fields correspond to
   * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values,
@@ -1380,15 +1428,33 @@ struct ethtool_per_queue_op {
  };
  
  /**
- * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * struct ethtool_fecparam - Ethernet Forward Error Correction parameters
   * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
- * @active_fec: FEC mode which is active on porte
- * @fec: Bitmask of supported/configured FEC modes
- * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ * @active_fec: FEC mode which is active on the port, single bit set, GET only.
+ * @fec: Bitmask of configured FEC modes.
+ * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET.
   *
- * Drivers should reject a non-zero setting of @autoneg when
- * autoneogotiation is disabled (or not supported) for the link.
+ * Note that @reserved was never validated on input and ethtool user space
+ * left it uninitialized when calling SET. Hence going forward it can only be
+ * used to return a value to userspace with GET.
+ *
+ * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS.
+ * FEC settings are configured by link autonegotiation whenever it's enabled.
+ * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode.
+ *
+ * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings.
+ * It is recommended that drivers only accept a single bit set in @fec.
+ * When multiple bits are set in @fec drivers may pick mode in an implementation
+ * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other
+ * FEC modes, because it's unclear whether in this case other modes constrain
+ * AUTO or are independent choices.
+ * Drivers must reject SET requests if they support none of the requested modes.
+ *
+ * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead
+ * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM.
   *
+ * See enum ethtool_fec_config_bits for definition of valid bits for both
+ * @fec and @active_fec.
   */
  struct ethtool_fecparam {
         __u32   cmd;
@@ -1400,11 +1466,16 @@ struct ethtool_fecparam {
  
  /**
   * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
- * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
- * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
- * @ETHTOOL_FEC_OFF: No FEC Mode
- * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
- * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not
+ *                     be used together with other bits. GET only.
+ * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually
+ *                     based link mode and SFP parameters read from module's
+ *                     EEPROM. This bit does _not_ mean autonegotiation.
+ * @ETHTOOL_FEC_OFF_BIT: No FEC Mode
+ * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet
+ *                     Consortium)
   */
  enum ethtool_fec_config_bits {
         ETHTOOL_FEC_NONE_BIT,
@@ -1962,6 +2033,11 @@ enum ethtool_reset_flags {
   *     autonegotiation; 0 if unknown or not applicable.  Read-only.
   * @transceiver: Used to distinguish different possible PHY types,
   *     reported consistently by PHYLIB.  Read-only.
+ * @master_slave_cfg: Master/slave port mode.
+ * @master_slave_state: Master/slave port state.
+ * @reserved: Reserved for future use; see the note on reserved space.
+ * @reserved1: Reserved for future use; see the note on reserved space.
+ * @link_mode_masks: Variable length bitmaps.
   *
   * If autonegotiation is disabled, the speed and @duplex represent the
   * fixed link mode and are writable if the driver supports multiple
author	Yu Watanabe <watanabe.yu+github@gmail.com>
	Fri, 24 Dec 2021 02:23:11 +0000 (11:23 +0900)
committer	Yu Watanabe <watanabe.yu+github@gmail.com>
	Sat, 25 Dec 2021 06:34:02 +0000 (15:34 +0900)
src/shared/linux/bpf.h		patch \| blob \| blame \| history
src/shared/linux/bpf_insn.h		patch \| blob \| blame \| history
src/shared/linux/dm-ioctl.h		patch \| blob \| blame \| history
src/shared/linux/ethtool.h		patch \| blob \| blame \| history