bpf-helpers.7: Refresh page

[thirdparty/man-pages.git] / man7 / bpf-helpers.7
diff --git a/man7/bpf-helpers.7 b/man7/bpf-helpers.7

index 26ddf83692d7a44758f02e3e1cf6af7dcf52478d..b4236f177b4d38f4c63bfcd90ee07e8e5681850c 100644 (file)
--- a/man7/bpf-helpers.7
+++ b/man7/bpf-helpers.7
@@ -27,18 +27,18 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
  .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
  .in \\n[rst2man-indent\\n[rst2man-indent-level]]u
  ..
-.TH "BPF-HELPERS" 7 "2023-04-11" "Linux v6.2"
+.TH "BPF-HELPERS" 7 "2023-11-10" "Linux v6.8"
  .SH NAME
  BPF-HELPERS \- list of eBPF helper functions
  .\" Copyright (C) All BPF authors and contributors from 2014 to present.
  .
  .\" See git log include/uapi/linux/bpf.h in kernel tree for details.
  .
-.\"
+.\" 
  .
-.\" SPDX-License-Identifier:  Linux-man-pages-copyleft
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
  .
-.\"
+.\" 
  .
  .\" Please do not edit this file. It was generated from the documentation
  .
@@ -156,27 +156,25 @@ Current \fIktime\fP\&.
  .B Description
  This helper is a \(dqprintk()\-like\(dq facility for debugging. It
  prints a message defined by format \fIfmt\fP (of size \fIfmt_size\fP)
-to file \fI/sys/kernel/debug/tracing/trace\fP from DebugFS, if
+to file \fI/sys/kernel/tracing/trace\fP from TraceFS, if
  available. It can take up to three additional \fBu64\fP
  arguments (as an eBPF helpers, the total number of arguments is
  limited to five).
  .sp
  Each time the helper is called, it appends a line to the trace.
-Lines are discarded while \fI/sys/kernel/debug/tracing/trace\fP is
-open, use \fI/sys/kernel/debug/tracing/trace_pipe\fP to avoid this.
+Lines are discarded while \fI/sys/kernel/tracing/trace\fP is
+open, use \fI/sys/kernel/tracing/trace_pipe\fP to avoid this.
  The format of the trace is customizable, and the exact output
  one will get depends on the options set in
-\fI/sys/kernel/debug/tracing/trace_options\fP (see also the
+\fI/sys/kernel/tracing/trace_options\fP (see also the
  \fIREADME\fP file under the same directory). However, it usually
  defaults to something like:
  .INDENT 7.0
  .INDENT 3.5
  .sp
-.nf
-.ft C
-telnet\-470   [001] .N.. 419421.045894: 0x00000001: <fmt>
-.ft P
-.fi
+.EX
+telnet\-470   [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+.EE
  .UNINDENT
  .UNINDENT
  .sp
@@ -204,7 +202,8 @@ are set.
  \fB0x00000001\fP is a fake value used by BPF for the
  instruction pointer register.
  .IP \(bu 2
-\fB<fmt>\fP is the message formatted with \fIfmt\fP\&.
+\fB<formatted msg>\fP is the message formatted with
+\fIfmt\fP\&.
  .UNINDENT
  .UNINDENT
  .UNINDENT
@@ -404,7 +403,9 @@ performed again, if the helper is used in combination with
  direct packet access.
  .TP
  .B Return
-0 on success, or a negative error in case of failure.
+0 on success, or a negative error in case of failure. Positive
+error indicates a potential drop or congestion in the target
+device. The particular positive error codes are not defined.
  .UNINDENT
  .TP
  .B \fBu64 bpf_get_current_pid_tgid(void)\fP
@@ -541,8 +542,7 @@ remote ends with IPv4 address other than 10.0.0.1:
  .INDENT 7.0
  .INDENT 3.5
  .sp
-.nf
-.ft C
+.EX
  int ret;
  struct bpf_tunnel_key key = {};
  
@@ -554,8 +554,7 @@ if (key.remote_ipv4 != 0x0a000001)
          return TC_ACT_SHOT;     // drop packet
  
  return TC_ACT_OK;               // accept packet
-.ft P
-.fi
+.EE
  .UNINDENT
  .UNINDENT
  .sp
@@ -600,20 +599,22 @@ sequence number should be added to tunnel header before
  sending the packet. This flag was added for GRE
  encapsulation, but might be used with other protocols
  as well in the future.
+.TP
+.B \fBBPF_F_NO_TUNNEL_KEY\fP
+Add a flag to tunnel metadata indicating that no tunnel
+key should be set in the resulting tunnel header.
  .UNINDENT
  .sp
  Here is a typical usage on the transmit path:
  .INDENT 7.0
  .INDENT 3.5
  .sp
-.nf
-.ft C
+.EX
  struct bpf_tunnel_key key;
       populate key ...
  bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
  bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
-.ft P
-.fi
+.EE
  .UNINDENT
  .UNINDENT
  .sp
@@ -827,11 +828,9 @@ user stacks (such as stacks for Java programs). To do so, use:
  .INDENT 7.0
  .INDENT 3.5
  .sp
-.nf
-.ft C
+.EX
  # sysctl kernel.perf_event_max_stack=<new value>
-.ft P
-.fi
+.EE
  .UNINDENT
  .UNINDENT
  .TP
@@ -1334,8 +1333,8 @@ The option value of length \fIoptlen\fP is pointed by \fIoptval\fP\&.
  .IP \(bu 2
  \fBstruct bpf_sock_ops\fP for \fBBPF_PROG_TYPE_SOCK_OPS\fP\&.
  .IP \(bu 2
-\fBstruct bpf_sock_addr\fP for \fBBPF_CGROUP_INET4_CONNECT\fP
-and \fBBPF_CGROUP_INET6_CONNECT\fP\&.
+\fBstruct bpf_sock_addr\fP for \fBBPF_CGROUP_INET4_CONNECT\fP,
+\fBBPF_CGROUP_INET6_CONNECT\fP and \fBBPF_CGROUP_UNIX_CONNECT\fP\&.
  .UNINDENT
  .sp
  This helper actually implements a subset of \fBsetsockopt()\fP\&.
@@ -1417,6 +1416,11 @@ type; \fIlen\fP is the length of the inner MAC header.
  \fBBPF_F_ADJ_ROOM_ENCAP_L2_ETH\fP:
  Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
  L2 type as Ethernet.
+.IP \(bu 2
+\fBBPF_F_ADJ_ROOM_DECAP_L3_IPV4\fP,
+\fBBPF_F_ADJ_ROOM_DECAP_L3_IPV6\fP:
+Indicate the new IP header version after decapsulating the outer
+IP header. Used when the inner and outer IP versions are different.
  .UNINDENT
  .sp
  A call to this helper is susceptible to change the underlying
@@ -1572,11 +1576,9 @@ as follows.
  .INDENT 7.0
  .INDENT 3.5
  .sp
-.nf
-.ft C
+.EX
  normalized_counter = counter * t_enabled / t_running
-.ft P
-.fi
+.EE
  .UNINDENT
  .UNINDENT
  .sp
@@ -1596,7 +1598,7 @@ value and do the calculation inside the eBPF program.
  .INDENT 7.0
  .TP
  .B Description
-For en eBPF program attached to a perf event, retrieve the
+For an eBPF program attached to a perf event, retrieve the
  value of the event counter associated to \fIctx\fP and store it in
  the structure pointed by \fIbuf\fP and of size \fIbuf_size\fP\&. Enabled
  and running times are also stored in the structure (see
@@ -1623,8 +1625,8 @@ The retrieved value is stored in the structure pointed by
  .IP \(bu 2
  \fBstruct bpf_sock_ops\fP for \fBBPF_PROG_TYPE_SOCK_OPS\fP\&.
  .IP \(bu 2
-\fBstruct bpf_sock_addr\fP for \fBBPF_CGROUP_INET4_CONNECT\fP
-and \fBBPF_CGROUP_INET6_CONNECT\fP\&.
+\fBstruct bpf_sock_addr\fP for \fBBPF_CGROUP_INET4_CONNECT\fP,
+\fBBPF_CGROUP_INET6_CONNECT\fP and \fBBPF_CGROUP_UNIX_CONNECT\fP\&.
  .UNINDENT
  .sp
  This helper actually implements a subset of \fBgetsockopt()\fP\&.
@@ -1945,11 +1947,9 @@ user stacks (such as stacks for Java programs). To do so, use:
  .INDENT 7.0
  .INDENT 3.5
  .sp
-.nf
-.ft C
+.EX
  # sysctl kernel.perf_event_max_stack=<new value>
-.ft P
-.fi
+.EE
  .UNINDENT
  .UNINDENT
  .TP
@@ -2010,9 +2010,26 @@ following values:
  Do a direct table lookup vs full lookup using FIB
  rules.
  .TP
+.B \fBBPF_FIB_LOOKUP_TBID\fP
+Used with BPF_FIB_LOOKUP_DIRECT.
+Use the routing table ID present in \fIparams\fP\->tbid
+for the fib lookup.
+.TP
  .B \fBBPF_FIB_LOOKUP_OUTPUT\fP
  Perform lookup from an egress perspective (default is
  ingress).
+.TP
+.B \fBBPF_FIB_LOOKUP_SKIP_NEIGH\fP
+Skip the neighbour table lookup. \fIparams\fP\->dmac
+and \fIparams\fP\->smac will not be set as output. A common
+use case is to call \fBbpf_redirect_neigh\fP() after
+doing \fBbpf_fib_lookup\fP().
+.TP
+.B \fBBPF_FIB_LOOKUP_SRC\fP
+Derive and set source IP addr in \fIparams\fP\->ipv{4,6}_src
+for the nexthop. If the src addr cannot be derived,
+\fBBPF_FIB_LKUP_RET_NO_SRC_ADDR\fP is returned. In this
+case, \fIparams\fP\->dmac and \fIparams\fP\->smac are not set either.
  .UNINDENT
  .sp
  \fIctx\fP is either \fBstruct xdp_md\fP for XDP programs or
@@ -3029,24 +3046,20 @@ get its length at runtime. See the following snippet:
  .INDENT 7.0
  .INDENT 3.5
  .sp
-.nf
-.ft C
+.EX
  SEC(\(dqkprobe/sys_open\(dq)
  void bpf_sys_open(struct pt_regs *ctx)
  {
          char buf[PATHLEN]; // PATHLEN is defined to 256
-        int res;
-
-        res = bpf_probe_read_user_str(buf, sizeof(buf),
-                                      ctx\->di);
+        int res = bpf_probe_read_user_str(buf, sizeof(buf),
+                                          ctx\->di);
  
          // Consume buf, for example push it to
          // userspace via bpf_perf_event_output(); we
          // can use res (the string length) as event
          // size, after checking its boundaries.
  }
-.ft P
-.fi
+.EE
  .UNINDENT
  .UNINDENT
  .sp
@@ -3253,9 +3266,6 @@ The \fIflags\fP argument must be zero.
  .sp
  \fB\-EOPNOTSUPP\fP if the operation is not supported, for example
  a call from outside of TC ingress.
-.sp
-\fB\-ESOCKTNOSUPPORT\fP if the socket type is not supported
-(reuseport).
  .UNINDENT
  .TP
  .B \fBlong bpf_sk_assign(struct bpf_sk_lookup *\fP\fIctx\fP\fB, struct bpf_sock *\fP\fIsk\fP\fB, u64\fP \fIflags\fP\fB)\fP
@@ -3605,6 +3615,8 @@ Dynamically cast a \fIsk\fP pointer to a \fIudp6_sock\fP pointer.
  .TP
  .B Description
  Return a user or a kernel stack in bpf program provided buffer.
+Note: the user stack will only be populated if the \fItask\fP is
+the current task; all other tasks will return \-EOPNOTSUPP.
  To achieve this, the helper needs \fItask\fP, which is a valid
  pointer to \fBstruct task_struct\fP\&. To store the stacktrace, the
  bpf program provides \fIbuf\fP with a nonnegative \fIsize\fP\&.
@@ -3617,6 +3629,7 @@ the following flags:
  .TP
  .B \fBBPF_F_USER_STACK\fP
  Collect a user space stack instead of a kernel stack.
+The \fItask\fP must be the current task.
  .TP
  .B \fBBPF_F_USER_BUILD_ID\fP
  Collect buildid+offset instead of ips for user stack,
@@ -3632,11 +3645,9 @@ user stacks (such as stacks for Java programs). To do so, use:
  .INDENT 7.0
  .INDENT 3.5
  .sp
-.nf
-.ft C
+.EX
  # sysctl kernel.perf_event_max_stack=<new value>
-.ft P
-.fi
+.EE
  .UNINDENT
  .UNINDENT
  .TP
@@ -4334,6 +4345,17 @@ The map can contain timers that invoke callback_fn\-s from different
  programs. The same callback_fn can serve different timers from
  different maps if key/value layout matches across maps.
  Every bpf_timer_set_callback() can have different callback_fn.
+.sp
+\fIflags\fP can be one of:
+.INDENT 7.0
+.TP
+.B \fBBPF_F_TIMER_ABS\fP
+Start the timer in absolute expire value instead of the
+default relative one.
+.TP
+.B \fBBPF_F_TIMER_CPU_PIN\fP
+Timer will be pinned to the CPU of the caller.
+.UNINDENT
  .TP
  .B Return
  0 on success.
@@ -4360,10 +4382,14 @@ own timer which would have led to a deadlock otherwise.
  .TP
  .B Description
  Get address of the traced function (for tracing and kprobe programs).
+.sp
+When called for kprobe program attached as uprobe it returns
+probe address for both entry and return uprobe.
  .TP
  .B Return
-Address of the traced function.
+Address of the traced function for kprobe.
  0 for kprobes placed within the function (not at the entry).
+Address of the probe for uprobe and return uprobe.
  .UNINDENT
  .TP
  .B \fBu64 bpf_get_attach_cookie(void *\fP\fIctx\fP\fB)\fP
@@ -4817,12 +4843,28 @@ of \fIsrc\fP\(aqs data, \-EINVAL if \fIsrc\fP is an invalid dynptr or if
  .B Description
  Write \fIlen\fP bytes from \fIsrc\fP into \fIdst\fP, starting from \fIoffset\fP
  into \fIdst\fP\&.
-\fIflags\fP is currently unused.
+.sp
+\fIflags\fP must be 0 except for skb\-type dynptrs.
+.INDENT 7.0
+.TP
+.B For skb\-type dynptrs:
+.INDENT 7.0
+.IP \(bu 2
+All data slices of the dynptr are automatically
+invalidated after \fBbpf_dynptr_write\fP(). This is
+because writing may pull the skb and change the
+underlying packet buffer.
+.IP \(bu 2
+For \fIflags\fP, please see the flags accepted by
+\fBbpf_skb_store_bytes\fP().
+.UNINDENT
+.UNINDENT
  .TP
  .B Return
  0 on success, \-E2BIG if \fIoffset\fP + \fIlen\fP exceeds the length
  of \fIdst\fP\(aqs data, \-EINVAL if \fIdst\fP is an invalid dynptr or if \fIdst\fP
-is a read\-only dynptr or if \fIflags\fP is not 0.
+is a read\-only dynptr or if \fIflags\fP is not correct. For skb\-type dynptrs,
+other errors correspond to errors returned by \fBbpf_skb_store_bytes\fP().
  .UNINDENT
  .TP
  .B \fBvoid *bpf_dynptr_data(const struct bpf_dynptr *\fP\fIptr\fP\fB, u32\fP \fIoffset\fP\fB, u32\fP \fIlen\fP\fB)\fP
@@ -4833,6 +4875,9 @@ Get a pointer to the underlying dynptr data.
  .sp
  \fIlen\fP must be a statically known value. The returned data slice
  is invalidated whenever the dynptr is invalidated.
+.sp
+skb and xdp type dynptrs may not use bpf_dynptr_data. They should
+instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
  .TP
  .B Return
  Pointer to the underlying dynptr data, NULL if the dynptr is
@@ -5049,7 +5094,7 @@ eBPF programs can have an associated license, passed along with the bytecode
  instructions to the kernel when the programs are loaded. The format for that
  string is identical to the one in use for kernel modules (Dual licenses, such
  as \(dqDual BSD/GPL\(dq, may be used). Some helper functions are only accessible to
-programs that are compatible with the GNU Privacy License (GPL).
+programs that are compatible with the GNU General Public License (GNU GPL).
  .sp
  In order to use such helpers, the eBPF program must be loaded with the correct
  license string passed (via \fBattr\fP) to the \fBbpf\fP() system call, and this
@@ -5058,11 +5103,9 @@ similar to the following:
  .INDENT 0.0
  .INDENT 3.5
  .sp
-.nf
-.ft C
+.EX
  char ____license[] __attribute__((section(\(dqlicense\(dq), used)) = \(dqGPL\(dq;
-.ft P
-.fi
+.EE
  .UNINDENT
  .UNINDENT
  .SH IMPLEMENTATION