BGP: Add option to enforce first AS in AS_PATH

[thirdparty/bird.git] / doc / bird.sgml
diff --git a/doc/bird.sgml b/doc/bird.sgml

index c725081adc080b2991d08f3614d2b1bc79b170c1..aeea613fd4107a30eee51dcb26c38e4d111741d0 100644 (file)
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -25,7 +25,7 @@ configuration - something in config which is not keyword.
  Ondrej Filip <it/&lt;feela@network.cz&gt;/,
  Pavel Machek <it/&lt;pavel@ucw.cz&gt;/,
  Martin Mares <it/&lt;mj@ucw.cz&gt;/,
-Jan Matejka <it/&lt;mq@jmq.cz&gt;/,
+Maria Matejka <it/&lt;mq@jmq.cz&gt;/,
  Ondrej Zajicek <it/&lt;santiago@crfreenet.org&gt;/
  </author>
  
@@ -149,10 +149,10 @@ BIRD executable by configuring out routing protocols you don't use, and
         use given configuration file instead of <it/prefix/<file>/etc/bird.conf</file>.
  
         <tag><label id="argv-debug">-d</tag>
-       enable debug messages and run bird in foreground.
+       enable debug messages to stderr, and run bird in foreground.
  
-       <tag><label id="argv-log-file">-D <m/filename of debug log/</tag>
-       log debugging information to given file instead of stderr.
+       <tag><label id="argv-debug-file">-D <m/filename of debug log/</tag>
+       enable debug messages to given file.
  
         <tag><label id="argv-foreground">-f</tag>
         run bird in foreground.
@@ -293,6 +293,20 @@ routes are:
         <item>Route next hops (see below)
  </itemize>
  
+<sect1>IPv6 source-specific routes
+<label id="ip-sadr-routes">
+
+<p>The IPv6 routes containing both destination and source prefix. They are used
+for source-specific routing (SSR), also called source-address dependent routing
+(SADR), see <rfc id="8043">. Currently limited mostly to the Babel protocol.
+Configuration keyword is <cf/ipv6 sadr/.
+
+<itemize>
+       <item>(PK) Route destination (IP prefix together with its length)
+       <item>(PK) Route source (IP prefix together with its length)
+       <item>Route next hops (see below)
+</itemize>
+
  <sect1>VPN IPv4 and IPv6 routes
  <label id="vpn-routes">
  
@@ -389,6 +403,14 @@ configured for all relevant protocols and requires protocol-specific support
  (currently implemented for Kernel and BGP protocols), it is activated for
  particular boot by option <cf/-R/.
  
+<p>Some protocols (e.g. BGP) could be restarted gracefully after both
+intentional outage and crash, while others (e.g. OSPF) after intentional outage
+only. For planned graceful restart, BIRD must be shut down by
+<ref id="cli-graceful-restart" name="graceful restart"> command instead of
+regular <ref id="cli-down" name="down"> command. In this way routing neighbors
+are notified about planned graceful restart and routes are kept in kernel table
+after shutdown.
+
  
  <chapt>Configuration
  <label id="config">
@@ -407,31 +429,33 @@ extensive way.
  a comment, whitespace characters are treated as a single space. If there's a
  variable number of options, they are grouped using the <cf/{ }/ brackets. Each
  option is terminated by a <cf/;/. Configuration is case sensitive. There are two
-ways how to name symbols (like protocol names, filter names, constants etc.). You
-can either use a simple string starting with a letter followed by any
-combination of letters and numbers (e.g. <cf/R123/, <cf/myfilter/, <cf/bgp5/) or you can
-enclose the name into apostrophes (<cf/'/) and than you can use any combination
-of numbers, letters. hyphens, dots and colons (e.g. <cf/'1:strange-name'/,
-<cf/'-NAME-'/, <cf/'cool::name'/).
+ways how to name symbols (like protocol names, filter names, constants etc.).
+You can either use a simple string starting with a letter (or underscore)
+followed by any combination of letters, numbers and underscores (e.g. <cf/R123/,
+<cf/my_filter/, <cf/bgp5/) or you can enclose the name into apostrophes (<cf/'/)
+and than you can use any combination of numbers, letters, underscores, hyphens,
+dots and colons (e.g.  <cf/'1:strange-name'/, <cf/'-NAME-'/, <cf/'cool::name'/).
  
  <p>Here is an example of a simple config file. It enables synchronization of
-routing tables with OS kernel, scans for new network interfaces every 10 seconds
-and runs RIP on all network interfaces found.
+routing tables with OS kernel, learns network interfaces and runs RIP on all
+network interfaces found.
  
  <code>
  protocol kernel {
+       ipv4 {
+               export all;     # Default is export none
+       };
         persist;                # Don't remove routes on BIRD shutdown
-       scan time 20;           # Scan kernel routing table every 20 seconds
-       export all;             # Default is export none
  }
  
  protocol device {
-       scan time 10;           # Scan interfaces every 10 seconds
  }
  
  protocol rip {
-       export all;
-       import all;
+       ipv4 {
+               import all;
+               export all;
+       };
         interface "*";
  }
  </code>
@@ -455,19 +479,29 @@ ipv6 table
  include "tablename.conf";;
  </code>
  
-       <tag><label id="opt-log">log "<m/filename/"|syslog [name <m/name/]|stderr all|{ <m/list of classes/ }</tag>
-       Set logging of messages having the given class (either <cf/all/ or
-       <cf/{ error|trace [, <m/.../] }/ etc.) into selected destination (a file specified
-       as a filename string, syslog with optional name argument, or the stderr
-       output). Classes are:
+       <tag><label id="opt-log">log "<m/filename/" [<m/limit/ "<m/backup/"] | syslog [name <m/name/] | stderr all|{ <m/list of classes/ }</tag>
+       Set logging of messages having the given class (either <cf/all/ or <cf>{
+       error|trace [, <m/.../] }</cf> etc.) into selected destination - a file
+       specified as a filename string (with optional log rotation information),
+       syslog (with optional name argument), or the stderr output.
+
+       Classes are:
         <cf/info/, <cf/warning/, <cf/error/ and <cf/fatal/ for messages about local problems,
         <cf/debug/ for debugging messages,
         <cf/trace/ when you want to know what happens in the network,
         <cf/remote/ for messages about misbehavior of remote machines,
         <cf/auth/ about authentication failures,
         <cf/bug/ for internal BIRD bugs.
+
+       Logging directly to file supports basic log rotation -- there is an
+       optional log file limit and a backup filename, when log file reaches the
+       limit, the current log file is renamed to the backup filename and a new
+       log file is created.
+
         You may specify more than one <cf/log/ line to establish logging to
-       multiple destinations. Default: log everything to the system log.
+       multiple destinations. Default: log everything to the system log, or
+       to the debug output if debugging is enabled by <cf/-d//<cf/-D/
+       command-line option.
  
         <tag><label id="opt-debug-protocols">debug protocols all|off|{ states|routes|filters|interfaces|events|packets [, <m/.../] }</tag>
         Set global defaults of protocol debugging options. See <cf/debug/ in the
@@ -534,6 +568,12 @@ include "tablename.conf";;
         constants based on /etc/iproute2/rt_* files. A list of defined constants
         can be seen (together with other symbols) using 'show symbols' command.
  
+       <tag><label id="opt-attribute">attribute <m/type/ <m/name/</tag>
+       Declare a custom route attribute. You can set and get it in filters like
+       any other route attribute. This feature is intended for marking routes
+       in import filters for export filtering purposes instead of locally
+       assigned BGP communities which have to be deleted in export filters.
+
         <tag><label id="opt-router-id">router id <m/IPv4 address/</tag>
         Set BIRD's router ID. It's a world-wide unique identification of your
         router, usually one of router's IPv4 addresses. Default: the lowest
@@ -637,16 +677,26 @@ agreement").
  
         <tag><label id="proto-description">description "<m/text/"</tag>
         This is an optional description of the protocol. It is displayed as a
-       part of the output of 'show route all' command.
+       part of the output of 'show protocols all' command.
  
-       <tag><label id="proto-vrf">vrf "<m/text/"</tag>
+       <tag><label id="proto-vrf">vrf "<m/text/"|default</tag>
         Associate the protocol with specific VRF. The protocol will be
         restricted to interfaces assigned to the VRF and will use sockets bound
-       to the VRF. Appropriate VRF interface must exist on OS level. For kernel
-       protocol, an appropriate table still must be explicitly selected by
-       <cf/table/ option. Note that the VRF support in BIRD and Linux kernel
-       (4.11) is still in development and is currently problematic outside of
-       multihop BGP.
+       to the VRF. A corresponding VRF interface must exist on OS level. For
+       kernel protocol, an appropriate table still must be explicitly selected
+       by <cf/table/ option.
+
+       By selecting <cf/default/, the protocol is associated with the default
+       VRF; i.e., it will be restricted to interfaces not assigned to any
+       regular VRF. That is different from not specifying <cf/vrf/ at all, in
+       which case the protocol may use any interface regardless of its VRF
+       status.
+
+       Note that for proper VRF support it is necessary to use Linux kernel
+       version at least 4.14, older versions have limited VRF implementation.
+       Before Linux kernel 5.0, a socket bound to a port in default VRF collide
+       with others in regular VRFs. In BGP, this can be avoided by using
+       <ref id="bgp-strict-bind" name="strict bind"> option.
  
         <tag><label id="proto-channel"><m/channel name/ [{<m/channel config/}]</tag>
         Every channel must be explicitly stated. See the protocol-specific
@@ -698,10 +748,6 @@ agreement").
         on all interfaces that have address from 192.168.0.0/16, but not from
         192.168.1.0/24.
  
-       <cf>interface -192.168.1.0/24, 192.168.0.0/16;</cf> - start the protocol
-       on all interfaces that have address from 192.168.0.0/16, but not from
-       192.168.1.0/24.
-
         <cf>interface "eth*" 192.168.1.0/24;</cf> - start the protocol on all
         ethernet interfaces that have address from 192.168.1.0/24.
  
@@ -775,8 +821,10 @@ agreement").
  <label id="channel-opts">
  
  <p>Every channel belongs to a protocol and is configured inside its block. The
-minimal channel config is empty, then it uses the default values. The name of
-the channel implies its nettype.
+minimal channel config is empty, then it uses default values. The name of the
+channel implies its nettype. Channel definitions can be inherited from protocol
+templates. Multiple definitions of the same channel are forbidden, but channels
+inherited from templates can be updated by new definitions.
  
  <descrip>
         <tag><label id="proto-table">table <m/name/</tag>
@@ -790,11 +838,13 @@ the channel implies its nettype.
         <tag><label id="proto-import">import all | none | filter <m/name/ | filter { <m/filter commands/ } | where <m/boolean filter expression/</tag>
         Specify a filter to be used for filtering routes coming from the
         protocol to the routing table. <cf/all/ is for keeping all routes,
-       <cf/none/ is for dropping all routes. Default: <cf/all/.
+       <cf/none/ is for dropping all routes. Default: <cf/all/ (except for
+       EBGP).
  
         <tag><label id="proto-export">export <m/filter/</tag>
         This is similar to the <cf>import</cf> keyword, except that it works in
-       the direction from the routing table to the protocol. Default: <cf/none/.
+       the direction from the routing table to the protocol. Default: <cf/none/
+       (except for EBGP).
  
         <tag><label id="proto-import-keep-filtered">import keep filtered <m/switch/</tag>
         Usually, if an import filter rejects a route, the route is forgotten.
@@ -841,7 +891,7 @@ protocol rip ng {
  }
  </code>
  
-<p>And this is a non-trivial example.
+<p>This is a non-trivial example.
  <code>
  protocol rip ng {
         ipv6 {
@@ -854,6 +904,35 @@ protocol rip ng {
  }
  </code>
  
+<p>And this is even more complicated example using templates.
+<code>
+template bgp {
+       local 198.51.100.14 as 65000;
+
+       ipv4 {
+               table mytable4;
+               import filter { ... };
+               export none;
+       };
+       ipv6 {
+               table mytable6;
+               import filter { ... };
+               export none;
+       };
+}
+
+protocol bgp from  {
+       neighbor 198.51.100.130 as 64496;
+
+       # IPv4 channel is inherited as-is, while IPv6
+       # channel is adjusted by export filter option
+       ipv6 {
+               export filter { ... };
+       };
+}
+</code>
+
+
  <chapt>Remote control
  <label id="remote-control">
  
@@ -978,6 +1057,10 @@ This argument can be omitted if there exists only a single instance.
         number of networks, number of routes before and after filtering). If
         you use <cf/count/ instead, only the statistics will be printed.
  
+       <tag><label id="cli-mrt-dump">mrt dump table <m/name/|"<m/pattern/" to "<m/filename/" [filter <m/f/|where <m/c/]</tag>
+       Dump content of a routing table to a specified file in MRT table dump
+       format. See <ref id="mrt" name="MRT protocol"> for details.
+
         <tag><label id="cli-configure">configure [soft] ["<m/config file/"] [timeout [<m/num/]]</tag>
         Reload configuration from a given file. BIRD will smoothly switch itself
         to the new configuration, protocols are reconfigured if possible,
@@ -1037,6 +1120,10 @@ This argument can be omitted if there exists only a single instance.
         <tag><label id="cli-down">down</tag>
         Shut BIRD down.
  
+       <tag><label id="cli-graceful-restart">graceful restart</tag>
+       Shut BIRD down for graceful restart. See <ref id="graceful-restart"
+       name="graceful restart"> section for details.
+
         <tag><label id="cli-debug">debug <m/protocol/|<m/pattern/|all all|off|{ states|routes|filters|events|packets [, <m/.../] }</tag>
         Control protocol debugging.
  
@@ -1051,6 +1138,7 @@ This argument can be omitted if there exists only a single instance.
         Evaluate given expression.
  </descrip>
  
+
  <chapt>Filters
  <label id="filters">
  
@@ -1089,7 +1177,7 @@ int var;
  <p>As you can see, a filter has a header, a list of local variables, and a body.
  The header consists of the <cf/filter/ keyword followed by a (unique) name of
  filter. The list of local variables consists of <cf><M>type name</M>;</cf>
-pairs where each pair defines one local variable. The body consists of <cf>
+pairs where each pair declares one local variable. The body consists of <cf>
  { <M>statements</M> }</cf>. Each <m/statement/ is terminated by a <cf/;/. You
  can group several statements to a single compound statement by using braces
  (<cf>{ <M>statements</M> }</cf>) which is useful if you want to make a bigger
@@ -1118,7 +1206,7 @@ called like in C: <cf>name(); with_parameters(5);</cf>. Function may return
  values using the <cf>return <m/[expr]/</cf> command. Returning a value exits
  from current function (this is similar to C).
  
-<p>Filters are declared in a way similar to functions except they can't have
+<p>Filters are defined in a way similar to functions except they can't have
  explicit parameters. They get a route table entry as an implicit parameter, it
  is also passed automatically to any functions called. The filter must terminate
  with either <cf/accept/ or <cf/reject/ statement. If there's a runtime error in
@@ -1146,8 +1234,8 @@ bird>
  <label id="data-types">
  
  <p>Each variable and each value has certain type. Booleans, integers and enums
-are incompatible with each other (that is to prevent you from shooting in the
-foot).
+are incompatible with each other (that is to prevent you from shooting oneself
+in the foot).
  
  <descrip>
         <tag><label id="type-bool">bool</tag>
@@ -1184,7 +1272,7 @@ foot).
         This type can hold a single IP address. The IPv4 addresses are stored as
         IPv4-Mapped IPv6 addresses so one data type for both of them is used.
         Whether the address is IPv4 or not may be checked by <cf>.is_ip4</cf>
-       which returns <cf/bool/. IP addresses are written in the standard
+       which returns a <cf/bool/. IP addresses are written in the standard
         notation (<cf/10.20.30.40/ or <cf/fec0:3:4::1/). You can apply special
         operator <cf>.mask(<M>num</M>)</cf> on values of type ip. It masks out
         all but first <cf><M>num</M></cf> bits from the IP address. So
@@ -1198,12 +1286,17 @@ foot).
         operator <cf/.type/. The type may be:
  
         <cf/NET_IP4/ and <cf/NET_IP6/ prefixes hold an IP prefix. The literals
-       are written as <cf><m/ipaddress//<m/pxlen/</cf>,
-       or <cf><m>ipaddress</m>/<m>netmask</m></cf>. There are two special
+       are written as <cf><m/ipaddress//<m/pxlen/</cf>. There are two special
         operators on these: <cf/.ip/ which extracts the IP address from the
         pair, and <cf/.len/, which separates prefix length from the pair.
         So <cf>1.2.0.0/16.len = 16</cf> is true.
  
+       <cf/NET_IP6_SADR/ nettype holds both destination and source IPv6
+       prefix. The literals are written as <cf><m/ipaddress//<m/pxlen/ from
+       <m/ipaddress//<m/pxlen/</cf>, where the first part is the destination
+       prefix and the second art is the source prefix. They support the same
+       operators as IP prefixes, but just for the destination part.
+
         <cf/NET_VPN4/ and <cf/NET_VPN6/ prefixes hold an IP prefix with VPN
         Route Distinguisher (<rfc id="4364">). They support the same special
         operators as IP prefixes, and also <cf/.rd/ which extracts the Route
@@ -1376,7 +1469,8 @@ foot).
         but <tt>bgp_path &tilde; [= * 4 5 * =]</tt> is false. BGP mask
         expressions can also contain integer expressions enclosed in parenthesis
         and integer variables, for example <tt>[= * 4 (1+2) a =]</tt>. You can
-        also use ranges, for example <tt>[= * 3..5 2 100..200 * =]</tt>.
+       also use ranges (e.g. <tt>[= * 3..5 2 100..200 * =]</tt>) and sets
+       (e.g. <tt>[= 1 2 [3, 5, 7] * =]</tt>).
  
         <tag><label id="type-clist">clist</tag>
         Clist is similar to a set, except that unlike other sets, it can be
@@ -1416,7 +1510,7 @@ foot).
         <cf/!&tilde;/ membership operators) can be used to modify or test
         eclists, with ECs instead of pairs as arguments.
  
-       <tag><label id="type-lclist">lclist/</tag>
+       <tag><label id="type-lclist">lclist</tag>
         Lclist is a data type used for BGP large community lists. Like eclists,
         lclists are very similar to clists, but they are sets of LCs instead of
         pairs. The same operations (like <cf/add/, <cf/delete/ or <cf/&tilde;/
@@ -1424,6 +1518,7 @@ foot).
         lclists, with LCs instead of pairs as arguments.
  </descrip>
  
+
  <sect>Operators
  <label id="operators">
  
@@ -1447,8 +1542,8 @@ the clist that is also a member of the pair/quad set).
  <p>There is one operator related to ROA infrastructure - <cf/roa_check()/. It
  examines a ROA table and does <rfc id="6483"> route origin validation for a
  given network prefix. The basic usage is <cf>roa_check(<m/table/)</cf>, which
-checks current route (which should be from BGP to have AS_PATH argument) in the
-specified ROA table and returns ROA_UNKNOWN if there is no relevant ROA,
+checks the current route (which should be from BGP to have AS_PATH argument) in
+the specified ROA table and returns ROA_UNKNOWN if there is no relevant ROA,
  ROA_VALID if there is a matching ROA, or ROA_INVALID if there are some relevant
  ROAs but none of them match. There is also an extended variant
  <cf>roa_check(<m/table/, <m/prefix/, <m/asn/)</cf>, which allows to specify a
@@ -1495,11 +1590,20 @@ if 1234 = i then printn "."; else {
  <label id="route-attributes">
  
  <p>A filter is implicitly passed a route, and it can access its attributes just
-like it accesses variables. Attempts to access undefined attribute result in a
-runtime error; you can check if an attribute is defined by using the
-<cf>defined( <m>attribute</m> )</cf> operator. One notable exception to this
-rule are attributes of clist type, where undefined value is regarded as empty
-clist for most purposes.
+like it accesses variables. There are common route attributes, protocol-specific
+route attributes and custom route attributes. Most common attributes are
+mandatory (always defined), while remaining are optional.  Attempts to access
+undefined attribute result in a runtime error; you can check if an attribute is
+defined by using the <cf>defined( <m>attribute</m> )</cf> operator. One notable
+exception to this rule are attributes of bgppath and *clist types, where
+undefined value is regarded as empty bgppath/*clist for most purposes.
+
+Attributes can be defined by just setting them in filters. Custom attributes
+have to be first declared by <ref id="opt-attribute" name="attribute"> global
+option. You can also undefine optional attribute back to non-existence by using
+the <cf>unset( <m/attribute/ )</cf> operator.
+
+Common route attributes are:
  
  <descrip>
         <tag><label id="rta-net"><m/prefix/ net</tag>
@@ -1550,7 +1654,8 @@ clist for most purposes.
         <tag><label id="rta-ifname"><m/string/ ifname</tag>
         Name of the outgoing interface. Sink routes (like blackhole, unreachable
         or prohibit) and multipath routes have no interface associated with
-       them, so <cf/ifname/ returns an empty string for such routes. Read-only.
+       them, so <cf/ifname/ returns an empty string for such routes. Setting it
+       would also change route to a direct one (remove gateway).
  
         <tag><label id="rta-ifindex"><m/int/ ifindex</tag>
         Index of the outgoing interface. System wide index of the interface. May
@@ -1562,13 +1667,11 @@ clist for most purposes.
         The optional attribute that can be used to specify a distance to the
         network for routes that do not have a native protocol metric attribute
         (like <cf/ospf_metric1/ for OSPF routes). It is used mainly by BGP to
-       compare internal distances to boundary routers (see below). It is also
-       used when the route is exported to OSPF as a default value for OSPF type
-       1 metric.
+       compare internal distances to boundary routers (see below).
  </descrip>
  
-<p>There also exist protocol-specific attributes which are described in the
-corresponding protocol sections.
+<p>Protocol-specific route attributes are described in the corresponding
+protocol sections.
  
  
  <sect>Other statements
@@ -1578,7 +1681,7 @@ corresponding protocol sections.
  
  <descrip>
         <tag><label id="assignment"><m/variable/ = <m/expr/</tag>
-       Set variable to a given value.
+       Set variable (or route attribute) to a given value.
  
         <tag><label id="filter-accept-reject">accept|reject [ <m/expr/ ]</tag>
         Accept or reject the route, possibly printing <cf><m>expr</m></cf>.
@@ -1616,19 +1719,26 @@ cases desirable.
  routes over the same IPv6 transport. For sending and receiving Babel packets,
  only a link-local IPv6 address is needed.
  
-<p>BIRD does not implement any Babel extensions, but will coexist with
-implementations using extensions (and will just ignore extension messages).
+<p>BIRD implements an extension for IPv6 source-specific routing (SSR or SADR),
+but must be configured accordingly to use it. SADR-enabled Babel router can
+interoperate with non-SADR Babel router, but the later would ignore routes
+with specific (non-zero) source prefix.
  
  <sect1>Configuration
  <label id="babel-config">
  
-<p>Babel supports no global configuration options apart from those common to all
-other protocols, but supports the following per-interface configuration options:
+<p>The Babel protocol support both IPv4 and IPv6 channels; both can be
+configured simultaneously. It can also be configured with <ref
+id="ip-sadr-routes" name="IPv6 SADR"> channel instead of regular IPv6
+channel, in such case SADR support is enabled. Babel supports no global
+configuration options apart from those common to all other protocols, but
+supports the following per-interface configuration options:
  
  <code>
  protocol babel [<name>] {
         ipv4 { <channel config> };
-       ipv6 { <channel config> };
+       ipv6 [sadr] { <channel config> };
+        randomize router id <switch>;
         interface <interface pattern> {
                 type <wired|wireless>;
                 rxcost <number>;
@@ -1648,8 +1758,17 @@ protocol babel [<name>] {
  </code>
  
  <descrip>
-      <tag><label id="babel-channel">ipv4|ipv6 <m/channel config/</tag>
-      The supported channels are IPv4 and IPv6.
+      <tag><label id="babel-channel">ipv4 | ipv6 [sadr] <m/channel config/</tag>
+      The supported channels are IPv4, IPv6, and IPv6 SADR.
+
+      <tag><label id="babel-random-router-id">randomize router id <m/switch/</tag>
+      If enabled, Bird will randomize the top 32 bits of its router ID whenever
+      the protocol instance starts up. If a Babel node restarts, it loses its
+      sequence number, which can cause its routes to be rejected by peers until
+      the state is cleared out by other nodes in the network (which can take on
+      the order of minutes). Enabling this option causes Bird to pick a random
+      router ID every time it starts up, which avoids this problem at the cost
+      of not having stable router IDs in the network. Default: no.
  
        <tag><label id="babel-type">type wired|wireless </tag>
        This option specifies the interface type: Wired or wireless. On wired
@@ -1796,12 +1915,11 @@ the BFD session went down).
  advanced features like the echo mode or authentication are not implemented), IP
  transport for BFD as defined in <rfc id="5881"> and <rfc id="5883"> and
  interaction with client protocols as defined in <rfc id="5882">.
-We currently support at most one protocol instance.
  
  <p>BFD packets are sent with a dynamic source port number. Linux systems use by
  default a bit different dynamic port range than the IANA approved one
  (49152-65535). If you experience problems with compatibility, please adjust
-<cf>/proc/sys/net/ipv4/ip_local_port_range</cf>
+<cf>/proc/sys/net/ipv4/ip_local_port_range</cf>.
  
  <sect1>Configuration
  <label id="bfd-config">
@@ -1818,6 +1936,14 @@ configuration is often sufficient.
  <p>Note that to use BFD for other protocols like OSPF or BGP, these protocols
  also have to be configured to request BFD sessions, usually by <cf/bfd/ option.
  
+<p>A BFD instance not associated with any VRF handles session requests from all
+other protocols, even ones associated with a VRF. Such setup would work for
+single-hop BFD sessions if <cf/net.ipv4.udp_l3mdev_accept/ sysctl is enabled,
+but does not currently work for multihop sessions. Another approach is to
+configure multiple BFD instances, one for each VRF (including the default VRF).
+Each BFD instance associated with a VRF (regular or default) only handles
+session requests from protocols in the same VRF.
+
  <p>Some of BFD session options require <m/time/ value, which has to be specified
  with the appropriate unit: <m/num/ <cf/s/|<cf/ms/|<cf/us/. Although microseconds
  are allowed as units, practical minimum values are usually in order of tens of
@@ -2035,12 +2161,14 @@ avoid routing loops.
  <item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
  <item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
  <item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
+<item> <rfc id="7311"> - Accumulated IGP Metric Attribute for BGP
  <item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
  <item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
  <item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
  <item> <rfc id="7947"> - Internet Exchange BGP Route Server
  <item> <rfc id="8092"> - BGP Large Communities Attribute
  <item> <rfc id="8203"> - BGP Administrative Shutdown Communication
+<item> <rfc id="8212"> - Default EBGP Route Propagation Behavior without Policies
  </itemize>
  
  <sect1>Route selection rules
@@ -2082,22 +2210,35 @@ to set routing policy and all the other parameters differently for each neighbor
  using the following configuration parameters:
  
  <descrip>
-       <tag><label id="bgp-local">local [<m/ip/] as <m/number/</tag>
+       <tag><label id="bgp-local">local [<m/ip/] [port <m/number/] [as <m/number/]</tag>
         Define which AS we are part of. (Note that contrary to other IP routers,
         BIRD is able to act as a router located in multiple AS'es simultaneously,
         but in such cases you need to tweak the BGP paths manually in the filters
         to get consistent behavior.) Optional <cf/ip/ argument specifies a source
-       address, equivalent to the <cf/source address/ option (see below). This
-       parameter is mandatory.
+       address, equivalent to the <cf/source address/ option (see below).
+       Optional <cf/port/ argument specifies the local BGP port instead of
+       standard port 179. The parameter may be used multiple times with
+       different sub-options (e.g., both <cf/local 10.0.0.1 as 65000;/ and
+       <cf/local 10.0.0.1; local as 65000;/ are valid). This parameter is
+       mandatory.
  
-       <tag><label id="bgp-neighbor">neighbor [<m/ip/] [port <m/number/] [as <m/number/]</tag>
+       <tag><label id="bgp-neighbor">neighbor [<m/ip/ | range <m/prefix/] [port <m/number/] [as <m/number/] [internal|external]</tag>
         Define neighboring router this instance will be talking to and what AS
         it is located in. In case the neighbor is in the same AS as we are, we
-       automatically switch to iBGP. Optionally, the remote port may also be
-       specified. The parameter may be used multiple times with different
-       sub-options (e.g., both <cf/neighbor 10.0.0.1 as 65000;/ and
-       <cf/neighbor 10.0.0.1; neighbor as 65000;/ are valid). This parameter is
-       mandatory.
+       automatically switch to IBGP. Alternatively, it is possible to specify
+       just <cf/internal/ or <cf/external/ instead of AS number, in that case
+       either local AS number, or any external AS number is accepted.
+       Optionally, the remote port may also be specified. Like <cf/local/
+       parameter, this parameter may also be used multiple times with different
+       sub-options. This parameter is mandatory.
+
+       It is possible to specify network prefix (with <cf/range/ keyword)
+       instead of explicit neighbor IP address. This enables dynamic BGP
+       behavior, where the BGP instance listens on BGP port, but new BGP
+       instances are spawned for incoming BGP connections (if source address
+       matches the network prefix). It is possible to mix regular BGP instances
+       with dynamic BGP instances and have multiple dynamic BGP instances with
+       different ranges.
  
         <tag><label id="bgp-iface">interface <m/string/</tag>
         Define interface we should use for link-local BGP IPv6 sessions.
@@ -2126,9 +2267,19 @@ using the following configuration parameters:
         the number of hops is 2. Default: enabled for iBGP.
  
         <tag><label id="bgp-source-address">source address <m/ip/</tag>
-       Define local address we should use for next hop calculation and as a
-       source address for the BGP session. Default: the address of the local
-       end of the interface our neighbor is connected to.
+       Define local address we should use as a source address for the BGP
+       session. Default: the address of the local end of the interface our
+       neighbor is connected to.
+
+       <tag><label id="bgp-dynamic-name">dynamic name "<m/text/"</tag>
+       Define common prefix of names used for new BGP instances spawned when
+       dynamic BGP behavior is active. Actual names also contain numeric
+       index to distinguish individual instances.  Default: "dynbgp".
+
+       <tag><label id="bgp-dynamic-name-digits">dynamic name digits <m/number/</tag>
+       Define minimum number of digits for index in names of spawned dynamic
+       BGP instances. E.g., if set to 2, then the first name would be
+       "dynbgp01". Default: 0.
  
         <tag><label id="bgp-strict-bind">strict bind <m/switch/</tag>
         Specify whether BGP listening socket should be bound to a specific local
@@ -2147,13 +2298,16 @@ using the following configuration parameters:
         immediately shut down. Note that this option cannot be used with
         multihop BGP. Default: enabled for direct BGP, disabled otherwise.
  
-       <tag><label id="bgp-bfd">bfd <M>switch</M></tag>
+       <tag><label id="bgp-bfd">bfd <M>switch</M>|graceful</tag>
         BGP could use BFD protocol as an advisory mechanism for neighbor
         liveness and failure detection. If enabled, BIRD setups a BFD session
         for the BGP neighbor and tracks its liveness by it. This has an
         advantage of an order of magnitude lower detection times in case of
-       failure. Note that BFD protocol also has to be configured, see
-       <ref id="bfd" name="BFD"> section for details. Default: disabled.
+       failure. When a neighbor failure is detected, the BGP session is
+       restarted. Optionally, it can be configured (by <cf/graceful/ argument)
+       to trigger graceful restart instead of regular restart. Note that BFD
+       protocol also has to be configured, see <ref id="bfd" name="BFD">
+       section for details. Default: disabled.
  
         <tag><label id="bgp-ttl-security">ttl security <m/switch/</tag>
         Use GTSM (<rfc id="5082"> - the generalized TTL security mechanism). GTSM
@@ -2241,6 +2395,25 @@ using the following configuration parameters:
         completely disabled and you should ensure loop-free behavior by some
         other means. Default: 0 (no local AS number allowed).
  
+       <tag><label id="bgp-allow-as-sets">allow as sets [<m/switch/]</tag>
+       AS path attribute received with BGP routes may contain not only
+       sequences of AS numbers, but also sets of AS numbers. These rarely used
+       artifacts are results of inter-AS route aggregation. AS sets are
+       deprecated (<rfc id="6472">), and likely to be rejected in the future,
+       as they complicate security features like RPKI validation. When this
+       option is disabled, then received AS paths with AS sets are rejected as
+       malformed and corresponding BGP updates are treated as withdraws.
+       Default: on.
+
+       <tag><label id="bgp-enforce-first-as">enforce first as [<m/switch/]</tag>
+       Routes received from an EBGP neighbor are generally expected to have the
+       first (leftmost) AS number in their AS path equal to the neighbor AS
+       number. This is not enforced by default as there are legitimate cases
+       where it is not true, e.g. connections to route servers. When this
+       option is enabled, routes with non-matching first AS number are rejected
+       and corresponding updates are treated as withdraws. The option is valid
+       on EBGP sessions only. Default: off.
+
         <tag><label id="bgp-enable-route-refresh">enable route refresh <m/switch/</tag>
         After the initial route exchange, BGP protocol uses incremental updates
         to keep BGP speakers synchronized. Sometimes (e.g., if BGP speaker
@@ -2275,6 +2448,25 @@ using the following configuration parameters:
         re-establish after a restart before deleting stale routes. Default:
         120 seconds.
  
+       <tag><label id="bgp-long-lived-graceful-restart">long lived graceful restart <m/switch/|aware</tag>
+       The long-lived graceful restart is an extension of the traditional
+       <ref id="bgp-graceful-restart" name="BGP graceful restart">, where stale
+       routes are kept even after the <ref id="bgp-graceful-restart-time"
+       name="restart time"> expires for additional long-lived stale time, but
+       they are marked with the LLGR_STALE community, depreferenced, and
+       withdrawn from routers not supporting LLGR. Like traditional BGP
+       graceful restart, it has three states: disabled, aware (receiving-only),
+       and enabled. Note that long-lived graceful restart requires at least
+       aware level of traditional BGP graceful restart. Default: aware, unless
+       graceful restart is disabled.
+
+       <tag><label id="bgp-long-lived-stale-time">long lived stale time <m/number/</tag>
+       The long-lived stale time is announced in the BGP long-lived graceful
+       restart capability and specifies how long the neighbor would keep stale
+       routes depreferenced during long-lived graceful restart until either the
+       session is re-stablished and synchronized or the stale time expires and
+       routes are removed. Default: 3600 seconds.
+
         <tag><label id="bgp-interpret-communities">interpret communities <m/switch/</tag>
         <rfc id="1997"> demands that BGP speaker should process well-known
         communities like no-export (65535, 65281) or no-advertise (65535,
@@ -2296,8 +2488,8 @@ using the following configuration parameters:
  
         <tag><label id="bgp-enable-extended-messages">enable extended messages <m/switch/</tag>
         The BGP protocol uses maximum message length of 4096 bytes. This option
-       provides an extension to allow extended messages with length up
-       to 65535 bytes. Default: off.
+       provides an extension (<rfc id="8654">) to allow extended messages with
+       length up to 65535 bytes. Default: off.
  
         <tag><label id="bgp-capabilities">capabilities <m/switch/</tag>
         Use capability advertisement to advertise optional capabilities. This is
@@ -2319,6 +2511,19 @@ using the following configuration parameters:
         disable the instance automatically and wait for an administrator to fix
         the problem manually. Default: off.
  
+       <tag><label id="bgp-disable-after-cease">disable after cease <m/switch/|<m/set-of-flags/</tag>
+       When a Cease notification is received, disable the instance
+       automatically and wait for an administrator to fix the problem manually.
+       When used with <m/switch/ argument, it means handle every Cease subtype
+       with the exception of <cf/connection collision/. Default: off.
+
+       The <m/set-of-flags/ allows to narrow down relevant Cease subtypes. The
+       syntax is <cf>{<m/flag/ [, <m/.../] }</cf>, where flags are: <cf/cease/,
+       <cf/prefix limit hit/, <cf/administrative shutdown/,
+       <cf/peer deconfigured/, <cf/administrative reset/,
+       <cf/connection rejected/, <cf/configuration change/,
+       <cf/connection collision/, <cf/out of resources/.
+
         <tag><label id="bgp-hold-time">hold time <m/number/</tag>
         Time in seconds to wait for a Keepalive message from the other side
         before considering the connection stale. Default: depends on agreement
@@ -2427,22 +2632,47 @@ together with their appropriate channels follows.
  </tabular>
  </table>
  
-<p>BGP's channels have additional config options (together with the common ones):
+<p>Due to <rfc id="8212">, external BGP protocol requires explicit configuration
+of import and export policies (in contrast to other protocols, where default
+policies of <cf/import all/ and <cf/export none/ are used in absence of explicit
+configuration). Note that blanket policies like <cf/all/ or <cf/none/ can still
+be used in explicit configuration.
  
-<descrip>
-       <tag><label id="bgp-next-hop-keep">next hop keep</tag>
-       Forward the received Next Hop attribute even in situations where the
-       local address should be used instead, like when the route is sent to an
-       interface with a different subnet. Default: disabled.
+<p>BGP channels have additional config options (together with the common ones):
  
-       <tag><label id="bgp-next-hop-self">next hop self</tag>
-       Avoid calculation of the Next Hop attribute and always advertise our own
-       source address as a next hop. This needs to be used only occasionally to
-       circumvent misconfigurations of other routers. Default: disabled.
+<descrip>
+       <tag><label id="bgp-mandatory">mandatory <m/switch/</tag>
+       When local and neighbor sets of configured AFI/SAFI pairs differ,
+       capability negotiation ensures that a common subset is used. For
+       mandatory channels their associated AFI/SAFI must be negotiated
+       (i.e., also announced by the neighbor), otherwise BGP session
+       negotiation fails with <it/'Required capability missing'/ error.
+       Regardless, at least one AFI/SAFI must be negotiated in order to BGP
+       session be successfully established. Default: off.
+
+       <tag><label id="bgp-next-hop-keep">next hop keep <m/switch/|ibgp|ebgp</tag>
+       Do not modify the Next Hop attribute and advertise the current one
+       unchanged even in cases where our own local address should be used
+       instead. This is necessary when the BGP speaker does not forward network
+       traffic (route servers and some route reflectors) and also can be useful
+       in some other cases (e.g. multihop EBGP sessions). Can be enabled for
+       all routes, or just for routes received from IBGP / EBGP neighbors.
+       Default: disabled for regular BGP, enabled for route servers,
+       <cf/ibgp/ for route reflectors.
+
+       <tag><label id="bgp-next-hop-self">next hop self <m/switch/|ibgp|ebgp</tag>
+       Always advertise our own local address as a next hop, even in cases
+       where the current Next Hop attribute should be used unchanged. This is
+       sometimes used for routes propagated from EBGP to IBGP when IGP routing
+       does not cover inter-AS links, therefore IP addreses of EBGP neighbors
+       are not resolvable through IGP. Can be enabled for all routes, or just
+       for routes received from IBGP / EBGP neighbors. Default: disabled.
  
         <tag><label id="bgp-next-hop-address">next hop address <m/ip/</tag>
-       Avoid calculation of the Next Hop attribute and always advertise this address
-       as a next hop.
+       Specify which address to use when our own local address should be
+       announced in the Next Hop attribute. Default: the source address of the
+       BGP session (if acceptable), or the preferred address of an associated
+       interface.
  
         <tag><label id="bgp-missing-lladdr">missing lladdr self|drop|ignore</tag>
         Next Hop attribute in BGP-IPv6 sometimes contains just the global IPv6
@@ -2482,6 +2712,26 @@ together with their appropriate channels follows.
         for every allowed table type. Default: the same as the main table
         the channel is connected to (if eligible).
  
+       <tag><label id="bgp-import-table">import table <m/switch/</tag>
+       A BGP import table contains all received routes from given BGP neighbor,
+       before application of import filters. It is also called <em/Adj-RIB-In/
+       in BGP terminology. BIRD BGP by default operates without import tables,
+       in which case received routes are just processed by import filters,
+       accepted ones are stored in the master table, and the rest is forgotten.
+       Enabling <cf/import table/ allows to store unprocessed routes, which can
+       be examined later by <cf/show route/, and can be used to reconfigure
+       import filters without full route refresh. Default: off.
+
+       <tag><label id="bgp-export-table">export table <m/switch/</tag>
+       A BGP export table contains all routes sent to given BGP neighbor, after
+       application of export filters. It is also called <em/Adj-RIB-Out/ in BGP
+       terminology. BIRD BGP by default operates without export tables, in
+       which case routes from master table are just processed by export filters
+       and then announced by BGP. Enabling <cf/export table/ allows to store
+       routes after export filter processing, so they can be examined later by
+       <cf/show route/, and can be used to eliminate unnecessary updates or
+       withdraws. Default: off.
+
         <tag><label id="bgp-secondary">secondary <m/switch/</tag>
         Usually, if an export filter rejects a selected route, no other route is
         propagated for that network. This option allows to try the next route in
@@ -2491,6 +2741,15 @@ together with their appropriate channels follows.
         explicitly (to conserve memory). This option requires that the connected
         routing table is <ref id="dsc-table-sorted" name="sorted">. Default: off.
  
+       <tag><label id="bgp-extended-next-hop">extended next hop <m/switch/</tag>
+       BGP expects that announced next hops have the same address family as
+       associated network prefixes. This option provides an extension to use
+       IPv4 next hops with IPv6 prefixes and vice versa. For IPv4 / VPNv4
+       channels, the behavior is controlled by the Extended Next Hop Encoding
+       capability, as described in <rfc id="5549">. For IPv6 / VPNv6 channels,
+       just IPv4-mapped IPv6 addresses are used, as described in
+       <rfc id="4798"> and <rfc id="4659">. Default: off.
+
         <tag><label id="bgp-add-paths">add paths <m/switch/|rx|tx</tag>
         Standard BGP can propagate only one path (route) per destination network
         (usually the selected one). This option controls the add-path protocol
@@ -2500,12 +2759,55 @@ together with their appropriate channels follows.
         TX direction. When active, all available routes accepted by the export
         filter are advertised to the neighbor. Default: off.
  
+       <tag><label id="bgp-aigp">aigp <m/switch/|originate</tag>
+       The BGP protocol does not use a common metric like other routing
+       protocols, instead it uses a set of criteria for route selection
+       consisting both overall AS path length and a distance to the nearest AS
+       boundary router. Assuming that metrics of different autonomous systems
+       are incomparable, once a route is propagated from an AS to a next one,
+       the distance in the old AS does not matter.
+
+       The AIGP extension (<rfc id="7311">) allows to propagate accumulated
+       IGP metric (in the AIGP attribute) through both IBGP and EBGP links,
+       computing total distance through multiple autonomous systems (assuming
+       they use comparable IGP metric). The total AIGP metric is compared in
+       the route selection process just after Local Preference comparison (and
+       before AS path length comparison).
+
+       This option controls whether AIGP attribute propagation is allowed on
+       the session. Optionally, it can be set to <cf/originate/, which not only
+       allows AIGP attribute propagation, but also new AIGP attributes are
+       automatically attached to non-BGP routes with valid IGP metric (e.g.
+       <cf/ospf_metric1/) as they are exported to the BGP session. Default:
+       enabled for IBGP (and intra-confederation EBGP), disabled for regular
+       EBGP.
+
+       <tag><label id="bgp-cost">cost <m/number/</tag>
+       When BGP <ref id="bgp-gateway" name="gateway mode"> is <cf/recursive/
+       (mainly multihop IBGP sessions), then the distance to BGP next hop is
+       based on underlying IGP metric. This option specifies the distance to
+       BGP next hop for BGP sessions in direct gateway mode (mainly direct
+       EBGP sessions).
+
         <tag><label id="bgp-graceful-restart-c">graceful restart <m/switch/</tag>
         Although BGP graceful restart is configured mainly by protocol-wide
         <ref id="bgp-graceful-restart" name="options">, it is possible to
         configure restarting role per AFI/SAFI pair by this channel option.
         The option is ignored if graceful restart is disabled by protocol-wide
         option. Default: off in aware mode, on in full mode.
+
+       <tag><label id="bgp-long-lived-graceful-restart-c">long lived graceful restart <m/switch/</tag>
+       BGP long-lived graceful restart is configured mainly by protocol-wide
+       <ref id="bgp-long-lived-graceful-restart" name="options">, but the
+       restarting role can be set per AFI/SAFI pair by this channel option.
+       The option is ignored if long-lived graceful restart is disabled by
+       protocol-wide option. Default: off in aware mode, on in full mode.
+
+       <tag><label id="bgp-long-lived-stale-time-c">long lived stale time <m/number/</tag>
+       Like previous graceful restart channel options, this option allows to
+       set <ref id="bgp-long-lived-stale-time" name="long lived stale time">
+       per AFI/SAFI pair instead of per protocol. Default: set by protocol-wide
+       option.
  </descrip>
  
  <sect1>Attributes
@@ -2516,17 +2818,17 @@ together with their appropriate channels follows.
  some of them (marked with `<tt/O/') are optional.
  
  <descrip>
-       <tag><label id="rta-bgp-path">bgppath bgp_path/</tag>
+       <tag><label id="rta-bgp-path">bgppath bgp_path</tag>
         Sequence of AS numbers describing the AS path the packet will travel
         through when forwarded according to the particular route. In case of
         internal BGP it doesn't contain the number of the local AS.
  
-       <tag><label id="rta-bgp-local-pref">int bgp_local_pref/ [I]</tag>
+       <tag><label id="rta-bgp-local-pref">int bgp_local_pref [I]</tag>
         Local preference value used for selection among multiple BGP routes (see
         the selection rules above). It's used as an additional metric which is
         propagated through the whole local AS.
  
-       <tag><label id="rta-bgp-med">int bgp_med/ [O]</tag>
+       <tag><label id="rta-bgp-med">int bgp_med [O]</tag>
         The Multiple Exit Discriminator of the route is an optional attribute
         which is used on external (inter-AS) links to convey to an adjacent AS
         the optimal entry point into the local AS. The received attribute is
@@ -2537,28 +2839,30 @@ some of them (marked with `<tt/O/') are optional.
         external BGP instance. See <rfc id="4451"> for further discussion of
         BGP MED attribute.
  
-       <tag><label id="rta-bgp-origin">enum bgp_origin/</tag>
+       <tag><label id="rta-bgp-origin">enum bgp_origin</tag>
         Origin of the route: either <cf/ORIGIN_IGP/ if the route has originated
         in an interior routing protocol or <cf/ORIGIN_EGP/ if it's been imported
         from the <tt>EGP</tt> protocol (nowadays it seems to be obsolete) or
         <cf/ORIGIN_INCOMPLETE/ if the origin is unknown.
  
-       <tag><label id="rta-bgp-next-hop">ip bgp_next_hop/</tag>
+       <tag><label id="rta-bgp-next-hop">ip bgp_next_hop</tag>
         Next hop to be used for forwarding of packets to this destination. On
         internal BGP connections, it's an address of the originating router if
         it's inside the local AS or a boundary router the packet will leave the
         AS through if it's an exterior route, so each BGP speaker within the AS
         has a chance to use the shortest interior path possible to this point.
  
-       <tag><label id="rta-bgp-atomic-aggr">void bgp_atomic_aggr/ [O]</tag>
+       <tag><label id="rta-bgp-atomic-aggr">void bgp_atomic_aggr [O]</tag>
         This is an optional attribute which carries no value, but the sole
         presence of which indicates that the route has been aggregated from
         multiple routes by some router on the path from the originator.
  
-<!-- we don't handle aggregators right since they are of a very obscure type
-       <tag>bgp_aggregator</tag>
--->
-       <tag><label id="rta-bgp-community">clist bgp_community/ [O]</tag>
+       <tag><label id="rta-bgp-aggregator">void bgp_aggregator [O]</tag>
+       This is an optional attribute specifying AS number and IP address of the
+       BGP router that created the route by aggregating multiple BGP routes.
+       Currently, the attribute is not accessible from filters.
+
+       <tag><label id="rta-bgp-community">clist bgp_community [O]</tag>
         List of community values associated with the route. Each such value is a
         pair (represented as a <cf/pair/ data type inside the filters) of 16-bit
         integers, the first of them containing the number of the AS which
@@ -2569,14 +2873,14 @@ some of them (marked with `<tt/O/') are optional.
         freedom about which community attributes it defines and what will their
         semantics be.
  
-       <tag><label id="rta-bgp-ext-community">eclist bgp_ext_community/ [O]</tag>
+       <tag><label id="rta-bgp-ext-community">eclist bgp_ext_community [O]</tag>
         List of extended community values associated with the route. Extended
         communities have similar usage as plain communities, but they have an
         extended range (to allow 4B ASNs) and a nontrivial structure with a type
         field. Individual community values are represented using an <cf/ec/ data
         type inside the filters.
  
-       <tag><label id="rta-bgp-large-community">lclist <cf/bgp_large_community/ [O]</tag>
+       <tag><label id="rta-bgp-large-community">lclist bgp_large_community [O]</tag>
         List of large community values associated with the route. Large BGP
         communities is another variant of communities, but contrary to extended
         communities they behave very much the same way as regular communities,
@@ -2584,14 +2888,19 @@ some of them (marked with `<tt/O/') are optional.
         Individual community values are represented using an <cf/lc/ data type
         inside the filters.
  
-       <tag><label id="rta-bgp-originator-id">quad bgp_originator_id/ [I, O]</tag>
+       <tag><label id="rta-bgp-originator-id">quad bgp_originator_id [I, O]</tag>
         This attribute is created by the route reflector when reflecting the
         route and contains the router ID of the originator of the route in the
         local AS.
  
-       <tag><label id="rta-bgp-cluster-list">clist bgp_cluster_list/ [I, O]</tag>
+       <tag><label id="rta-bgp-cluster-list">clist bgp_cluster_list [I, O]</tag>
         This attribute contains a list of cluster IDs of route reflectors. Each
         route reflector prepends its cluster ID when reflecting the route.
+
+       <tag><label id="rta-bgp-aigp">void bgp_aigp [O]</tag>
+       This attribute contains accumulated IGP metric, which is a total
+       distance to the destination through multiple autonomous systems.
+       Currently, the attribute is not accessible from filters.
  </descrip>
  
  <sect1>Example
@@ -2660,7 +2969,6 @@ interfaces to be defined for them to work with.
         so the default time is set to a large value.
  
         <tag><label id="device-iface">interface <m/pattern/ [, <m/.../]</tag>
-
         By default, the Device protocol handles all interfaces without any
         configuration. Interface definitions allow to specify optional
         parameters for specific interfaces. See <ref id="proto-iface"
@@ -2701,24 +3009,17 @@ protocol device {
  <p>The Direct protocol is a simple generator of device routes for all the
  directly connected networks according to the list of interfaces provided by the
  kernel via the Device protocol. The Direct protocol supports both IPv4 and IPv6
-channels.
+channels; both can be configured simultaneously. It can also be configured with
+<ref id="ip-sadr-routes" name="IPv6 SADR"> channel instead of regular IPv6
+channel in order to be used together with SADR-enabled Babel protocol.
  
  <p>The question is whether it is a good idea to have such device routes in BIRD
  routing table. OS kernel usually handles device routes for directly connected
  networks by itself so we don't need (and don't want) to export these routes to
  the kernel protocol. OSPF protocol creates device routes for its interfaces
-itself and BGP protocol is usually used for exporting aggregate routes. Although
-there are some use cases that use the direct protocol (like abusing eBGP as an
-IGP routing protocol), in most cases it is not needed to have these device
-routes in BIRD routing table and to use the direct protocol.
-
-<p>There is one notable case when you definitely want to use the direct protocol
--- running BIRD on BSD systems. Having high priority device routes for directly
-connected networks from the direct protocol protects kernel device routes from
-being overwritten or removed by IGP routes during some transient network
-conditions, because a lower priority IGP route for the same network is not
-exported to the kernel routing table. This is an issue on BSD systems only, as
-on Linux systems BIRD cannot change non-BIRD route in the kernel routing table.
+itself and BGP protocol is usually used for exporting aggregate routes. But the
+Direct protocol is necessary for distance-vector protocols like RIP or Babel to
+announce local networks.
  
  <p>There are just few configuration options for the Direct protocol:
  
@@ -2764,14 +3065,10 @@ interface) or whether an `alien' route has been added by someone else (depending
  on the <cf/learn/ switch, such routes are either ignored or accepted to our
  table).
  
-<p>Unfortunately, there is one thing that makes the routing table synchronization
-a bit more complicated. In the kernel routing table there are also device routes
-for directly connected networks. These routes are usually managed by OS itself
-(as a part of IP address configuration) and we don't want to touch that. They
-are completely ignored during the scan of the kernel tables and also the export
-of device routes from BIRD tables to kernel routing tables is restricted to
-prevent accidental interference. This restriction can be disabled using
-<cf/device routes/ switch.
+<p>Note that routes created by OS kernel itself, namely direct routes
+representing IP subnets of associated interfaces, are not imported even with
+<cf/learn/ enabled. You can use <ref id="direct" name="Direct protocol"> to
+generate these direct routes.
  
  <p>If your OS supports only a single routing table, you can configure only one
  instance of the Kernel protocol. If it supports multiple tables (in order to
@@ -2785,8 +3082,10 @@ kernel protocols to the same routing table and changing route destination
  (gateway) in an export filter of a kernel protocol does not work. Both
  limitations can be overcome using another routing table and the pipe protocol.
  
-<p>The Kernel protocol supports both IPv4 and IPv6 channels; only one of them
-can be configured in each protocol instance.
+<p>The Kernel protocol supports both IPv4 and IPv6 channels; only one channel
+can be configured in each protocol instance. On Linux, it also supports <ref
+id="ip-sadr-routes" name="IPv6 SADR"> and <ref id="mpls-routes" name="MPLS">
+channels.
  
  <sect1>Configuration
  <label id="krt-config">
@@ -2848,26 +3147,26 @@ translated to appropriate system (and OS-specific) route attributes. We support
  these attributes:
  
  <descrip>
-       <tag><label id="rta-krt-source">int krt_source/</tag>
+       <tag><label id="rta-krt-source">int krt_source</tag>
         The original source of the imported kernel route. The value is
         system-dependent. On Linux, it is a value of the protocol field of the
         route. See /etc/iproute2/rt_protos for common values. On BSD, it is
         based on STATIC and PROTOx flags. The attribute is read-only.
  
-       <tag><label id="rta-krt-metric">int krt_metric/</tag> (Linux)
+       <tag><label id="rta-krt-metric">int krt_metric</tag> (Linux)
         The kernel metric of the route. When multiple same routes are in a
         kernel routing table, the Linux kernel chooses one with lower metric.
         Note that preferred way to set kernel metric is to use protocol option
         <cf/metric/, unless per-route metric values are needed.
  
-       <tag><label id="rta-krt-prefsrc">ip krt_prefsrc/</tag> (Linux)
+       <tag><label id="rta-krt-prefsrc">ip krt_prefsrc</tag> (Linux)
         The preferred source address. Used in source address selection for
         outgoing packets. Has to be one of the IP addresses of the router.
  
-       <tag><label id="rta-krt-realm">int krt_realm/</tag> (Linux)
+       <tag><label id="rta-krt-realm">int krt_realm</tag> (Linux)
         The realm of the route. Can be used for traffic classification.
  
-       <tag><label id="rta-krt-scope">int krt_scope/</tag> (Linux IPv4)
+       <tag><label id="rta-krt-scope">int krt_scope</tag> (Linux IPv4)
         The scope of the route. Valid values are 0-254, although Linux kernel
         may reject some values depending on route type and nexthop. It is
         supposed to represent `indirectness' of the route, where nexthops of
@@ -2925,6 +3224,83 @@ protocol kernel {                # Secondary routing table
  </code>
  
  
+<sect>MRT
+<label id="mrt">
+
+<sect1>Introduction
+<label id="mrt-intro">
+
+<p>The MRT protocol is a component responsible for handling the Multi-Threaded
+Routing Toolkit (MRT) routing information export format, which is mainly used
+for collecting and analyzing of routing information from BGP routers. The MRT
+protocol can be configured to do periodic dumps of routing tables, created MRT
+files can be analyzed later by other tools. Independent MRT table dumps can also
+be requested from BIRD client. There is also a feature to save incoming BGP
+messages in MRT files, but it is controlled by <ref id="proto-mrtdump"
+name="mrtdump"> options independently of MRT protocol, although that might
+change in the future.
+
+BIRD implements the main MRT format specification as defined in <rfc id="6396">
+and the ADD_PATH extension (<rfc id="8050">).
+
+<sect1>Configuration
+<label id="mrt-config">
+
+<p>MRT configuration consists of several statements describing routing table
+dumps. Multiple independent periodic dumps can be done as multiple MRT protocol
+instances. The MRT protocol does not use channels. There are two mandatory
+statements: <cf/filename/ and <cf/period/.
+
+The behavior can be modified by following configuration parameters:
+
+<descrip>
+       <tag><label id="mrt-table">table <m/name/ | "<m/pattern/"</tag>
+       Specify a routing table (or a set of routing tables described by a
+       wildcard pattern) that are to be dumped by the MRT protocol instance.
+       Default: the master table.
+
+       <tag><label id="mrt-filter">filter { <m/filter commands/ }</tag>
+       The MRT protocol allows to specify a filter that is applied to routes as
+       they are dumped. Rejected routes are ignored and not saved to the MRT
+       dump file. Default: no filter.
+
+       <tag><label id="mrt-where">where <m/filter expression/</tag>
+       An alternative way to specify a filter for the MRT protocol.
+
+       <tag><label id="mrt-filename">filename "<m/filename/"</tag>
+       Specify a filename for MRT dump files. The filename may contain time
+       format sequences with <it/strftime(3)/ notation (see <it/man strftime/
+       for details), there is also a sequence "%N" that is expanded to the name
+       of dumped table. Therefore, each periodic dump of each table can be
+       saved to a different file. Mandatory, see example below.
+
+       <tag><label id="mrt-period">period <m/number/</tag>
+       Specify the time interval (in seconds) between periodic dumps.
+       Mandatory.
+
+       <tag><label id="mrt-always-add-path">always add path <m/switch/</tag>
+       The MRT format uses special records (specified in <rfc id="8050">) for
+       routes received using BGP ADD_PATH extension to keep Path ID, while
+       other routes use regular records. This has advantage of better
+       compatibility with tools that do not know special records, but it loses
+       information about which route is the best route. When this option is
+       enabled, both ADD_PATH and non-ADD_PATH routes are stored in ADD_PATH
+       records and order of routes for network is preserved. Default: disabled.
+</descrip>
+
+<sect1>Example
+<label id="mrt-exam">
+
+<p><code>
+protocol mrt {
+       table "tab*";
+       where source = RTS_BGP;
+       filename "/var/log/bird/%N_%F_%T.mrt";
+       period 300;
+}
+</code>
+
+
  <sect>OSPF
  <label id="ospf">
  
@@ -2986,6 +3362,8 @@ protocol ospf [v2|v3] &lt;name&gt; {
         tick &lt;num&gt;;
         ecmp &lt;switch&gt; [limit &lt;num&gt;];
         merge external &lt;switch&gt;;
+       graceful restart &lt;switch&gt;|aware;
+       graceful restart time &lt;num&gt;;
         area &lt;id&gt; {
                 stub;
                 nssa;
@@ -3129,6 +3507,31 @@ protocol ospf [v2|v3] &lt;name&gt; {
         from different LSAs are treated as separate even if they represents the
         same destination. Default value is no.
  
+       <tag><label id="ospf-graceful-restart">graceful restart <m/switch/|aware</tag>
+       When an OSPF instance is restarted, neighbors break adjacencies and
+       recalculate their routing tables, which disrupts packet forwarding even
+       when the forwarding plane of the restarting router remains intact.
+       <rfc id="3623"> specifies a graceful restart mechanism to alleviate this
+       issue. For OSPF graceful restart, restarting router originates
+       Grace-LSAs, announcing intent to do graceful restart. Neighbors
+       receiving these LSAs enter helper mode, in which they ignore breakdown
+       of adjacencies, behave as if nothing is happening and keep old routes.
+       When adjacencies are reestablished, the restarting router flushes
+       Grace-LSAs and graceful restart is ended.
+
+       This option controls the graceful restart mechanism. It has three
+       states: Disabled, when no support is provided. Aware, when graceful
+       restart helper mode is supported, but no local graceful restart is
+       allowed (i.e. helper-only role). Enabled, when the full graceful restart
+       support is provided (i.e. both restarting and helper role). Note that
+       proper support for local graceful restart requires also configuration of
+       other protocols. Default: aware.
+
+       <tag><label id="ospf-graceful-restart-time">graceful restart time <m/num/</tag>
+       The restart time is announced in the Grace-LSA and specifies how long
+       neighbors should wait for proper end of the graceful restart before
+       exiting helper mode prematurely. Default: 120 seconds.
+
         <tag><label id="ospf-area">area <M>id</M></tag>
         This defines an OSPF area with given area ID (an integer or an IPv4
         address, similarly to a router ID). The most important area is the
@@ -3247,6 +3650,11 @@ protocol ospf [v2|v3] &lt;name&gt; {
         Specifies interval in seconds between retransmissions of unacknowledged
         updates. Default value is 5.
  
+       <tag><label id="ospf-transmit-delay">transmit delay <M>num</M></tag>
+       Specifies estimated transmission delay of link state updates send over
+       the interface. The value is added to LSA age of LSAs propagated through
+       it. Default value is 1.
+
         <tag><label id="ospf-priority">priority <M>num</M></tag>
         On every multiple access network (e.g., the Ethernet) Designated Router
         and Backup Designated router are elected. These routers have some special
@@ -3267,16 +3675,6 @@ protocol ospf [v2|v3] &lt;name&gt; {
         <m/dead/ seconds, it will consider the neighbor down. If both directives
         <cf/dead count/ and <cf/dead/ are used, <cf/dead/ has precedence.
  
-       <tag><label id="ospf-secondary">secondary <M>switch</M></tag>
-       On BSD systems, older versions of BIRD supported OSPFv2 only for the
-       primary IP address of an interface, other IP ranges on the interface
-       were handled as stub networks. Since v1.4.1, regular operation on
-       secondary IP addresses is supported, but disabled by default for
-       compatibility. This option allows to enable it. The option is a
-       transitional measure, will be removed in the next major release as the
-       behavior will be changed. On Linux systems, the option is irrelevant, as
-       operation on non-primary addresses is already the regular behavior.
-
         <tag><label id="ospf-rx-buffer">rx buffer <M>num</M></tag>
         This option allows to specify the size of buffers used for packet
         processing. The buffer size should be bigger than maximal size of any
@@ -3430,8 +3828,15 @@ protocol ospf [v2|v3] &lt;name&gt; {
  with internal <cf/metric/, a <cf/metric of type 2/ is always longer than any
  <cf/metric of type 1/ or any <cf/internal metric/. <cf/Internal metric/ or
  <cf/metric of type 1/ is stored in attribute <cf/ospf_metric1/, <cf/metric type
-2/ is stored in attribute <cf/ospf_metric2/. If you specify both metrics only
-metric1 is used.
+2/ is stored in attribute <cf/ospf_metric2/.
+
+When both metrics are specified then <cf/metric of type 2/ is used. This is
+relevant e.g. when a type 2 external route is propagated from one OSPF domain to
+another and <cf/ospf_metric1/ is an internal distance to the original ASBR,
+while <cf/ospf_metric2/ stores the type 2 metric. Note that in such cases if
+<cf/ospf_metric1/ is non-zero then <cf/ospf_metric2/ is increased by one to
+ensure monotonicity of metric, as internal distance is reset to zero when an
+external route is announced.
  
  <p>Each external route can also carry attribute <cf/ospf_tag/ which is a 32-bit
  integer which is used when exporting routes to other protocols; otherwise, it
@@ -3508,6 +3913,54 @@ protocol ospf MyOSPF {
  }
  </code>
  
+<sect>Perf
+<label id="perf">
+
+<sect1>Introduction
+<label id="perf-intro">
+
+<p>The Perf protocol is a generator of fake routes together with a time measurement
+framework. Its purpose is to check BIRD performance and to benchmark filters.
+
+<p>Import mode of this protocol runs in several steps. In each step, it generates 2^x routes,
+imports them into the appropriate table and withdraws them. The exponent x is configurable.
+It runs the benchmark several times for the same x, then it increases x by one
+until it gets too high, then it stops.
+
+<p>Export mode of this protocol repeats route refresh from table and measures how long it takes.
+
+<p>Output data is logged on info level. There is a Perl script <cf>proto/perf/parse.pl</cf>
+which may be handy to parse the data and draw some plots.
+
+<p>Implementation of this protocol is experimental. Use with caution and do not keep
+any instance of Perf in production configs for long time. The config interface is also unstable
+and may change in future versions without warning.
+
+<sect1>Configuration
+<label id="perf-config">
+
+<p><descrip>
+       <tag><label id="perf-mode">mode import|export</tag>
+       Set perf mode. Default: import
+
+       <tag><label id="perf-repeat">repeat <m/number/</tag>
+       Run this amount of iterations of the benchmark for every amount step. Default: 4
+
+       <tag><label id="perf-from">exp from <m/number/</tag>
+       Begin benchmarking on this exponent for number of generated routes in one step.
+       Default: 10
+
+       <tag><label id="perf-to">exp to <m/number/</tag>
+       Stop benchmarking on this exponent. Default: 20
+
+       <tag><label id="perf-threshold-min">threshold min <m/time/</tag>
+       If a run for the given exponent took less than this time for route import,
+       increase the exponent immediately. Default: 1 ms
+
+       <tag><label id="perf-threshold-max">threshold max <m/time/</tag>
+       If every run for the given exponent took at least this time for route import,
+       stop benchmarking. Default: 500 ms
+</descrip>
  
  <sect>Pipe
  <label id="pipe">
@@ -3523,18 +3976,9 @@ the filters. Export filters control export of routes from the primary table to
  the secondary one, import filters control the opposite direction. Both tables
  must be of the same nettype.
  
-<p>The Pipe protocol may work in the transparent mode mode or in the opaque
-mode. In the transparent mode, the Pipe protocol retransmits all routes from
-one table to the other table, retaining their original source and attributes.
-If import and export filters are set to accept, then both tables would have
-the same content. The transparent mode is the default mode.
-
-<p>In the opaque mode, the Pipe protocol retransmits optimal route from one
-table to the other table in a similar way like other protocols send and receive
-routes. Retransmitted route will have the source set to the Pipe protocol, which
-may limit access to protocol specific route attributes. This mode is mainly for
-compatibility, it is not suggested for new configs. The mode can be changed by
-<tt/mode/ option.
+<p>The Pipe protocol retransmits all routes from one table to the other table,
+retaining their original source and attributes. If import and export filters
+are set to accept, then both tables would have the same content.
  
  <p>The primary use of multiple routing tables and the Pipe protocol is for
  policy routing, where handling of a single packet doesn't depend only on its
@@ -3599,13 +4043,13 @@ protocol kernel kern2 {
  }
  
  protocol bgp bgp1 {                    # The outside connections
-       ipv4 { table as1; export all; };
+       ipv4 { table as1; import all; export all; };
         local as 1;
         neighbor 192.168.0.1 as 1001;
  }
  
  protocol bgp bgp2 {
-       ipv4 { table as2; export all; };
+       ipv4 { table as2; import all; export all; };
         local as 2;
         neighbor 10.0.0.1 as 1002;
  }
@@ -3697,7 +4141,7 @@ definitions, prefix definitions and DNS definitions:
         RAdv protocol could be configured to change its behavior based on
         availability of routes. When this option is used, the protocol waits in
         suppressed state until a <it/trigger route/ (for the specified network)
-       is exported to the protocol, the protocol also returnsd to suppressed
+       is exported to the protocol, the protocol also returns to suppressed
         state if the <it/trigger route/ disappears. Note that route export
         depends on specified export filter, as usual. This option could be used,
         e.g., for handling failover in multihoming scenarios.
@@ -3741,6 +4185,12 @@ definitions, prefix definitions and DNS definitions:
         The minimum delay between two consecutive router advertisements, in
         seconds. Default: 3
  
+       <tag><label id="radv-solicited-ra-unicast">solicited ra unicast <m/switch/</tag>
+       Solicited router advertisements are usually sent to all-nodes multicast
+       group like unsolicited ones, but the router can be configured to send
+       them as unicast directly to soliciting nodes instead. This is especially
+       useful on wireless networks (see <rfc id="7772">). Default: no
+
         <tag><label id="radv-iface-managed">managed <m/switch/</tag>
         This option specifies whether hosts should use DHCPv6 for IP address
         configuration. Default: no
@@ -3889,13 +4339,13 @@ definitions, prefix definitions and DNS definitions:
  <p>RAdv defines two route attributes:
  
  <descrip>
-       <tag><label id="rta-ra-preference">enum ra_preference/</tag>
+       <tag><label id="rta-ra-preference">enum ra_preference</tag>
         The preference of the route. The value can be <it/RA_PREF_LOW/,
         <it/RA_PREF_MEDIUM/ or <it/RA_PREF_HIGH/. If the attribute is not set,
         the <ref id="radv-iface-route-preference" name="route preference">
         option is used.
  
-       <tag><label id="rta-ra-lifetime">int ra_lifetime/</tag>
+       <tag><label id="rta-ra-lifetime">int ra_lifetime</tag>
         The advertised lifetime of the route, in seconds. The special value of
         0xffffffff represents infinity. If the attribute is not set, the
         <ref id="radv-iface-route-lifetime" name="route lifetime">
@@ -4201,13 +4651,13 @@ protocol rip [ng] [&lt;name&gt;] {
  <p>RIP defines two route attributes:
  
  <descrip>
-       <tag>int <cf/rip_metric/</tag>
+       <tag><label id="rta-rip-metric">int rip_metric</tag>
         RIP metric of the route (ranging from 0 to <cf/infinity/). When routes
         from different RIP instances are available and all of them have the same
         preference, BIRD prefers the route with lowest <cf/rip_metric/. When a
         non-RIP route is exported to RIP, the default metric is 1.
  
-       <tag><label id="rta-rip-tag">int rip_tag/</tag>
+       <tag><label id="rta-rip-tag">int rip_tag</tag>
         RIP route tag: a 16-bit number which can be used to carry additional
         information with the route (for example, an originating AS number in
         case of external routes). When a non-RIP route is exported to RIP, the
@@ -4237,6 +4687,7 @@ protocol rip {
  
  
  <sect>RPKI
+<label id="rpki">
  
  <sect1>Introduction
  
@@ -4377,7 +4828,7 @@ protocol rpki {
  filter peer_in_v4 {
         if (roa_check(r4, net, bgp_path.last) = ROA_INVALID) then
         {
-               print "Ignore invalid ROA ", net, " for ASN ", bgp_path.last;
+               print "Ignore RPKI invalid ", net, " for ASN ", bgp_path.last;
                 reject;
         }
         accept;
@@ -4387,7 +4838,10 @@ protocol bgp {
         debug all;
         local as 65000;
         neighbor 192.168.2.1 as 65001;
-       ipv4 { import filter peer_in_v4; };
+       ipv4 {
+               import filter peer_in_v4;
+               export none;
+       };
  }
  </code>