]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man7/ip.7
getrlimit.2, signalfd.2, statfs.2, tee.2, dlopen.3, duplocale.3, ftw.3, get_nprocs...
[thirdparty/man-pages.git] / man7 / ip.7
1 '\" t
2 .\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
3 .\"
4 .\" %%%LICENSE_START(VERBATIM_ONE_PARA)
5 .\" Permission is granted to distribute possibly modified copies
6 .\" of this page provided the header is included verbatim,
7 .\" and in case of nontrivial modification author and date
8 .\" of the modification is added to the header.
9 .\" %%%LICENSE_END
10 .\"
11 .\" $Id: ip.7,v 1.19 2000/12/20 18:10:31 ak Exp $
12 .\"
13 .\" FIXME The following socket options are yet to be documented
14 .\"
15 .\" IP_XFRM_POLICY (2.5.48)
16 .\" Needs CAP_NET_ADMIN
17 .\"
18 .\" IP_IPSEC_POLICY (2.5.47)
19 .\" Needs CAP_NET_ADMIN
20 .\"
21 .\" IP_PASSSEC (2.6.17)
22 .\" Boolean
23 .\" commit 2c7946a7bf45ae86736ab3b43d0085e43947945c
24 .\" Author: Catherine Zhang <cxzhang@watson.ibm.com>
25 .\"
26 .\" IP_MINTTL (2.6.34)
27 .\" commit d218d11133d888f9745802146a50255a4781d37a
28 .\" Author: Stephen Hemminger <shemminger@vyatta.com>
29 .\"
30 .\" MCAST_JOIN_GROUP (2.4.22 / 2.6)
31 .\"
32 .\" MCAST_BLOCK_SOURCE (2.4.22 / 2.6)
33 .\"
34 .\" MCAST_UNBLOCK_SOURCE (2.4.22 / 2.6)
35 .\"
36 .\" MCAST_LEAVE_GROUP (2.4.22 / 2.6)
37 .\"
38 .\" MCAST_JOIN_SOURCE_GROUP (2.4.22 / 2.6)
39 .\"
40 .\" MCAST_LEAVE_SOURCE_GROUP (2.4.22 / 2.6)
41 .\"
42 .\" MCAST_MSFILTER (2.4.22 / 2.6)
43 .\"
44 .\" IP_UNICAST_IF (3.4)
45 .\" commit 76e21053b5bf33a07c76f99d27a74238310e3c71
46 .\" Author: Erich E. Hoover <ehoover@mines.edu>
47 .\"
48 .TH IP 7 2017-03-13 "Linux" "Linux Programmer's Manual"
49 .SH NAME
50 ip \- Linux IPv4 protocol implementation
51 .SH SYNOPSIS
52 .B #include <sys/socket.h>
53 .br
54 .\" .B #include <net/netinet.h> -- does not exist anymore
55 .\" .B #include <linux/errqueue.h> -- never include <linux/foo.h>
56 .B #include <netinet/in.h>
57 .br
58 .B #include <netinet/ip.h> \fR/* superset of previous */
59 .PP
60 .IB tcp_socket " = socket(AF_INET, SOCK_STREAM, 0);"
61 .br
62 .IB udp_socket " = socket(AF_INET, SOCK_DGRAM, 0);"
63 .br
64 .IB raw_socket " = socket(AF_INET, SOCK_RAW, " protocol ");"
65 .SH DESCRIPTION
66 Linux implements the Internet Protocol, version 4,
67 described in RFC\ 791 and RFC\ 1122.
68 .B ip
69 contains a level 2 multicasting implementation conforming to RFC\ 1112.
70 It also contains an IP router including a packet filter.
71 .PP
72 The programming interface is BSD-sockets compatible.
73 For more information on sockets, see
74 .BR socket (7).
75 .PP
76 An IP socket is created using
77 .BR socket (2):
78 .PP
79 socket(AF_INET, socket_type, protocol);
80 .PP
81 Valid socket types are
82 .B SOCK_STREAM
83 to open a
84 .BR tcp (7)
85 socket,
86 .B SOCK_DGRAM
87 to open a
88 .BR udp (7)
89 socket, or
90 .B SOCK_RAW
91 to open a
92 .BR raw (7)
93 socket to access the IP protocol directly.
94 .I protocol
95 is the IP protocol in the IP header to be received or sent.
96 The only valid values for
97 .I protocol
98 are 0 and
99 .B IPPROTO_TCP
100 for TCP sockets, and 0 and
101 .B IPPROTO_UDP
102 for UDP sockets.
103 For
104 .B SOCK_RAW
105 you may specify a valid IANA IP protocol defined in
106 RFC\ 1700 assigned numbers.
107 .PP
108 When a process wants to receive new incoming packets or connections, it
109 should bind a socket to a local interface address using
110 .BR bind (2).
111 In this case, only one IP socket may be bound to any given local
112 (address, port) pair.
113 When
114 .B INADDR_ANY
115 is specified in the bind call, the socket will be bound to
116 .I all
117 local interfaces.
118 When
119 .BR listen (2)
120 is called on an unbound socket, the socket is automatically bound
121 to a random free port with the local address set to
122 .BR INADDR_ANY .
123 When
124 .BR connect (2)
125 is called on an unbound socket, the socket is automatically bound
126 to a random free port or to a usable shared port with the local address
127 set to
128 .BR INADDR_ANY .
129 .PP
130 A TCP local socket address that has been bound is unavailable for
131 some time after closing, unless the
132 .B SO_REUSEADDR
133 flag has been set.
134 Care should be taken when using this flag as it makes TCP less reliable.
135 .SS Address format
136 An IP socket address is defined as a combination of an IP interface
137 address and a 16-bit port number.
138 The basic IP protocol does not supply port numbers, they
139 are implemented by higher level protocols like
140 .BR udp (7)
141 and
142 .BR tcp (7).
143 On raw sockets
144 .I sin_port
145 is set to the IP protocol.
146 .PP
147 .in +4n
148 .EX
149 struct sockaddr_in {
150 sa_family_t sin_family; /* address family: AF_INET */
151 in_port_t sin_port; /* port in network byte order */
152 struct in_addr sin_addr; /* internet address */
153 };
154
155 /* Internet address. */
156 struct in_addr {
157 uint32_t s_addr; /* address in network byte order */
158 };
159 .EE
160 .in
161 .PP
162 .I sin_family
163 is always set to
164 .BR AF_INET .
165 This is required; in Linux 2.2 most networking functions return
166 .B EINVAL
167 when this setting is missing.
168 .I sin_port
169 contains the port in network byte order.
170 The port numbers below 1024 are called
171 .IR "privileged ports"
172 (or sometimes:
173 .IR "reserved ports" ).
174 Only a privileged process
175 (on Linux: a process that has the
176 .B CAP_NET_BIND_SERVICE
177 capability in the user namespace governing its network namespace) may
178 .BR bind (2)
179 to these sockets.
180 Note that the raw IPv4 protocol as such has no concept of a
181 port, they are implemented only by higher protocols like
182 .BR tcp (7)
183 and
184 .BR udp (7).
185 .PP
186 .I sin_addr
187 is the IP host address.
188 The
189 .I s_addr
190 member of
191 .I struct in_addr
192 contains the host interface address in network byte order.
193 .I in_addr
194 should be assigned one of the
195 .BR INADDR_*
196 values (e.g.,
197 .BR INADDR_ANY )
198 or set using the
199 .BR inet_aton (3),
200 .BR inet_addr (3),
201 .BR inet_makeaddr (3)
202 library functions or directly with the name resolver (see
203 .BR gethostbyname (3)).
204 .PP
205 IPv4 addresses are divided into unicast, broadcast,
206 and multicast addresses.
207 Unicast addresses specify a single interface of a host,
208 broadcast addresses specify all hosts on a network, and multicast
209 addresses address all hosts in a multicast group.
210 Datagrams to broadcast addresses can be sent or received only when the
211 .B SO_BROADCAST
212 socket flag is set.
213 In the current implementation, connection-oriented sockets are allowed
214 to use only unicast addresses.
215 .\" Leave a loophole for XTP @)
216 .PP
217 Note that the address and the port are always stored in
218 network byte order.
219 In particular, this means that you need to call
220 .BR htons (3)
221 on the number that is assigned to a port.
222 All address/port manipulation
223 functions in the standard library work in network byte order.
224 .PP
225 There are several special addresses:
226 .B INADDR_LOOPBACK
227 (127.0.0.1)
228 always refers to the local host via the loopback device;
229 .B INADDR_ANY
230 (0.0.0.0)
231 means any address for binding;
232 .B INADDR_BROADCAST
233 (255.255.255.255)
234 means any host and has the same effect on bind as
235 .B INADDR_ANY
236 for historical reasons.
237 .SS Socket options
238 IP supports some protocol-specific socket options that can be set with
239 .BR setsockopt (2)
240 and read with
241 .BR getsockopt (2).
242 The socket option level for IP is
243 .BR IPPROTO_IP .
244 .\" or SOL_IP on Linux
245 A boolean integer flag is zero when it is false, otherwise true.
246 .PP
247 When an invalid socket option is specified,
248 .BR getsockopt (2)
249 and
250 .BR setsockopt (2)
251 fail with the error
252 .BR ENOPROTOOPT .
253 .TP
254 .BR IP_ADD_MEMBERSHIP " (since Linux 1.2)"
255 Join a multicast group.
256 Argument is an
257 .I ip_mreqn
258 structure.
259 .PP
260 .in +4n
261 .EX
262 struct ip_mreqn {
263 struct in_addr imr_multiaddr; /* IP multicast group
264 address */
265 struct in_addr imr_address; /* IP address of local
266 interface */
267 int imr_ifindex; /* interface index */
268 };
269 .EE
270 .in
271 .PP
272 .I imr_multiaddr
273 contains the address of the multicast group the application
274 wants to join or leave.
275 It must be a valid multicast address
276 .\" (i.e., within the 224.0.0.0-239.255.255.255 range)
277 (or
278 .BR setsockopt (2)
279 fails with the error
280 .BR EINVAL ).
281 .I imr_address
282 is the address of the local interface with which the system
283 should join the multicast group; if it is equal to
284 .BR INADDR_ANY ,
285 an appropriate interface is chosen by the system.
286 .I imr_ifindex
287 is the interface index of the interface that should join/leave the
288 .I imr_multiaddr
289 group, or 0 to indicate any interface.
290 .IP
291 The
292 .I ip_mreqn
293 structure is available only since Linux 2.2.
294 For compatibility, the old
295 .I ip_mreq
296 structure (present since Linux 1.2) is still supported;
297 it differs from
298 .I ip_mreqn
299 only by not including the
300 .I imr_ifindex
301 field.
302 (The kernel determines which structure is being passed based
303 on the size passed in
304 .IR optlen .)
305 .IP
306 .B IP_ADD_MEMBERSHIP
307 is valid only for
308 .BR setsockopt (2).
309 .\"
310 .TP
311 .BR IP_ADD_SOURCE_MEMBERSHIP " (since Linux 2.4.22 / 2.5.68)"
312 Join a multicast group and allow receiving data only
313 from a specified source.
314 Argument is an
315 .I ip_mreq_source
316 structure.
317 .PP
318 .in +4n
319 .EX
320 struct ip_mreq_source {
321 struct in_addr imr_multiaddr; /* IP multicast group
322 address */
323 struct in_addr imr_interface; /* IP address of local
324 interface */
325 struct in_addr imr_sourceaddr; /* IP address of
326 multicast source */
327 };
328 .EE
329 .in
330 .PP
331 The
332 .I ip_mreq_source
333 structure is similar to
334 .I ip_mreqn
335 described under
336 .BR IP_ADD_MEMBERSIP .
337 The
338 .I imr_multiaddr
339 field contains the address of the multicast group the application
340 wants to join or leave.
341 The
342 .I imr_interface
343 field is the address of the local interface with which
344 the system should join the multicast group.
345 Finally, the
346 .I imr_sourceaddr
347 field contains the address of the source the
348 application wants to receive data from.
349 .IP
350 This option can be used multiple times to allow
351 receiving data from more than one source.
352 .TP
353 .BR IP_BIND_ADDRESS_NO_PORT " (since Linux 4.2)"
354 .\" commit 90c337da1524863838658078ec34241f45d8394d
355 Inform the kernel to not reserve an ephemeral port when using
356 .BR bind (2)
357 with a port number of 0.
358 The port will later be automatically chosen at
359 .BR connect (2)
360 time,
361 in a way that allows sharing a source port as long as the 4-tuple is unique.
362 .TP
363 .BR IP_BLOCK_SOURCE " (since Linux 2.4.22 / 2.5.68)"
364 Stop receiving multicast data from a specific source in a given group.
365 This is valid only after the application has subscribed
366 to the multicast group using either
367 .BR IP_ADD_MEMBERSHIP
368 or
369 .BR IP_ADD_SOURCE_MEMBERSHIP .
370 .IP
371 Argument is an
372 .I ip_mreq_source
373 structure as described under
374 .BR IP_ADD_SOURCE_MEMBERSHIP .
375 .TP
376 .BR IP_DROP_MEMBERSHIP " (since Linux 1.2)"
377 Leave a multicast group.
378 Argument is an
379 .I ip_mreqn
380 or
381 .I ip_mreq
382 structure similar to
383 .BR IP_ADD_MEMBERSHIP .
384 .TP
385 .BR IP_DROP_SOURCE_MEMBERSHIP " (since Linux 2.4.22 / 2.5.68)"
386 Leave a source-specific group\(emthat is, stop receiving data from
387 a given multicast group that come from a given source.
388 If the application has subscribed to multiple sources within
389 the same group, data from the remaining sources will still be delivered.
390 To stop receiving data from all sources at once, use
391 .BR IP_DROP_MEMBERSHIP .
392 .IP
393 Argument is an
394 .I ip_mreq_source
395 structure as described under
396 .BR IP_ADD_SOURCE_MEMBERSHIP .
397 .TP
398 .BR IP_FREEBIND " (since Linux 2.4)"
399 .\" Precisely: 2.4.0-test10
400 If enabled, this boolean option allows binding to an IP address
401 that is nonlocal or does not (yet) exist.
402 This permits listening on a socket,
403 without requiring the underlying network interface or the
404 specified dynamic IP address to be up at the time that
405 the application is trying to bind to it.
406 This option is the per-socket equivalent of the
407 .IR ip_nonlocal_bind
408 .I /proc
409 interface described below.
410 .TP
411 .BR IP_HDRINCL " (since Linux 2.0)"
412 If enabled,
413 the user supplies an IP header in front of the user data.
414 Valid only for
415 .B SOCK_RAW
416 sockets; see
417 .BR raw (7)
418 for more information.
419 When this flag is enabled, the values set by
420 .BR IP_OPTIONS ,
421 .BR IP_TTL ,
422 and
423 .B IP_TOS
424 are ignored.
425 .TP
426 .BR IP_MSFILTER " (since Linux 2.4.22 / 2.5.68)"
427 This option provides access to the advanced full-state filtering API.
428 Argument is an
429 .I ip_msfilter
430 structure.
431 .PP
432 .in +4n
433 .EX
434 struct ip_msfilter {
435 struct in_addr imsf_multiaddr; /* IP multicast group
436 address */
437 struct in_addr imsf_interface; /* IP address of local
438 interface */
439 uint32_t imsf_fmode; /* Filter-mode */
440
441 uint32_t imsf_numsrc; /* Number of sources in
442 the following array */
443 struct in_addr imsf_slist[1]; /* Array of source
444 addresses */
445 };
446 .EE
447 .in
448 .PP
449 There are two macros,
450 .BR MCAST_INCLUDE
451 and
452 .BR MCAST_EXCLUDE ,
453 which can be used to specify the filtering mode.
454 Additionally, the
455 .BR IP_MSFILTER_SIZE (n)
456 macro exists to determine how much memory is needed to store
457 .I ip_msfilter
458 structure with
459 .I n
460 sources in the source list.
461 .IP
462 For the full description of multicast source filtering
463 refer to RFC 3376.
464 .TP
465 .BR IP_MTU " (since Linux 2.2)"
466 .\" Precisely: 2.1.124
467 Retrieve the current known path MTU of the current socket.
468 Returns an integer.
469 .IP
470 .B IP_MTU
471 is valid only for
472 .BR getsockopt (2)
473 and can be employed only when the socket has been connected.
474 .TP
475 .BR IP_MTU_DISCOVER " (since Linux 2.2)"
476 .\" Precisely: 2.1.124
477 Set or receive the Path MTU Discovery setting for a socket.
478 When enabled, Linux will perform Path MTU Discovery
479 as defined in RFC\ 1191 on
480 .B SOCK_STREAM
481 sockets.
482 For
483 .RB non- SOCK_STREAM
484 sockets,
485 .B IP_PMTUDISC_DO
486 forces the don't-fragment flag to be set on all outgoing packets.
487 It is the user's responsibility to packetize the data
488 in MTU-sized chunks and to do the retransmits if necessary.
489 The kernel will reject (with
490 .BR EMSGSIZE )
491 datagrams that are bigger than the known path MTU.
492 .B IP_PMTUDISC_WANT
493 will fragment a datagram if needed according to the path MTU,
494 or will set the don't-fragment flag otherwise.
495 .IP
496 The system-wide default can be toggled between
497 .B IP_PMTUDISC_WANT
498 and
499 .B IP_PMTUDISC_DONT
500 by writing (respectively, zero and nonzero values) to the
501 .I /proc/sys/net/ipv4/ip_no_pmtu_disc
502 file.
503 .TS
504 tab(:);
505 c l
506 l l.
507 Path MTU discovery value:Meaning
508 IP_PMTUDISC_WANT:Use per-route settings.
509 IP_PMTUDISC_DONT:Never do Path MTU Discovery.
510 IP_PMTUDISC_DO:Always do Path MTU Discovery.
511 IP_PMTUDISC_PROBE:Set DF but ignore Path MTU.
512 .TE
513 .sp 1
514 When PMTU discovery is enabled, the kernel automatically keeps track of
515 the path MTU per destination host.
516 When it is connected to a specific peer with
517 .BR connect (2),
518 the currently known path MTU can be retrieved conveniently using the
519 .B IP_MTU
520 socket option (e.g., after an
521 .B EMSGSIZE
522 error occurred).
523 The path MTU may change over time.
524 For connectionless sockets with many destinations,
525 the new MTU for a given destination can also be accessed using the
526 error queue (see
527 .BR IP_RECVERR ).
528 A new error will be queued for every incoming MTU update.
529 .IP
530 While MTU discovery is in progress, initial packets from datagram sockets
531 may be dropped.
532 Applications using UDP should be aware of this and not
533 take it into account for their packet retransmit strategy.
534 .IP
535 To bootstrap the path MTU discovery process on unconnected sockets, it
536 is possible to start with a big datagram size
537 (headers up to 64 kilobytes long) and let it shrink by updates of the path MTU.
538 .IP
539 To get an initial estimate of the
540 path MTU, connect a datagram socket to the destination address using
541 .BR connect (2)
542 and retrieve the MTU by calling
543 .BR getsockopt (2)
544 with the
545 .B IP_MTU
546 option.
547 .IP
548 It is possible to implement RFC 4821 MTU probing with
549 .B SOCK_DGRAM
550 or
551 .B SOCK_RAW
552 sockets by setting a value of
553 .BR IP_PMTUDISC_PROBE
554 (available since Linux 2.6.22).
555 This is also particularly useful for diagnostic tools such as
556 .BR tracepath (8)
557 that wish to deliberately send probe packets larger than
558 the observed Path MTU.
559 .TP
560 .BR IP_MULTICAST_ALL " (since Linux 2.6.31)"
561 This option can be used to modify the delivery policy of multicast messages
562 to sockets bound to the wildcard
563 .B INADDR_ANY
564 address.
565 The argument is a boolean integer (defaults to 1).
566 If set to 1,
567 the socket will receive messages from all the groups that have been joined
568 globally on the whole system.
569 Otherwise, it will deliver messages only from
570 the groups that have been explicitly joined (for example via the
571 .B IP_ADD_MEMBERSHIP
572 option) on this particular socket.
573 .TP
574 .BR IP_MULTICAST_IF " (since Linux 1.2)"
575 Set the local device for a multicast socket.
576 The argument for
577 .BR setsockopt (2)
578 is an
579 .I ip_mreqn
580 or
581 .\" net: IP_MULTICAST_IF setsockopt now recognizes struct mreq
582 .\" Commit: 3a084ddb4bf299a6e898a9a07c89f3917f0713f7
583 (since Linux 3.5)
584 .I ip_mreq
585 structure similar to
586 .BR IP_ADD_MEMBERSHIP ,
587 or an
588 .I in_addr
589 structure.
590 (The kernel determines which structure is being passed based
591 on the size passed in
592 .IR optlen .)
593 For
594 .BR getsockopt (2),
595 the argument is an
596 .I in_addr
597 structure.
598 .TP
599 .BR IP_MULTICAST_LOOP " (since Linux 1.2)"
600 Set or read a boolean integer argument that determines whether
601 sent multicast packets should be looped back to the local sockets.
602 .TP
603 .BR IP_MULTICAST_TTL " (since Linux 1.2)"
604 Set or read the time-to-live value of outgoing multicast packets for this
605 socket.
606 It is very important for multicast packets to set the smallest TTL possible.
607 The default is 1 which means that multicast packets don't leave the local
608 network unless the user program explicitly requests it.
609 Argument is an integer.
610 .TP
611 .BR IP_NODEFRAG " (since Linux 2.6.36)"
612 If enabled (argument is nonzero),
613 the reassembly of outgoing packets is disabled in the netfilter layer.
614 The argument is an integer.
615 .IP
616 This option is valid only for
617 .B SOCK_RAW
618 sockets.
619 .TP
620 .BR IP_OPTIONS " (since Linux 2.0)"
621 .\" Precisely: 1.3.30
622 Set or get the IP options to be sent with every packet from this socket.
623 The arguments are a pointer to a memory buffer containing the options
624 and the option length.
625 The
626 .BR setsockopt (2)
627 call sets the IP options associated with a socket.
628 The maximum option size for IPv4 is 40 bytes.
629 See RFC\ 791 for the allowed options.
630 When the initial connection request packet for a
631 .B SOCK_STREAM
632 socket contains IP options, the IP options will be set automatically
633 to the options from the initial packet with routing headers reversed.
634 Incoming packets are not allowed to change options after the connection
635 is established.
636 The processing of all incoming source routing options
637 is disabled by default and can be enabled by using the
638 .I accept_source_route
639 .I /proc
640 interface.
641 Other options like timestamps are still handled.
642 For datagram sockets, IP options can be only set by the local user.
643 Calling
644 .BR getsockopt (2)
645 with
646 .B IP_OPTIONS
647 puts the current IP options used for sending into the supplied buffer.
648 .TP
649 .BR IP_PKTINFO " (since Linux 2.2)"
650 .\" Precisely: 2.1.68
651 Pass an
652 .B IP_PKTINFO
653 ancillary message that contains a
654 .I pktinfo
655 structure that supplies some information about the incoming packet.
656 This only works for datagram oriented sockets.
657 The argument is a flag that tells the socket whether the
658 .B IP_PKTINFO
659 message should be passed or not.
660 The message itself can only be sent/retrieved
661 as control message with a packet using
662 .BR recvmsg (2)
663 or
664 .BR sendmsg (2).
665 .IP
666 .in +4n
667 .EX
668 struct in_pktinfo {
669 unsigned int ipi_ifindex; /* Interface index */
670 struct in_addr ipi_spec_dst; /* Local address */
671 struct in_addr ipi_addr; /* Header Destination
672 address */
673 };
674 .EE
675 .in
676 .IP
677 .I ipi_ifindex
678 is the unique index of the interface the packet was received on.
679 .I ipi_spec_dst
680 is the local address of the packet and
681 .I ipi_addr
682 is the destination address in the packet header.
683 If
684 .B IP_PKTINFO
685 is passed to
686 .BR sendmsg (2)
687 and
688 .\" This field is grossly misnamed
689 .I ipi_spec_dst
690 is not zero, then it is used as the local source address for the routing
691 table lookup and for setting up IP source route options.
692 When
693 .I ipi_ifindex
694 is not zero, the primary local address of the interface specified by the
695 index overwrites
696 .I ipi_spec_dst
697 for the routing table lookup.
698 .TP
699 .BR IP_RECVERR " (since Linux 2.2)"
700 .\" Precisely: 2.1.15
701 Enable extended reliable error message passing.
702 When enabled on a datagram socket, all
703 generated errors will be queued in a per-socket error queue.
704 When the user receives an error from a socket operation,
705 the errors can be received by calling
706 .BR recvmsg (2)
707 with the
708 .B MSG_ERRQUEUE
709 flag set.
710 The
711 .I sock_extended_err
712 structure describing the error will be passed in an ancillary message with
713 the type
714 .B IP_RECVERR
715 and the level
716 .BR IPPROTO_IP .
717 .\" or SOL_IP on Linux
718 This is useful for reliable error handling on unconnected sockets.
719 The received data portion of the error queue contains the error packet.
720 .IP
721 The
722 .B IP_RECVERR
723 control message contains a
724 .I sock_extended_err
725 structure:
726 .IP
727 .in +4n
728 .EX
729 #define SO_EE_ORIGIN_NONE 0
730 #define SO_EE_ORIGIN_LOCAL 1
731 #define SO_EE_ORIGIN_ICMP 2
732 #define SO_EE_ORIGIN_ICMP6 3
733
734 struct sock_extended_err {
735 uint32_t ee_errno; /* error number */
736 uint8_t ee_origin; /* where the error originated */
737 uint8_t ee_type; /* type */
738 uint8_t ee_code; /* code */
739 uint8_t ee_pad;
740 uint32_t ee_info; /* additional information */
741 uint32_t ee_data; /* other data */
742 /* More data may follow */
743 };
744
745 struct sockaddr *SO_EE_OFFENDER(struct sock_extended_err *);
746 .EE
747 .in
748 .IP
749 .I ee_errno
750 contains the
751 .I errno
752 number of the queued error.
753 .I ee_origin
754 is the origin code of where the error originated.
755 The other fields are protocol-specific.
756 The macro
757 .B SO_EE_OFFENDER
758 returns a pointer to the address of the network object
759 where the error originated from given a pointer to the ancillary message.
760 If this address is not known, the
761 .I sa_family
762 member of the
763 .I sockaddr
764 contains
765 .B AF_UNSPEC
766 and the other fields of the
767 .I sockaddr
768 are undefined.
769 .IP
770 IP uses the
771 .I sock_extended_err
772 structure as follows:
773 .I ee_origin
774 is set to
775 .B SO_EE_ORIGIN_ICMP
776 for errors received as an ICMP packet, or
777 .B SO_EE_ORIGIN_LOCAL
778 for locally generated errors.
779 Unknown values should be ignored.
780 .I ee_type
781 and
782 .I ee_code
783 are set from the type and code fields of the ICMP header.
784 .I ee_info
785 contains the discovered MTU for
786 .B EMSGSIZE
787 errors.
788 The message also contains the
789 .I sockaddr_in of the node
790 caused the error, which can be accessed with the
791 .B SO_EE_OFFENDER
792 macro.
793 The
794 .I sin_family
795 field of the
796 .B SO_EE_OFFENDER
797 address is
798 .B AF_UNSPEC
799 when the source was unknown.
800 When the error originated from the network, all IP options
801 .RB ( IP_OPTIONS ", " IP_TTL ", "
802 etc.) enabled on the socket and contained in the
803 error packet are passed as control messages.
804 The payload of the packet causing the error is returned as normal payload.
805 .\" FIXME . Is it a good idea to document that? It is a dubious feature.
806 .\" On
807 .\" .B SOCK_STREAM
808 .\" sockets,
809 .\" .B IP_RECVERR
810 .\" has slightly different semantics. Instead of
811 .\" saving the errors for the next timeout, it passes all incoming
812 .\" errors immediately to the user.
813 .\" This might be useful for very short-lived TCP connections which
814 .\" need fast error handling. Use this option with care:
815 .\" it makes TCP unreliable
816 .\" by not allowing it to recover properly from routing
817 .\" shifts and other normal
818 .\" conditions and breaks the protocol specification.
819 Note that TCP has no error queue;
820 .B MSG_ERRQUEUE
821 is not permitted on
822 .B SOCK_STREAM
823 sockets.
824 .B IP_RECVERR
825 is valid for TCP, but all errors are returned by socket function return or
826 .B SO_ERROR
827 only.
828 .IP
829 For raw sockets,
830 .B IP_RECVERR
831 enables passing of all received ICMP errors to the
832 application, otherwise errors are only reported on connected sockets
833 .IP
834 It sets or retrieves an integer boolean flag.
835 .B IP_RECVERR
836 defaults to off.
837 .TP
838 .BR IP_RECVOPTS " (since Linux 2.2)"
839 .\" Precisely: 2.1.15
840 Pass all incoming IP options to the user in a
841 .B IP_OPTIONS
842 control message.
843 The routing header and other options are already filled in
844 for the local host.
845 Not supported for
846 .B SOCK_STREAM
847 sockets.
848 .TP
849 .BR IP_RECVORIGDSTADDR " (since Linux 2.6.29)"
850 .\" commit e8b2dfe9b4501ed0047459b2756ba26e5a940a69
851 This boolean option enables the
852 .B IP_ORIGDSTADDR
853 ancillary message in
854 .BR recvmsg (2),
855 in which the kernel returns the original destination address
856 of the datagram being received.
857 The ancillary message contains a
858 .IR "struct sockaddr_in" .
859 .TP
860 .BR IP_RECVTOS " (since Linux 2.2)"
861 .\" Precisely: 2.1.68
862 If enabled, the
863 .B IP_TOS
864 ancillary message is passed with incoming packets.
865 It contains a byte which specifies the Type of Service/Precedence
866 field of the packet header.
867 Expects a boolean integer flag.
868 .TP
869 .BR IP_RECVTTL " (since Linux 2.2)"
870 .\" Precisely: 2.1.68
871 When this flag is set, pass a
872 .B IP_TTL
873 control message with the time-to-live
874 field of the received packet as a byte.
875 Not supported for
876 .B SOCK_STREAM
877 sockets.
878 .TP
879 .BR IP_RETOPTS " (since Linux 2.2)"
880 .\" Precisely: 2.1.15
881 Identical to
882 .BR IP_RECVOPTS ,
883 but returns raw unprocessed options with timestamp and route record
884 options not filled in for this hop.
885 .TP
886 .BR IP_ROUTER_ALERT " (since Linux 2.2)"
887 .\" Precisely: 2.1.68
888 Pass all to-be forwarded packets with the
889 IP Router Alert option set to this socket.
890 Valid only for raw sockets.
891 This is useful, for instance, for user-space RSVP daemons.
892 The tapped packets are not forwarded by the kernel; it is
893 the user's responsibility to send them out again.
894 Socket binding is ignored,
895 such packets are only filtered by protocol.
896 Expects an integer flag.
897 .TP
898 .BR IP_TOS " (since Linux 1.0)"
899 Set or receive the Type-Of-Service (TOS) field that is sent
900 with every IP packet originating from this socket.
901 It is used to prioritize packets on the network.
902 TOS is a byte.
903 There are some standard TOS flags defined:
904 .B IPTOS_LOWDELAY
905 to minimize delays for interactive traffic,
906 .B IPTOS_THROUGHPUT
907 to optimize throughput,
908 .B IPTOS_RELIABILITY
909 to optimize for reliability,
910 .B IPTOS_MINCOST
911 should be used for "filler data" where slow transmission doesn't matter.
912 At most one of these TOS values can be specified.
913 Other bits are invalid and shall be cleared.
914 Linux sends
915 .B IPTOS_LOWDELAY
916 datagrams first by default,
917 but the exact behavior depends on the configured queueing discipline.
918 .\" FIXME elaborate on this
919 Some high-priority levels may require superuser privileges (the
920 .B CAP_NET_ADMIN
921 capability).
922 .\" The priority can also be set in a protocol-independent way by the
923 .\" .RB ( SOL_SOCKET ", " SO_PRIORITY )
924 .\" socket option (see
925 .\" .BR socket (7)).
926 .TP
927 .BR IP_TRANSPARENT " (since Linux 2.6.24)"
928 .\" commit f5715aea4564f233767ea1d944b2637a5fd7cd2e
929 .\" This patch introduces the IP_TRANSPARENT socket option: enabling that
930 .\" will make the IPv4 routing omit the non-local source address check on
931 .\" output. Setting IP_TRANSPARENT requires NET_ADMIN capability.
932 .\" http://lwn.net/Articles/252545/
933 Setting this boolean option enables transparent proxying on this socket.
934 This socket option allows
935 the calling application to bind to a nonlocal IP address and operate
936 both as a client and a server with the foreign address as the local endpoint.
937 NOTE: this requires that routing be set up in a way that
938 packets going to the foreign address are routed through the TProxy box
939 (i.e., the system hosting the application that employs the
940 .B IP_TRANSPARENT
941 socket option).
942 Enabling this socket option requires superuser privileges
943 (the
944 .BR CAP_NET_ADMIN
945 capability).
946 .IP
947 TProxy redirection with the iptables TPROXY target also requires that
948 this option be set on the redirected socket.
949 .TP
950 .BR IP_TTL " (since Linux 1.0)"
951 Set or retrieve the current time-to-live field that is used in every packet
952 sent from this socket.
953 .TP
954 .BR IP_UNBLOCK_SOURCE " (since Linux 2.4.22 / 2.5.68)"
955 Unblock previously blocked multicast source.
956 Returns
957 .BR EADDRNOTAVAIL
958 when given source is not being blocked.
959 .IP
960 Argument is an
961 .I ip_mreq_source
962 structure as described under
963 .BR IP_ADD_SOURCE_MEMBERSHIP .
964 .SS /proc interfaces
965 The IP protocol
966 supports a set of
967 .I /proc
968 interfaces to configure some global parameters.
969 The parameters can be accessed by reading or writing files in the directory
970 .IR /proc/sys/net/ipv4/ .
971 .\" FIXME As at 2.6.12, 14 Jun 2005, the following are undocumented:
972 .\" ip_queue_maxlen
973 .\" ip_conntrack_max
974 Interfaces described as
975 .I Boolean
976 take an integer value, with a nonzero value ("true") meaning that
977 the corresponding option is enabled, and a zero value ("false")
978 meaning that the option is disabled.
979 .\"
980 .TP
981 .IR ip_always_defrag " (Boolean; since Linux 2.2.13)"
982 [New with kernel 2.2.13; in earlier kernel versions this feature
983 was controlled at compile time by the
984 .B CONFIG_IP_ALWAYS_DEFRAG
985 option; this option is not present in 2.4.x and later]
986 .IP
987 When this boolean flag is enabled (not equal 0), incoming fragments
988 (parts of IP packets
989 that arose when some host between origin and destination decided
990 that the packets were too large and cut them into pieces) will be
991 reassembled (defragmented) before being processed, even if they are
992 about to be forwarded.
993 .IP
994 Enable only if running either a firewall that is the sole link
995 to your network or a transparent proxy; never ever use it for a
996 normal router or host.
997 Otherwise, fragmented communication can be disturbed
998 if the fragments travel over different links.
999 Defragmentation also has a large memory and CPU time cost.
1000 .IP
1001 This is automagically turned on when masquerading or transparent
1002 proxying are configured.
1003 .\"
1004 .TP
1005 .IR ip_autoconfig " (since Linux 2.2 to 2.6.17)"
1006 .\" Precisely: since 2.1.68
1007 .\" FIXME document ip_autoconfig
1008 Not documented.
1009 .\"
1010 .TP
1011 .IR ip_default_ttl " (integer; default: 64; since Linux 2.2)"
1012 .\" Precisely: 2.1.15
1013 Set the default time-to-live value of outgoing packets.
1014 This can be changed per socket with the
1015 .B IP_TTL
1016 option.
1017 .\"
1018 .TP
1019 .IR ip_dynaddr " (Boolean; default: disabled; since Linux 2.0.31)"
1020 Enable dynamic socket address and masquerading entry rewriting on interface
1021 address change.
1022 This is useful for dialup interface with changing IP addresses.
1023 0 means no rewriting, 1 turns it on and 2 enables verbose mode.
1024 .\"
1025 .TP
1026 .IR ip_forward " (Boolean; default: disabled; since Linux 1.2)"
1027 Enable IP forwarding with a boolean flag.
1028 IP forwarding can be also set on a per-interface basis.
1029 .\"
1030 .TP
1031 .IR ip_local_port_range " (since Linux 2.2)"
1032 .\" Precisely: since 2.1.68
1033 This file contains two integers that define the default local port range
1034 allocated to sockets that are not explicitly bound to a port number\(emthat
1035 is, the range used for
1036 .IR "ephemeral ports" .
1037 An ephemeral port is allocated to a socket in the following circumstances:
1038 .RS
1039 .IP * 3
1040 the port number in a socket address is specified as 0 when calling
1041 .BR bind (2);
1042 .IP *
1043 .BR listen (2)
1044 is called on a stream socket that was not previously bound;
1045 .IP *
1046 .BR connect (2)
1047 was called on a socket that was not previously bound;
1048 .IP *
1049 .BR sendto (2)
1050 is called on a datagram socket that was not previously bound.
1051 .RE
1052 .IP
1053 Allocation of ephemeral ports starts with the first number in
1054 .IR ip_local_port_range
1055 and ends with the second number.
1056 If the range of ephemeral ports is exhausted,
1057 then the relevant system call returns an error (but see BUGS).
1058 .IP
1059 Note that the port range in
1060 .IR ip_local_port_range
1061 should not conflict with the ports used by masquerading
1062 (although the case is handled).
1063 Also, arbitrary choices may cause problems with some firewall packet
1064 filters that make assumptions about the local ports in use.
1065 The first number should be at least greater than 1024,
1066 or better, greater than 4096, to avoid clashes
1067 with well known ports and to minimize firewall problems.
1068 .\"
1069 .TP
1070 .IR ip_no_pmtu_disc " (Boolean; default: disabled; since Linux 2.2)"
1071 .\" Precisely: 2.1.15
1072 If enabled, don't do Path MTU Discovery for TCP sockets by default.
1073 Path MTU discovery may fail if misconfigured firewalls (that drop
1074 all ICMP packets) or misconfigured interfaces (e.g., a point-to-point
1075 link where the both ends don't agree on the MTU) are on the path.
1076 It is better to fix the broken routers on the path than to turn off
1077 Path MTU Discovery globally, because not doing it incurs a high cost
1078 to the network.
1079 .\"
1080 .\" The following is from 2.6.12: Documentation/networking/ip-sysctl.txt
1081 .TP
1082 .IR ip_nonlocal_bind " (Boolean; default: disabled; since Linux 2.4)"
1083 .\" Precisely: patch-2.4.0-test10
1084 If set, allows processes to
1085 .BR bind (2)
1086 to nonlocal IP addresses,
1087 which can be quite useful, but may break some applications.
1088 .\"
1089 .\" The following is from 2.6.12: Documentation/networking/ip-sysctl.txt
1090 .TP
1091 .IR ip6frag_time " (integer; default: 30)"
1092 Time in seconds to keep an IPv6 fragment in memory.
1093 .\"
1094 .\" The following is from 2.6.12: Documentation/networking/ip-sysctl.txt
1095 .TP
1096 .IR ip6frag_secret_interval " (integer; default: 600)"
1097 Regeneration interval (in seconds) of the hash secret (or lifetime
1098 for the hash secret) for IPv6 fragments.
1099 .TP
1100 .IR ipfrag_high_thresh " (integer), " ipfrag_low_thresh " (integer)"
1101 If the amount of queued IP fragments reaches
1102 .IR ipfrag_high_thresh ,
1103 the queue is pruned down to
1104 .IR ipfrag_low_thresh .
1105 Contains an integer with the number of bytes.
1106 .TP
1107 .I neigh/*
1108 See
1109 .BR arp (7).
1110 .\" FIXME Document the conf/*/* interfaces
1111 .\"
1112 .\" FIXME Document the route/* interfaces
1113 .SS Ioctls
1114 All ioctls described in
1115 .BR socket (7)
1116 apply to
1117 .BR ip .
1118 .\" 2006-04-02, mtk
1119 .\" commented out the following because ipchains is obsolete
1120 .\" .PP
1121 .\" The ioctls to configure firewalling are documented in
1122 .\" .BR ipfw (4)
1123 .\" from the
1124 .\" .B ipchains
1125 .\" package.
1126 .PP
1127 Ioctls to configure generic device parameters are described in
1128 .BR netdevice (7).
1129 .\" FIXME Add a discussion of multicasting
1130 .SH ERRORS
1131 .\" FIXME document all errors.
1132 .\" We should really fix the kernels to give more uniform
1133 .\" error returns (ENOMEM vs ENOBUFS, EPERM vs EACCES etc.)
1134 .TP
1135 .B EACCES
1136 The user tried to execute an operation without the necessary permissions.
1137 These include:
1138 sending a packet to a broadcast address without having the
1139 .B SO_BROADCAST
1140 flag set;
1141 sending a packet via a
1142 .I prohibit
1143 route;
1144 modifying firewall settings without superuser privileges (the
1145 .B CAP_NET_ADMIN
1146 capability);
1147 binding to a privileged port without superuser privileges (the
1148 .B CAP_NET_BIND_SERVICE
1149 capability).
1150 .TP
1151 .B EADDRINUSE
1152 Tried to bind to an address already in use.
1153 .TP
1154 .B EADDRNOTAVAIL
1155 A nonexistent interface was requested or the requested source
1156 address was not local.
1157 .TP
1158 .B EAGAIN
1159 Operation on a nonblocking socket would block.
1160 .TP
1161 .B EALREADY
1162 A connection operation on a nonblocking socket is already in progress.
1163 .TP
1164 .B ECONNABORTED
1165 A connection was closed during an
1166 .BR accept (2).
1167 .TP
1168 .B EHOSTUNREACH
1169 No valid routing table entry matches the destination address.
1170 This error can be caused by an ICMP message from a remote router or
1171 for the local routing table.
1172 .TP
1173 .B EINVAL
1174 Invalid argument passed.
1175 For send operations this can be caused by sending to a
1176 .I blackhole
1177 route.
1178 .TP
1179 .B EISCONN
1180 .BR connect (2)
1181 was called on an already connected socket.
1182 .TP
1183 .B EMSGSIZE
1184 Datagram is bigger than an MTU on the path and it cannot be fragmented.
1185 .TP
1186 .BR ENOBUFS ", " ENOMEM
1187 Not enough free memory.
1188 This often means that the memory allocation is limited by the socket
1189 buffer limits, not by the system memory, but this is not 100% consistent.
1190 .TP
1191 .B ENOENT
1192 .B SIOCGSTAMP
1193 was called on a socket where no packet arrived.
1194 .TP
1195 .B ENOPKG
1196 A kernel subsystem was not configured.
1197 .TP
1198 .BR ENOPROTOOPT " and " EOPNOTSUPP
1199 Invalid socket option passed.
1200 .TP
1201 .B ENOTCONN
1202 The operation is defined only on a connected socket, but the socket wasn't
1203 connected.
1204 .TP
1205 .B EPERM
1206 User doesn't have permission to set high priority, change configuration,
1207 or send signals to the requested process or group.
1208 .TP
1209 .B EPIPE
1210 The connection was unexpectedly closed or shut down by the other end.
1211 .TP
1212 .B ESOCKTNOSUPPORT
1213 The socket is not configured or an unknown socket type was requested.
1214 .PP
1215 Other errors may be generated by the overlaying protocols; see
1216 .BR tcp (7),
1217 .BR raw (7),
1218 .BR udp (7),
1219 and
1220 .BR socket (7).
1221 .SH NOTES
1222 .BR IP_FREEBIND ,
1223 .BR IP_MSFILTER ,
1224 .BR IP_MTU ,
1225 .BR IP_MTU_DISCOVER ,
1226 .BR IP_RECVORIGDSTADDR ,
1227 .BR IP_PKTINFO ,
1228 .BR IP_RECVERR ,
1229 .BR IP_ROUTER_ALERT ,
1230 and
1231 .BR IP_TRANSPARENT
1232 are Linux-specific.
1233 .\" IP_PASSSEC is Linux-specific
1234 .\" IP_XFRM_POLICY is Linux-specific
1235 .\" IP_IPSEC_POLICY is a nonstandard extension, also present on some BSDs
1236 .PP
1237 Be very careful with the
1238 .B SO_BROADCAST
1239 option \- it is not privileged in Linux.
1240 It is easy to overload the network
1241 with careless broadcasts.
1242 For new application protocols
1243 it is better to use a multicast group instead of broadcasting.
1244 Broadcasting is discouraged.
1245 .PP
1246 Some other BSD sockets implementations provide
1247 .B IP_RCVDSTADDR
1248 and
1249 .B IP_RECVIF
1250 socket options to get the destination address and the interface of
1251 received datagrams.
1252 Linux has the more general
1253 .B IP_PKTINFO
1254 for the same task.
1255 .PP
1256 Some BSD sockets implementations also provide an
1257 .B IP_RECVTTL
1258 option, but an ancillary message with type
1259 .B IP_RECVTTL
1260 is passed with the incoming packet.
1261 This is different from the
1262 .B IP_TTL
1263 option used in Linux.
1264 .PP
1265 Using the
1266 .B SOL_IP
1267 socket options level isn't portable; BSD-based stacks use the
1268 .B IPPROTO_IP
1269 level.
1270 .SS Compatibility
1271 For compatibility with Linux 2.0, the obsolete
1272 .BI "socket(AF_INET, SOCK_PACKET, " protocol )
1273 syntax is still supported to open a
1274 .BR packet (7)
1275 socket.
1276 This is deprecated and should be replaced by
1277 .BI "socket(AF_PACKET, SOCK_RAW, " protocol )
1278 instead.
1279 The main difference is the new
1280 .I sockaddr_ll
1281 address structure for generic link layer information instead of the old
1282 .BR sockaddr_pkt .
1283 .SH BUGS
1284 There are too many inconsistent error values.
1285 .PP
1286 The error used to diagnose exhaustion of the ephemeral port range differs
1287 across the various system calls
1288 .RB ( connect (2),
1289 .BR bind (2),
1290 .BR listen (2),
1291 .BR sendto (2))
1292 that can assign ephemeral ports.
1293 .PP
1294 The ioctls to configure IP-specific interface options and ARP tables are
1295 not described.
1296 .\" .PP
1297 .\" Some versions of glibc forget to declare
1298 .\" .IR in_pktinfo .
1299 .\" Workaround currently is to copy it into your program from this man page.
1300 .PP
1301 Receiving the original destination address with
1302 .B MSG_ERRQUEUE
1303 in
1304 .I msg_name
1305 by
1306 .BR recvmsg (2)
1307 does not work in some 2.2 kernels.
1308 .\" .SH AUTHORS
1309 .\" This man page was written by Andi Kleen.
1310 .SH SEE ALSO
1311 .BR recvmsg (2),
1312 .BR sendmsg (2),
1313 .BR byteorder (3),
1314 .BR ipfw (4),
1315 .BR capabilities (7),
1316 .BR icmp (7),
1317 .BR ipv6 (7),
1318 .BR netlink (7),
1319 .BR raw (7),
1320 .BR socket (7),
1321 .BR tcp (7),
1322 .BR udp (7),
1323 .BR ip (8)
1324 .PP
1325 RFC\ 791 for the original IP specification.
1326 RFC\ 1122 for the IPv4 host requirements.
1327 RFC\ 1812 for the IPv4 router requirements.