]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man7/ip.7
memusage.1, memusagestat.1, pldd.1, accept.2, adjtimex.2, arch_prctl.2, bdflush.2...
[thirdparty/man-pages.git] / man7 / ip.7
1 '\" t
2 .\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
3 .\"
4 .\" %%%LICENSE_START(VERBATIM_ONE_PARA)
5 .\" Permission is granted to distribute possibly modified copies
6 .\" of this page provided the header is included verbatim,
7 .\" and in case of nontrivial modification author and date
8 .\" of the modification is added to the header.
9 .\" %%%LICENSE_END
10 .\"
11 .\" $Id: ip.7,v 1.19 2000/12/20 18:10:31 ak Exp $
12 .\"
13 .\" FIXME The following socket options are yet to be documented
14 .\" IP_XFRM_POLICY (2.5.48)
15 .\" Needs CAP_NET_ADMIN
16 .\" IP_IPSEC_POLICY (2.5.47)
17 .\" Needs CAP_NET_ADMIN
18 .\" IP_PASSSEC (2.6.17)
19 .\" Boolean
20 .\" commit 2c7946a7bf45ae86736ab3b43d0085e43947945c
21 .\" Author: Catherine Zhang <cxzhang@watson.ibm.com>
22 .\" IP_MINTTL (2.6.34)
23 .\" commit d218d11133d888f9745802146a50255a4781d37a
24 .\" Author: Stephen Hemminger <shemminger@vyatta.com>
25 .\" MCAST_JOIN_GROUP (2.4.22 / 2.6)
26 .\" MCAST_BLOCK_SOURCE (2.4.22 / 2.6)
27 .\" MCAST_UNBLOCK_SOURCE (2.4.22 / 2.6)
28 .\" MCAST_LEAVE_GROUP (2.4.22 / 2.6)
29 .\" MCAST_JOIN_SOURCE_GROUP (2.4.22 / 2.6)
30 .\" MCAST_LEAVE_SOURCE_GROUP (2.4.22 / 2.6)
31 .\" MCAST_MSFILTER (2.4.22 / 2.6)
32 .\" IP_UNICAST_IF (3.4)
33 .\" commit 76e21053b5bf33a07c76f99d27a74238310e3c71
34 .\" Author: Erich E. Hoover <ehoover@mines.edu>
35 .\"
36 .TH IP 7 2016-10-08 "Linux" "Linux Programmer's Manual"
37 .SH NAME
38 ip \- Linux IPv4 protocol implementation
39 .SH SYNOPSIS
40 .B #include <sys/socket.h>
41 .br
42 .\" .B #include <net/netinet.h> -- does not exist anymore
43 .\" .B #include <linux/errqueue.h> -- never include <linux/foo.h>
44 .B #include <netinet/in.h>
45 .br
46 .B #include <netinet/ip.h> \fR/* superset of previous */
47 .sp
48 .IB tcp_socket " = socket(AF_INET, SOCK_STREAM, 0);"
49 .br
50 .IB udp_socket " = socket(AF_INET, SOCK_DGRAM, 0);"
51 .br
52 .IB raw_socket " = socket(AF_INET, SOCK_RAW, " protocol ");"
53 .SH DESCRIPTION
54 Linux implements the Internet Protocol, version 4,
55 described in RFC\ 791 and RFC\ 1122.
56 .B ip
57 contains a level 2 multicasting implementation conforming to RFC\ 1112.
58 It also contains an IP router including a packet filter.
59 .\" FIXME . has someone verified that 2.1 is really 1812 compliant?
60 .PP
61 The programming interface is BSD-sockets compatible.
62 For more information on sockets, see
63 .BR socket (7).
64 .PP
65 An IP socket is created using
66 .BR socket (2):
67
68 socket(AF_INET, socket_type, protocol);
69
70 Valid socket types are
71 .B SOCK_STREAM
72 to open a
73 .BR tcp (7)
74 socket,
75 .B SOCK_DGRAM
76 to open a
77 .BR udp (7)
78 socket, or
79 .B SOCK_RAW
80 to open a
81 .BR raw (7)
82 socket to access the IP protocol directly.
83 .I protocol
84 is the IP protocol in the IP header to be received or sent.
85 The only valid values for
86 .I protocol
87 are 0 and
88 .B IPPROTO_TCP
89 for TCP sockets, and 0 and
90 .B IPPROTO_UDP
91 for UDP sockets.
92 For
93 .B SOCK_RAW
94 you may specify a valid IANA IP protocol defined in
95 RFC\ 1700 assigned numbers.
96 .PP
97 When a process wants to receive new incoming packets or connections, it
98 should bind a socket to a local interface address using
99 .BR bind (2).
100 In this case, only one IP socket may be bound to any given local
101 (address, port) pair.
102 When
103 .B INADDR_ANY
104 is specified in the bind call, the socket will be bound to
105 .I all
106 local interfaces.
107 When
108 .BR listen (2)
109 is called on an unbound socket, the socket is automatically bound
110 to a random free port with the local address set to
111 .BR INADDR_ANY .
112 When
113 .BR connect (2)
114 is called on an unbound socket, the socket is automatically bound
115 to a random free port or to a usable shared port with the local address
116 set to
117 .BR INADDR_ANY .
118
119 A TCP local socket address that has been bound is unavailable for
120 some time after closing, unless the
121 .B SO_REUSEADDR
122 flag has been set.
123 Care should be taken when using this flag as it makes TCP less reliable.
124 .SS Address format
125 An IP socket address is defined as a combination of an IP interface
126 address and a 16-bit port number.
127 The basic IP protocol does not supply port numbers, they
128 are implemented by higher level protocols like
129 .BR udp (7)
130 and
131 .BR tcp (7).
132 On raw sockets
133 .I sin_port
134 is set to the IP protocol.
135 .PP
136 .in +4n
137 .nf
138 struct sockaddr_in {
139 sa_family_t sin_family; /* address family: AF_INET */
140 in_port_t sin_port; /* port in network byte order */
141 struct in_addr sin_addr; /* internet address */
142 };
143
144 /* Internet address. */
145 struct in_addr {
146 uint32_t s_addr; /* address in network byte order */
147 };
148 .fi
149 .in
150 .PP
151 .I sin_family
152 is always set to
153 .BR AF_INET .
154 This is required; in Linux 2.2 most networking functions return
155 .B EINVAL
156 when this setting is missing.
157 .I sin_port
158 contains the port in network byte order.
159 The port numbers below 1024 are called
160 .IR "privileged ports"
161 (or sometimes:
162 .IR "reserved ports" ).
163 Only a privileged process
164 (on Linux: a process that has the
165 .B CAP_NET_BIND_SERVICE
166 capability in the user namespace governing its network namespace) may
167 .BR bind (2)
168 to these sockets.
169 Note that the raw IPv4 protocol as such has no concept of a
170 port, they are implemented only by higher protocols like
171 .BR tcp (7)
172 and
173 .BR udp (7).
174 .PP
175 .I sin_addr
176 is the IP host address.
177 The
178 .I s_addr
179 member of
180 .I struct in_addr
181 contains the host interface address in network byte order.
182 .I in_addr
183 should be assigned one of the
184 .BR INADDR_*
185 values (e.g.,
186 .BR INADDR_ANY )
187 or set using the
188 .BR inet_aton (3),
189 .BR inet_addr (3),
190 .BR inet_makeaddr (3)
191 library functions or directly with the name resolver (see
192 .BR gethostbyname (3)).
193
194 IPv4 addresses are divided into unicast, broadcast,
195 and multicast addresses.
196 Unicast addresses specify a single interface of a host,
197 broadcast addresses specify all hosts on a network, and multicast
198 addresses address all hosts in a multicast group.
199 Datagrams to broadcast addresses can be sent or received only when the
200 .B SO_BROADCAST
201 socket flag is set.
202 In the current implementation, connection-oriented sockets are allowed
203 to use only unicast addresses.
204 .\" Leave a loophole for XTP @)
205
206 Note that the address and the port are always stored in
207 network byte order.
208 In particular, this means that you need to call
209 .BR htons (3)
210 on the number that is assigned to a port.
211 All address/port manipulation
212 functions in the standard library work in network byte order.
213
214 There are several special addresses:
215 .B INADDR_LOOPBACK
216 (127.0.0.1)
217 always refers to the local host via the loopback device;
218 .B INADDR_ANY
219 (0.0.0.0)
220 means any address for binding;
221 .B INADDR_BROADCAST
222 (255.255.255.255)
223 means any host and has the same effect on bind as
224 .B INADDR_ANY
225 for historical reasons.
226 .SS Socket options
227 IP supports some protocol-specific socket options that can be set with
228 .BR setsockopt (2)
229 and read with
230 .BR getsockopt (2).
231 The socket option level for IP is
232 .BR IPPROTO_IP .
233 .\" or SOL_IP on Linux
234 A boolean integer flag is zero when it is false, otherwise true.
235
236 When an invalid socket option is specified,
237 .BR getsockopt (2)
238 and
239 .BR setsockopt (2)
240 fail with the error
241 .BR ENOPROTOOPT .
242 .TP
243 .BR IP_ADD_MEMBERSHIP " (since Linux 1.2)"
244 Join a multicast group.
245 Argument is an
246 .I ip_mreqn
247 structure.
248 .sp
249 .in +4n
250 .nf
251 struct ip_mreqn {
252 struct in_addr imr_multiaddr; /* IP multicast group
253 address */
254 struct in_addr imr_address; /* IP address of local
255 interface */
256 int imr_ifindex; /* interface index */
257 };
258 .fi
259 .in
260 .sp
261 .I imr_multiaddr
262 contains the address of the multicast group the application
263 wants to join or leave.
264 It must be a valid multicast address
265 .\" (i.e., within the 224.0.0.0-239.255.255.255 range)
266 (or
267 .BR setsockopt (2)
268 fails with the error
269 .BR EINVAL ).
270 .I imr_address
271 is the address of the local interface with which the system
272 should join the multicast group; if it is equal to
273 .BR INADDR_ANY ,
274 an appropriate interface is chosen by the system.
275 .I imr_ifindex
276 is the interface index of the interface that should join/leave the
277 .I imr_multiaddr
278 group, or 0 to indicate any interface.
279 .IP
280 The
281 .I ip_mreqn
282 structure is available only since Linux 2.2.
283 For compatibility, the old
284 .I ip_mreq
285 structure (present since Linux 1.2) is still supported;
286 it differs from
287 .I ip_mreqn
288 only by not including the
289 .I imr_ifindex
290 field.
291 (The kernel determines which structure is being passed based
292 on the size passed in
293 .IR optlen .)
294
295 .B IP_ADD_MEMBERSHIP
296 is valid only for
297 .BR setsockopt (2).
298 .\"
299 .TP
300 .BR IP_ADD_SOURCE_MEMBERSHIP " (since Linux 2.4.22 / 2.5.68)"
301 Join a multicast group and allow receiving data only
302 from a specified source.
303 Argument is an
304 .I ip_mreq_source
305 structure.
306 .sp
307 .in +4n
308 .nf
309 struct ip_mreq_source {
310 struct in_addr imr_multiaddr; /* IP multicast group
311 address */
312 struct in_addr imr_interface; /* IP address of local
313 interface */
314 struct in_addr imr_sourceaddr; /* IP address of
315 multicast source */
316 };
317 .fi
318 .in
319 .sp
320 The
321 .I ip_mreq_source
322 structure is similar to
323 .I ip_mreqn
324 described under
325 .BR IP_ADD_MEMBERSIP .
326 The
327 .I imr_multiaddr
328 field contains the address of the multicast group the application
329 wants to join or leave.
330 The
331 .I imr_interface
332 field is the address of the local interface with which
333 the system should join the multicast group.
334 Finally, the
335 .I imr_sourceaddr
336 field contains the address of the source the
337 application wants to receive data from.
338 .IP
339 This option can be used multiple times to allow
340 receiving data from more than one source.
341 .TP
342 .BR IP_BIND_ADDRESS_NO_PORT " (since Linux 4.2)"
343 .\" commit 90c337da1524863838658078ec34241f45d8394d
344 Inform the kernel to not reserve an ephemeral port when using
345 .BR bind (2)
346 with a port number of 0.
347 The port will later be automatically chosen at
348 .BR connect (2)
349 time,
350 in a way that allows sharing a source port as long as the 4-tuple is unique.
351 .TP
352 .BR IP_BLOCK_SOURCE " (since Linux 2.4.22 / 2.5.68)"
353 Stop receiving multicast data from a specific source in a given group.
354 This is valid only after the application has subscribed
355 to the multicast group using either
356 .BR IP_ADD_MEMBERSHIP
357 or
358 .BR IP_ADD_SOURCE_MEMBERSHIP .
359 .IP
360 Argument is an
361 .I ip_mreq_source
362 structure as described under
363 .BR IP_ADD_SOURCE_MEMBERSHIP .
364 .TP
365 .BR IP_DROP_MEMBERSHIP " (since Linux 1.2)"
366 Leave a multicast group.
367 Argument is an
368 .I ip_mreqn
369 or
370 .I ip_mreq
371 structure similar to
372 .BR IP_ADD_MEMBERSHIP .
373 .TP
374 .BR IP_DROP_SOURCE_MEMBERSHIP " (since Linux 2.4.22 / 2.5.68)"
375 Leave a source-specific group\(emthat is, stop receiving data from
376 a given multicast group that come from a given source.
377 If the application has subscribed to multiple sources within
378 the same group, data from the remaining sources will still be delivered.
379 To stop receiving data from all sources at once, use
380 .BR IP_DROP_MEMBERSHIP .
381 .IP
382 Argument is an
383 .I ip_mreq_source
384 structure as described under
385 .BR IP_ADD_SOURCE_MEMBERSHIP .
386 .TP
387 .BR IP_FREEBIND " (since Linux 2.4)"
388 .\" Precisely: 2.4.0-test10
389 If enabled, this boolean option allows binding to an IP address
390 that is nonlocal or does not (yet) exist.
391 This permits listening on a socket,
392 without requiring the underlying network interface or the
393 specified dynamic IP address to be up at the time that
394 the application is trying to bind to it.
395 This option is the per-socket equivalent of the
396 .IR ip_nonlocal_bind
397 .I /proc
398 interface described below.
399 .TP
400 .BR IP_HDRINCL " (since Linux 2.0)"
401 If enabled,
402 the user supplies an IP header in front of the user data.
403 Valid only for
404 .B SOCK_RAW
405 sockets; see
406 .BR raw (7)
407 for more information.
408 When this flag is enabled, the values set by
409 .BR IP_OPTIONS ,
410 .BR IP_TTL ,
411 and
412 .B IP_TOS
413 are ignored.
414 .TP
415 .BR IP_MSFILTER " (since Linux 2.4.22 / 2.5.68)"
416 This option provides access to the advanced full-state filtering API.
417 Argument is an
418 .I ip_msfilter
419 structure.
420 .sp
421 .in +4n
422 .nf
423 struct ip_msfilter {
424 struct in_addr imsf_multiaddr; /* IP multicast group
425 address */
426 struct in_addr imsf_interface; /* IP address of local
427 interface */
428 uint32_t imsf_fmode; /* Filter-mode */
429
430 uint32_t imsf_numsrc; /* Number of sources in
431 the following array */
432 struct in_addr imsf_slist[1]; /* Array of source
433 addresses */
434 };
435 .fi
436 .in
437 .sp
438 There are two macros,
439 .BR MCAST_INCLUDE
440 and
441 .BR MCAST_EXCLUDE ,
442 which can be used to specify the filtering mode.
443 Additionally, the
444 .BR IP_MSFILTER_SIZE (n)
445 macro exists to determine how much memory is needed to store
446 .I ip_msfilter
447 structure with
448 .I n
449 sources in the source list.
450 .IP
451 For the full description of multicast source filtering
452 refer to RFC 3376.
453 .TP
454 .BR IP_MTU " (since Linux 2.2)"
455 .\" Precisely: 2.1.124
456 Retrieve the current known path MTU of the current socket.
457 Returns an integer.
458
459 .B IP_MTU
460 is valid only for
461 .BR getsockopt (2)
462 and can be employed only when the socket has been connected.
463 .TP
464 .BR IP_MTU_DISCOVER " (since Linux 2.2)"
465 .\" Precisely: 2.1.124
466 Set or receive the Path MTU Discovery setting for a socket.
467 When enabled, Linux will perform Path MTU Discovery
468 as defined in RFC\ 1191 on
469 .B SOCK_STREAM
470 sockets.
471 For
472 .RB non- SOCK_STREAM
473 sockets,
474 .B IP_PMTUDISC_DO
475 forces the don't-fragment flag to be set on all outgoing packets.
476 It is the user's responsibility to packetize the data
477 in MTU-sized chunks and to do the retransmits if necessary.
478 The kernel will reject (with
479 .BR EMSGSIZE )
480 datagrams that are bigger than the known path MTU.
481 .B IP_PMTUDISC_WANT
482 will fragment a datagram if needed according to the path MTU,
483 or will set the don't-fragment flag otherwise.
484
485 The system-wide default can be toggled between
486 .B IP_PMTUDISC_WANT
487 and
488 .B IP_PMTUDISC_DONT
489 by writing (respectively, zero and nonzero values) to the
490 .I /proc/sys/net/ipv4/ip_no_pmtu_disc
491 file.
492 .TS
493 tab(:);
494 c l
495 l l.
496 Path MTU discovery value:Meaning
497 IP_PMTUDISC_WANT:Use per-route settings.
498 IP_PMTUDISC_DONT:Never do Path MTU Discovery.
499 IP_PMTUDISC_DO:Always do Path MTU Discovery.
500 IP_PMTUDISC_PROBE:Set DF but ignore Path MTU.
501 .TE
502
503 When PMTU discovery is enabled, the kernel automatically keeps track of
504 the path MTU per destination host.
505 When it is connected to a specific peer with
506 .BR connect (2),
507 the currently known path MTU can be retrieved conveniently using the
508 .B IP_MTU
509 socket option (e.g., after an
510 .B EMSGSIZE
511 error occurred).
512 The path MTU may change over time.
513 For connectionless sockets with many destinations,
514 the new MTU for a given destination can also be accessed using the
515 error queue (see
516 .BR IP_RECVERR ).
517 A new error will be queued for every incoming MTU update.
518
519 While MTU discovery is in progress, initial packets from datagram sockets
520 may be dropped.
521 Applications using UDP should be aware of this and not
522 take it into account for their packet retransmit strategy.
523
524 To bootstrap the path MTU discovery process on unconnected sockets, it
525 is possible to start with a big datagram size
526 (up to 64K-headers bytes long) and let it shrink by updates of the path MTU.
527 .\" FIXME . this is an ugly hack
528
529 To get an initial estimate of the
530 path MTU, connect a datagram socket to the destination address using
531 .BR connect (2)
532 and retrieve the MTU by calling
533 .BR getsockopt (2)
534 with the
535 .B IP_MTU
536 option.
537
538 It is possible to implement RFC 4821 MTU probing with
539 .B SOCK_DGRAM
540 or
541 .B SOCK_RAW
542 sockets by setting a value of
543 .BR IP_PMTUDISC_PROBE
544 (available since Linux 2.6.22).
545 This is also particularly useful for diagnostic tools such as
546 .BR tracepath (8)
547 that wish to deliberately send probe packets larger than
548 the observed Path MTU.
549 .TP
550 .BR IP_MULTICAST_ALL " (since Linux 2.6.31)"
551 This option can be used to modify the delivery policy of multicast messages
552 to sockets bound to the wildcard
553 .B INADDR_ANY
554 address.
555 The argument is a boolean integer (defaults to 1).
556 If set to 1,
557 the socket will receive messages from all the groups that have been joined
558 globally on the whole system.
559 Otherwise, it will deliver messages only from
560 the groups that have been explicitly joined (for example via the
561 .B IP_ADD_MEMBERSHIP
562 option) on this particular socket.
563 .TP
564 .BR IP_MULTICAST_IF " (since Linux 1.2)"
565 Set the local device for a multicast socket.
566 The argument for
567 .BR setsockopt (2)
568 is an
569 .I ip_mreqn
570 or
571 .\" net: IP_MULTICAST_IF setsockopt now recognizes struct mreq
572 .\" Commit: 3a084ddb4bf299a6e898a9a07c89f3917f0713f7
573 (since Linux 3.5)
574 .I ip_mreq
575 structure similar to
576 .BR IP_ADD_MEMBERSHIP ,
577 or an
578 .I in_addr
579 structure.
580 (The kernel determines which structure is being passed based
581 on the size passed in
582 .IR optlen .)
583 For
584 .BR getsockopt (2),
585 the argument is an
586 .I in_addr
587 structure.
588 .TP
589 .BR IP_MULTICAST_LOOP " (since Linux 1.2)"
590 Set or read a boolean integer argument that determines whether
591 sent multicast packets should be looped back to the local sockets.
592 .TP
593 .BR IP_MULTICAST_TTL " (since Linux 1.2)"
594 Set or read the time-to-live value of outgoing multicast packets for this
595 socket.
596 It is very important for multicast packets to set the smallest TTL possible.
597 The default is 1 which means that multicast packets don't leave the local
598 network unless the user program explicitly requests it.
599 Argument is an integer.
600 .TP
601 .BR IP_NODEFRAG " (since Linux 2.6.36)"
602 If enabled (argument is nonzero),
603 the reassembly of outgoing packets is disabled in the netfilter layer.
604 The argument is an integer.
605
606 This option is valid only for
607 .B SOCK_RAW
608 sockets.
609 .TP
610 .BR IP_OPTIONS " (since Linux 2.0)"
611 .\" Precisely: 1.3.30
612 Set or get the IP options to be sent with every packet from this socket.
613 The arguments are a pointer to a memory buffer containing the options
614 and the option length.
615 The
616 .BR setsockopt (2)
617 call sets the IP options associated with a socket.
618 The maximum option size for IPv4 is 40 bytes.
619 See RFC\ 791 for the allowed options.
620 When the initial connection request packet for a
621 .B SOCK_STREAM
622 socket contains IP options, the IP options will be set automatically
623 to the options from the initial packet with routing headers reversed.
624 Incoming packets are not allowed to change options after the connection
625 is established.
626 The processing of all incoming source routing options
627 is disabled by default and can be enabled by using the
628 .I accept_source_route
629 .I /proc
630 interface.
631 Other options like timestamps are still handled.
632 For datagram sockets, IP options can be only set by the local user.
633 Calling
634 .BR getsockopt (2)
635 with
636 .B IP_OPTIONS
637 puts the current IP options used for sending into the supplied buffer.
638 .TP
639 .BR IP_PKTINFO " (since Linux 2.2)"
640 .\" Precisely: 2.1.68
641 Pass an
642 .B IP_PKTINFO
643 ancillary message that contains a
644 .I pktinfo
645 structure that supplies some information about the incoming packet.
646 This only works for datagram oriented sockets.
647 The argument is a flag that tells the socket whether the
648 .B IP_PKTINFO
649 message should be passed or not.
650 The message itself can only be sent/retrieved
651 as control message with a packet using
652 .BR recvmsg (2)
653 or
654 .BR sendmsg (2).
655 .IP
656 .in +4n
657 .nf
658 struct in_pktinfo {
659 unsigned int ipi_ifindex; /* Interface index */
660 struct in_addr ipi_spec_dst; /* Local address */
661 struct in_addr ipi_addr; /* Header Destination
662 address */
663 };
664 .fi
665 .in
666 .IP
667 .\" FIXME . elaborate on that.
668 .I ipi_ifindex
669 is the unique index of the interface the packet was received on.
670 .I ipi_spec_dst
671 is the local address of the packet and
672 .I ipi_addr
673 is the destination address in the packet header.
674 If
675 .B IP_PKTINFO
676 is passed to
677 .BR sendmsg (2)
678 and
679 .\" This field is grossly misnamed
680 .I ipi_spec_dst
681 is not zero, then it is used as the local source address for the routing
682 table lookup and for setting up IP source route options.
683 When
684 .I ipi_ifindex
685 is not zero, the primary local address of the interface specified by the
686 index overwrites
687 .I ipi_spec_dst
688 for the routing table lookup.
689 .TP
690 .BR IP_RECVERR " (since Linux 2.2)"
691 .\" Precisely: 2.1.15
692 Enable extended reliable error message passing.
693 When enabled on a datagram socket, all
694 generated errors will be queued in a per-socket error queue.
695 When the user receives an error from a socket operation,
696 the errors can be received by calling
697 .BR recvmsg (2)
698 with the
699 .B MSG_ERRQUEUE
700 flag set.
701 The
702 .I sock_extended_err
703 structure describing the error will be passed in an ancillary message with
704 the type
705 .B IP_RECVERR
706 and the level
707 .BR IPPROTO_IP .
708 .\" or SOL_IP on Linux
709 This is useful for reliable error handling on unconnected sockets.
710 The received data portion of the error queue contains the error packet.
711 .IP
712 The
713 .B IP_RECVERR
714 control message contains a
715 .I sock_extended_err
716 structure:
717 .IP
718 .in +4n
719 .ne 18
720 .nf
721 #define SO_EE_ORIGIN_NONE 0
722 #define SO_EE_ORIGIN_LOCAL 1
723 #define SO_EE_ORIGIN_ICMP 2
724 #define SO_EE_ORIGIN_ICMP6 3
725
726 struct sock_extended_err {
727 uint32_t ee_errno; /* error number */
728 uint8_t ee_origin; /* where the error originated */
729 uint8_t ee_type; /* type */
730 uint8_t ee_code; /* code */
731 uint8_t ee_pad;
732 uint32_t ee_info; /* additional information */
733 uint32_t ee_data; /* other data */
734 /* More data may follow */
735 };
736
737 struct sockaddr *SO_EE_OFFENDER(struct sock_extended_err *);
738 .fi
739 .in
740 .IP
741 .I ee_errno
742 contains the
743 .I errno
744 number of the queued error.
745 .I ee_origin
746 is the origin code of where the error originated.
747 The other fields are protocol-specific.
748 The macro
749 .B SO_EE_OFFENDER
750 returns a pointer to the address of the network object
751 where the error originated from given a pointer to the ancillary message.
752 If this address is not known, the
753 .I sa_family
754 member of the
755 .I sockaddr
756 contains
757 .B AF_UNSPEC
758 and the other fields of the
759 .I sockaddr
760 are undefined.
761 .IP
762 IP uses the
763 .I sock_extended_err
764 structure as follows:
765 .I ee_origin
766 is set to
767 .B SO_EE_ORIGIN_ICMP
768 for errors received as an ICMP packet, or
769 .B SO_EE_ORIGIN_LOCAL
770 for locally generated errors.
771 Unknown values should be ignored.
772 .I ee_type
773 and
774 .I ee_code
775 are set from the type and code fields of the ICMP header.
776 .I ee_info
777 contains the discovered MTU for
778 .B EMSGSIZE
779 errors.
780 The message also contains the
781 .I sockaddr_in of the node
782 caused the error, which can be accessed with the
783 .B SO_EE_OFFENDER
784 macro.
785 The
786 .I sin_family
787 field of the
788 .B SO_EE_OFFENDER
789 address is
790 .B AF_UNSPEC
791 when the source was unknown.
792 When the error originated from the network, all IP options
793 .RB ( IP_OPTIONS ", " IP_TTL ", "
794 etc.) enabled on the socket and contained in the
795 error packet are passed as control messages.
796 The payload of the packet causing the error is returned as normal payload.
797 .\" FIXME . Is it a good idea to document that? It is a dubious feature.
798 .\" On
799 .\" .B SOCK_STREAM
800 .\" sockets,
801 .\" .B IP_RECVERR
802 .\" has slightly different semantics. Instead of
803 .\" saving the errors for the next timeout, it passes all incoming
804 .\" errors immediately to the user.
805 .\" This might be useful for very short-lived TCP connections which
806 .\" need fast error handling. Use this option with care:
807 .\" it makes TCP unreliable
808 .\" by not allowing it to recover properly from routing
809 .\" shifts and other normal
810 .\" conditions and breaks the protocol specification.
811 Note that TCP has no error queue;
812 .B MSG_ERRQUEUE
813 is not permitted on
814 .B SOCK_STREAM
815 sockets.
816 .B IP_RECVERR
817 is valid for TCP, but all errors are returned by socket function return or
818 .B SO_ERROR
819 only.
820 .IP
821 For raw sockets,
822 .B IP_RECVERR
823 enables passing of all received ICMP errors to the
824 application, otherwise errors are only reported on connected sockets
825 .IP
826 It sets or retrieves an integer boolean flag.
827 .B IP_RECVERR
828 defaults to off.
829 .TP
830 .BR IP_RECVOPTS " (since Linux 2.2)"
831 .\" Precisely: 2.1.15
832 Pass all incoming IP options to the user in a
833 .B IP_OPTIONS
834 control message.
835 The routing header and other options are already filled in
836 for the local host.
837 Not supported for
838 .B SOCK_STREAM
839 sockets.
840 .TP
841 .BR IP_RECVORIGDSTADDR " (since Linux 2.6.29)"
842 .\" commit e8b2dfe9b4501ed0047459b2756ba26e5a940a69
843 This boolean option enables the
844 .B IP_ORIGDSTADDR
845 ancillary message in
846 .BR recvmsg (2),
847 in which the kernel returns the original destination address
848 of the datagram being received.
849 The ancillary message contains a
850 .IR "struct sockaddr_in" .
851 .TP
852 .BR IP_RECVTOS " (since Linux 2.2)"
853 .\" Precisely: 2.1.68
854 If enabled, the
855 .B IP_TOS
856 ancillary message is passed with incoming packets.
857 It contains a byte which specifies the Type of Service/Precedence
858 field of the packet header.
859 Expects a boolean integer flag.
860 .TP
861 .BR IP_RECVTTL " (since Linux 2.2)"
862 .\" Precisely: 2.1.68
863 When this flag is set, pass a
864 .B IP_TTL
865 control message with the time-to-live
866 field of the received packet as a byte.
867 Not supported for
868 .B SOCK_STREAM
869 sockets.
870 .TP
871 .BR IP_RETOPTS " (since Linux 2.2)"
872 .\" Precisely: 2.1.15
873 Identical to
874 .BR IP_RECVOPTS ,
875 but returns raw unprocessed options with timestamp and route record
876 options not filled in for this hop.
877 .TP
878 .BR IP_ROUTER_ALERT " (since Linux 2.2)"
879 .\" Precisely: 2.1.68
880 Pass all to-be forwarded packets with the
881 IP Router Alert option set to this socket.
882 Valid only for raw sockets.
883 This is useful, for instance, for user-space RSVP daemons.
884 The tapped packets are not forwarded by the kernel; it is
885 the user's responsibility to send them out again.
886 Socket binding is ignored,
887 such packets are only filtered by protocol.
888 Expects an integer flag.
889 .TP
890 .BR IP_TOS " (since Linux 1.0)"
891 Set or receive the Type-Of-Service (TOS) field that is sent
892 with every IP packet originating from this socket.
893 It is used to prioritize packets on the network.
894 TOS is a byte.
895 There are some standard TOS flags defined:
896 .B IPTOS_LOWDELAY
897 to minimize delays for interactive traffic,
898 .B IPTOS_THROUGHPUT
899 to optimize throughput,
900 .B IPTOS_RELIABILITY
901 to optimize for reliability,
902 .B IPTOS_MINCOST
903 should be used for "filler data" where slow transmission doesn't matter.
904 At most one of these TOS values can be specified.
905 Other bits are invalid and shall be cleared.
906 Linux sends
907 .B IPTOS_LOWDELAY
908 datagrams first by default,
909 but the exact behavior depends on the configured queueing discipline.
910 .\" FIXME elaborate on this
911 Some high-priority levels may require superuser privileges (the
912 .B CAP_NET_ADMIN
913 capability).
914 .\" The priority can also be set in a protocol-independent way by the
915 .\" .RB ( SOL_SOCKET ", " SO_PRIORITY )
916 .\" socket option (see
917 .\" .BR socket (7)).
918 .TP
919 .BR IP_TRANSPARENT " (since Linux 2.6.24)"
920 .\" commit f5715aea4564f233767ea1d944b2637a5fd7cd2e
921 .\" This patch introduces the IP_TRANSPARENT socket option: enabling that
922 .\" will make the IPv4 routing omit the non-local source address check on
923 .\" output. Setting IP_TRANSPARENT requires NET_ADMIN capability.
924 .\" http://lwn.net/Articles/252545/
925 Setting this boolean option enables transparent proxying on this socket.
926 This socket option allows
927 the calling application to bind to a nonlocal IP address and operate
928 both as a client and a server with the foreign address as the local endpoint.
929 NOTE: this requires that routing be set up in a way that
930 packets going to the foreign address are routed through the TProxy box
931 (i.e., the system hosting the application that employs the
932 .B IP_TRANSPARENT
933 socket option).
934 Enabling this socket option requires superuser privileges
935 (the
936 .BR CAP_NET_ADMIN
937 capability).
938 .IP
939 TProxy redirection with the iptables TPROXY target also requires that
940 this option be set on the redirected socket.
941 .TP
942 .BR IP_TTL " (since Linux 1.0)"
943 Set or retrieve the current time-to-live field that is used in every packet
944 sent from this socket.
945 .TP
946 .BR IP_UNBLOCK_SOURCE " (since Linux 2.4.22 / 2.5.68)"
947 Unblock previously blocked multicast source.
948 Returns
949 .BR EADDRNOTAVAIL
950 when given source is not being blocked.
951 .IP
952 Argument is an
953 .I ip_mreq_source
954 structure as described under
955 .BR IP_ADD_SOURCE_MEMBERSHIP .
956 .SS /proc interfaces
957 The IP protocol
958 supports a set of
959 .I /proc
960 interfaces to configure some global parameters.
961 The parameters can be accessed by reading or writing files in the directory
962 .IR /proc/sys/net/ipv4/ .
963 .\" FIXME As at 2.6.12, 14 Jun 2005, the following are undocumented:
964 .\" ip_queue_maxlen
965 .\" ip_conntrack_max
966 Interfaces described as
967 .I Boolean
968 take an integer value, with a nonzero value ("true") meaning that
969 the corresponding option is enabled, and a zero value ("false")
970 meaning that the option is disabled.
971 .\"
972 .TP
973 .IR ip_always_defrag " (Boolean; since Linux 2.2.13)"
974 [New with kernel 2.2.13; in earlier kernel versions this feature
975 was controlled at compile time by the
976 .B CONFIG_IP_ALWAYS_DEFRAG
977 option; this option is not present in 2.4.x and later]
978
979 When this boolean flag is enabled (not equal 0), incoming fragments
980 (parts of IP packets
981 that arose when some host between origin and destination decided
982 that the packets were too large and cut them into pieces) will be
983 reassembled (defragmented) before being processed, even if they are
984 about to be forwarded.
985
986 Only enable if running either a firewall that is the sole link
987 to your network or a transparent proxy; never ever use it for a
988 normal router or host.
989 Otherwise, fragmented communication can be disturbed
990 if the fragments travel over different links.
991 Defragmentation also has a large memory and CPU time cost.
992
993 This is automagically turned on when masquerading or transparent
994 proxying are configured.
995 .\"
996 .TP
997 .IR ip_autoconfig " (since Linux 2.2 to 2.6.17)"
998 .\" Precisely: since 2.1.68
999 .\" FIXME document ip_autoconfig
1000 Not documented.
1001 .\"
1002 .TP
1003 .IR ip_default_ttl " (integer; default: 64; since Linux 2.2)"
1004 .\" Precisely: 2.1.15
1005 Set the default time-to-live value of outgoing packets.
1006 This can be changed per socket with the
1007 .B IP_TTL
1008 option.
1009 .\"
1010 .TP
1011 .IR ip_dynaddr " (Boolean; default: disabled; since Linux 2.0.31)"
1012 Enable dynamic socket address and masquerading entry rewriting on interface
1013 address change.
1014 This is useful for dialup interface with changing IP addresses.
1015 0 means no rewriting, 1 turns it on and 2 enables verbose mode.
1016 .\"
1017 .TP
1018 .IR ip_forward " (Boolean; default: disabled; since Linux 1.2)"
1019 Enable IP forwarding with a boolean flag.
1020 IP forwarding can be also set on a per-interface basis.
1021 .\"
1022 .TP
1023 .IR ip_local_port_range " (since Linux 2.2)"
1024 .\" Precisely: since 2.1.68
1025 This file contains two integers that define the default local port range
1026 allocated to sockets that are not explicitly bound to a port number\(emthat
1027 is, the range used for
1028 .IR "ephemeral ports" .
1029 An ephemeral port is allocated to a socket in the following circumstances:
1030 .RS
1031 .IP * 3
1032 the port number in a socket address is specified as 0 when calling
1033 .BR bind (2);
1034 .IP *
1035 .BR listen (2)
1036 is called on a stream socket that was not previously bound;
1037 .IP *
1038 .BR connect (2)
1039 was called on a socket that was not previously bound;
1040 .IP *
1041 .BR sendto (2)
1042 is called on a datagram socket that was not previously bound.
1043 .RE
1044 .IP
1045 Allocation of ephemeral ports starts with the first number in
1046 .IR ip_local_port_range
1047 and ends with the second number.
1048 If the range of ephemeral ports is exhausted,
1049 then the relevant system call returns an error (but see BUGS).
1050 .IP
1051 Note that the port range in
1052 .IR ip_local_port_range
1053 should not conflict with the ports used by masquerading
1054 (although the case is handled).
1055 Also, arbitrary choices may cause problems with some firewall packet
1056 filters that make assumptions about the local ports in use.
1057 The first number should be at least greater than 1024,
1058 or better, greater than 4096, to avoid clashes
1059 with well known ports and to minimize firewall problems.
1060 .\"
1061 .TP
1062 .IR ip_no_pmtu_disc " (Boolean; default: disabled; since Linux 2.2)"
1063 .\" Precisely: 2.1.15
1064 If enabled, don't do Path MTU Discovery for TCP sockets by default.
1065 Path MTU discovery may fail if misconfigured firewalls (that drop
1066 all ICMP packets) or misconfigured interfaces (e.g., a point-to-point
1067 link where the both ends don't agree on the MTU) are on the path.
1068 It is better to fix the broken routers on the path than to turn off
1069 Path MTU Discovery globally, because not doing it incurs a high cost
1070 to the network.
1071 .\"
1072 .\" The following is from 2.6.12: Documentation/networking/ip-sysctl.txt
1073 .TP
1074 .IR ip_nonlocal_bind " (Boolean; default: disabled; since Linux 2.4)"
1075 .\" Precisely: patch-2.4.0-test10
1076 If set, allows processes to
1077 .BR bind (2)
1078 to nonlocal IP addresses,
1079 which can be quite useful, but may break some applications.
1080 .\"
1081 .\" The following is from 2.6.12: Documentation/networking/ip-sysctl.txt
1082 .TP
1083 .IR ip6frag_time " (integer; default: 30)"
1084 Time in seconds to keep an IPv6 fragment in memory.
1085 .\"
1086 .\" The following is from 2.6.12: Documentation/networking/ip-sysctl.txt
1087 .TP
1088 .IR ip6frag_secret_interval " (integer; default: 600)"
1089 Regeneration interval (in seconds) of the hash secret (or lifetime
1090 for the hash secret) for IPv6 fragments.
1091 .TP
1092 .IR ipfrag_high_thresh " (integer), " ipfrag_low_thresh " (integer)"
1093 If the amount of queued IP fragments reaches
1094 .IR ipfrag_high_thresh ,
1095 the queue is pruned down to
1096 .IR ipfrag_low_thresh .
1097 Contains an integer with the number of bytes.
1098 .TP
1099 .I neigh/*
1100 See
1101 .BR arp (7).
1102 .\" FIXME Document the conf/*/* interfaces
1103 .\"
1104 .\" FIXME Document the route/* interfaces
1105 .SS Ioctls
1106 All ioctls described in
1107 .BR socket (7)
1108 apply to
1109 .BR ip .
1110 .\" 2006-04-02, mtk
1111 .\" commented out the following because ipchains is obsolete
1112 .\" .PP
1113 .\" The ioctls to configure firewalling are documented in
1114 .\" .BR ipfw (4)
1115 .\" from the
1116 .\" .B ipchains
1117 .\" package.
1118 .PP
1119 Ioctls to configure generic device parameters are described in
1120 .BR netdevice (7).
1121 .\" FIXME Add a discussion of multicasting
1122 .SH ERRORS
1123 .\" FIXME document all errors.
1124 .\" We should really fix the kernels to give more uniform
1125 .\" error returns (ENOMEM vs ENOBUFS, EPERM vs EACCES etc.)
1126 .TP
1127 .B EACCES
1128 The user tried to execute an operation without the necessary permissions.
1129 These include:
1130 sending a packet to a broadcast address without having the
1131 .B SO_BROADCAST
1132 flag set;
1133 sending a packet via a
1134 .I prohibit
1135 route;
1136 modifying firewall settings without superuser privileges (the
1137 .B CAP_NET_ADMIN
1138 capability);
1139 binding to a privileged port without superuser privileges (the
1140 .B CAP_NET_BIND_SERVICE
1141 capability).
1142 .TP
1143 .B EADDRINUSE
1144 Tried to bind to an address already in use.
1145 .TP
1146 .B EADDRNOTAVAIL
1147 A nonexistent interface was requested or the requested source
1148 address was not local.
1149 .TP
1150 .B EAGAIN
1151 Operation on a nonblocking socket would block.
1152 .TP
1153 .B EALREADY
1154 A connection operation on a nonblocking socket is already in progress.
1155 .TP
1156 .B ECONNABORTED
1157 A connection was closed during an
1158 .BR accept (2).
1159 .TP
1160 .B EHOSTUNREACH
1161 No valid routing table entry matches the destination address.
1162 This error can be caused by a ICMP message from a remote router or
1163 for the local routing table.
1164 .TP
1165 .B EINVAL
1166 Invalid argument passed.
1167 For send operations this can be caused by sending to a
1168 .I blackhole
1169 route.
1170 .TP
1171 .B EISCONN
1172 .BR connect (2)
1173 was called on an already connected socket.
1174 .TP
1175 .B EMSGSIZE
1176 Datagram is bigger than an MTU on the path and it cannot be fragmented.
1177 .TP
1178 .BR ENOBUFS ", " ENOMEM
1179 Not enough free memory.
1180 This often means that the memory allocation is limited by the socket
1181 buffer limits, not by the system memory, but this is not 100% consistent.
1182 .TP
1183 .B ENOENT
1184 .B SIOCGSTAMP
1185 was called on a socket where no packet arrived.
1186 .TP
1187 .B ENOPKG
1188 A kernel subsystem was not configured.
1189 .TP
1190 .BR ENOPROTOOPT " and " EOPNOTSUPP
1191 Invalid socket option passed.
1192 .TP
1193 .B ENOTCONN
1194 The operation is defined only on a connected socket, but the socket wasn't
1195 connected.
1196 .TP
1197 .B EPERM
1198 User doesn't have permission to set high priority, change configuration,
1199 or send signals to the requested process or group.
1200 .TP
1201 .B EPIPE
1202 The connection was unexpectedly closed or shut down by the other end.
1203 .TP
1204 .B ESOCKTNOSUPPORT
1205 The socket is not configured or an unknown socket type was requested.
1206 .PP
1207 Other errors may be generated by the overlaying protocols; see
1208 .BR tcp (7),
1209 .BR raw (7),
1210 .BR udp (7),
1211 and
1212 .BR socket (7).
1213 .SH NOTES
1214 .BR IP_FREEBIND ,
1215 .BR IP_MSFILTER ,
1216 .BR IP_MTU ,
1217 .BR IP_MTU_DISCOVER ,
1218 .BR IP_RECVORIGDSTADDR ,
1219 .BR IP_PKTINFO ,
1220 .BR IP_RECVERR ,
1221 .BR IP_ROUTER_ALERT ,
1222 and
1223 .BR IP_TRANSPARENT
1224 are Linux-specific.
1225 .\" IP_PASSSEC is Linux-specific
1226 .\" IP_XFRM_POLICY is Linux-specific
1227 .\" IP_IPSEC_POLICY is a nonstandard extension, also present on some BSDs
1228
1229 Be very careful with the
1230 .B SO_BROADCAST
1231 option \- it is not privileged in Linux.
1232 It is easy to overload the network
1233 with careless broadcasts.
1234 For new application protocols
1235 it is better to use a multicast group instead of broadcasting.
1236 Broadcasting is discouraged.
1237 .PP
1238 Some other BSD sockets implementations provide
1239 .B IP_RCVDSTADDR
1240 and
1241 .B IP_RECVIF
1242 socket options to get the destination address and the interface of
1243 received datagrams.
1244 Linux has the more general
1245 .B IP_PKTINFO
1246 for the same task.
1247 .PP
1248 Some BSD sockets implementations also provide an
1249 .B IP_RECVTTL
1250 option, but an ancillary message with type
1251 .B IP_RECVTTL
1252 is passed with the incoming packet.
1253 This is different from the
1254 .B IP_TTL
1255 option used in Linux.
1256 .PP
1257 Using the
1258 .B SOL_IP
1259 socket options level isn't portable; BSD-based stacks use the
1260 .B IPPROTO_IP
1261 level.
1262 .SS Compatibility
1263 For compatibility with Linux 2.0, the obsolete
1264 .BI "socket(AF_INET, SOCK_PACKET, " protocol )
1265 syntax is still supported to open a
1266 .BR packet (7)
1267 socket.
1268 This is deprecated and should be replaced by
1269 .BI "socket(AF_PACKET, SOCK_RAW, " protocol )
1270 instead.
1271 The main difference is the new
1272 .I sockaddr_ll
1273 address structure for generic link layer information instead of the old
1274 .BR sockaddr_pkt .
1275 .SH BUGS
1276 There are too many inconsistent error values.
1277 .PP
1278 The error used to diagnose exhaustion of the ephemeral port range differs
1279 across the various system calls
1280 .RB ( connect (2),
1281 .BR bind (2),
1282 .BR listen (2),
1283 .BR sendto (2))
1284 that can assign ephemeral ports.
1285 .PP
1286 The ioctls to configure IP-specific interface options and ARP tables are
1287 not described.
1288 .\" .PP
1289 .\" Some versions of glibc forget to declare
1290 .\" .IR in_pktinfo .
1291 .\" Workaround currently is to copy it into your program from this man page.
1292 .PP
1293 Receiving the original destination address with
1294 .B MSG_ERRQUEUE
1295 in
1296 .I msg_name
1297 by
1298 .BR recvmsg (2)
1299 does not work in some 2.2 kernels.
1300 .\" .SH AUTHORS
1301 .\" This man page was written by Andi Kleen.
1302 .SH SEE ALSO
1303 .BR recvmsg (2),
1304 .BR sendmsg (2),
1305 .BR byteorder (3),
1306 .BR ipfw (4),
1307 .BR capabilities (7),
1308 .BR icmp (7),
1309 .BR ipv6 (7),
1310 .BR netlink (7),
1311 .BR raw (7),
1312 .BR socket (7),
1313 .BR tcp (7),
1314 .BR udp (7)
1315 .PP
1316 RFC\ 791 for the original IP specification.
1317 RFC\ 1122 for the IPv4 host requirements.
1318 RFC\ 1812 for the IPv4 router requirements.