]>
Commit | Line | Data |
---|---|---|
77117f4f MK |
1 | .\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>. |
2 | .\" and copyright (c) 1999 Matthew Wilcox. | |
2297bf0e | 3 | .\" |
00acdba1 | 4 | .\" %%%LICENSE_START(VERBATIM_ONE_PARA) |
77117f4f MK |
5 | .\" Permission is granted to distribute possibly modified copies |
6 | .\" of this page provided the header is included verbatim, | |
7 | .\" and in case of nontrivial modification author and date | |
8 | .\" of the modification is added to the header. | |
8ff7380d | 9 | .\" %%%LICENSE_END |
77117f4f MK |
10 | .\" |
11 | .\" 2002-10-30, Michael Kerrisk, <mtk.manpages@gmail.com> | |
12 | .\" Added description of SO_ACCEPTCONN | |
13 | .\" 2004-05-20, aeb, added SO_RCVTIMEO/SO_SNDTIMEO text. | |
14 | .\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com> | |
15 | .\" Added notes on capability requirements | |
16 | .\" A few small grammar fixes | |
dd2127e0 JE |
17 | .\" 2010-06-13 Jan Engelhardt <jengelh@medozas.de> |
18 | .\" Documented SO_DOMAIN and SO_PROTOCOL. | |
e57fe8ad | 19 | .\" |
8cf1b72a MK |
20 | .\" FIXME |
21 | .\" The following are not yet documented: | |
e57fe8ad MK |
22 | .\" |
23 | .\" SO_PEERNAME (2.4?) | |
24 | .\" get only | |
25 | .\" Seems to do something similar to getpeername(), but then | |
26 | .\" why is it necessary / how does it differ? | |
27 | .\" | |
e57fe8ad MK |
28 | .\" SO_TIMESTAMPING (2.6.30) |
29 | .\" Documentation/networking/timestamping.txt | |
30 | .\" commit cb9eff097831007afb30d64373f29d99825d0068 | |
31 | .\" Author: Patrick Ohly <patrick.ohly@intel.com> | |
32 | .\" | |
33 | .\" SO_WIFI_STATUS (3.3) | |
34 | .\" commit 6e3e939f3b1bf8534b32ad09ff199d88800835a0 | |
35 | .\" Author: Johannes Berg <johannes.berg@intel.com> | |
36 | .\" Also: SCM_WIFI_STATUS | |
37 | .\" | |
38 | .\" SO_NOFCS (3.4) | |
39 | .\" commit 3bdc0eba0b8b47797f4a76e377dd8360f317450f | |
40 | .\" Author: Ben Greear <greearb@candelatech.com> | |
41 | .\" | |
42 | .\" SO_GET_FILTER (3.8) | |
43 | .\" commit a8fc92778080c845eaadc369a0ecf5699a03bef0 | |
44 | .\" Author: Pavel Emelyanov <xemul@parallels.com> | |
45 | .\" | |
e57fe8ad MK |
46 | .\" SO_MAX_PACING_RATE (3.13) |
47 | .\" commit 62748f32d501f5d3712a7c372bbb92abc7c62bc7 | |
48 | .\" Author: Eric Dumazet <edumazet@google.com> | |
49 | .\" | |
50 | .\" SO_BPF_EXTENSIONS (3.14) | |
51 | .\" commit ea02f9411d9faa3553ed09ce0ec9f00ceae9885e | |
52 | .\" Author: Michal Sekletar <msekleta@redhat.com> | |
77117f4f | 53 | .\" |
45186a5d | 54 | .TH SOCKET 7 2021-03-22 "Linux man-pages (unreleased)" |
77117f4f MK |
55 | .SH NAME |
56 | socket \- Linux socket interface | |
57 | .SH SYNOPSIS | |
c7db92b9 | 58 | .nf |
77117f4f | 59 | .B #include <sys/socket.h> |
68e4db0a | 60 | .PP |
c4e7b714 | 61 | .IB sockfd " = socket(int " socket_family ", int " socket_type ", int " protocol ); |
c7db92b9 | 62 | .fi |
77117f4f MK |
63 | .SH DESCRIPTION |
64 | This manual page describes the Linux networking socket layer user | |
65 | interface. | |
66 | The BSD compatible sockets | |
67 | are the uniform interface | |
68 | between the user process and the network protocol stacks in the kernel. | |
69 | The protocol modules are grouped into | |
70 | .I protocol families | |
2c212ccd | 71 | such as |
5019071b | 72 | .BR AF_INET ", " AF_IPX ", and " AF_PACKET , |
77117f4f MK |
73 | and |
74 | .I socket types | |
2c212ccd | 75 | such as |
77117f4f MK |
76 | .B SOCK_STREAM |
77 | or | |
78 | .BR SOCK_DGRAM . | |
79 | See | |
80 | .BR socket (2) | |
81 | for more information on families and types. | |
c634028a | 82 | .SS Socket-layer functions |
77117f4f MK |
83 | These functions are used by the user process to send or receive packets |
84 | and to do other socket operations. | |
85 | For more information see their respective manual pages. | |
5711c04f | 86 | .PP |
77117f4f MK |
87 | .BR socket (2) |
88 | creates a socket, | |
89 | .BR connect (2) | |
90 | connects a socket to a remote socket address, | |
91 | the | |
92 | .BR bind (2) | |
93 | function binds a socket to a local socket address, | |
94 | .BR listen (2) | |
95 | tells the socket that new connections shall be accepted, and | |
96 | .BR accept (2) | |
97 | is used to get a new socket with a new incoming connection. | |
98 | .BR socketpair (2) | |
33a0ccb2 | 99 | returns two connected anonymous sockets (implemented only for a few |
77117f4f | 100 | local families like |
d4c8c97c | 101 | .BR AF_UNIX ) |
77117f4f MK |
102 | .PP |
103 | .BR send (2), | |
104 | .BR sendto (2), | |
105 | and | |
106 | .BR sendmsg (2) | |
107 | send data over a socket, and | |
108 | .BR recv (2), | |
109 | .BR recvfrom (2), | |
110 | .BR recvmsg (2) | |
111 | receive data from a socket. | |
112 | .BR poll (2) | |
113 | and | |
114 | .BR select (2) | |
115 | wait for arriving data or a readiness to send data. | |
116 | In addition, the standard I/O operations like | |
117 | .BR write (2), | |
118 | .BR writev (2), | |
119 | .BR sendfile (2), | |
120 | .BR read (2), | |
121 | and | |
122 | .BR readv (2) | |
123 | can be used to read and write data. | |
124 | .PP | |
125 | .BR getsockname (2) | |
126 | returns the local socket address and | |
127 | .BR getpeername (2) | |
128 | returns the remote socket address. | |
129 | .BR getsockopt (2) | |
130 | and | |
131 | .BR setsockopt (2) | |
132 | are used to set or get socket layer or protocol options. | |
133 | .BR ioctl (2) | |
134 | can be used to set or read some other options. | |
135 | .PP | |
136 | .BR close (2) | |
137 | is used to close a socket. | |
138 | .BR shutdown (2) | |
139 | closes parts of a full-duplex socket connection. | |
140 | .PP | |
141 | Seeking, or calling | |
142 | .BR pread (2) | |
143 | or | |
144 | .BR pwrite (2) | |
c7094399 | 145 | with a nonzero position is not supported on sockets. |
77117f4f | 146 | .PP |
ff40dbb3 | 147 | It is possible to do nonblocking I/O on sockets by setting the |
77117f4f MK |
148 | .B O_NONBLOCK |
149 | flag on a socket file descriptor using | |
150 | .BR fcntl (2). | |
151 | Then all operations that would block will (usually) | |
152 | return with | |
153 | .B EAGAIN | |
154 | (operation should be retried later); | |
155 | .BR connect (2) | |
156 | will return | |
157 | .B EINPROGRESS | |
158 | error. | |
159 | The user can then wait for various events via | |
160 | .BR poll (2) | |
161 | or | |
162 | .BR select (2). | |
163 | .TS | |
164 | tab(:) allbox; | |
165 | c s s | |
0b174fe0 | 166 | l l lx. |
77117f4f MK |
167 | I/O events |
168 | Event:Poll flag:Occurrence | |
169 | Read:POLLIN:T{ | |
170 | New data arrived. | |
171 | T} | |
172 | Read:POLLIN:T{ | |
173 | A connection setup has been completed | |
174 | (for connection-oriented sockets) | |
175 | T} | |
176 | Read:POLLHUP:T{ | |
177 | A disconnection request has been initiated by the other end. | |
178 | T} | |
179 | Read:POLLHUP:T{ | |
180 | A connection is broken (only for connection-oriented protocols). | |
181 | When the socket is written | |
182 | .B SIGPIPE | |
183 | is also sent. | |
184 | T} | |
185 | Write:POLLOUT:T{ | |
186 | Socket has enough send buffer space for writing new data. | |
187 | T} | |
188 | Read/Write:T{ | |
bd8a7ca2 | 189 | POLLIN | |
77117f4f MK |
190 | .br |
191 | POLLOUT | |
192 | T}:T{ | |
193 | An outgoing | |
194 | .BR connect (2) | |
195 | finished. | |
196 | T} | |
0b174fe0 MK |
197 | Read/Write:POLLERR:T{ |
198 | An asynchronous error occurred. | |
199 | T} | |
200 | Read/Write:POLLHUP:T{ | |
201 | The other end has shut down one direction. | |
202 | T} | |
77117f4f MK |
203 | Exception:POLLPRI:T{ |
204 | Urgent data arrived. | |
205 | .B SIGURG | |
206 | is sent then. | |
207 | T} | |
208 | .\" FIXME . The following is not true currently: | |
209 | .\" It is no I/O event when the connection | |
210 | .\" is broken from the local end using | |
211 | .\" .BR shutdown (2) | |
212 | .\" or | |
213 | .\" .BR close (2). | |
214 | .TE | |
77117f4f MK |
215 | .PP |
216 | An alternative to | |
217 | .BR poll (2) | |
218 | and | |
219 | .BR select (2) | |
220 | is to let the kernel inform the application about events | |
221 | via a | |
222 | .B SIGIO | |
223 | signal. | |
224 | For that the | |
225 | .B O_ASYNC | |
226 | flag must be set on a socket file descriptor via | |
227 | .BR fcntl (2) | |
228 | and a valid signal handler for | |
229 | .B SIGIO | |
230 | must be installed via | |
231 | .BR sigaction (2). | |
232 | See the | |
233 | .I Signals | |
234 | discussion below. | |
b1b84b7a MK |
235 | .SS Socket address structures |
236 | Each socket domain has its own format for socket addresses, | |
237 | with a domain-specific address structure. | |
238 | Each of these structures begins with an | |
239 | integer "family" field (typed as | |
240 | .IR sa_family_t ) | |
241 | that indicates the type of the address structure. | |
242 | This allows | |
243 | the various system calls (e.g., | |
244 | .BR connect (2), | |
245 | .BR bind (2), | |
246 | .BR accept (2), | |
247 | .BR getsockname (2), | |
248 | .BR getpeername (2)), | |
249 | which are generic to all socket domains, | |
250 | to determine the domain of a particular socket address. | |
5711c04f | 251 | .PP |
b1b84b7a MK |
252 | To allow any type of socket address to be passed to |
253 | interfaces in the sockets API, | |
254 | the type | |
1ae6b2c7 | 255 | .I struct sockaddr |
b1b84b7a | 256 | is defined. |
e6d86b41 | 257 | The purpose of this type is purely to allow casting of |
b1b84b7a | 258 | domain-specific socket address types to a "generic" type, |
e6d86b41 | 259 | so as to avoid compiler warnings about type mismatches in |
b1b84b7a | 260 | calls to the sockets API. |
5711c04f | 261 | .PP |
b1b84b7a MK |
262 | In addition, the sockets API provides the data type |
263 | .IR "struct sockaddr_storage". | |
264 | This type | |
265 | is suitable to accommodate all supported domain-specific socket | |
266 | address structures; it is large enough and is aligned properly. | |
e6d86b41 | 267 | (In particular, it is large enough to hold |
b1b84b7a MK |
268 | IPv6 socket addresses.) |
269 | The structure includes the following field, which can be used to identify | |
270 | the type of socket address actually stored in the structure: | |
5711c04f | 271 | .PP |
b1b84b7a | 272 | .in +4n |
b8302363 | 273 | .EX |
b1b84b7a | 274 | sa_family_t ss_family; |
b8302363 | 275 | .EE |
b1b84b7a | 276 | .in |
5711c04f | 277 | .PP |
e6d86b41 | 278 | The |
b1b84b7a MK |
279 | .I sockaddr_storage |
280 | structure is useful in programs that must handle socket addresses | |
281 | in a generic way | |
282 | (e.g., programs that must deal with both IPv4 and IPv6 socket addresses). | |
c634028a | 283 | .SS Socket options |
7d247ee8 | 284 | The socket options listed below can be set by using |
77117f4f MK |
285 | .BR setsockopt (2) |
286 | and read with | |
287 | .BR getsockopt (2) | |
288 | with the socket level set to | |
289 | .B SOL_SOCKET | |
7d247ee8 MK |
290 | for all sockets. |
291 | Unless otherwise noted, | |
292 | .I optval | |
293 | is a pointer to an | |
294 | .IR int . | |
bea08fec | 295 | .\" FIXME . |
e2ec4f17 MK |
296 | .\" In the list below, the text used to describe argument types |
297 | .\" for each socket option should be more consistent | |
298 | .\" | |
77117f4f MK |
299 | .\" SO_ACCEPTCONN is in POSIX.1-2001, and its origin is explained in |
300 | .\" W R Stevens, UNPv1 | |
301 | .TP | |
302 | .B SO_ACCEPTCONN | |
303 | Returns a value indicating whether or not this socket has been marked | |
304 | to accept connections with | |
305 | .BR listen (2). | |
306 | The value 0 indicates that this is not a listening socket, | |
307 | the value 1 indicates that this is a listening socket. | |
fa574567 | 308 | This socket option is read-only. |
77117f4f | 309 | .TP |
096da110 MK |
310 | .BR SO_ATTACH_FILTER " (since Linux 2.2), " SO_ATTACH_BPF " (since Linux 3.19)" |
311 | Attach a classic BPF | |
312 | .RB ( SO_ATTACH_FILTER ) | |
313 | or an extended BPF | |
314 | .RB ( SO_ATTACH_BPF ) | |
315 | program to the socket for use as a filter of incoming packets. | |
316 | A packet will be dropped if the filter program returns zero. | |
317 | If the filter program returns a | |
777411ae | 318 | nonzero value which is less than the packet's data length, |
096da110 MK |
319 | the packet will be truncated to the length returned. |
320 | If the value returned by the filter is greater than or equal to the | |
321 | packet's data length, the packet is allowed to proceed unmodified. | |
5711c04f | 322 | .IP |
1fa871f5 | 323 | The argument for |
1ae6b2c7 | 324 | .B SO_ATTACH_FILTER |
1fa871f5 CG |
325 | is a |
326 | .I sock_fprog | |
096da110 MK |
327 | structure, defined in |
328 | .IR <linux/filter.h> : | |
6545cc56 | 329 | .IP |
1fa871f5 | 330 | .in +4n |
6545cc56 | 331 | .EX |
1fa871f5 CG |
332 | struct sock_fprog { |
333 | unsigned short len; | |
334 | struct sock_filter *filter; | |
335 | }; | |
6545cc56 | 336 | .EE |
1fa871f5 CG |
337 | .in |
338 | .IP | |
339 | The argument for | |
1ae6b2c7 | 340 | .B SO_ATTACH_BPF |
1fa871f5 CG |
341 | is a file descriptor returned by the |
342 | .BR bpf (2) | |
343 | system call and must refer to a program of type | |
d8012462 | 344 | .BR BPF_PROG_TYPE_SOCKET_FILTER . |
5711c04f | 345 | .IP |
096da110 MK |
346 | These options may be set multiple times for a given socket, |
347 | each time replacing the previous filter program. | |
348 | The classic and extended versions may be called on the same socket, | |
349 | but the previous filter will always be replaced such that a socket | |
350 | never has more than one filter defined. | |
5711c04f | 351 | .IP |
096da110 | 352 | Both classic and extended BPF are explained in the kernel source file |
1fa871f5 CG |
353 | .I Documentation/networking/filter.txt |
354 | .TP | |
096da110 | 355 | .BR SO_ATTACH_REUSEPORT_CBPF ", " SO_ATTACH_REUSEPORT_EBPF |
1fa871f5 | 356 | For use with the |
1ae6b2c7 | 357 | .B SO_REUSEPORT |
096da110 MK |
358 | option, these options allow the user to set a classic BPF |
359 | .RB ( SO_ATTACH_REUSEPORT_CBPF ) | |
360 | or an extended BPF | |
361 | .RB ( SO_ATTACH_REUSEPORT_EBPF ) | |
362 | program which defines how packets are assigned to | |
1fa871f5 | 363 | the sockets in the reuseport group (that is, all sockets which have |
1ae6b2c7 | 364 | .B SO_REUSEPORT |
096da110 | 365 | set and are using the same local address to receive packets). |
5711c04f | 366 | .IP |
096da110 MK |
367 | The BPF program must return an index between 0 and N\-1 representing |
368 | the socket which should receive the packet | |
369 | (where N is the number of sockets in the group). | |
370 | If the BPF program returns an invalid index, | |
371 | socket selection will fall back to the plain | |
1ae6b2c7 | 372 | .B SO_REUSEPORT |
1fa871f5 | 373 | mechanism. |
5711c04f | 374 | .IP |
1fa871f5 CG |
375 | Sockets are numbered in the order in which they are added to the group |
376 | (that is, the order of | |
377 | .BR bind (2) | |
378 | calls for UDP sockets or the order of | |
379 | .BR listen (2) | |
096da110 MK |
380 | calls for TCP sockets). |
381 | New sockets added to a reuseport group will inherit the BPF program. | |
382 | When a socket is removed from a reuseport group (via | |
383 | .BR close (2)), | |
1fa871f5 CG |
384 | the last socket in the group will be moved into the closed socket's |
385 | position. | |
5711c04f | 386 | .IP |
096da110 MK |
387 | These options may be set repeatedly at any time on any socket in the group |
388 | to replace the current BPF program used by all sockets in the group. | |
5711c04f | 389 | .IP |
1ae6b2c7 | 390 | .B SO_ATTACH_REUSEPORT_CBPF |
096da110 | 391 | takes the same argument type as |
1ae6b2c7 | 392 | .B SO_ATTACH_FILTER |
1fa871f5 | 393 | and |
1ae6b2c7 | 394 | .B SO_ATTACH_REUSEPORT_EBPF |
096da110 | 395 | takes the same argument type as |
d8012462 | 396 | .BR SO_ATTACH_BPF . |
5711c04f | 397 | .IP |
096da110 MK |
398 | UDP support for this feature is available since Linux 4.5; |
399 | TCP support is available since Linux 4.6. | |
1fa871f5 | 400 | .TP |
77117f4f MK |
401 | .B SO_BINDTODEVICE |
402 | Bind this socket to a particular device like \(lqeth0\(rq, | |
403 | as specified in the passed interface name. | |
404 | If the | |
405 | name is an empty string or the option length is zero, the socket device | |
406 | binding is removed. | |
d0cb7cc6 | 407 | The passed option is a variable-length null-terminated |
77117f4f MK |
408 | interface name string with the maximum size of |
409 | .BR IFNAMSIZ . | |
410 | If a socket is bound to an interface, | |
411 | only packets received from that particular interface are processed by the | |
412 | socket. | |
33a0ccb2 | 413 | Note that this works only for some socket types, particularly |
77117f4f MK |
414 | .B AF_INET |
415 | sockets. | |
416 | It is not supported for packet sockets (use normal | |
56bf2613 | 417 | .BR bind (2) |
77117f4f | 418 | there). |
5711c04f | 419 | .IP |
757716c7 MK |
420 | Before Linux 3.8, |
421 | this socket option could be set, but could not retrieved with | |
422 | .BR getsockopt (2). | |
423 | Since Linux 3.8, it is readable. | |
424 | The | |
425 | .I optlen | |
b072a788 | 426 | argument should contain the buffer size available |
757716c7 | 427 | to receive the device name and is recommended to be |
1ae6b2c7 | 428 | .B IFNAMSIZ |
757716c7 MK |
429 | bytes. |
430 | The real device name length is reported back in the | |
431 | .I optlen | |
432 | argument. | |
77117f4f MK |
433 | .TP |
434 | .B SO_BROADCAST | |
435 | Set or get the broadcast flag. | |
42bd5b3d | 436 | When enabled, datagram sockets are allowed to send |
77117f4f MK |
437 | packets to a broadcast address. |
438 | This option has no effect on stream-oriented sockets. | |
439 | .TP | |
440 | .B SO_BSDCOMPAT | |
441 | Enable BSD bug-to-bug compatibility. | |
442 | This is used by the UDP protocol module in Linux 2.0 and 2.2. | |
eebf8c09 | 443 | If enabled, ICMP errors received for a UDP socket will not be passed |
77117f4f MK |
444 | to the user program. |
445 | In later kernel versions, support for this option has been phased out: | |
446 | Linux 2.4 silently ignores it, and Linux 2.6 generates a kernel warning | |
447 | (printk()) if a program uses this option. | |
448 | Linux 2.0 also enabled BSD bug-to-bug compatibility | |
449 | options (random header changing, skipping of the broadcast flag) for raw | |
450 | sockets with this option, but that was removed in Linux 2.2. | |
451 | .TP | |
452 | .B SO_DEBUG | |
453 | Enable socket debugging. | |
d7087783 | 454 | Allowed only for processes with the |
77117f4f MK |
455 | .B CAP_NET_ADMIN |
456 | capability or an effective user ID of 0. | |
457 | .TP | |
096da110 MK |
458 | .BR SO_DETACH_FILTER " (since Linux 2.2), " SO_DETACH_BPF " (since Linux 3.19)" |
459 | These two options, which are synonyms, | |
460 | may be used to remove the classic or extended BPF | |
461 | program attached to a socket with either | |
1ae6b2c7 | 462 | .B SO_ATTACH_FILTER |
1fa871f5 | 463 | or |
096da110 | 464 | .BR SO_ATTACH_BPF . |
1fa871f5 | 465 | The option value is ignored. |
1fa871f5 | 466 | .TP |
dd2127e0 JE |
467 | .BR SO_DOMAIN " (since Linux 2.6.32)" |
468 | Retrieves the socket domain as an integer, returning a value such as | |
469 | .BR AF_INET6 . | |
470 | See | |
471 | .BR socket (2) | |
472 | for details. | |
fa574567 | 473 | This socket option is read-only. |
dd2127e0 | 474 | .TP |
77117f4f MK |
475 | .B SO_ERROR |
476 | Get and clear the pending socket error. | |
fa574567 | 477 | This socket option is read-only. |
77117f4f MK |
478 | Expects an integer. |
479 | .TP | |
480 | .B SO_DONTROUTE | |
33a0ccb2 | 481 | Don't send via a gateway, send only to directly connected hosts. |
77117f4f MK |
482 | The same effect can be achieved by setting the |
483 | .B MSG_DONTROUTE | |
484 | flag on a socket | |
485 | .BR send (2) | |
486 | operation. | |
487 | Expects an integer boolean flag. | |
488 | .TP | |
b7f97e8e MK |
489 | .BR SO_INCOMING_CPU " (gettable since Linux 3.19, settable since Linux 4.4)" |
490 | .\" getsockopt 2c8c56e15df3d4c2af3d656e44feb18789f75837 | |
491 | .\" setsockopt 70da268b569d32a9fddeea85dc18043de9d89f89 | |
492 | Sets or gets the CPU affinity of a socket. | |
493 | Expects an integer flag. | |
5711c04f | 494 | .IP |
ca1969e9 | 495 | .in +4n |
b8302363 | 496 | .EX |
ca1969e9 | 497 | int cpu = 1; |
0b3f52d0 MK |
498 | setsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, |
499 | sizeof(cpu)); | |
b8302363 | 500 | .EE |
ca1969e9 | 501 | .in |
5711c04f | 502 | .IP |
a99fa5fb MK |
503 | Because all of the packets for a single stream |
504 | (i.e., all packets for the same 4-tuple) | |
505 | arrive on the single RX queue that is associated with a particular CPU, | |
506 | the typical use case is to employ one listening process per RX queue, | |
507 | with the incoming flow being handled by a listener | |
508 | on the same CPU that is handling the RX queue. | |
b7f97e8e | 509 | This provides optimal NUMA behavior and keeps CPU caches hot. |
bb676145 MK |
510 | .\" |
511 | .\" From an email conversation with Eric Dumazet: | |
512 | .\" >> Note that setting the option is not supported if SO_REUSEPORT is used. | |
513 | .\" > | |
514 | .\" > Please define "not supported". Does this yield an API diagnostic? | |
515 | .\" > If so, what is it? | |
516 | .\" > | |
517 | .\" >> Socket will be selected from an array, either by a hash or BPF program | |
518 | .\" >> that has no access to this information. | |
519 | .\" > | |
520 | .\" > Sorry -- I'm lost here. How does this comment relate to the proposed | |
521 | .\" > man page text above? | |
99cf1681 | 522 | .\" |
bb676145 | 523 | .\" Simply that : |
99cf1681 | 524 | .\" |
bb676145 MK |
525 | .\" If an application uses both SO_INCOMING_CPU and SO_REUSEPORT, then |
526 | .\" SO_REUSEPORT logic, selecting the socket to receive the packet, ignores | |
527 | .\" SO_INCOMING_CPU setting. | |
ca1969e9 | 528 | .TP |
e8500ecc SS |
529 | .BR SO_INCOMING_NAPI_ID " (gettable since Linux 4.12)" |
530 | .\" getsockopt 6d4339028b350efbf87c61e6d9e113e5373545c9 | |
b5638e2e | 531 | Returns a system-level unique ID called NAPI ID that is associated |
820e13fb MK |
532 | with a RX queue on which the last packet associated with that |
533 | socket is received. | |
e8500ecc SS |
534 | .IP |
535 | This can be used by an application to split the incoming flows among worker | |
820e13fb MK |
536 | threads based on the RX queue on which the packets associated with the |
537 | flows are received. | |
538 | It allows each worker thread to be associated with | |
539 | a NIC HW receive queue and service all the connection | |
540 | requests received on that RX queue. | |
541 | This mapping between a app thread and | |
542 | a HW NIC queue streamlines the | |
e8500ecc SS |
543 | flow of data from the NIC to the application. |
544 | .TP | |
77117f4f MK |
545 | .B SO_KEEPALIVE |
546 | Enable sending of keep-alive messages on connection-oriented sockets. | |
547 | Expects an integer boolean flag. | |
548 | .TP | |
549 | .B SO_LINGER | |
550 | Sets or gets the | |
551 | .B SO_LINGER | |
552 | option. | |
553 | The argument is a | |
554 | .I linger | |
555 | structure. | |
6545cc56 | 556 | .IP |
77117f4f | 557 | .in +4n |
6545cc56 | 558 | .EX |
77117f4f MK |
559 | struct linger { |
560 | int l_onoff; /* linger active */ | |
561 | int l_linger; /* how many seconds to linger for */ | |
562 | }; | |
6545cc56 | 563 | .EE |
77117f4f MK |
564 | .in |
565 | .IP | |
566 | When enabled, a | |
567 | .BR close (2) | |
568 | or | |
569 | .BR shutdown (2) | |
570 | will not return until all queued messages for the socket have been | |
571 | successfully sent or the linger timeout has been reached. | |
572 | Otherwise, | |
573 | the call returns immediately and the closing is done in the background. | |
574 | When the socket is closed as part of | |
575 | .BR exit (2), | |
576 | it always lingers in the background. | |
577 | .TP | |
1fa871f5 | 578 | .B SO_LOCK_FILTER |
096da110 | 579 | .\" commit d59577b6ffd313d0ab3be39cb1ab47e29bdc9182 |
96d9edea | 580 | When set, this option will prevent |
096da110 MK |
581 | changing the filters associated with the socket. |
582 | These filters include any set using the socket options | |
f7111396 MK |
583 | .BR SO_ATTACH_FILTER , |
584 | .BR SO_ATTACH_BPF , | |
585 | .BR SO_ATTACH_REUSEPORT_CBPF , | |
096da110 | 586 | and |
335c2365 | 587 | .BR SO_ATTACH_REUSEPORT_EBPF . |
5711c04f | 588 | .IP |
59ac6f2f MK |
589 | The typical use case is for a privileged process to set up a raw socket |
590 | (an operation that requires the | |
1ae6b2c7 | 591 | .B CAP_NET_RAW |
59ac6f2f | 592 | capability), apply a restrictive filter, set the |
1ae6b2c7 | 593 | .B SO_LOCK_FILTER |
59ac6f2f | 594 | option, |
1fa871f5 | 595 | and then either drop its privileges or pass the socket file descriptor |
59ac6f2f | 596 | to an unprivileged process via a UNIX domain socket. |
5711c04f | 597 | .IP |
096da110 | 598 | Once the |
1ae6b2c7 | 599 | .B SO_LOCK_FILTER |
96d9edea | 600 | option has been enabled, attempts to change or remove the filter |
096da110 | 601 | attached to a socket, or to disable the |
1ae6b2c7 | 602 | .B SO_LOCK_FILTER |
096da110 MK |
603 | option will fail with the error |
604 | .BR EPERM . | |
1fa871f5 | 605 | .TP |
cf0a1f7c M |
606 | .BR SO_MARK " (since Linux 2.6.25)" |
607 | .\" commit 4a19ec5800fc3bb64e2d87c4d9fdd9e636086fe0 | |
608 | .\" and 914a9ab386a288d0f22252fc268ecbc048cdcbd5 | |
609 | Set the mark for each packet sent through this socket | |
610 | (similar to the netfilter MARK target but socket-based). | |
611 | Changing the mark can be used for mark-based | |
612 | routing without netfilter or for packet filtering. | |
613 | Setting this option requires the | |
614 | .B CAP_NET_ADMIN | |
615 | capability. | |
616 | .TP | |
77117f4f MK |
617 | .B SO_OOBINLINE |
618 | If this option is enabled, | |
619 | out-of-band data is directly placed into the receive data stream. | |
2b9b829d | 620 | Otherwise, out-of-band data is passed only when the |
77117f4f MK |
621 | .B MSG_OOB |
622 | flag is set during receiving. | |
623 | .\" don't document it because it can do too much harm. | |
624 | .\".B SO_NO_CHECK | |
5d75650a MK |
625 | .\" The kernel has support for the SO_NO_CHECK socket |
626 | .\" option (boolean: 0 == default, calculate checksum on xmit, | |
627 | .\" 1 == do not calculate checksum on xmit). | |
628 | .\" Additional note from Andi Kleen on SO_NO_CHECK (2010-08-30) | |
629 | .\" On Linux UDP checksums are essentially free and there's no reason | |
630 | .\" to turn them off and it would disable another safety line. | |
631 | .\" That is why I didn't document the option. | |
77117f4f MK |
632 | .TP |
633 | .B SO_PASSCRED | |
634 | Enable or disable the receiving of the | |
635 | .B SCM_CREDENTIALS | |
636 | control message. | |
637 | For more information see | |
638 | .BR unix (7). | |
2fc7c74c MK |
639 | .TP |
640 | .B SO_PASSSEC | |
641 | Enable or disable the receiving of the | |
642 | .B SCM_SECURITY | |
643 | control message. | |
644 | For more information see | |
645 | .BR unix (7). | |
77117f4f | 646 | .TP |
3f1e877d MK |
647 | .BR SO_PEEK_OFF " (since Linux 3.4)" |
648 | .\" commit ef64a54f6e558155b4f149bb10666b9e914b6c54 | |
649 | This option, which is currently supported only for | |
650 | .BR unix (7) | |
651 | sockets, sets the value of the "peek offset" for the | |
7f4cd55d | 652 | .BR recv (2) |
3f1e877d | 653 | system call when used with |
1ae6b2c7 | 654 | .B MSG_PEEK |
3f1e877d | 655 | flag. |
5711c04f | 656 | .IP |
3f1e877d MK |
657 | When this option is set to a negative value |
658 | (it is set to \-1 for all new sockets), | |
659 | traditional behavior is provided: | |
7f4cd55d | 660 | .BR recv (2) |
3f1e877d | 661 | with the |
1ae6b2c7 | 662 | .B MSG_PEEK |
3f1e877d | 663 | flag will peek data from the front of the queue. |
5711c04f | 664 | .IP |
3f1e877d MK |
665 | When the option is set to a value greater than or equal to zero, |
666 | then the next peek at data queued in the socket will occur at | |
667 | the byte offset specified by the option value. | |
668 | At the same time, the "peek offset" will be | |
669 | incremented by the number of bytes that were peeked from the queue, | |
cac3a0c5 | 670 | so that a subsequent peek will return the next data in the queue. |
5711c04f | 671 | .IP |
3f1e877d MK |
672 | If data is removed from the front of the queue via a call to |
673 | .BR recv (2) | |
674 | (or similar) without the | |
1ae6b2c7 | 675 | .B MSG_PEEK |
3f1e877d MK |
676 | flag, the "peek offset" will be decreased by the number of bytes removed. |
677 | In other words, receiving data without the | |
678 | .B MSG_PEEK | |
679 | flag will cause the "peek offset" to be adjusted to maintain | |
680 | the correct relative position in the queued data, | |
681 | so that a subsequent peek will retrieve the data that would have been | |
682 | retrieved had the data not been removed. | |
5711c04f | 683 | .IP |
3f1e877d MK |
684 | For datagram sockets, if the "peek offset" points to the middle of a packet, |
685 | the data returned will be marked with the | |
1ae6b2c7 | 686 | .B MSG_TRUNC |
3f1e877d | 687 | flag. |
5711c04f | 688 | .IP |
3f1e877d MK |
689 | The following example serves to illustrate the use of |
690 | .BR SO_PEEK_OFF . | |
691 | Suppose a stream socket has the following queued input data: | |
5711c04f | 692 | .IP |
1ae6b2c7 AC |
693 | .in +4n |
694 | .EX | |
695 | aabbccddeeff | |
696 | .EE | |
697 | .in | |
3f1e877d MK |
698 | .IP |
699 | The following sequence of | |
700 | .BR recv (2) | |
701 | calls would have the effect noted in the comments: | |
5711c04f | 702 | .IP |
3f1e877d | 703 | .in +4n |
b8302363 | 704 | .EX |
3f1e877d MK |
705 | int ov = 4; // Set peek offset to 4 |
706 | setsockopt(fd, SOL_SOCKET, SO_PEEK_OFF, &ov, sizeof(ov)); | |
707 | ||
708 | recv(fd, buf, 2, MSG_PEEK); // Peeks "cc"; offset set to 6 | |
709 | recv(fd, buf, 2, MSG_PEEK); // Peeks "dd"; offset set to 8 | |
710 | recv(fd, buf, 2, 0); // Reads "aa"; offset set to 6 | |
711 | recv(fd, buf, 2, MSG_PEEK); // Peeks "ee"; offset set to 8 | |
b8302363 | 712 | .EE |
3f1e877d MK |
713 | .in |
714 | .TP | |
77117f4f | 715 | .B SO_PEERCRED |
94950b9a MK |
716 | Return the credentials of the peer process connected to this socket. |
717 | For further details, see | |
77117f4f | 718 | .BR unix (7). |
77117f4f | 719 | .TP |
e6f90c3f SS |
720 | .BR SO_PEERSEC " (since Linux 2.6.2)" |
721 | Return the security context of the peer socket connected to this socket. | |
722 | For further details, see | |
71a38281 MK |
723 | .BR unix (7) |
724 | and | |
725 | .BR ip (7). | |
e6f90c3f | 726 | .TP |
77117f4f MK |
727 | .B SO_PRIORITY |
728 | Set the protocol-defined priority for all packets to be sent on | |
729 | this socket. | |
730 | Linux uses this value to order the networking queues: | |
731 | packets with a higher priority may be processed first depending | |
732 | on the selected device queueing discipline. | |
3a7ee744 MK |
733 | .\" For |
734 | .\" .BR ip (7), | |
735 | .\" this also sets the IP type-of-service (TOS) field for outgoing packets. | |
77117f4f MK |
736 | Setting a priority outside the range 0 to 6 requires the |
737 | .B CAP_NET_ADMIN | |
738 | capability. | |
739 | .TP | |
dd2127e0 JE |
740 | .BR SO_PROTOCOL " (since Linux 2.6.32)" |
741 | Retrieves the socket protocol as an integer, returning a value such as | |
742 | .BR IPPROTO_SCTP . | |
743 | See | |
744 | .BR socket (2) | |
745 | for details. | |
fa574567 | 746 | This socket option is read-only. |
dd2127e0 | 747 | .TP |
77117f4f MK |
748 | .B SO_RCVBUF |
749 | Sets or gets the maximum socket receive buffer in bytes. | |
750 | The kernel doubles this value (to allow space for bookkeeping overhead) | |
751 | when it is set using | |
752 | .\" Most (all?) other implementations do not do this -- MTK, Dec 05 | |
753 | .BR setsockopt (2), | |
754 | and this doubled value is returned by | |
755 | .BR getsockopt (2). | |
3de2d3be | 756 | .\" The following thread on LMKL is quite informative: |
a1fa36af | 757 | .\" getsockopt/setsockopt with SO_RCVBUF and SO_SNDBUF "non-standard" behavior |
3de2d3be MK |
758 | .\" 17 July 2012 |
759 | .\" http://thread.gmane.org/gmane.linux.kernel/1328935 | |
77117f4f | 760 | The default value is set by the |
5a2ff571 MK |
761 | .I /proc/sys/net/core/rmem_default |
762 | file, and the maximum allowed value is set by the | |
763 | .I /proc/sys/net/core/rmem_max | |
764 | file. | |
77117f4f MK |
765 | The minimum (doubled) value for this option is 256. |
766 | .TP | |
767 | .BR SO_RCVBUFFORCE " (since Linux 2.6.14)" | |
768 | Using this socket option, a privileged | |
769 | .RB ( CAP_NET_ADMIN ) | |
770 | process can perform the same task as | |
771 | .BR SO_RCVBUF , | |
772 | but the | |
773 | .I rmem_max | |
774 | limit can be overridden. | |
775 | .TP | |
776 | .BR SO_RCVLOWAT " and " SO_SNDLOWAT | |
777 | Specify the minimum number of bytes in the buffer until the socket layer | |
778 | will pass the data to the protocol | |
779 | .RB ( SO_SNDLOWAT ) | |
780 | or the user on receiving | |
781 | .RB ( SO_RCVLOWAT ). | |
782 | These two values are initialized to 1. | |
783 | .B SO_SNDLOWAT | |
784 | is not changeable on Linux | |
785 | .RB ( setsockopt (2) | |
786 | fails with the error | |
787 | .BR ENOPROTOOPT ). | |
788 | .B SO_RCVLOWAT | |
789 | is changeable | |
790 | only since Linux 2.4. | |
44a00819 MK |
791 | .IP |
792 | Before Linux 2.6.28 | |
858c8575 | 793 | .\" Tested on kernel 2.6.14 -- mtk, 30 Nov 05 |
44a00819 MK |
794 | .BR select (2), |
795 | .BR poll (2), | |
77117f4f | 796 | and |
44a00819 MK |
797 | .BR epoll (7) |
798 | did not respect the | |
77117f4f MK |
799 | .B SO_RCVLOWAT |
800 | setting on Linux, | |
44a00819 MK |
801 | and indicated a socket as readable when even a single byte of data |
802 | was available. | |
803 | A subsequent read from the socket would then block until | |
77117f4f MK |
804 | .B SO_RCVLOWAT |
805 | bytes are available. | |
858c8575 MK |
806 | Since Linux 2.6.28, |
807 | .\" commit c7004482e8dcb7c3c72666395cfa98a216a4fb70 | |
808 | .BR select (2), | |
809 | .BR poll (2), | |
810 | and | |
811 | .BR epoll (7) | |
812 | indicate a socket as readable only if at least | |
813 | .B SO_RCVLOWAT | |
814 | bytes are available. | |
77117f4f MK |
815 | .TP |
816 | .BR SO_RCVTIMEO " and " SO_SNDTIMEO | |
817 | .\" Not implemented in 2.0. | |
818 | .\" Implemented in 2.1.11 for getsockopt: always return a zero struct. | |
819 | .\" Implemented in 2.3.41 for setsockopt, and actually used. | |
820 | Specify the receiving or sending timeouts until reporting an error. | |
821 | The argument is a | |
822 | .IR "struct timeval" . | |
823 | If an input or output function blocks for this period of time, and | |
824 | data has been sent or received, the return value of that function | |
825 | will be the amount of data transferred; if no data has been transferred | |
56db9d31 | 826 | and the timeout has been reached, then \-1 is returned with |
77117f4f MK |
827 | .I errno |
828 | set to | |
1ae6b2c7 | 829 | .B EAGAIN |
77117f4f | 830 | or |
f3277220 | 831 | .BR EWOULDBLOCK , |
77117f4f | 832 | .\" in fact to EAGAIN |
f3277220 AK |
833 | or |
834 | .B EINPROGRESS | |
835 | (for | |
836 | .BR connect (2)) | |
ff40dbb3 | 837 | just as if the socket was specified to be nonblocking. |
eebf8c09 | 838 | If the timeout is set to zero (the default), |
77117f4f MK |
839 | then the operation will never timeout. |
840 | Timeouts only have effect for system calls that perform socket I/O (e.g., | |
841 | .BR read (2), | |
842 | .BR recvmsg (2), | |
843 | .BR send (2), | |
844 | .BR sendmsg (2)); | |
845 | timeouts have no effect for | |
846 | .BR select (2), | |
847 | .BR poll (2), | |
848 | .BR epoll_wait (2), | |
02f95a31 | 849 | and so on. |
77117f4f MK |
850 | .TP |
851 | .B SO_REUSEADDR | |
c28f1dd3 MK |
852 | .\" commit c617f398edd4db2b8567a28e899a88f8f574798d |
853 | .\" https://lwn.net/Articles/542629/ | |
77117f4f MK |
854 | Indicates that the rules used in validating addresses supplied in a |
855 | .BR bind (2) | |
856 | call should allow reuse of local addresses. | |
857 | For | |
d4c8c97c | 858 | .B AF_INET |
77117f4f MK |
859 | sockets this |
860 | means that a socket may bind, except when there | |
861 | is an active listening socket bound to the address. | |
862 | When the listening socket is bound to | |
863 | .B INADDR_ANY | |
864 | with a specific port then it is not possible | |
865 | to bind to this port for any local address. | |
866 | Argument is an integer boolean flag. | |
867 | .TP | |
75979920 | 868 | .BR SO_REUSEPORT " (since Linux 3.9)" |
11af2d4b | 869 | Permits multiple |
75979920 DW |
870 | .B AF_INET |
871 | or | |
872 | .B AF_INET6 | |
11af2d4b MK |
873 | sockets to be bound to an identical socket address. |
874 | This option must be set on each socket (including the first socket) | |
875 | prior to calling | |
876 | .BR bind (2) | |
877 | on the socket. | |
878 | To prevent port hijacking, | |
879 | all of the processes binding to the same address must have the same | |
880 | effective UID. | |
881 | This option can be employed with both TCP and UDP sockets. | |
5711c04f | 882 | .IP |
11af2d4b | 883 | For TCP sockets, this option allows |
75979920 DW |
884 | .BR accept (2) |
885 | load distribution in a multi-threaded server to be improved by | |
c28f1dd3 | 886 | using a distinct listener socket for each thread. |
11af2d4b MK |
887 | This provides improved load distribution as compared |
888 | to traditional techniques such using a single | |
889 | .BR accept (2)ing | |
890 | thread that distributes connections, | |
891 | or having multiple threads that compete to | |
892 | .BR accept (2) | |
893 | from the same socket. | |
5711c04f | 894 | .IP |
11af2d4b MK |
895 | For UDP sockets, |
896 | the use of this option can provide better distribution | |
897 | of incoming datagrams to multiple processes (or threads) as compared | |
898 | to the traditional technique of having multiple processes | |
899 | compete to receive datagrams on the same socket. | |
75979920 | 900 | .TP |
9cad276e | 901 | .BR SO_RXQ_OVFL " (since Linux 2.6.33)" |
f4c644e5 | 902 | .\" commit 3b885787ea4112eaa80945999ea0901bf742707f |
91edd9ad | 903 | Indicates that an unsigned 32-bit value ancillary message (cmsg) |
f4c644e5 | 904 | should be attached to received skbs indicating |
4cee5821 | 905 | the number of packets dropped by the socket since its creation. |
8cfbaec5 | 906 | .TP |
7ded63ef RBP |
907 | .BR SO_SELECT_ERR_QUEUE " (since Linux 3.10)" |
908 | .\" commit 7d4c04fc170087119727119074e72445f2bb192b | |
909 | .\" Author: Keller, Jacob E <jacob.e.keller@intel.com> | |
112e0e60 MK |
910 | When this option is set on a socket, |
911 | an error condition on a socket causes notification not only via the | |
912 | .I exceptfds | |
913 | set of | |
914 | .BR select (2). | |
915 | Similarly, | |
916 | .BR poll (2) | |
917 | also returns a | |
7ded63ef | 918 | .B POLLPRI |
112e0e60 | 919 | whenever an |
7ded63ef | 920 | .B POLLERR |
112e0e60 MK |
921 | event is returned. |
922 | .\" It does not affect wake up. | |
7ded63ef | 923 | .IP |
112e0e60 | 924 | Background: this option was added when waking up on an error condition |
165bef47 | 925 | occurred only via the |
1ae6b2c7 | 926 | .I readfds |
112e0e60 | 927 | and |
1ae6b2c7 | 928 | .I writefds |
112e0e60 MK |
929 | sets of |
930 | .BR select (2). | |
931 | The option was added to allow monitoring for error conditions via the | |
932 | .I exceptfds | |
933 | argument without simultaneously having to receive notifications (via | |
934 | .IR readfds ) | |
935 | for regular data that can be read from the socket. | |
936 | After changes in Linux 4.16, | |
937 | .\" commit 6e5d58fdc9bedd0255a8 | |
938 | .\" ("skbuff: Fix not waking applications when errors are enqueued") | |
939 | the use of this flag to achieve the desired notifications | |
940 | is no longer necessary. | |
941 | This option is nevertheless retained for backwards compatibility. | |
7ded63ef | 942 | .TP |
77117f4f MK |
943 | .B SO_SNDBUF |
944 | Sets or gets the maximum socket send buffer in bytes. | |
945 | The kernel doubles this value (to allow space for bookkeeping overhead) | |
946 | when it is set using | |
947 | .\" Most (all?) other implementations do not do this -- MTK, Dec 05 | |
3de2d3be | 948 | .\" See also the comment to SO_RCVBUF (17 Jul 2012 LKML mail) |
77117f4f MK |
949 | .BR setsockopt (2), |
950 | and this doubled value is returned by | |
951 | .BR getsockopt (2). | |
952 | The default value is set by the | |
5a2ff571 MK |
953 | .I /proc/sys/net/core/wmem_default |
954 | file and the maximum allowed value is set by the | |
955 | .I /proc/sys/net/core/wmem_max | |
956 | file. | |
77117f4f MK |
957 | The minimum (doubled) value for this option is 2048. |
958 | .TP | |
959 | .BR SO_SNDBUFFORCE " (since Linux 2.6.14)" | |
960 | Using this socket option, a privileged | |
961 | .RB ( CAP_NET_ADMIN ) | |
962 | process can perform the same task as | |
963 | .BR SO_SNDBUF , | |
964 | but the | |
965 | .I wmem_max | |
966 | limit can be overridden. | |
967 | .TP | |
968 | .B SO_TIMESTAMP | |
969 | Enable or disable the receiving of the | |
970 | .B SO_TIMESTAMP | |
971 | control message. | |
972 | The timestamp control message is sent with level | |
973 | .B SOL_SOCKET | |
dd6b076a MK |
974 | and a |
975 | .I cmsg_type | |
976 | of | |
977 | .BR SCM_TIMESTAMP . | |
978 | The | |
77117f4f MK |
979 | .I cmsg_data |
980 | field is a | |
981 | .I "struct timeval" | |
982 | indicating the | |
983 | reception time of the last packet passed to the user in this call. | |
984 | See | |
985 | .BR cmsg (3) | |
986 | for details on control messages. | |
987 | .TP | |
3e472692 MK |
988 | .BR SO_TIMESTAMPNS " (since Linux 2.6.22)" |
989 | .\" commit 92f37fd2ee805aa77925c1e64fd56088b46094fc | |
a47d370b AC |
990 | Enable or disable the receiving of the |
991 | .B SO_TIMESTAMPNS | |
992 | control message. | |
993 | The timestamp control message is sent with level | |
994 | .B SOL_SOCKET | |
dd6b076a MK |
995 | and a |
996 | .I cmsg_type | |
997 | of | |
998 | .BR SCM_TIMESTAMPNS . | |
999 | The | |
a47d370b AC |
1000 | .I cmsg_data |
1001 | field is a | |
1002 | .I "struct timespec" | |
1003 | indicating the | |
1004 | reception time of the last packet passed to the user in this call. | |
1005 | The clock used for the timestamp is | |
1006 | .BR CLOCK_REALTIME . | |
1007 | See | |
1008 | .BR cmsg (3) | |
1009 | for details on control messages. | |
3e472692 MK |
1010 | .IP |
1011 | A socket cannot mix | |
1012 | .B SO_TIMESTAMP | |
1013 | and | |
575bac0f | 1014 | .BR SO_TIMESTAMPNS : |
3e472692 | 1015 | the two modes are mutually exclusive. |
a47d370b | 1016 | .TP |
77117f4f | 1017 | .B SO_TYPE |
fa574567 | 1018 | Gets the socket type as an integer (e.g., |
77117f4f | 1019 | .BR SOCK_STREAM ). |
fa574567 | 1020 | This socket option is read-only. |
8e57271a | 1021 | .TP |
1260477b | 1022 | .BR SO_BUSY_POLL " (since Linux 3.11)" |
8e57271a | 1023 | Sets the approximate time in microseconds to busy poll on a blocking receive |
049be102 MK |
1024 | when there is no data. |
1025 | Increasing this value requires | |
84fc2a6e | 1026 | .BR CAP_NET_ADMIN . |
8e57271a ET |
1027 | The default for this option is controlled by the |
1028 | .I /proc/sys/net/core/busy_read | |
84fc2a6e | 1029 | file. |
5711c04f | 1030 | .IP |
84fc2a6e | 1031 | The value in the |
8e57271a | 1032 | .I /proc/sys/net/core/busy_poll |
84fc2a6e | 1033 | file determines how long |
8e57271a | 1034 | .BR select (2) |
84fc2a6e | 1035 | and |
8e57271a | 1036 | .BR poll (2) |
84fc2a6e | 1037 | will busy poll when they operate on sockets with |
1ae6b2c7 | 1038 | .B SO_BUSY_POLL |
8e57271a | 1039 | set and no events to report are found. |
5711c04f | 1040 | .IP |
049be102 MK |
1041 | In both cases, |
1042 | busy polling will only be done when the socket last received data | |
8e57271a | 1043 | from a network device that supports this option. |
5711c04f | 1044 | .IP |
049be102 MK |
1045 | While busy polling may improve latency of some applications, |
1046 | care must be taken when using it since this will increase | |
1047 | both CPU utilization and power usage. | |
77117f4f MK |
1048 | .SS Signals |
1049 | When writing onto a connection-oriented socket that has been shut down | |
1050 | (by the local or the remote end) | |
1051 | .B SIGPIPE | |
1052 | is sent to the writing process and | |
1053 | .B EPIPE | |
1054 | is returned. | |
1055 | The signal is not sent when the write call | |
1056 | specified the | |
1057 | .B MSG_NOSIGNAL | |
1058 | flag. | |
1059 | .PP | |
1060 | When requested with the | |
1061 | .B FIOSETOWN | |
1062 | .BR fcntl (2) | |
1063 | or | |
1064 | .B SIOCSPGRP | |
1065 | .BR ioctl (2), | |
1066 | .B SIGIO | |
1067 | is sent when an I/O event occurs. | |
1068 | It is possible to use | |
1069 | .BR poll (2) | |
1070 | or | |
1071 | .BR select (2) | |
1072 | in the signal handler to find out which socket the event occurred on. | |
1073 | An alternative (in Linux 2.2) is to set a real-time signal using the | |
1074 | .B F_SETSIG | |
1075 | .BR fcntl (2); | |
1076 | the handler of the real time signal will be called with | |
1077 | the file descriptor in the | |
1078 | .I si_fd | |
1079 | field of its | |
1080 | .IR siginfo_t . | |
1081 | See | |
1082 | .BR fcntl (2) | |
1083 | for more information. | |
1084 | .PP | |
1085 | Under some circumstances (e.g., multiple processes accessing a | |
1086 | single socket), the condition that caused the | |
1087 | .B SIGIO | |
1088 | may have already disappeared when the process reacts to the signal. | |
1089 | If this happens, the process should wait again because Linux | |
1090 | will resend the signal later. | |
c634028a | 1091 | .\" .SS Ancillary messages |
5a2ff571 MK |
1092 | .SS /proc interfaces |
1093 | The core socket networking parameters can be accessed | |
1094 | via files in the directory | |
1095 | .IR /proc/sys/net/core/ . | |
77117f4f MK |
1096 | .TP |
1097 | .I rmem_default | |
1098 | contains the default setting in bytes of the socket receive buffer. | |
1099 | .TP | |
1100 | .I rmem_max | |
1101 | contains the maximum socket receive buffer size in bytes which a user may | |
1102 | set by using the | |
1103 | .B SO_RCVBUF | |
1104 | socket option. | |
1105 | .TP | |
1106 | .I wmem_default | |
1107 | contains the default setting in bytes of the socket send buffer. | |
1108 | .TP | |
1109 | .I wmem_max | |
1110 | contains the maximum socket send buffer size in bytes which a user may | |
1111 | set by using the | |
1112 | .B SO_SNDBUF | |
1113 | socket option. | |
1114 | .TP | |
cabf996a | 1115 | .IR message_cost " and " message_burst |
77117f4f MK |
1116 | configure the token bucket filter used to load limit warning messages |
1117 | caused by external network events. | |
1118 | .TP | |
1119 | .I netdev_max_backlog | |
1120 | Maximum number of packets in the global input queue. | |
1121 | .TP | |
1122 | .I optmem_max | |
1123 | Maximum length of ancillary data and user control data like the iovecs | |
1124 | per socket. | |
1125 | .\" netdev_fastroute is not documented because it is experimental | |
1126 | .SS Ioctls | |
1127 | These operations can be accessed using | |
1128 | .BR ioctl (2): | |
5711c04f | 1129 | .PP |
77117f4f | 1130 | .in +4n |
b8302363 | 1131 | .EX |
77117f4f | 1132 | .IB error " = ioctl(" ip_socket ", " ioctl_type ", " &value_result ");" |
b8302363 | 1133 | .EE |
77117f4f MK |
1134 | .in |
1135 | .TP | |
1136 | .B SIOCGSTAMP | |
1137 | Return a | |
1138 | .I struct timeval | |
1139 | with the receive timestamp of the last packet passed to the user. | |
1140 | This is useful for accurate round trip time measurements. | |
1141 | See | |
1142 | .BR setitimer (2) | |
1143 | for a description of | |
1144 | .IR "struct timeval" . | |
1145 | .\" | |
a47d370b | 1146 | This ioctl should be used only if the socket options |
77117f4f | 1147 | .B SO_TIMESTAMP |
a47d370b AC |
1148 | and |
1149 | .B SO_TIMESTAMPNS | |
1150 | are not set on the socket. | |
77117f4f MK |
1151 | Otherwise, it returns the timestamp of the |
1152 | last packet that was received while | |
1153 | .B SO_TIMESTAMP | |
a47d370b AC |
1154 | and |
1155 | .B SO_TIMESTAMPNS | |
1156 | were not set, or it fails if no such packet has been received, | |
77117f4f MK |
1157 | (i.e., |
1158 | .BR ioctl (2) | |
1159 | returns \-1 with | |
1160 | .I errno | |
1161 | set to | |
1162 | .BR ENOENT ). | |
1163 | .TP | |
1164 | .B SIOCSPGRP | |
0d86f490 | 1165 | Set the process or process group that is to receive |
77117f4f MK |
1166 | .B SIGIO |
1167 | or | |
1168 | .B SIGURG | |
0d86f490 | 1169 | signals when I/O becomes possible or urgent data is available. |
77117f4f MK |
1170 | The argument is a pointer to a |
1171 | .IR pid_t . | |
0d86f490 | 1172 | For further details, see the description of |
1ae6b2c7 | 1173 | .B F_SETOWN |
0d86f490 MK |
1174 | in |
1175 | .BR fcntl (2). | |
77117f4f MK |
1176 | .TP |
1177 | .B FIOASYNC | |
1178 | Change the | |
1179 | .B O_ASYNC | |
1180 | flag to enable or disable asynchronous I/O mode of the socket. | |
1181 | Asynchronous I/O mode means that the | |
1182 | .B SIGIO | |
1183 | signal or the signal set with | |
1184 | .B F_SETSIG | |
1185 | is raised when a new I/O event occurs. | |
1186 | .IP | |
1187 | Argument is an integer boolean flag. | |
1188 | (This operation is synonymous with the use of | |
1189 | .BR fcntl (2) | |
1190 | to set the | |
1191 | .B O_ASYNC | |
1192 | flag.) | |
1193 | .\" | |
1194 | .TP | |
1195 | .B SIOCGPGRP | |
1196 | Get the current process or process group that receives | |
1197 | .B SIGIO | |
1198 | or | |
1199 | .B SIGURG | |
1200 | signals, | |
1201 | or 0 | |
1202 | when none is set. | |
1203 | .PP | |
1204 | Valid | |
1205 | .BR fcntl (2) | |
1206 | operations: | |
1207 | .TP | |
1208 | .B FIOGETOWN | |
1209 | The same as the | |
1210 | .B SIOCGPGRP | |
1211 | .BR ioctl (2). | |
1212 | .TP | |
1213 | .B FIOSETOWN | |
1214 | The same as the | |
1215 | .B SIOCSPGRP | |
1216 | .BR ioctl (2). | |
1217 | .SH VERSIONS | |
1218 | .B SO_BINDTODEVICE | |
1219 | was introduced in Linux 2.0.30. | |
1220 | .B SO_PASSCRED | |
1221 | is new in Linux 2.2. | |
5a2ff571 MK |
1222 | The |
1223 | .I /proc | |
159097d4 | 1224 | interfaces were introduced in Linux 2.2. |
77117f4f MK |
1225 | .B SO_RCVTIMEO |
1226 | and | |
1227 | .B SO_SNDTIMEO | |
1228 | are supported since Linux 2.3.41. | |
1229 | Earlier, timeouts were fixed to | |
1230 | a protocol-specific setting, and could not be read or written. | |
1231 | .SH NOTES | |
1232 | Linux assumes that half of the send/receive buffer is used for internal | |
5a2ff571 MK |
1233 | kernel structures; thus the values in the corresponding |
1234 | .I /proc | |
1235 | files are twice what can be observed on the wire. | |
5711c04f | 1236 | .PP |
2a479ee4 | 1237 | Linux will allow port reuse only with the |
77117f4f MK |
1238 | .B SO_REUSEADDR |
1239 | option | |
1240 | when this option was set both in the previous program that performed a | |
1241 | .BR bind (2) | |
3b777aff | 1242 | to the port and in the program that wants to reuse the port. |
77117f4f MK |
1243 | This differs from some implementations (e.g., FreeBSD) |
1244 | where only the later program needs to set the | |
1245 | .B SO_REUSEADDR | |
1246 | option. | |
1247 | Typically this difference is invisible, since, for example, a server | |
1248 | program is designed to always set this option. | |
77117f4f MK |
1249 | .\" .SH AUTHORS |
1250 | .\" This man page was written by Andi Kleen. | |
47297adb | 1251 | .SH SEE ALSO |
6e933659 | 1252 | .BR wireshark (1), |
b1e6b7c7 | 1253 | .BR bpf (2), |
f3277220 | 1254 | .BR connect (2), |
0ec954ee | 1255 | .BR getsockopt (2), |
77117f4f MK |
1256 | .BR setsockopt (2), |
1257 | .BR socket (2), | |
587f954b | 1258 | .BR pcap (3), |
a5409af7 | 1259 | .BR address_families (7), |
77117f4f MK |
1260 | .BR capabilities (7), |
1261 | .BR ddp (7), | |
1262 | .BR ip (7), | |
999f8568 | 1263 | .BR ipv6 (7), |
0b8a4459 | 1264 | .BR packet (7), |
c24995b9 MK |
1265 | .BR tcp (7), |
1266 | .BR udp (7), | |
6e933659 MK |
1267 | .BR unix (7), |
1268 | .BR tcpdump (8) |