]> git.ipfire.org Git - people/ms/linux.git/blame - net/socket.c
Merge branch 'nfc-const'
[people/ms/linux.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
cc69837f 55#include <linux/ethtool.h>
1da177e4 56#include <linux/mm.h>
1da177e4
LT
57#include <linux/socket.h>
58#include <linux/file.h>
59#include <linux/net.h>
60#include <linux/interrupt.h>
aaca0bdc 61#include <linux/thread_info.h>
55737fda 62#include <linux/rcupdate.h>
1da177e4
LT
63#include <linux/netdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
4a3e2f71 66#include <linux/mutex.h>
1da177e4 67#include <linux/if_bridge.h>
20380731 68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
d7c08826 107#include <linux/ptp_clock_kernel.h>
06021292 108
e0d1095a 109#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
06021292 112#endif
6b96018b 113
8ae5e030
AV
114static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
115static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 116static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
117
118static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
119static __poll_t sock_poll(struct file *file,
120 struct poll_table_struct *wait);
89bddce5 121static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
122#ifdef CONFIG_COMPAT
123static long compat_sock_ioctl(struct file *file,
89bddce5 124 unsigned int cmd, unsigned long arg);
89bbfc95 125#endif
1da177e4 126static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
127static ssize_t sock_sendpage(struct file *file, struct page *page,
128 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 129static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 130 struct pipe_inode_info *pipe, size_t len,
9c55e01c 131 unsigned int flags);
542d3065
AB
132
133#ifdef CONFIG_PROC_FS
134static void sock_show_fdinfo(struct seq_file *m, struct file *f)
135{
136 struct socket *sock = f->private_data;
137
138 if (sock->ops->show_fdinfo)
139 sock->ops->show_fdinfo(m, sock);
140}
141#else
142#define sock_show_fdinfo NULL
143#endif
1da177e4 144
1da177e4
LT
145/*
146 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
147 * in the operation structures but are done directly via the socketcall() multiplexor.
148 */
149
da7071d7 150static const struct file_operations socket_file_ops = {
1da177e4
LT
151 .owner = THIS_MODULE,
152 .llseek = no_llseek,
8ae5e030
AV
153 .read_iter = sock_read_iter,
154 .write_iter = sock_write_iter,
1da177e4
LT
155 .poll = sock_poll,
156 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
157#ifdef CONFIG_COMPAT
158 .compat_ioctl = compat_sock_ioctl,
159#endif
1da177e4 160 .mmap = sock_mmap,
1da177e4
LT
161 .release = sock_close,
162 .fasync = sock_fasync,
5274f052
JA
163 .sendpage = sock_sendpage,
164 .splice_write = generic_splice_sendpage,
9c55e01c 165 .splice_read = sock_splice_read,
b4653342 166 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
167};
168
fe0bdbde
YD
169static const char * const pf_family_names[] = {
170 [PF_UNSPEC] = "PF_UNSPEC",
171 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
172 [PF_INET] = "PF_INET",
173 [PF_AX25] = "PF_AX25",
174 [PF_IPX] = "PF_IPX",
175 [PF_APPLETALK] = "PF_APPLETALK",
176 [PF_NETROM] = "PF_NETROM",
177 [PF_BRIDGE] = "PF_BRIDGE",
178 [PF_ATMPVC] = "PF_ATMPVC",
179 [PF_X25] = "PF_X25",
180 [PF_INET6] = "PF_INET6",
181 [PF_ROSE] = "PF_ROSE",
182 [PF_DECnet] = "PF_DECnet",
183 [PF_NETBEUI] = "PF_NETBEUI",
184 [PF_SECURITY] = "PF_SECURITY",
185 [PF_KEY] = "PF_KEY",
186 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
187 [PF_PACKET] = "PF_PACKET",
188 [PF_ASH] = "PF_ASH",
189 [PF_ECONET] = "PF_ECONET",
190 [PF_ATMSVC] = "PF_ATMSVC",
191 [PF_RDS] = "PF_RDS",
192 [PF_SNA] = "PF_SNA",
193 [PF_IRDA] = "PF_IRDA",
194 [PF_PPPOX] = "PF_PPPOX",
195 [PF_WANPIPE] = "PF_WANPIPE",
196 [PF_LLC] = "PF_LLC",
197 [PF_IB] = "PF_IB",
198 [PF_MPLS] = "PF_MPLS",
199 [PF_CAN] = "PF_CAN",
200 [PF_TIPC] = "PF_TIPC",
201 [PF_BLUETOOTH] = "PF_BLUETOOTH",
202 [PF_IUCV] = "PF_IUCV",
203 [PF_RXRPC] = "PF_RXRPC",
204 [PF_ISDN] = "PF_ISDN",
205 [PF_PHONET] = "PF_PHONET",
206 [PF_IEEE802154] = "PF_IEEE802154",
207 [PF_CAIF] = "PF_CAIF",
208 [PF_ALG] = "PF_ALG",
209 [PF_NFC] = "PF_NFC",
210 [PF_VSOCK] = "PF_VSOCK",
211 [PF_KCM] = "PF_KCM",
212 [PF_QIPCRTR] = "PF_QIPCRTR",
213 [PF_SMC] = "PF_SMC",
214 [PF_XDP] = "PF_XDP",
215};
216
1da177e4
LT
217/*
218 * The protocol list. Each protocol is registered in here.
219 */
220
1da177e4 221static DEFINE_SPINLOCK(net_family_lock);
190683a9 222static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 223
1da177e4 224/*
89bddce5
SH
225 * Support routines.
226 * Move socket addresses back and forth across the kernel/user
227 * divide and look after the messy bits.
1da177e4
LT
228 */
229
1da177e4
LT
230/**
231 * move_addr_to_kernel - copy a socket address into kernel space
232 * @uaddr: Address in user space
233 * @kaddr: Address in kernel space
234 * @ulen: Length in user space
235 *
236 * The address is copied into kernel space. If the provided address is
237 * too long an error code of -EINVAL is returned. If the copy gives
238 * invalid addresses -EFAULT is returned. On a success 0 is returned.
239 */
240
43db362d 241int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 242{
230b1839 243 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 244 return -EINVAL;
89bddce5 245 if (ulen == 0)
1da177e4 246 return 0;
89bddce5 247 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 248 return -EFAULT;
3ec3b2fb 249 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
250}
251
252/**
253 * move_addr_to_user - copy an address to user space
254 * @kaddr: kernel space address
255 * @klen: length of address in kernel
256 * @uaddr: user space address
257 * @ulen: pointer to user length field
258 *
259 * The value pointed to by ulen on entry is the buffer length available.
260 * This is overwritten with the buffer space used. -EINVAL is returned
261 * if an overlong buffer is specified or a negative buffer size. -EFAULT
262 * is returned if either the buffer or the length field are not
263 * accessible.
264 * After copying the data up to the limit the user specifies, the true
265 * length of the data is written over the length limit the user
266 * specified. Zero is returned for a success.
267 */
89bddce5 268
43db362d 269static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 270 void __user *uaddr, int __user *ulen)
1da177e4
LT
271{
272 int err;
273 int len;
274
68c6beb3 275 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
276 err = get_user(len, ulen);
277 if (err)
1da177e4 278 return err;
89bddce5
SH
279 if (len > klen)
280 len = klen;
68c6beb3 281 if (len < 0)
1da177e4 282 return -EINVAL;
89bddce5 283 if (len) {
d6fe3945
SG
284 if (audit_sockaddr(klen, kaddr))
285 return -ENOMEM;
89bddce5 286 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
287 return -EFAULT;
288 }
289 /*
89bddce5
SH
290 * "fromlen shall refer to the value before truncation.."
291 * 1003.1g
1da177e4
LT
292 */
293 return __put_user(klen, ulen);
294}
295
08009a76 296static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
297
298static struct inode *sock_alloc_inode(struct super_block *sb)
299{
300 struct socket_alloc *ei;
89bddce5 301
e94b1766 302 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
303 if (!ei)
304 return NULL;
333f7909
AV
305 init_waitqueue_head(&ei->socket.wq.wait);
306 ei->socket.wq.fasync_list = NULL;
307 ei->socket.wq.flags = 0;
89bddce5 308
1da177e4
LT
309 ei->socket.state = SS_UNCONNECTED;
310 ei->socket.flags = 0;
311 ei->socket.ops = NULL;
312 ei->socket.sk = NULL;
313 ei->socket.file = NULL;
1da177e4
LT
314
315 return &ei->vfs_inode;
316}
317
6d7855c5 318static void sock_free_inode(struct inode *inode)
1da177e4 319{
43815482
ED
320 struct socket_alloc *ei;
321
322 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 323 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
324}
325
51cc5068 326static void init_once(void *foo)
1da177e4 327{
89bddce5 328 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 329
a35afb83 330 inode_init_once(&ei->vfs_inode);
1da177e4 331}
89bddce5 332
1e911632 333static void init_inodecache(void)
1da177e4
LT
334{
335 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
336 sizeof(struct socket_alloc),
337 0,
338 (SLAB_HWCACHE_ALIGN |
339 SLAB_RECLAIM_ACCOUNT |
5d097056 340 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 341 init_once);
1e911632 342 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
343}
344
b87221de 345static const struct super_operations sockfs_ops = {
c6d409cf 346 .alloc_inode = sock_alloc_inode,
6d7855c5 347 .free_inode = sock_free_inode,
c6d409cf 348 .statfs = simple_statfs,
1da177e4
LT
349};
350
c23fbb6b
ED
351/*
352 * sockfs_dname() is called from d_path().
353 */
354static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
355{
356 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 357 d_inode(dentry)->i_ino);
c23fbb6b
ED
358}
359
3ba13d17 360static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 361 .d_dname = sockfs_dname,
1da177e4
LT
362};
363
bba0bd31
AG
364static int sockfs_xattr_get(const struct xattr_handler *handler,
365 struct dentry *dentry, struct inode *inode,
366 const char *suffix, void *value, size_t size)
367{
368 if (value) {
369 if (dentry->d_name.len + 1 > size)
370 return -ERANGE;
371 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
372 }
373 return dentry->d_name.len + 1;
374}
375
376#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
377#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
378#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
379
380static const struct xattr_handler sockfs_xattr_handler = {
381 .name = XATTR_NAME_SOCKPROTONAME,
382 .get = sockfs_xattr_get,
383};
384
4a590153 385static int sockfs_security_xattr_set(const struct xattr_handler *handler,
e65ce2a5 386 struct user_namespace *mnt_userns,
4a590153
AG
387 struct dentry *dentry, struct inode *inode,
388 const char *suffix, const void *value,
389 size_t size, int flags)
390{
391 /* Handled by LSM. */
392 return -EAGAIN;
393}
394
395static const struct xattr_handler sockfs_security_xattr_handler = {
396 .prefix = XATTR_SECURITY_PREFIX,
397 .set = sockfs_security_xattr_set,
398};
399
bba0bd31
AG
400static const struct xattr_handler *sockfs_xattr_handlers[] = {
401 &sockfs_xattr_handler,
4a590153 402 &sockfs_security_xattr_handler,
bba0bd31
AG
403 NULL
404};
405
fba9be49 406static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 407{
fba9be49
DH
408 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
409 if (!ctx)
410 return -ENOMEM;
411 ctx->ops = &sockfs_ops;
412 ctx->dops = &sockfs_dentry_operations;
413 ctx->xattr = sockfs_xattr_handlers;
414 return 0;
c74a1cbb
AV
415}
416
417static struct vfsmount *sock_mnt __read_mostly;
418
419static struct file_system_type sock_fs_type = {
420 .name = "sockfs",
fba9be49 421 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
422 .kill_sb = kill_anon_super,
423};
424
1da177e4
LT
425/*
426 * Obtains the first available file descriptor and sets it up for use.
427 *
39d8c1b6
DM
428 * These functions create file structures and maps them to fd space
429 * of the current process. On success it returns file descriptor
1da177e4
LT
430 * and file struct implicitly stored in sock->file.
431 * Note that another thread may close file descriptor before we return
432 * from this function. We use the fact that now we do not refer
433 * to socket after mapping. If one day we will need it, this
434 * function will increment ref. count on file by 1.
435 *
436 * In any case returned fd MAY BE not valid!
437 * This race condition is unavoidable
438 * with shared fd spaces, we cannot solve it inside kernel,
439 * but we take care of internal coherence yet.
440 */
441
8a3c245c
PT
442/**
443 * sock_alloc_file - Bind a &socket to a &file
444 * @sock: socket
445 * @flags: file status flags
446 * @dname: protocol name
447 *
448 * Returns the &file bound with @sock, implicitly storing it
449 * in sock->file. If dname is %NULL, sets to "".
450 * On failure the return is a ERR pointer (see linux/err.h).
451 * This function uses GFP_KERNEL internally.
452 */
453
aab174f0 454struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 455{
7cbe66b6 456 struct file *file;
1da177e4 457
d93aa9d8
AV
458 if (!dname)
459 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 460
d93aa9d8
AV
461 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
462 O_RDWR | (flags & O_NONBLOCK),
463 &socket_file_ops);
b5ffe634 464 if (IS_ERR(file)) {
8e1611e2 465 sock_release(sock);
39b65252 466 return file;
cc3808f8
AV
467 }
468
469 sock->file = file;
39d8c1b6 470 file->private_data = sock;
d8e464ec 471 stream_open(SOCK_INODE(sock), file);
28407630 472 return file;
39d8c1b6 473}
56b31d1c 474EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 475
56b31d1c 476static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
477{
478 struct file *newfile;
28407630 479 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
480 if (unlikely(fd < 0)) {
481 sock_release(sock);
28407630 482 return fd;
ce4bb04c 483 }
39d8c1b6 484
aab174f0 485 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 486 if (!IS_ERR(newfile)) {
39d8c1b6 487 fd_install(fd, newfile);
28407630
AV
488 return fd;
489 }
7cbe66b6 490
28407630
AV
491 put_unused_fd(fd);
492 return PTR_ERR(newfile);
1da177e4
LT
493}
494
8a3c245c
PT
495/**
496 * sock_from_file - Return the &socket bounded to @file.
497 * @file: file
8a3c245c 498 *
dba4a925 499 * On failure returns %NULL.
8a3c245c
PT
500 */
501
dba4a925 502struct socket *sock_from_file(struct file *file)
6cb153ca 503{
6cb153ca
BL
504 if (file->f_op == &socket_file_ops)
505 return file->private_data; /* set in sock_map_fd */
506
23bb80d2 507 return NULL;
6cb153ca 508}
406a3c63 509EXPORT_SYMBOL(sock_from_file);
6cb153ca 510
1da177e4 511/**
c6d409cf 512 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
513 * @fd: file handle
514 * @err: pointer to an error code return
515 *
516 * The file handle passed in is locked and the socket it is bound
241c4667 517 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
518 * with a negative errno code and NULL is returned. The function checks
519 * for both invalid handles and passing a handle which is not a socket.
520 *
521 * On a success the socket object pointer is returned.
522 */
523
524struct socket *sockfd_lookup(int fd, int *err)
525{
526 struct file *file;
1da177e4
LT
527 struct socket *sock;
528
89bddce5
SH
529 file = fget(fd);
530 if (!file) {
1da177e4
LT
531 *err = -EBADF;
532 return NULL;
533 }
89bddce5 534
dba4a925
FR
535 sock = sock_from_file(file);
536 if (!sock) {
537 *err = -ENOTSOCK;
1da177e4 538 fput(file);
dba4a925 539 }
6cb153ca
BL
540 return sock;
541}
c6d409cf 542EXPORT_SYMBOL(sockfd_lookup);
1da177e4 543
6cb153ca
BL
544static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
545{
00e188ef 546 struct fd f = fdget(fd);
6cb153ca
BL
547 struct socket *sock;
548
3672558c 549 *err = -EBADF;
00e188ef 550 if (f.file) {
dba4a925 551 sock = sock_from_file(f.file);
00e188ef 552 if (likely(sock)) {
ce787a5a 553 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 554 return sock;
00e188ef 555 }
dba4a925 556 *err = -ENOTSOCK;
00e188ef 557 fdput(f);
1da177e4 558 }
6cb153ca 559 return NULL;
1da177e4
LT
560}
561
600e1779
MY
562static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
563 size_t size)
564{
565 ssize_t len;
566 ssize_t used = 0;
567
c5ef6035 568 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
569 if (len < 0)
570 return len;
571 used += len;
572 if (buffer) {
573 if (size < used)
574 return -ERANGE;
575 buffer += len;
576 }
577
578 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
579 used += len;
580 if (buffer) {
581 if (size < used)
582 return -ERANGE;
583 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
584 buffer += len;
585 }
586
587 return used;
588}
589
549c7297
CB
590static int sockfs_setattr(struct user_namespace *mnt_userns,
591 struct dentry *dentry, struct iattr *iattr)
86741ec2 592{
549c7297 593 int err = simple_setattr(&init_user_ns, dentry, iattr);
86741ec2 594
e1a3a60a 595 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
596 struct socket *sock = SOCKET_I(d_inode(dentry));
597
6d8c50dc
CW
598 if (sock->sk)
599 sock->sk->sk_uid = iattr->ia_uid;
600 else
601 err = -ENOENT;
86741ec2
LC
602 }
603
604 return err;
605}
606
600e1779 607static const struct inode_operations sockfs_inode_ops = {
600e1779 608 .listxattr = sockfs_listxattr,
86741ec2 609 .setattr = sockfs_setattr,
600e1779
MY
610};
611
1da177e4 612/**
8a3c245c 613 * sock_alloc - allocate a socket
89bddce5 614 *
1da177e4
LT
615 * Allocate a new inode and socket object. The two are bound together
616 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 617 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
618 */
619
f4a00aac 620struct socket *sock_alloc(void)
1da177e4 621{
89bddce5
SH
622 struct inode *inode;
623 struct socket *sock;
1da177e4 624
a209dfc7 625 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
626 if (!inode)
627 return NULL;
628
629 sock = SOCKET_I(inode);
630
85fe4025 631 inode->i_ino = get_next_ino();
89bddce5 632 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
633 inode->i_uid = current_fsuid();
634 inode->i_gid = current_fsgid();
600e1779 635 inode->i_op = &sockfs_inode_ops;
1da177e4 636
1da177e4
LT
637 return sock;
638}
f4a00aac 639EXPORT_SYMBOL(sock_alloc);
1da177e4 640
6d8c50dc 641static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
642{
643 if (sock->ops) {
644 struct module *owner = sock->ops->owner;
645
6d8c50dc
CW
646 if (inode)
647 inode_lock(inode);
1da177e4 648 sock->ops->release(sock);
ff7b11aa 649 sock->sk = NULL;
6d8c50dc
CW
650 if (inode)
651 inode_unlock(inode);
1da177e4
LT
652 sock->ops = NULL;
653 module_put(owner);
654 }
655
333f7909 656 if (sock->wq.fasync_list)
3410f22e 657 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 658
1da177e4
LT
659 if (!sock->file) {
660 iput(SOCK_INODE(sock));
661 return;
662 }
89bddce5 663 sock->file = NULL;
1da177e4 664}
6d8c50dc 665
9a8ad9ac
AL
666/**
667 * sock_release - close a socket
668 * @sock: socket to close
669 *
670 * The socket is released from the protocol stack if it has a release
671 * callback, and the inode is then released if the socket is bound to
672 * an inode not a file.
673 */
6d8c50dc
CW
674void sock_release(struct socket *sock)
675{
676 __sock_release(sock, NULL);
677}
c6d409cf 678EXPORT_SYMBOL(sock_release);
1da177e4 679
c14ac945 680void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 681{
140c55d4
ED
682 u8 flags = *tx_flags;
683
c14ac945 684 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
685 flags |= SKBTX_HW_TSTAMP;
686
c14ac945 687 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
688 flags |= SKBTX_SW_TSTAMP;
689
c14ac945 690 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
691 flags |= SKBTX_SCHED_TSTAMP;
692
140c55d4 693 *tx_flags = flags;
20d49473 694}
67cc0d40 695EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 696
8c3c447b
PA
697INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
698 size_t));
a648a592
PA
699INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
700 size_t));
d8725c86 701static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 702{
a648a592
PA
703 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
704 inet_sendmsg, sock, msg,
705 msg_data_left(msg));
d8725c86
AV
706 BUG_ON(ret == -EIOCBQUEUED);
707 return ret;
1da177e4
LT
708}
709
85806af0
RD
710/**
711 * sock_sendmsg - send a message through @sock
712 * @sock: socket
713 * @msg: message to send
714 *
715 * Sends @msg through @sock, passing through LSM.
716 * Returns the number of bytes sent, or an error code.
717 */
d8725c86 718int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 719{
d8725c86 720 int err = security_socket_sendmsg(sock, msg,
01e97e65 721 msg_data_left(msg));
228e548e 722
d8725c86 723 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 724}
c6d409cf 725EXPORT_SYMBOL(sock_sendmsg);
1da177e4 726
8a3c245c
PT
727/**
728 * kernel_sendmsg - send a message through @sock (kernel-space)
729 * @sock: socket
730 * @msg: message header
731 * @vec: kernel vec
732 * @num: vec array length
733 * @size: total message data size
734 *
735 * Builds the message data with @vec and sends it through @sock.
736 * Returns the number of bytes sent, or an error code.
737 */
738
1da177e4
LT
739int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
740 struct kvec *vec, size_t num, size_t size)
741{
aa563d7b 742 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 743 return sock_sendmsg(sock, msg);
1da177e4 744}
c6d409cf 745EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 746
8a3c245c
PT
747/**
748 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
749 * @sk: sock
750 * @msg: message header
751 * @vec: output s/g array
752 * @num: output s/g array length
753 * @size: total message data size
754 *
755 * Builds the message data with @vec and sends it through @sock.
756 * Returns the number of bytes sent, or an error code.
757 * Caller must hold @sk.
758 */
759
306b13eb
TH
760int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
761 struct kvec *vec, size_t num, size_t size)
762{
763 struct socket *sock = sk->sk_socket;
764
765 if (!sock->ops->sendmsg_locked)
db5980d8 766 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 767
aa563d7b 768 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
769
770 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
771}
772EXPORT_SYMBOL(kernel_sendmsg_locked);
773
8605330a
SHY
774static bool skb_is_err_queue(const struct sk_buff *skb)
775{
776 /* pkt_type of skbs enqueued on the error queue are set to
777 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
778 * in recvmsg, since skbs received on a local socket will never
779 * have a pkt_type of PACKET_OUTGOING.
780 */
781 return skb->pkt_type == PACKET_OUTGOING;
782}
783
b50a5c70
ML
784/* On transmit, software and hardware timestamps are returned independently.
785 * As the two skb clones share the hardware timestamp, which may be updated
786 * before the software timestamp is received, a hardware TX timestamp may be
787 * returned only if there is no software TX timestamp. Ignore false software
788 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 789 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
790 * hardware timestamp.
791 */
792static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
793{
794 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
795}
796
aad9c8c4
ML
797static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
798{
799 struct scm_ts_pktinfo ts_pktinfo;
800 struct net_device *orig_dev;
801
802 if (!skb_mac_header_was_set(skb))
803 return;
804
805 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
806
807 rcu_read_lock();
808 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
809 if (orig_dev)
810 ts_pktinfo.if_index = orig_dev->ifindex;
811 rcu_read_unlock();
812
813 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
814 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
815 sizeof(ts_pktinfo), &ts_pktinfo);
816}
817
92f37fd2
ED
818/*
819 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
820 */
821void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
822 struct sk_buff *skb)
823{
20d49473 824 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 825 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
826 struct scm_timestamping_internal tss;
827
b50a5c70 828 int empty = 1, false_tstamp = 0;
20d49473
PO
829 struct skb_shared_hwtstamps *shhwtstamps =
830 skb_hwtstamps(skb);
831
832 /* Race occurred between timestamp enabling and packet
833 receiving. Fill in the current time for now. */
b50a5c70 834 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 835 __net_timestamp(skb);
b50a5c70
ML
836 false_tstamp = 1;
837 }
20d49473
PO
838
839 if (need_software_tstamp) {
840 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
841 if (new_tstamp) {
842 struct __kernel_sock_timeval tv;
843
844 skb_get_new_timestamp(skb, &tv);
845 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
846 sizeof(tv), &tv);
847 } else {
848 struct __kernel_old_timeval tv;
849
850 skb_get_timestamp(skb, &tv);
851 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
852 sizeof(tv), &tv);
853 }
20d49473 854 } else {
887feae3
DD
855 if (new_tstamp) {
856 struct __kernel_timespec ts;
857
858 skb_get_new_timestampns(skb, &ts);
859 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
860 sizeof(ts), &ts);
861 } else {
df1b4ba9 862 struct __kernel_old_timespec ts;
887feae3
DD
863
864 skb_get_timestampns(skb, &ts);
865 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
866 sizeof(ts), &ts);
867 }
20d49473
PO
868 }
869 }
870
f24b9be5 871 memset(&tss, 0, sizeof(tss));
c199105d 872 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 873 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 874 empty = 0;
4d276eb6 875 if (shhwtstamps &&
b9f40e21 876 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826
YL
877 !skb_is_swtx_tstamp(skb, false_tstamp)) {
878 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
879 ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
880
881 if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
882 tss.ts + 2)) {
883 empty = 0;
884
885 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
886 !skb_is_err_queue(skb))
887 put_ts_pktinfo(msg, skb);
888 }
aad9c8c4 889 }
1c885808 890 if (!empty) {
9718475e
DD
891 if (sock_flag(sk, SOCK_TSTAMP_NEW))
892 put_cmsg_scm_timestamping64(msg, &tss);
893 else
894 put_cmsg_scm_timestamping(msg, &tss);
1c885808 895
8605330a 896 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 897 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
898 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
899 skb->len, skb->data);
900 }
92f37fd2 901}
7c81fd8b
ACM
902EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
903
6e3e939f
JB
904void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
905 struct sk_buff *skb)
906{
907 int ack;
908
909 if (!sock_flag(sk, SOCK_WIFI_STATUS))
910 return;
911 if (!skb->wifi_acked_valid)
912 return;
913
914 ack = skb->wifi_acked;
915
916 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
917}
918EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
919
11165f14 920static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
921 struct sk_buff *skb)
3b885787 922{
744d5a3e 923 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 924 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 925 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
926}
927
767dd033 928void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
929 struct sk_buff *skb)
930{
931 sock_recv_timestamp(msg, sk, skb);
932 sock_recv_drops(msg, sk, skb);
933}
767dd033 934EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 935
8c3c447b 936INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
937 size_t, int));
938INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
939 size_t, int));
1b784140 940static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 941 int flags)
1da177e4 942{
a648a592
PA
943 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
944 inet_recvmsg, sock, msg, msg_data_left(msg),
945 flags);
1da177e4
LT
946}
947
85806af0
RD
948/**
949 * sock_recvmsg - receive a message from @sock
950 * @sock: socket
951 * @msg: message to receive
952 * @flags: message flags
953 *
954 * Receives @msg from @sock, passing through LSM. Returns the total number
955 * of bytes received, or an error.
956 */
2da62906 957int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 958{
2da62906 959 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 960
2da62906 961 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 962}
c6d409cf 963EXPORT_SYMBOL(sock_recvmsg);
1da177e4 964
c1249c0a 965/**
8a3c245c
PT
966 * kernel_recvmsg - Receive a message from a socket (kernel space)
967 * @sock: The socket to receive the message from
968 * @msg: Received message
969 * @vec: Input s/g array for message data
970 * @num: Size of input s/g array
971 * @size: Number of bytes to read
972 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 973 *
8a3c245c
PT
974 * On return the msg structure contains the scatter/gather array passed in the
975 * vec argument. The array is modified so that it consists of the unfilled
976 * portion of the original array.
c1249c0a 977 *
8a3c245c 978 * The returned value is the total number of bytes received, or an error.
c1249c0a 979 */
8a3c245c 980
89bddce5
SH
981int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
982 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 983{
1f466e1f 984 msg->msg_control_is_user = false;
aa563d7b 985 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 986 return sock_recvmsg(sock, msg, flags);
1da177e4 987}
c6d409cf 988EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 989
ce1d4d3e
CH
990static ssize_t sock_sendpage(struct file *file, struct page *page,
991 int offset, size_t size, loff_t *ppos, int more)
1da177e4 992{
1da177e4
LT
993 struct socket *sock;
994 int flags;
995
ce1d4d3e
CH
996 sock = file->private_data;
997
35f9c09f
ED
998 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
999 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
1000 flags |= more;
ce1d4d3e 1001
e6949583 1002 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 1003}
1da177e4 1004
9c55e01c 1005static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1006 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1007 unsigned int flags)
1008{
1009 struct socket *sock = file->private_data;
1010
997b37da 1011 if (unlikely(!sock->ops->splice_read))
95506588 1012 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 1013
9c55e01c
JA
1014 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1015}
1016
8ae5e030 1017static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1018{
6d652330
AV
1019 struct file *file = iocb->ki_filp;
1020 struct socket *sock = file->private_data;
0345f931 1021 struct msghdr msg = {.msg_iter = *to,
1022 .msg_iocb = iocb};
8ae5e030 1023 ssize_t res;
ce1d4d3e 1024
ebfcd895 1025 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1026 msg.msg_flags = MSG_DONTWAIT;
1027
1028 if (iocb->ki_pos != 0)
1da177e4 1029 return -ESPIPE;
027445c3 1030
66ee59af 1031 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1032 return 0;
1033
2da62906 1034 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1035 *to = msg.msg_iter;
1036 return res;
1da177e4
LT
1037}
1038
8ae5e030 1039static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1040{
6d652330
AV
1041 struct file *file = iocb->ki_filp;
1042 struct socket *sock = file->private_data;
0345f931 1043 struct msghdr msg = {.msg_iter = *from,
1044 .msg_iocb = iocb};
8ae5e030 1045 ssize_t res;
1da177e4 1046
8ae5e030 1047 if (iocb->ki_pos != 0)
ce1d4d3e 1048 return -ESPIPE;
027445c3 1049
ebfcd895 1050 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1051 msg.msg_flags = MSG_DONTWAIT;
1052
6d652330
AV
1053 if (sock->type == SOCK_SEQPACKET)
1054 msg.msg_flags |= MSG_EOR;
1055
d8725c86 1056 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1057 *from = msg.msg_iter;
1058 return res;
1da177e4
LT
1059}
1060
1da177e4
LT
1061/*
1062 * Atomic setting of ioctl hooks to avoid race
1063 * with module unload.
1064 */
1065
4a3e2f71 1066static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1067static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1068 unsigned int cmd, struct ifreq *ifr,
1069 void __user *uarg);
1da177e4 1070
ad2f99ae
AB
1071void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1072 unsigned int cmd, struct ifreq *ifr,
1073 void __user *uarg))
1da177e4 1074{
4a3e2f71 1075 mutex_lock(&br_ioctl_mutex);
1da177e4 1076 br_ioctl_hook = hook;
4a3e2f71 1077 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1078}
1079EXPORT_SYMBOL(brioctl_set);
1080
ad2f99ae
AB
1081int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1082 struct ifreq *ifr, void __user *uarg)
1083{
1084 int err = -ENOPKG;
1085
1086 if (!br_ioctl_hook)
1087 request_module("bridge");
1088
1089 mutex_lock(&br_ioctl_mutex);
1090 if (br_ioctl_hook)
1091 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1092 mutex_unlock(&br_ioctl_mutex);
1093
1094 return err;
1095}
1096
4a3e2f71 1097static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1098static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1099
881d966b 1100void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1101{
4a3e2f71 1102 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1103 vlan_ioctl_hook = hook;
4a3e2f71 1104 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1105}
1106EXPORT_SYMBOL(vlan_ioctl_set);
1107
6b96018b 1108static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1109 unsigned int cmd, unsigned long arg)
6b96018b 1110{
876f0bf9
AB
1111 struct ifreq ifr;
1112 bool need_copyout;
6b96018b
AB
1113 int err;
1114 void __user *argp = (void __user *)arg;
a554bf96 1115 void __user *data;
6b96018b
AB
1116
1117 err = sock->ops->ioctl(sock, cmd, arg);
1118
1119 /*
1120 * If this ioctl is unknown try to hand it down
1121 * to the NIC driver.
1122 */
36fd633e
AV
1123 if (err != -ENOIOCTLCMD)
1124 return err;
6b96018b 1125
a554bf96 1126 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1127 return -EFAULT;
a554bf96 1128 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1129 if (!err && need_copyout)
a554bf96 1130 if (put_user_ifreq(&ifr, argp))
44c02a2c 1131 return -EFAULT;
876f0bf9 1132
6b96018b
AB
1133 return err;
1134}
1135
1da177e4
LT
1136/*
1137 * With an ioctl, arg may well be a user mode pointer, but we don't know
1138 * what to do with it - that's up to the protocol still.
1139 */
1140
1141static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1142{
1143 struct socket *sock;
881d966b 1144 struct sock *sk;
1da177e4
LT
1145 void __user *argp = (void __user *)arg;
1146 int pid, err;
881d966b 1147 struct net *net;
1da177e4 1148
b69aee04 1149 sock = file->private_data;
881d966b 1150 sk = sock->sk;
3b1e0a65 1151 net = sock_net(sk);
44c02a2c
AV
1152 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1153 struct ifreq ifr;
a554bf96 1154 void __user *data;
44c02a2c 1155 bool need_copyout;
a554bf96 1156 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1157 return -EFAULT;
a554bf96 1158 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1159 if (!err && need_copyout)
a554bf96 1160 if (put_user_ifreq(&ifr, argp))
44c02a2c 1161 return -EFAULT;
1da177e4 1162 } else
3d23e349 1163#ifdef CONFIG_WEXT_CORE
1da177e4 1164 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1165 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1166 } else
3d23e349 1167#endif
89bddce5 1168 switch (cmd) {
1da177e4
LT
1169 case FIOSETOWN:
1170 case SIOCSPGRP:
1171 err = -EFAULT;
1172 if (get_user(pid, (int __user *)argp))
1173 break;
393cc3f5 1174 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1175 break;
1176 case FIOGETOWN:
1177 case SIOCGPGRP:
609d7fa9 1178 err = put_user(f_getown(sock->file),
89bddce5 1179 (int __user *)argp);
1da177e4
LT
1180 break;
1181 case SIOCGIFBR:
1182 case SIOCSIFBR:
1183 case SIOCBRADDBR:
1184 case SIOCBRDELBR:
ad2f99ae 1185 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1186 break;
1187 case SIOCGIFVLAN:
1188 case SIOCSIFVLAN:
1189 err = -ENOPKG;
1190 if (!vlan_ioctl_hook)
1191 request_module("8021q");
1192
4a3e2f71 1193 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1194 if (vlan_ioctl_hook)
881d966b 1195 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1196 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1197 break;
c62cce2c
AV
1198 case SIOCGSKNS:
1199 err = -EPERM;
1200 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1201 break;
1202
1203 err = open_related_ns(&net->ns, get_net_ns);
1204 break;
0768e170
AB
1205 case SIOCGSTAMP_OLD:
1206 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1207 if (!sock->ops->gettstamp) {
1208 err = -ENOIOCTLCMD;
1209 break;
1210 }
1211 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1212 cmd == SIOCGSTAMP_OLD,
1213 !IS_ENABLED(CONFIG_64BIT));
60747828 1214 break;
0768e170
AB
1215 case SIOCGSTAMP_NEW:
1216 case SIOCGSTAMPNS_NEW:
1217 if (!sock->ops->gettstamp) {
1218 err = -ENOIOCTLCMD;
1219 break;
1220 }
1221 err = sock->ops->gettstamp(sock, argp,
1222 cmd == SIOCGSTAMP_NEW,
1223 false);
c7cbdbf2 1224 break;
876f0bf9
AB
1225
1226 case SIOCGIFCONF:
1227 err = dev_ifconf(net, argp);
1228 break;
1229
1da177e4 1230 default:
63ff03ab 1231 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1232 break;
89bddce5 1233 }
1da177e4
LT
1234 return err;
1235}
1236
8a3c245c
PT
1237/**
1238 * sock_create_lite - creates a socket
1239 * @family: protocol family (AF_INET, ...)
1240 * @type: communication type (SOCK_STREAM, ...)
1241 * @protocol: protocol (0, ...)
1242 * @res: new socket
1243 *
1244 * Creates a new socket and assigns it to @res, passing through LSM.
1245 * The new socket initialization is not complete, see kernel_accept().
1246 * Returns 0 or an error. On failure @res is set to %NULL.
1247 * This function internally uses GFP_KERNEL.
1248 */
1249
1da177e4
LT
1250int sock_create_lite(int family, int type, int protocol, struct socket **res)
1251{
1252 int err;
1253 struct socket *sock = NULL;
89bddce5 1254
1da177e4
LT
1255 err = security_socket_create(family, type, protocol, 1);
1256 if (err)
1257 goto out;
1258
1259 sock = sock_alloc();
1260 if (!sock) {
1261 err = -ENOMEM;
1262 goto out;
1263 }
1264
1da177e4 1265 sock->type = type;
7420ed23
VY
1266 err = security_socket_post_create(sock, family, type, protocol, 1);
1267 if (err)
1268 goto out_release;
1269
1da177e4
LT
1270out:
1271 *res = sock;
1272 return err;
7420ed23
VY
1273out_release:
1274 sock_release(sock);
1275 sock = NULL;
1276 goto out;
1da177e4 1277}
c6d409cf 1278EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1279
1280/* No kernel lock held - perfect */
ade994f4 1281static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1282{
3cafb376 1283 struct socket *sock = file->private_data;
a331de3b 1284 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1285
e88958e6
CH
1286 if (!sock->ops->poll)
1287 return 0;
f641f13b 1288
a331de3b
CH
1289 if (sk_can_busy_loop(sock->sk)) {
1290 /* poll once if requested by the syscall */
1291 if (events & POLL_BUSY_LOOP)
1292 sk_busy_loop(sock->sk, 1);
1293
1294 /* if this socket can poll_ll, tell the system call */
1295 flag = POLL_BUSY_LOOP;
1296 }
1297
1298 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1299}
1300
89bddce5 1301static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1302{
b69aee04 1303 struct socket *sock = file->private_data;
1da177e4
LT
1304
1305 return sock->ops->mmap(file, sock, vma);
1306}
1307
20380731 1308static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1309{
6d8c50dc 1310 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1311 return 0;
1312}
1313
1314/*
1315 * Update the socket async list
1316 *
1317 * Fasync_list locking strategy.
1318 *
1319 * 1. fasync_list is modified only under process context socket lock
1320 * i.e. under semaphore.
1321 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1322 * or under socket lock
1da177e4
LT
1323 */
1324
1325static int sock_fasync(int fd, struct file *filp, int on)
1326{
989a2979
ED
1327 struct socket *sock = filp->private_data;
1328 struct sock *sk = sock->sk;
333f7909 1329 struct socket_wq *wq = &sock->wq;
1da177e4 1330
989a2979 1331 if (sk == NULL)
1da177e4 1332 return -EINVAL;
1da177e4
LT
1333
1334 lock_sock(sk);
eaefd110 1335 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1336
eaefd110 1337 if (!wq->fasync_list)
989a2979
ED
1338 sock_reset_flag(sk, SOCK_FASYNC);
1339 else
bcdce719 1340 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1341
989a2979 1342 release_sock(sk);
1da177e4
LT
1343 return 0;
1344}
1345
ceb5d58b 1346/* This function may be called only under rcu_lock */
1da177e4 1347
ceb5d58b 1348int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1349{
ceb5d58b 1350 if (!wq || !wq->fasync_list)
1da177e4 1351 return -1;
ceb5d58b 1352
89bddce5 1353 switch (how) {
8d8ad9d7 1354 case SOCK_WAKE_WAITD:
ceb5d58b 1355 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1356 break;
1357 goto call_kill;
8d8ad9d7 1358 case SOCK_WAKE_SPACE:
ceb5d58b 1359 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1360 break;
7c7ab580 1361 fallthrough;
8d8ad9d7 1362 case SOCK_WAKE_IO:
89bddce5 1363call_kill:
43815482 1364 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1365 break;
8d8ad9d7 1366 case SOCK_WAKE_URG:
43815482 1367 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1368 }
ceb5d58b 1369
1da177e4
LT
1370 return 0;
1371}
c6d409cf 1372EXPORT_SYMBOL(sock_wake_async);
1da177e4 1373
8a3c245c
PT
1374/**
1375 * __sock_create - creates a socket
1376 * @net: net namespace
1377 * @family: protocol family (AF_INET, ...)
1378 * @type: communication type (SOCK_STREAM, ...)
1379 * @protocol: protocol (0, ...)
1380 * @res: new socket
1381 * @kern: boolean for kernel space sockets
1382 *
1383 * Creates a new socket and assigns it to @res, passing through LSM.
1384 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1385 * be set to true if the socket resides in kernel space.
1386 * This function internally uses GFP_KERNEL.
1387 */
1388
721db93a 1389int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1390 struct socket **res, int kern)
1da177e4
LT
1391{
1392 int err;
1393 struct socket *sock;
55737fda 1394 const struct net_proto_family *pf;
1da177e4
LT
1395
1396 /*
89bddce5 1397 * Check protocol is in range
1da177e4
LT
1398 */
1399 if (family < 0 || family >= NPROTO)
1400 return -EAFNOSUPPORT;
1401 if (type < 0 || type >= SOCK_MAX)
1402 return -EINVAL;
1403
1404 /* Compatibility.
1405
1406 This uglymoron is moved from INET layer to here to avoid
1407 deadlock in module load.
1408 */
1409 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1410 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1411 current->comm);
1da177e4
LT
1412 family = PF_PACKET;
1413 }
1414
1415 err = security_socket_create(family, type, protocol, kern);
1416 if (err)
1417 return err;
89bddce5 1418
55737fda
SH
1419 /*
1420 * Allocate the socket and allow the family to set things up. if
1421 * the protocol is 0, the family is instructed to select an appropriate
1422 * default.
1423 */
1424 sock = sock_alloc();
1425 if (!sock) {
e87cc472 1426 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1427 return -ENFILE; /* Not exactly a match, but its the
1428 closest posix thing */
1429 }
1430
1431 sock->type = type;
1432
95a5afca 1433#ifdef CONFIG_MODULES
89bddce5
SH
1434 /* Attempt to load a protocol module if the find failed.
1435 *
1436 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1437 * requested real, full-featured networking support upon configuration.
1438 * Otherwise module support will break!
1439 */
190683a9 1440 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1441 request_module("net-pf-%d", family);
1da177e4
LT
1442#endif
1443
55737fda
SH
1444 rcu_read_lock();
1445 pf = rcu_dereference(net_families[family]);
1446 err = -EAFNOSUPPORT;
1447 if (!pf)
1448 goto out_release;
1da177e4
LT
1449
1450 /*
1451 * We will call the ->create function, that possibly is in a loadable
1452 * module, so we have to bump that loadable module refcnt first.
1453 */
55737fda 1454 if (!try_module_get(pf->owner))
1da177e4
LT
1455 goto out_release;
1456
55737fda
SH
1457 /* Now protected by module ref count */
1458 rcu_read_unlock();
1459
3f378b68 1460 err = pf->create(net, sock, protocol, kern);
55737fda 1461 if (err < 0)
1da177e4 1462 goto out_module_put;
a79af59e 1463
1da177e4
LT
1464 /*
1465 * Now to bump the refcnt of the [loadable] module that owns this
1466 * socket at sock_release time we decrement its refcnt.
1467 */
55737fda
SH
1468 if (!try_module_get(sock->ops->owner))
1469 goto out_module_busy;
1470
1da177e4
LT
1471 /*
1472 * Now that we're done with the ->create function, the [loadable]
1473 * module can have its refcnt decremented
1474 */
55737fda 1475 module_put(pf->owner);
7420ed23
VY
1476 err = security_socket_post_create(sock, family, type, protocol, kern);
1477 if (err)
3b185525 1478 goto out_sock_release;
55737fda 1479 *res = sock;
1da177e4 1480
55737fda
SH
1481 return 0;
1482
1483out_module_busy:
1484 err = -EAFNOSUPPORT;
1da177e4 1485out_module_put:
55737fda
SH
1486 sock->ops = NULL;
1487 module_put(pf->owner);
1488out_sock_release:
1da177e4 1489 sock_release(sock);
55737fda
SH
1490 return err;
1491
1492out_release:
1493 rcu_read_unlock();
1494 goto out_sock_release;
1da177e4 1495}
721db93a 1496EXPORT_SYMBOL(__sock_create);
1da177e4 1497
8a3c245c
PT
1498/**
1499 * sock_create - creates a socket
1500 * @family: protocol family (AF_INET, ...)
1501 * @type: communication type (SOCK_STREAM, ...)
1502 * @protocol: protocol (0, ...)
1503 * @res: new socket
1504 *
1505 * A wrapper around __sock_create().
1506 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1507 */
1508
1da177e4
LT
1509int sock_create(int family, int type, int protocol, struct socket **res)
1510{
1b8d7ae4 1511 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1512}
c6d409cf 1513EXPORT_SYMBOL(sock_create);
1da177e4 1514
8a3c245c
PT
1515/**
1516 * sock_create_kern - creates a socket (kernel space)
1517 * @net: net namespace
1518 * @family: protocol family (AF_INET, ...)
1519 * @type: communication type (SOCK_STREAM, ...)
1520 * @protocol: protocol (0, ...)
1521 * @res: new socket
1522 *
1523 * A wrapper around __sock_create().
1524 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1525 */
1526
eeb1bd5c 1527int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1528{
eeb1bd5c 1529 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1530}
c6d409cf 1531EXPORT_SYMBOL(sock_create_kern);
1da177e4 1532
9d6a15c3 1533int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1534{
1535 int retval;
1536 struct socket *sock;
a677a039
UD
1537 int flags;
1538
e38b36f3
UD
1539 /* Check the SOCK_* constants for consistency. */
1540 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1541 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1542 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1543 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1544
a677a039 1545 flags = type & ~SOCK_TYPE_MASK;
77d27200 1546 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1547 return -EINVAL;
1548 type &= SOCK_TYPE_MASK;
1da177e4 1549
aaca0bdc
UD
1550 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1551 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1552
1da177e4
LT
1553 retval = sock_create(family, type, protocol, &sock);
1554 if (retval < 0)
8e1611e2 1555 return retval;
1da177e4 1556
8e1611e2 1557 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1558}
1559
9d6a15c3
DB
1560SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1561{
1562 return __sys_socket(family, type, protocol);
1563}
1564
1da177e4
LT
1565/*
1566 * Create a pair of connected sockets.
1567 */
1568
6debc8d8 1569int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1570{
1571 struct socket *sock1, *sock2;
1572 int fd1, fd2, err;
db349509 1573 struct file *newfile1, *newfile2;
a677a039
UD
1574 int flags;
1575
1576 flags = type & ~SOCK_TYPE_MASK;
77d27200 1577 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1578 return -EINVAL;
1579 type &= SOCK_TYPE_MASK;
1da177e4 1580
aaca0bdc
UD
1581 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1582 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1583
016a266b
AV
1584 /*
1585 * reserve descriptors and make sure we won't fail
1586 * to return them to userland.
1587 */
1588 fd1 = get_unused_fd_flags(flags);
1589 if (unlikely(fd1 < 0))
1590 return fd1;
1591
1592 fd2 = get_unused_fd_flags(flags);
1593 if (unlikely(fd2 < 0)) {
1594 put_unused_fd(fd1);
1595 return fd2;
1596 }
1597
1598 err = put_user(fd1, &usockvec[0]);
1599 if (err)
1600 goto out;
1601
1602 err = put_user(fd2, &usockvec[1]);
1603 if (err)
1604 goto out;
1605
1da177e4
LT
1606 /*
1607 * Obtain the first socket and check if the underlying protocol
1608 * supports the socketpair call.
1609 */
1610
1611 err = sock_create(family, type, protocol, &sock1);
016a266b 1612 if (unlikely(err < 0))
1da177e4
LT
1613 goto out;
1614
1615 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1616 if (unlikely(err < 0)) {
1617 sock_release(sock1);
1618 goto out;
bf3c23d1 1619 }
d73aa286 1620
d47cd945
DH
1621 err = security_socket_socketpair(sock1, sock2);
1622 if (unlikely(err)) {
1623 sock_release(sock2);
1624 sock_release(sock1);
1625 goto out;
1626 }
1627
016a266b
AV
1628 err = sock1->ops->socketpair(sock1, sock2);
1629 if (unlikely(err < 0)) {
1630 sock_release(sock2);
1631 sock_release(sock1);
1632 goto out;
28407630
AV
1633 }
1634
aab174f0 1635 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1636 if (IS_ERR(newfile1)) {
28407630 1637 err = PTR_ERR(newfile1);
016a266b
AV
1638 sock_release(sock2);
1639 goto out;
28407630
AV
1640 }
1641
aab174f0 1642 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1643 if (IS_ERR(newfile2)) {
1644 err = PTR_ERR(newfile2);
016a266b
AV
1645 fput(newfile1);
1646 goto out;
db349509
AV
1647 }
1648
157cf649 1649 audit_fd_pair(fd1, fd2);
d73aa286 1650
db349509
AV
1651 fd_install(fd1, newfile1);
1652 fd_install(fd2, newfile2);
d73aa286 1653 return 0;
1da177e4 1654
016a266b 1655out:
d73aa286 1656 put_unused_fd(fd2);
d73aa286 1657 put_unused_fd(fd1);
1da177e4
LT
1658 return err;
1659}
1660
6debc8d8
DB
1661SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1662 int __user *, usockvec)
1663{
1664 return __sys_socketpair(family, type, protocol, usockvec);
1665}
1666
1da177e4
LT
1667/*
1668 * Bind a name to a socket. Nothing much to do here since it's
1669 * the protocol's responsibility to handle the local address.
1670 *
1671 * We move the socket address to kernel space before we call
1672 * the protocol layer (having also checked the address is ok).
1673 */
1674
a87d35d8 1675int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1676{
1677 struct socket *sock;
230b1839 1678 struct sockaddr_storage address;
6cb153ca 1679 int err, fput_needed;
1da177e4 1680
89bddce5 1681 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1682 if (sock) {
43db362d 1683 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1684 if (!err) {
89bddce5 1685 err = security_socket_bind(sock,
230b1839 1686 (struct sockaddr *)&address,
89bddce5 1687 addrlen);
6cb153ca
BL
1688 if (!err)
1689 err = sock->ops->bind(sock,
89bddce5 1690 (struct sockaddr *)
230b1839 1691 &address, addrlen);
1da177e4 1692 }
6cb153ca 1693 fput_light(sock->file, fput_needed);
89bddce5 1694 }
1da177e4
LT
1695 return err;
1696}
1697
a87d35d8
DB
1698SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1699{
1700 return __sys_bind(fd, umyaddr, addrlen);
1701}
1702
1da177e4
LT
1703/*
1704 * Perform a listen. Basically, we allow the protocol to do anything
1705 * necessary for a listen, and if that works, we mark the socket as
1706 * ready for listening.
1707 */
1708
25e290ee 1709int __sys_listen(int fd, int backlog)
1da177e4
LT
1710{
1711 struct socket *sock;
6cb153ca 1712 int err, fput_needed;
b8e1f9b5 1713 int somaxconn;
89bddce5
SH
1714
1715 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1716 if (sock) {
8efa6e93 1717 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1718 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1719 backlog = somaxconn;
1da177e4
LT
1720
1721 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1722 if (!err)
1723 err = sock->ops->listen(sock, backlog);
1da177e4 1724
6cb153ca 1725 fput_light(sock->file, fput_needed);
1da177e4
LT
1726 }
1727 return err;
1728}
1729
25e290ee
DB
1730SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1731{
1732 return __sys_listen(fd, backlog);
1733}
1734
de2ea4b6
JA
1735int __sys_accept4_file(struct file *file, unsigned file_flags,
1736 struct sockaddr __user *upeer_sockaddr,
09952e3e
JA
1737 int __user *upeer_addrlen, int flags,
1738 unsigned long nofile)
1da177e4
LT
1739{
1740 struct socket *sock, *newsock;
39d8c1b6 1741 struct file *newfile;
de2ea4b6 1742 int err, len, newfd;
230b1839 1743 struct sockaddr_storage address;
1da177e4 1744
77d27200 1745 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1746 return -EINVAL;
1747
1748 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1749 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1750
dba4a925
FR
1751 sock = sock_from_file(file);
1752 if (!sock) {
1753 err = -ENOTSOCK;
1da177e4 1754 goto out;
dba4a925 1755 }
1da177e4
LT
1756
1757 err = -ENFILE;
c6d409cf
ED
1758 newsock = sock_alloc();
1759 if (!newsock)
de2ea4b6 1760 goto out;
1da177e4
LT
1761
1762 newsock->type = sock->type;
1763 newsock->ops = sock->ops;
1764
1da177e4
LT
1765 /*
1766 * We don't need try_module_get here, as the listening socket (sock)
1767 * has the protocol module (sock->ops->owner) held.
1768 */
1769 __module_get(newsock->ops->owner);
1770
09952e3e 1771 newfd = __get_unused_fd_flags(flags, nofile);
39d8c1b6
DM
1772 if (unlikely(newfd < 0)) {
1773 err = newfd;
9a1875e6 1774 sock_release(newsock);
de2ea4b6 1775 goto out;
39d8c1b6 1776 }
aab174f0 1777 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1778 if (IS_ERR(newfile)) {
28407630
AV
1779 err = PTR_ERR(newfile);
1780 put_unused_fd(newfd);
de2ea4b6 1781 goto out;
28407630 1782 }
39d8c1b6 1783
a79af59e
FF
1784 err = security_socket_accept(sock, newsock);
1785 if (err)
39d8c1b6 1786 goto out_fd;
a79af59e 1787
de2ea4b6
JA
1788 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1789 false);
1da177e4 1790 if (err < 0)
39d8c1b6 1791 goto out_fd;
1da177e4
LT
1792
1793 if (upeer_sockaddr) {
9b2c45d4
DV
1794 len = newsock->ops->getname(newsock,
1795 (struct sockaddr *)&address, 2);
1796 if (len < 0) {
1da177e4 1797 err = -ECONNABORTED;
39d8c1b6 1798 goto out_fd;
1da177e4 1799 }
43db362d 1800 err = move_addr_to_user(&address,
230b1839 1801 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1802 if (err < 0)
39d8c1b6 1803 goto out_fd;
1da177e4
LT
1804 }
1805
1806 /* File flags are not inherited via accept() unlike another OSes. */
1807
39d8c1b6
DM
1808 fd_install(newfd, newfile);
1809 err = newfd;
1da177e4
LT
1810out:
1811 return err;
39d8c1b6 1812out_fd:
9606a216 1813 fput(newfile);
39d8c1b6 1814 put_unused_fd(newfd);
de2ea4b6
JA
1815 goto out;
1816
1817}
1818
1819/*
1820 * For accept, we attempt to create a new socket, set up the link
1821 * with the client, wake up the client, then return the new
1822 * connected fd. We collect the address of the connector in kernel
1823 * space and move it to user at the very end. This is unclean because
1824 * we open the socket then return an error.
1825 *
1826 * 1003.1g adds the ability to recvmsg() to query connection pending
1827 * status to recvmsg. We need to add that support in a way thats
1828 * clean when we restructure accept also.
1829 */
1830
1831int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1832 int __user *upeer_addrlen, int flags)
1833{
1834 int ret = -EBADF;
1835 struct fd f;
1836
1837 f = fdget(fd);
1838 if (f.file) {
1839 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1840 upeer_addrlen, flags,
1841 rlimit(RLIMIT_NOFILE));
6b07edeb 1842 fdput(f);
de2ea4b6
JA
1843 }
1844
1845 return ret;
1da177e4
LT
1846}
1847
4541e805
DB
1848SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1849 int __user *, upeer_addrlen, int, flags)
1850{
1851 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1852}
1853
20f37034
HC
1854SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1855 int __user *, upeer_addrlen)
aaca0bdc 1856{
4541e805 1857 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1858}
1859
1da177e4
LT
1860/*
1861 * Attempt to connect to a socket with the server address. The address
1862 * is in user space so we verify it is OK and move it to kernel space.
1863 *
1864 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1865 * break bindings
1866 *
1867 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1868 * other SEQPACKET protocols that take time to connect() as it doesn't
1869 * include the -EINPROGRESS status for such sockets.
1870 */
1871
f499a021 1872int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1873 int addrlen, int file_flags)
1da177e4
LT
1874{
1875 struct socket *sock;
bd3ded31 1876 int err;
1da177e4 1877
dba4a925
FR
1878 sock = sock_from_file(file);
1879 if (!sock) {
1880 err = -ENOTSOCK;
1da177e4 1881 goto out;
dba4a925 1882 }
1da177e4 1883
89bddce5 1884 err =
f499a021 1885 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1886 if (err)
bd3ded31 1887 goto out;
1da177e4 1888
f499a021 1889 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1890 sock->file->f_flags | file_flags);
1da177e4
LT
1891out:
1892 return err;
1893}
1894
bd3ded31
JA
1895int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1896{
1897 int ret = -EBADF;
1898 struct fd f;
1899
1900 f = fdget(fd);
1901 if (f.file) {
f499a021
JA
1902 struct sockaddr_storage address;
1903
1904 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1905 if (!ret)
1906 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1907 fdput(f);
bd3ded31
JA
1908 }
1909
1910 return ret;
1911}
1912
1387c2c2
DB
1913SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1914 int, addrlen)
1915{
1916 return __sys_connect(fd, uservaddr, addrlen);
1917}
1918
1da177e4
LT
1919/*
1920 * Get the local address ('name') of a socket object. Move the obtained
1921 * name to user space.
1922 */
1923
8882a107
DB
1924int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1925 int __user *usockaddr_len)
1da177e4
LT
1926{
1927 struct socket *sock;
230b1839 1928 struct sockaddr_storage address;
9b2c45d4 1929 int err, fput_needed;
89bddce5 1930
6cb153ca 1931 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1932 if (!sock)
1933 goto out;
1934
1935 err = security_socket_getsockname(sock);
1936 if (err)
1937 goto out_put;
1938
9b2c45d4
DV
1939 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1940 if (err < 0)
1da177e4 1941 goto out_put;
9b2c45d4
DV
1942 /* "err" is actually length in this case */
1943 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1944
1945out_put:
6cb153ca 1946 fput_light(sock->file, fput_needed);
1da177e4
LT
1947out:
1948 return err;
1949}
1950
8882a107
DB
1951SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1952 int __user *, usockaddr_len)
1953{
1954 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1955}
1956
1da177e4
LT
1957/*
1958 * Get the remote address ('name') of a socket object. Move the obtained
1959 * name to user space.
1960 */
1961
b21c8f83
DB
1962int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1963 int __user *usockaddr_len)
1da177e4
LT
1964{
1965 struct socket *sock;
230b1839 1966 struct sockaddr_storage address;
9b2c45d4 1967 int err, fput_needed;
1da177e4 1968
89bddce5
SH
1969 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1970 if (sock != NULL) {
1da177e4
LT
1971 err = security_socket_getpeername(sock);
1972 if (err) {
6cb153ca 1973 fput_light(sock->file, fput_needed);
1da177e4
LT
1974 return err;
1975 }
1976
9b2c45d4
DV
1977 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1978 if (err >= 0)
1979 /* "err" is actually length in this case */
1980 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1981 usockaddr_len);
6cb153ca 1982 fput_light(sock->file, fput_needed);
1da177e4
LT
1983 }
1984 return err;
1985}
1986
b21c8f83
DB
1987SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1988 int __user *, usockaddr_len)
1989{
1990 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1991}
1992
1da177e4
LT
1993/*
1994 * Send a datagram to a given address. We move the address into kernel
1995 * space and check the user space data area is readable before invoking
1996 * the protocol.
1997 */
211b634b
DB
1998int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1999 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2000{
2001 struct socket *sock;
230b1839 2002 struct sockaddr_storage address;
1da177e4
LT
2003 int err;
2004 struct msghdr msg;
2005 struct iovec iov;
6cb153ca 2006 int fput_needed;
6cb153ca 2007
602bd0e9
AV
2008 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
2009 if (unlikely(err))
2010 return err;
de0fa95c
PE
2011 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2012 if (!sock)
4387ff75 2013 goto out;
6cb153ca 2014
89bddce5 2015 msg.msg_name = NULL;
89bddce5
SH
2016 msg.msg_control = NULL;
2017 msg.msg_controllen = 0;
2018 msg.msg_namelen = 0;
6cb153ca 2019 if (addr) {
43db362d 2020 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2021 if (err < 0)
2022 goto out_put;
230b1839 2023 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2024 msg.msg_namelen = addr_len;
1da177e4
LT
2025 }
2026 if (sock->file->f_flags & O_NONBLOCK)
2027 flags |= MSG_DONTWAIT;
2028 msg.msg_flags = flags;
d8725c86 2029 err = sock_sendmsg(sock, &msg);
1da177e4 2030
89bddce5 2031out_put:
de0fa95c 2032 fput_light(sock->file, fput_needed);
4387ff75 2033out:
1da177e4
LT
2034 return err;
2035}
2036
211b634b
DB
2037SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2038 unsigned int, flags, struct sockaddr __user *, addr,
2039 int, addr_len)
2040{
2041 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2042}
2043
1da177e4 2044/*
89bddce5 2045 * Send a datagram down a socket.
1da177e4
LT
2046 */
2047
3e0fa65f 2048SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2049 unsigned int, flags)
1da177e4 2050{
211b634b 2051 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2052}
2053
2054/*
89bddce5 2055 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2056 * sender. We verify the buffers are writable and if needed move the
2057 * sender address from kernel to user space.
2058 */
7a09e1eb
DB
2059int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2060 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2061{
2062 struct socket *sock;
2063 struct iovec iov;
2064 struct msghdr msg;
230b1839 2065 struct sockaddr_storage address;
89bddce5 2066 int err, err2;
6cb153ca
BL
2067 int fput_needed;
2068
602bd0e9
AV
2069 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2070 if (unlikely(err))
2071 return err;
de0fa95c 2072 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2073 if (!sock)
de0fa95c 2074 goto out;
1da177e4 2075
89bddce5
SH
2076 msg.msg_control = NULL;
2077 msg.msg_controllen = 0;
f3d33426
HFS
2078 /* Save some cycles and don't copy the address if not needed */
2079 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2080 /* We assume all kernel code knows the size of sockaddr_storage */
2081 msg.msg_namelen = 0;
130ed5d1 2082 msg.msg_iocb = NULL;
9f138fa6 2083 msg.msg_flags = 0;
1da177e4
LT
2084 if (sock->file->f_flags & O_NONBLOCK)
2085 flags |= MSG_DONTWAIT;
2da62906 2086 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2087
89bddce5 2088 if (err >= 0 && addr != NULL) {
43db362d 2089 err2 = move_addr_to_user(&address,
230b1839 2090 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2091 if (err2 < 0)
2092 err = err2;
1da177e4 2093 }
de0fa95c
PE
2094
2095 fput_light(sock->file, fput_needed);
4387ff75 2096out:
1da177e4
LT
2097 return err;
2098}
2099
7a09e1eb
DB
2100SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2101 unsigned int, flags, struct sockaddr __user *, addr,
2102 int __user *, addr_len)
2103{
2104 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2105}
2106
1da177e4 2107/*
89bddce5 2108 * Receive a datagram from a socket.
1da177e4
LT
2109 */
2110
b7c0ddf5
JG
2111SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2112 unsigned int, flags)
1da177e4 2113{
7a09e1eb 2114 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2115}
2116
83f0c10b
FW
2117static bool sock_use_custom_sol_socket(const struct socket *sock)
2118{
2119 const struct sock *sk = sock->sk;
2120
2121 /* Use sock->ops->setsockopt() for MPTCP */
2122 return IS_ENABLED(CONFIG_MPTCP) &&
2123 sk->sk_protocol == IPPROTO_MPTCP &&
2124 sk->sk_type == SOCK_STREAM &&
2125 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2126}
2127
1da177e4
LT
2128/*
2129 * Set a socket option. Because we don't know the option lengths we have
2130 * to pass the user mode parameter for the protocols to sort out.
2131 */
a7b75c5a 2132int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2133 int optlen)
1da177e4 2134{
519a8a6c 2135 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2136 char *kernel_optval = NULL;
6cb153ca 2137 int err, fput_needed;
1da177e4
LT
2138 struct socket *sock;
2139
2140 if (optlen < 0)
2141 return -EINVAL;
89bddce5
SH
2142
2143 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2144 if (!sock)
2145 return err;
1da177e4 2146
4a367299
CH
2147 err = security_socket_setsockopt(sock, level, optname);
2148 if (err)
2149 goto out_put;
0d01da6a 2150
55db9c0e
CH
2151 if (!in_compat_syscall())
2152 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2153 user_optval, &optlen,
55db9c0e 2154 &kernel_optval);
4a367299
CH
2155 if (err < 0)
2156 goto out_put;
2157 if (err > 0) {
2158 err = 0;
2159 goto out_put;
2160 }
0d01da6a 2161
a7b75c5a
CH
2162 if (kernel_optval)
2163 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2164 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2165 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2166 else if (unlikely(!sock->ops->setsockopt))
2167 err = -EOPNOTSUPP;
4a367299
CH
2168 else
2169 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2170 optlen);
a7b75c5a 2171 kfree(kernel_optval);
4a367299
CH
2172out_put:
2173 fput_light(sock->file, fput_needed);
1da177e4
LT
2174 return err;
2175}
2176
cc36dca0
DB
2177SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2178 char __user *, optval, int, optlen)
2179{
2180 return __sys_setsockopt(fd, level, optname, optval, optlen);
2181}
2182
9cacf81f
SF
2183INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2184 int optname));
2185
1da177e4
LT
2186/*
2187 * Get a socket option. Because we don't know the option lengths we have
2188 * to pass a user mode parameter for the protocols to sort out.
2189 */
55db9c0e
CH
2190int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2191 int __user *optlen)
1da177e4 2192{
6cb153ca 2193 int err, fput_needed;
1da177e4 2194 struct socket *sock;
0d01da6a 2195 int max_optlen;
1da177e4 2196
89bddce5 2197 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2198 if (!sock)
2199 return err;
2200
2201 err = security_socket_getsockopt(sock, level, optname);
2202 if (err)
2203 goto out_put;
1da177e4 2204
55db9c0e
CH
2205 if (!in_compat_syscall())
2206 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2207
d8a9b38f
CH
2208 if (level == SOL_SOCKET)
2209 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2210 else if (unlikely(!sock->ops->getsockopt))
2211 err = -EOPNOTSUPP;
d8a9b38f
CH
2212 else
2213 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2214 optlen);
0d01da6a 2215
55db9c0e
CH
2216 if (!in_compat_syscall())
2217 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2218 optval, optlen, max_optlen,
2219 err);
6cb153ca 2220out_put:
d8a9b38f 2221 fput_light(sock->file, fput_needed);
1da177e4
LT
2222 return err;
2223}
2224
13a2d70e
DB
2225SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2226 char __user *, optval, int __user *, optlen)
2227{
2228 return __sys_getsockopt(fd, level, optname, optval, optlen);
2229}
2230
1da177e4
LT
2231/*
2232 * Shutdown a socket.
2233 */
2234
b713c195
JA
2235int __sys_shutdown_sock(struct socket *sock, int how)
2236{
2237 int err;
2238
2239 err = security_socket_shutdown(sock, how);
2240 if (!err)
2241 err = sock->ops->shutdown(sock, how);
2242
2243 return err;
2244}
2245
005a1aea 2246int __sys_shutdown(int fd, int how)
1da177e4 2247{
6cb153ca 2248 int err, fput_needed;
1da177e4
LT
2249 struct socket *sock;
2250
89bddce5
SH
2251 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2252 if (sock != NULL) {
b713c195 2253 err = __sys_shutdown_sock(sock, how);
6cb153ca 2254 fput_light(sock->file, fput_needed);
1da177e4
LT
2255 }
2256 return err;
2257}
2258
005a1aea
DB
2259SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2260{
2261 return __sys_shutdown(fd, how);
2262}
2263
89bddce5 2264/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2265 * fields which are the same type (int / unsigned) on our platforms.
2266 */
2267#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2268#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2269#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2270
c71d8ebe
TH
2271struct used_address {
2272 struct sockaddr_storage name;
2273 unsigned int name_len;
2274};
2275
0a384abf
JA
2276int __copy_msghdr_from_user(struct msghdr *kmsg,
2277 struct user_msghdr __user *umsg,
2278 struct sockaddr __user **save_addr,
2279 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2280{
ffb07550 2281 struct user_msghdr msg;
08adb7da
AV
2282 ssize_t err;
2283
ffb07550 2284 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2285 return -EFAULT;
dbb490b9 2286
1f466e1f
CH
2287 kmsg->msg_control_is_user = true;
2288 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2289 kmsg->msg_controllen = msg.msg_controllen;
2290 kmsg->msg_flags = msg.msg_flags;
2291
2292 kmsg->msg_namelen = msg.msg_namelen;
2293 if (!msg.msg_name)
6a2a2b3a
AS
2294 kmsg->msg_namelen = 0;
2295
dbb490b9
ML
2296 if (kmsg->msg_namelen < 0)
2297 return -EINVAL;
2298
1661bf36 2299 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2300 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2301
2302 if (save_addr)
ffb07550 2303 *save_addr = msg.msg_name;
08adb7da 2304
ffb07550 2305 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2306 if (!save_addr) {
864d9664
PA
2307 err = move_addr_to_kernel(msg.msg_name,
2308 kmsg->msg_namelen,
08adb7da
AV
2309 kmsg->msg_name);
2310 if (err < 0)
2311 return err;
2312 }
2313 } else {
2314 kmsg->msg_name = NULL;
2315 kmsg->msg_namelen = 0;
2316 }
2317
ffb07550 2318 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2319 return -EMSGSIZE;
2320
0345f931 2321 kmsg->msg_iocb = NULL;
0a384abf
JA
2322 *uiov = msg.msg_iov;
2323 *nsegs = msg.msg_iovlen;
2324 return 0;
2325}
2326
2327static int copy_msghdr_from_user(struct msghdr *kmsg,
2328 struct user_msghdr __user *umsg,
2329 struct sockaddr __user **save_addr,
2330 struct iovec **iov)
2331{
2332 struct user_msghdr msg;
2333 ssize_t err;
2334
2335 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2336 &msg.msg_iovlen);
2337 if (err)
2338 return err;
0345f931 2339
87e5e6da 2340 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2341 msg.msg_iov, msg.msg_iovlen,
da184284 2342 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2343 return err < 0 ? err : 0;
1661bf36
DC
2344}
2345
4257c8ca
JA
2346static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2347 unsigned int flags, struct used_address *used_address,
2348 unsigned int allowed_msghdr_flags)
1da177e4 2349{
b9d717a7 2350 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2351 __aligned(sizeof(__kernel_size_t));
89bddce5 2352 /* 20 is size of ipv6_pktinfo */
1da177e4 2353 unsigned char *ctl_buf = ctl;
d8725c86 2354 int ctl_len;
08adb7da 2355 ssize_t err;
89bddce5 2356
1da177e4
LT
2357 err = -ENOBUFS;
2358
228e548e 2359 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2360 goto out;
28a94d8f 2361 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2362 ctl_len = msg_sys->msg_controllen;
1da177e4 2363 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2364 err =
228e548e 2365 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2366 sizeof(ctl));
1da177e4 2367 if (err)
4257c8ca 2368 goto out;
228e548e
AB
2369 ctl_buf = msg_sys->msg_control;
2370 ctl_len = msg_sys->msg_controllen;
1da177e4 2371 } else if (ctl_len) {
ac4340fc
DM
2372 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2373 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2374 if (ctl_len > sizeof(ctl)) {
1da177e4 2375 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2376 if (ctl_buf == NULL)
4257c8ca 2377 goto out;
1da177e4
LT
2378 }
2379 err = -EFAULT;
1f466e1f 2380 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2381 goto out_freectl;
228e548e 2382 msg_sys->msg_control = ctl_buf;
1f466e1f 2383 msg_sys->msg_control_is_user = false;
1da177e4 2384 }
228e548e 2385 msg_sys->msg_flags = flags;
1da177e4
LT
2386
2387 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2388 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2389 /*
2390 * If this is sendmmsg() and current destination address is same as
2391 * previously succeeded address, omit asking LSM's decision.
2392 * used_address->name_len is initialized to UINT_MAX so that the first
2393 * destination address never matches.
2394 */
bc909d9d
MD
2395 if (used_address && msg_sys->msg_name &&
2396 used_address->name_len == msg_sys->msg_namelen &&
2397 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2398 used_address->name_len)) {
d8725c86 2399 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2400 goto out_freectl;
2401 }
d8725c86 2402 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2403 /*
2404 * If this is sendmmsg() and sending to current destination address was
2405 * successful, remember it.
2406 */
2407 if (used_address && err >= 0) {
2408 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2409 if (msg_sys->msg_name)
2410 memcpy(&used_address->name, msg_sys->msg_name,
2411 used_address->name_len);
c71d8ebe 2412 }
1da177e4
LT
2413
2414out_freectl:
89bddce5 2415 if (ctl_buf != ctl)
1da177e4 2416 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2417out:
2418 return err;
2419}
2420
03b1230c
JA
2421int sendmsg_copy_msghdr(struct msghdr *msg,
2422 struct user_msghdr __user *umsg, unsigned flags,
2423 struct iovec **iov)
4257c8ca
JA
2424{
2425 int err;
2426
2427 if (flags & MSG_CMSG_COMPAT) {
2428 struct compat_msghdr __user *msg_compat;
2429
2430 msg_compat = (struct compat_msghdr __user *) umsg;
2431 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2432 } else {
2433 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2434 }
2435 if (err < 0)
2436 return err;
2437
2438 return 0;
2439}
2440
2441static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2442 struct msghdr *msg_sys, unsigned int flags,
2443 struct used_address *used_address,
2444 unsigned int allowed_msghdr_flags)
2445{
2446 struct sockaddr_storage address;
2447 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2448 ssize_t err;
2449
2450 msg_sys->msg_name = &address;
2451
2452 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2453 if (err < 0)
2454 return err;
2455
2456 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2457 allowed_msghdr_flags);
da184284 2458 kfree(iov);
228e548e
AB
2459 return err;
2460}
2461
2462/*
2463 * BSD sendmsg interface
2464 */
03b1230c 2465long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2466 unsigned int flags)
2467{
03b1230c 2468 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2469}
228e548e 2470
e1834a32
DB
2471long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2472 bool forbid_cmsg_compat)
228e548e
AB
2473{
2474 int fput_needed, err;
2475 struct msghdr msg_sys;
1be374a0
AL
2476 struct socket *sock;
2477
e1834a32
DB
2478 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2479 return -EINVAL;
2480
1be374a0 2481 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2482 if (!sock)
2483 goto out;
2484
28a94d8f 2485 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2486
6cb153ca 2487 fput_light(sock->file, fput_needed);
89bddce5 2488out:
1da177e4
LT
2489 return err;
2490}
2491
666547ff 2492SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2493{
e1834a32 2494 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2495}
2496
228e548e
AB
2497/*
2498 * Linux sendmmsg interface
2499 */
2500
2501int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2502 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2503{
2504 int fput_needed, err, datagrams;
2505 struct socket *sock;
2506 struct mmsghdr __user *entry;
2507 struct compat_mmsghdr __user *compat_entry;
2508 struct msghdr msg_sys;
c71d8ebe 2509 struct used_address used_address;
f092276d 2510 unsigned int oflags = flags;
228e548e 2511
e1834a32
DB
2512 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2513 return -EINVAL;
2514
98382f41
AB
2515 if (vlen > UIO_MAXIOV)
2516 vlen = UIO_MAXIOV;
228e548e
AB
2517
2518 datagrams = 0;
2519
2520 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2521 if (!sock)
2522 return err;
2523
c71d8ebe 2524 used_address.name_len = UINT_MAX;
228e548e
AB
2525 entry = mmsg;
2526 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2527 err = 0;
f092276d 2528 flags |= MSG_BATCH;
228e548e
AB
2529
2530 while (datagrams < vlen) {
f092276d
TH
2531 if (datagrams == vlen - 1)
2532 flags = oflags;
2533
228e548e 2534 if (MSG_CMSG_COMPAT & flags) {
666547ff 2535 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2536 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2537 if (err < 0)
2538 break;
2539 err = __put_user(err, &compat_entry->msg_len);
2540 ++compat_entry;
2541 } else {
a7526eb5 2542 err = ___sys_sendmsg(sock,
666547ff 2543 (struct user_msghdr __user *)entry,
28a94d8f 2544 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2545 if (err < 0)
2546 break;
2547 err = put_user(err, &entry->msg_len);
2548 ++entry;
2549 }
2550
2551 if (err)
2552 break;
2553 ++datagrams;
3023898b
SHY
2554 if (msg_data_left(&msg_sys))
2555 break;
a78cb84c 2556 cond_resched();
228e548e
AB
2557 }
2558
228e548e
AB
2559 fput_light(sock->file, fput_needed);
2560
728ffb86
AB
2561 /* We only return an error if no datagrams were able to be sent */
2562 if (datagrams != 0)
228e548e
AB
2563 return datagrams;
2564
228e548e
AB
2565 return err;
2566}
2567
2568SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2569 unsigned int, vlen, unsigned int, flags)
2570{
e1834a32 2571 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2572}
2573
03b1230c
JA
2574int recvmsg_copy_msghdr(struct msghdr *msg,
2575 struct user_msghdr __user *umsg, unsigned flags,
2576 struct sockaddr __user **uaddr,
2577 struct iovec **iov)
1da177e4 2578{
08adb7da 2579 ssize_t err;
1da177e4 2580
4257c8ca
JA
2581 if (MSG_CMSG_COMPAT & flags) {
2582 struct compat_msghdr __user *msg_compat;
1da177e4 2583
4257c8ca
JA
2584 msg_compat = (struct compat_msghdr __user *) umsg;
2585 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2586 } else {
2587 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2588 }
1da177e4 2589 if (err < 0)
da184284 2590 return err;
1da177e4 2591
4257c8ca
JA
2592 return 0;
2593}
2594
2595static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2596 struct user_msghdr __user *msg,
2597 struct sockaddr __user *uaddr,
2598 unsigned int flags, int nosec)
2599{
2600 struct compat_msghdr __user *msg_compat =
2601 (struct compat_msghdr __user *) msg;
2602 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2603 struct sockaddr_storage addr;
2604 unsigned long cmsg_ptr;
2605 int len;
2606 ssize_t err;
2607
2608 msg_sys->msg_name = &addr;
a2e27255
ACM
2609 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2610 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2611
f3d33426
HFS
2612 /* We assume all kernel code knows the size of sockaddr_storage */
2613 msg_sys->msg_namelen = 0;
2614
1da177e4
LT
2615 if (sock->file->f_flags & O_NONBLOCK)
2616 flags |= MSG_DONTWAIT;
1af66221
ED
2617
2618 if (unlikely(nosec))
2619 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2620 else
2621 err = sock_recvmsg(sock, msg_sys, flags);
2622
1da177e4 2623 if (err < 0)
4257c8ca 2624 goto out;
1da177e4
LT
2625 len = err;
2626
2627 if (uaddr != NULL) {
43db362d 2628 err = move_addr_to_user(&addr,
a2e27255 2629 msg_sys->msg_namelen, uaddr,
89bddce5 2630 uaddr_len);
1da177e4 2631 if (err < 0)
4257c8ca 2632 goto out;
1da177e4 2633 }
a2e27255 2634 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2635 COMPAT_FLAGS(msg));
1da177e4 2636 if (err)
4257c8ca 2637 goto out;
1da177e4 2638 if (MSG_CMSG_COMPAT & flags)
a2e27255 2639 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2640 &msg_compat->msg_controllen);
2641 else
a2e27255 2642 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2643 &msg->msg_controllen);
2644 if (err)
4257c8ca 2645 goto out;
1da177e4 2646 err = len;
4257c8ca
JA
2647out:
2648 return err;
2649}
2650
2651static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2652 struct msghdr *msg_sys, unsigned int flags, int nosec)
2653{
2654 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2655 /* user mode address pointers */
2656 struct sockaddr __user *uaddr;
2657 ssize_t err;
2658
2659 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2660 if (err < 0)
2661 return err;
1da177e4 2662
4257c8ca 2663 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2664 kfree(iov);
a2e27255
ACM
2665 return err;
2666}
2667
2668/*
2669 * BSD recvmsg interface
2670 */
2671
03b1230c
JA
2672long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2673 struct user_msghdr __user *umsg,
2674 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2675{
03b1230c 2676 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2677}
2678
e1834a32
DB
2679long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2680 bool forbid_cmsg_compat)
a2e27255
ACM
2681{
2682 int fput_needed, err;
2683 struct msghdr msg_sys;
1be374a0
AL
2684 struct socket *sock;
2685
e1834a32
DB
2686 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2687 return -EINVAL;
2688
1be374a0 2689 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2690 if (!sock)
2691 goto out;
2692
a7526eb5 2693 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2694
6cb153ca 2695 fput_light(sock->file, fput_needed);
1da177e4
LT
2696out:
2697 return err;
2698}
2699
666547ff 2700SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2701 unsigned int, flags)
2702{
e1834a32 2703 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2704}
2705
a2e27255
ACM
2706/*
2707 * Linux recvmmsg interface
2708 */
2709
e11d4284
AB
2710static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2711 unsigned int vlen, unsigned int flags,
2712 struct timespec64 *timeout)
a2e27255
ACM
2713{
2714 int fput_needed, err, datagrams;
2715 struct socket *sock;
2716 struct mmsghdr __user *entry;
d7256d0e 2717 struct compat_mmsghdr __user *compat_entry;
a2e27255 2718 struct msghdr msg_sys;
766b9f92
DD
2719 struct timespec64 end_time;
2720 struct timespec64 timeout64;
a2e27255
ACM
2721
2722 if (timeout &&
2723 poll_select_set_timeout(&end_time, timeout->tv_sec,
2724 timeout->tv_nsec))
2725 return -EINVAL;
2726
2727 datagrams = 0;
2728
2729 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2730 if (!sock)
2731 return err;
2732
7797dc41
SHY
2733 if (likely(!(flags & MSG_ERRQUEUE))) {
2734 err = sock_error(sock->sk);
2735 if (err) {
2736 datagrams = err;
2737 goto out_put;
2738 }
e623a9e9 2739 }
a2e27255
ACM
2740
2741 entry = mmsg;
d7256d0e 2742 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2743
2744 while (datagrams < vlen) {
2745 /*
2746 * No need to ask LSM for more than the first datagram.
2747 */
d7256d0e 2748 if (MSG_CMSG_COMPAT & flags) {
666547ff 2749 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2750 &msg_sys, flags & ~MSG_WAITFORONE,
2751 datagrams);
d7256d0e
JMG
2752 if (err < 0)
2753 break;
2754 err = __put_user(err, &compat_entry->msg_len);
2755 ++compat_entry;
2756 } else {
a7526eb5 2757 err = ___sys_recvmsg(sock,
666547ff 2758 (struct user_msghdr __user *)entry,
a7526eb5
AL
2759 &msg_sys, flags & ~MSG_WAITFORONE,
2760 datagrams);
d7256d0e
JMG
2761 if (err < 0)
2762 break;
2763 err = put_user(err, &entry->msg_len);
2764 ++entry;
2765 }
2766
a2e27255
ACM
2767 if (err)
2768 break;
a2e27255
ACM
2769 ++datagrams;
2770
71c5c159
BB
2771 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2772 if (flags & MSG_WAITFORONE)
2773 flags |= MSG_DONTWAIT;
2774
a2e27255 2775 if (timeout) {
766b9f92 2776 ktime_get_ts64(&timeout64);
c2e6c856 2777 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2778 if (timeout->tv_sec < 0) {
2779 timeout->tv_sec = timeout->tv_nsec = 0;
2780 break;
2781 }
2782
2783 /* Timeout, return less than vlen datagrams */
2784 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2785 break;
2786 }
2787
2788 /* Out of band data, return right away */
2789 if (msg_sys.msg_flags & MSG_OOB)
2790 break;
a78cb84c 2791 cond_resched();
a2e27255
ACM
2792 }
2793
a2e27255 2794 if (err == 0)
34b88a68
ACM
2795 goto out_put;
2796
2797 if (datagrams == 0) {
2798 datagrams = err;
2799 goto out_put;
2800 }
a2e27255 2801
34b88a68
ACM
2802 /*
2803 * We may return less entries than requested (vlen) if the
2804 * sock is non block and there aren't enough datagrams...
2805 */
2806 if (err != -EAGAIN) {
a2e27255 2807 /*
34b88a68
ACM
2808 * ... or if recvmsg returns an error after we
2809 * received some datagrams, where we record the
2810 * error to return on the next call or if the
2811 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2812 */
34b88a68 2813 sock->sk->sk_err = -err;
a2e27255 2814 }
34b88a68
ACM
2815out_put:
2816 fput_light(sock->file, fput_needed);
a2e27255 2817
34b88a68 2818 return datagrams;
a2e27255
ACM
2819}
2820
e11d4284
AB
2821int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2822 unsigned int vlen, unsigned int flags,
2823 struct __kernel_timespec __user *timeout,
2824 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2825{
2826 int datagrams;
c2e6c856 2827 struct timespec64 timeout_sys;
a2e27255 2828
e11d4284
AB
2829 if (timeout && get_timespec64(&timeout_sys, timeout))
2830 return -EFAULT;
a2e27255 2831
e11d4284 2832 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2833 return -EFAULT;
2834
e11d4284
AB
2835 if (!timeout && !timeout32)
2836 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2837
2838 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2839
e11d4284
AB
2840 if (datagrams <= 0)
2841 return datagrams;
2842
2843 if (timeout && put_timespec64(&timeout_sys, timeout))
2844 datagrams = -EFAULT;
2845
2846 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2847 datagrams = -EFAULT;
2848
2849 return datagrams;
2850}
2851
1255e269
DB
2852SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2853 unsigned int, vlen, unsigned int, flags,
c2e6c856 2854 struct __kernel_timespec __user *, timeout)
1255e269 2855{
e11d4284
AB
2856 if (flags & MSG_CMSG_COMPAT)
2857 return -EINVAL;
2858
2859 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2860}
2861
2862#ifdef CONFIG_COMPAT_32BIT_TIME
2863SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2864 unsigned int, vlen, unsigned int, flags,
2865 struct old_timespec32 __user *, timeout)
2866{
2867 if (flags & MSG_CMSG_COMPAT)
2868 return -EINVAL;
2869
2870 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2871}
e11d4284 2872#endif
1255e269 2873
a2e27255 2874#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2875/* Argument list sizes for sys_socketcall */
2876#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2877static const unsigned char nargs[21] = {
c6d409cf
ED
2878 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2879 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2880 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2881 AL(4), AL(5), AL(4)
89bddce5
SH
2882};
2883
1da177e4
LT
2884#undef AL
2885
2886/*
89bddce5 2887 * System call vectors.
1da177e4
LT
2888 *
2889 * Argument checking cleaned up. Saved 20% in size.
2890 * This function doesn't need to set the kernel lock because
89bddce5 2891 * it is set by the callees.
1da177e4
LT
2892 */
2893
3e0fa65f 2894SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2895{
2950fa9d 2896 unsigned long a[AUDITSC_ARGS];
89bddce5 2897 unsigned long a0, a1;
1da177e4 2898 int err;
47379052 2899 unsigned int len;
1da177e4 2900
228e548e 2901 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2902 return -EINVAL;
c8e8cd57 2903 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2904
47379052
AV
2905 len = nargs[call];
2906 if (len > sizeof(a))
2907 return -EINVAL;
2908
1da177e4 2909 /* copy_from_user should be SMP safe. */
47379052 2910 if (copy_from_user(a, args, len))
1da177e4 2911 return -EFAULT;
3ec3b2fb 2912
2950fa9d
CG
2913 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2914 if (err)
2915 return err;
3ec3b2fb 2916
89bddce5
SH
2917 a0 = a[0];
2918 a1 = a[1];
2919
2920 switch (call) {
2921 case SYS_SOCKET:
9d6a15c3 2922 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2923 break;
2924 case SYS_BIND:
a87d35d8 2925 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2926 break;
2927 case SYS_CONNECT:
1387c2c2 2928 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2929 break;
2930 case SYS_LISTEN:
25e290ee 2931 err = __sys_listen(a0, a1);
89bddce5
SH
2932 break;
2933 case SYS_ACCEPT:
4541e805
DB
2934 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2935 (int __user *)a[2], 0);
89bddce5
SH
2936 break;
2937 case SYS_GETSOCKNAME:
2938 err =
8882a107
DB
2939 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2940 (int __user *)a[2]);
89bddce5
SH
2941 break;
2942 case SYS_GETPEERNAME:
2943 err =
b21c8f83
DB
2944 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2945 (int __user *)a[2]);
89bddce5
SH
2946 break;
2947 case SYS_SOCKETPAIR:
6debc8d8 2948 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2949 break;
2950 case SYS_SEND:
f3bf896b
DB
2951 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2952 NULL, 0);
89bddce5
SH
2953 break;
2954 case SYS_SENDTO:
211b634b
DB
2955 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2956 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2957 break;
2958 case SYS_RECV:
d27e9afc
DB
2959 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2960 NULL, NULL);
89bddce5
SH
2961 break;
2962 case SYS_RECVFROM:
7a09e1eb
DB
2963 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2964 (struct sockaddr __user *)a[4],
2965 (int __user *)a[5]);
89bddce5
SH
2966 break;
2967 case SYS_SHUTDOWN:
005a1aea 2968 err = __sys_shutdown(a0, a1);
89bddce5
SH
2969 break;
2970 case SYS_SETSOCKOPT:
cc36dca0
DB
2971 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2972 a[4]);
89bddce5
SH
2973 break;
2974 case SYS_GETSOCKOPT:
2975 err =
13a2d70e
DB
2976 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2977 (int __user *)a[4]);
89bddce5
SH
2978 break;
2979 case SYS_SENDMSG:
e1834a32
DB
2980 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2981 a[2], true);
89bddce5 2982 break;
228e548e 2983 case SYS_SENDMMSG:
e1834a32
DB
2984 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2985 a[3], true);
228e548e 2986 break;
89bddce5 2987 case SYS_RECVMSG:
e1834a32
DB
2988 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2989 a[2], true);
89bddce5 2990 break;
a2e27255 2991 case SYS_RECVMMSG:
3ca47e95 2992 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2993 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2994 a[2], a[3],
2995 (struct __kernel_timespec __user *)a[4],
2996 NULL);
2997 else
2998 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2999 a[2], a[3], NULL,
3000 (struct old_timespec32 __user *)a[4]);
a2e27255 3001 break;
de11defe 3002 case SYS_ACCEPT4:
4541e805
DB
3003 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3004 (int __user *)a[2], a[3]);
aaca0bdc 3005 break;
89bddce5
SH
3006 default:
3007 err = -EINVAL;
3008 break;
1da177e4
LT
3009 }
3010 return err;
3011}
3012
89bddce5 3013#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3014
55737fda
SH
3015/**
3016 * sock_register - add a socket protocol handler
3017 * @ops: description of protocol
3018 *
1da177e4
LT
3019 * This function is called by a protocol handler that wants to
3020 * advertise its address family, and have it linked into the
e793c0f7 3021 * socket interface. The value ops->family corresponds to the
55737fda 3022 * socket system call protocol family.
1da177e4 3023 */
f0fd27d4 3024int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3025{
3026 int err;
3027
3028 if (ops->family >= NPROTO) {
3410f22e 3029 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3030 return -ENOBUFS;
3031 }
55737fda
SH
3032
3033 spin_lock(&net_family_lock);
190683a9
ED
3034 if (rcu_dereference_protected(net_families[ops->family],
3035 lockdep_is_held(&net_family_lock)))
55737fda
SH
3036 err = -EEXIST;
3037 else {
cf778b00 3038 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3039 err = 0;
3040 }
55737fda
SH
3041 spin_unlock(&net_family_lock);
3042
fe0bdbde 3043 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3044 return err;
3045}
c6d409cf 3046EXPORT_SYMBOL(sock_register);
1da177e4 3047
55737fda
SH
3048/**
3049 * sock_unregister - remove a protocol handler
3050 * @family: protocol family to remove
3051 *
1da177e4
LT
3052 * This function is called by a protocol handler that wants to
3053 * remove its address family, and have it unlinked from the
55737fda
SH
3054 * new socket creation.
3055 *
3056 * If protocol handler is a module, then it can use module reference
3057 * counts to protect against new references. If protocol handler is not
3058 * a module then it needs to provide its own protection in
3059 * the ops->create routine.
1da177e4 3060 */
f0fd27d4 3061void sock_unregister(int family)
1da177e4 3062{
f0fd27d4 3063 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3064
55737fda 3065 spin_lock(&net_family_lock);
a9b3cd7f 3066 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3067 spin_unlock(&net_family_lock);
3068
3069 synchronize_rcu();
3070
fe0bdbde 3071 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3072}
c6d409cf 3073EXPORT_SYMBOL(sock_unregister);
1da177e4 3074
bf2ae2e4
XL
3075bool sock_is_registered(int family)
3076{
66b51b0a 3077 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3078}
3079
77d76ea3 3080static int __init sock_init(void)
1da177e4 3081{
b3e19d92 3082 int err;
2ca794e5
EB
3083 /*
3084 * Initialize the network sysctl infrastructure.
3085 */
3086 err = net_sysctl_init();
3087 if (err)
3088 goto out;
b3e19d92 3089
1da177e4 3090 /*
89bddce5 3091 * Initialize skbuff SLAB cache
1da177e4
LT
3092 */
3093 skb_init();
1da177e4
LT
3094
3095 /*
89bddce5 3096 * Initialize the protocols module.
1da177e4
LT
3097 */
3098
3099 init_inodecache();
b3e19d92
NP
3100
3101 err = register_filesystem(&sock_fs_type);
3102 if (err)
47260ba9 3103 goto out;
1da177e4 3104 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3105 if (IS_ERR(sock_mnt)) {
3106 err = PTR_ERR(sock_mnt);
3107 goto out_mount;
3108 }
77d76ea3
AK
3109
3110 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3111 */
3112
3113#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3114 err = netfilter_init();
3115 if (err)
3116 goto out;
1da177e4 3117#endif
cbeb321a 3118
408eccce 3119 ptp_classifier_init();
c1f19b51 3120
b3e19d92
NP
3121out:
3122 return err;
3123
3124out_mount:
3125 unregister_filesystem(&sock_fs_type);
b3e19d92 3126 goto out;
1da177e4
LT
3127}
3128
77d76ea3
AK
3129core_initcall(sock_init); /* early initcall */
3130
1da177e4
LT
3131#ifdef CONFIG_PROC_FS
3132void socket_seq_show(struct seq_file *seq)
3133{
648845ab
TZ
3134 seq_printf(seq, "sockets: used %d\n",
3135 sock_inuse_get(seq->private));
1da177e4 3136}
89bddce5 3137#endif /* CONFIG_PROC_FS */
1da177e4 3138
29c49648
AB
3139/* Handle the fact that while struct ifreq has the same *layout* on
3140 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3141 * which are handled elsewhere, it still has different *size* due to
3142 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3143 * resulting in struct ifreq being 32 and 40 bytes respectively).
3144 * As a result, if the struct happens to be at the end of a page and
3145 * the next page isn't readable/writable, we get a fault. To prevent
3146 * that, copy back and forth to the full size.
3147 */
3148int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
3149{
3150 if (in_compat_syscall()) {
3151 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
3152
3153 memset(ifr, 0, sizeof(*ifr));
3154 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3155 return -EFAULT;
3156
3157 if (ifrdata)
3158 *ifrdata = compat_ptr(ifr32->ifr_data);
3159
3160 return 0;
3161 }
3162
3163 if (copy_from_user(ifr, arg, sizeof(*ifr)))
3164 return -EFAULT;
3165
3166 if (ifrdata)
3167 *ifrdata = ifr->ifr_data;
3168
3169 return 0;
3170}
3171EXPORT_SYMBOL(get_user_ifreq);
3172
3173int put_user_ifreq(struct ifreq *ifr, void __user *arg)
3174{
3175 size_t size = sizeof(*ifr);
3176
3177 if (in_compat_syscall())
3178 size = sizeof(struct compat_ifreq);
3179
3180 if (copy_to_user(arg, ifr, size))
3181 return -EFAULT;
3182
3183 return 0;
3184}
3185EXPORT_SYMBOL(put_user_ifreq);
3186
89bbfc95 3187#ifdef CONFIG_COMPAT
7a50a240
AB
3188static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3189{
7a50a240 3190 compat_uptr_t uptr32;
44c02a2c
AV
3191 struct ifreq ifr;
3192 void __user *saved;
3193 int err;
7a50a240 3194
29c49648 3195 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3196 return -EFAULT;
3197
3198 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3199 return -EFAULT;
3200
44c02a2c
AV
3201 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3202 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3203
a554bf96 3204 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3205 if (!err) {
3206 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3207 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3208 err = -EFAULT;
ccbd6a5a 3209 }
44c02a2c 3210 return err;
7a229387
AB
3211}
3212
590d4693
BH
3213/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3214static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3215 struct compat_ifreq __user *u_ifreq32)
7a229387 3216{
44c02a2c 3217 struct ifreq ifreq;
a554bf96 3218 void __user *data;
7a229387 3219
a554bf96 3220 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3221 return -EFAULT;
a554bf96 3222 ifreq.ifr_data = data;
7a229387 3223
a554bf96 3224 return dev_ioctl(net, cmd, &ifreq, data, NULL);
37ac39bd
JB
3225}
3226
7a229387
AB
3227/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3228 * for some operations; this forces use of the newer bridge-utils that
25985edc 3229 * use compatible ioctls
7a229387 3230 */
6b96018b 3231static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3232{
6b96018b 3233 compat_ulong_t tmp;
7a229387 3234
6b96018b 3235 if (get_user(tmp, argp))
7a229387
AB
3236 return -EFAULT;
3237 if (tmp == BRCTL_GET_VERSION)
3238 return BRCTL_VERSION + 1;
3239 return -EINVAL;
3240}
3241
6b96018b
AB
3242static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3243 unsigned int cmd, unsigned long arg)
3244{
3245 void __user *argp = compat_ptr(arg);
3246 struct sock *sk = sock->sk;
3247 struct net *net = sock_net(sk);
7a229387 3248
6b96018b 3249 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3250 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3251
3252 switch (cmd) {
3253 case SIOCSIFBR:
3254 case SIOCGIFBR:
3255 return old_bridge_ioctl(argp);
7a50a240
AB
3256 case SIOCWANDEV:
3257 return compat_siocwandev(net, argp);
0768e170
AB
3258 case SIOCGSTAMP_OLD:
3259 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3260 if (!sock->ops->gettstamp)
3261 return -ENOIOCTLCMD;
0768e170 3262 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3263 !COMPAT_USE_64BIT_TIME);
3264
dd98d289 3265 case SIOCETHTOOL:
590d4693
BH
3266 case SIOCBONDSLAVEINFOQUERY:
3267 case SIOCBONDINFOQUERY:
a2116ed2 3268 case SIOCSHWTSTAMP:
fd468c74 3269 case SIOCGHWTSTAMP:
590d4693 3270 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3271
3272 case FIOSETOWN:
3273 case SIOCSPGRP:
3274 case FIOGETOWN:
3275 case SIOCGPGRP:
3276 case SIOCBRADDBR:
3277 case SIOCBRDELBR:
3278 case SIOCGIFVLAN:
3279 case SIOCSIFVLAN:
c62cce2c 3280 case SIOCGSKNS:
0768e170
AB
3281 case SIOCGSTAMP_NEW:
3282 case SIOCGSTAMPNS_NEW:
876f0bf9 3283 case SIOCGIFCONF:
6b96018b
AB
3284 return sock_ioctl(file, cmd, arg);
3285
3286 case SIOCGIFFLAGS:
3287 case SIOCSIFFLAGS:
709566d7
AB
3288 case SIOCGIFMAP:
3289 case SIOCSIFMAP:
6b96018b
AB
3290 case SIOCGIFMETRIC:
3291 case SIOCSIFMETRIC:
3292 case SIOCGIFMTU:
3293 case SIOCSIFMTU:
3294 case SIOCGIFMEM:
3295 case SIOCSIFMEM:
3296 case SIOCGIFHWADDR:
3297 case SIOCSIFHWADDR:
3298 case SIOCADDMULTI:
3299 case SIOCDELMULTI:
3300 case SIOCGIFINDEX:
6b96018b
AB
3301 case SIOCGIFADDR:
3302 case SIOCSIFADDR:
3303 case SIOCSIFHWBROADCAST:
6b96018b 3304 case SIOCDIFADDR:
6b96018b
AB
3305 case SIOCGIFBRDADDR:
3306 case SIOCSIFBRDADDR:
3307 case SIOCGIFDSTADDR:
3308 case SIOCSIFDSTADDR:
3309 case SIOCGIFNETMASK:
3310 case SIOCSIFNETMASK:
3311 case SIOCSIFPFLAGS:
3312 case SIOCGIFPFLAGS:
3313 case SIOCGIFTXQLEN:
3314 case SIOCSIFTXQLEN:
3315 case SIOCBRADDIF:
3316 case SIOCBRDELIF:
c6c9fee3 3317 case SIOCGIFNAME:
9177efd3
AB
3318 case SIOCSIFNAME:
3319 case SIOCGMIIPHY:
3320 case SIOCGMIIREG:
3321 case SIOCSMIIREG:
f92d4fc9
AV
3322 case SIOCBONDENSLAVE:
3323 case SIOCBONDRELEASE:
3324 case SIOCBONDSETHWADDR:
3325 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3326 case SIOCSARP:
3327 case SIOCGARP:
3328 case SIOCDARP:
c7dc504e 3329 case SIOCOUTQ:
9d7bf41f 3330 case SIOCOUTQNSD:
6b96018b 3331 case SIOCATMARK:
63ff03ab 3332 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3333 }
3334
6b96018b
AB
3335 return -ENOIOCTLCMD;
3336}
7a229387 3337
95c96174 3338static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3339 unsigned long arg)
89bbfc95
SP
3340{
3341 struct socket *sock = file->private_data;
3342 int ret = -ENOIOCTLCMD;
87de87d5
DM
3343 struct sock *sk;
3344 struct net *net;
3345
3346 sk = sock->sk;
3347 net = sock_net(sk);
89bbfc95
SP
3348
3349 if (sock->ops->compat_ioctl)
3350 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3351
87de87d5
DM
3352 if (ret == -ENOIOCTLCMD &&
3353 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3354 ret = compat_wext_handle_ioctl(net, cmd, arg);
3355
6b96018b
AB
3356 if (ret == -ENOIOCTLCMD)
3357 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3358
89bbfc95
SP
3359 return ret;
3360}
3361#endif
3362
8a3c245c
PT
3363/**
3364 * kernel_bind - bind an address to a socket (kernel space)
3365 * @sock: socket
3366 * @addr: address
3367 * @addrlen: length of address
3368 *
3369 * Returns 0 or an error.
3370 */
3371
ac5a488e
SS
3372int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3373{
3374 return sock->ops->bind(sock, addr, addrlen);
3375}
c6d409cf 3376EXPORT_SYMBOL(kernel_bind);
ac5a488e 3377
8a3c245c
PT
3378/**
3379 * kernel_listen - move socket to listening state (kernel space)
3380 * @sock: socket
3381 * @backlog: pending connections queue size
3382 *
3383 * Returns 0 or an error.
3384 */
3385
ac5a488e
SS
3386int kernel_listen(struct socket *sock, int backlog)
3387{
3388 return sock->ops->listen(sock, backlog);
3389}
c6d409cf 3390EXPORT_SYMBOL(kernel_listen);
ac5a488e 3391
8a3c245c
PT
3392/**
3393 * kernel_accept - accept a connection (kernel space)
3394 * @sock: listening socket
3395 * @newsock: new connected socket
3396 * @flags: flags
3397 *
3398 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3399 * If it fails, @newsock is guaranteed to be %NULL.
3400 * Returns 0 or an error.
3401 */
3402
ac5a488e
SS
3403int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3404{
3405 struct sock *sk = sock->sk;
3406 int err;
3407
3408 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3409 newsock);
3410 if (err < 0)
3411 goto done;
3412
cdfbabfb 3413 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3414 if (err < 0) {
3415 sock_release(*newsock);
fa8705b0 3416 *newsock = NULL;
ac5a488e
SS
3417 goto done;
3418 }
3419
3420 (*newsock)->ops = sock->ops;
1b08534e 3421 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3422
3423done:
3424 return err;
3425}
c6d409cf 3426EXPORT_SYMBOL(kernel_accept);
ac5a488e 3427
8a3c245c
PT
3428/**
3429 * kernel_connect - connect a socket (kernel space)
3430 * @sock: socket
3431 * @addr: address
3432 * @addrlen: address length
3433 * @flags: flags (O_NONBLOCK, ...)
3434 *
f1dcffcc 3435 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3436 * by default, and the only address from which datagrams are received.
3437 * For stream sockets, attempts to connect to @addr.
3438 * Returns 0 or an error code.
3439 */
3440
ac5a488e 3441int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3442 int flags)
ac5a488e
SS
3443{
3444 return sock->ops->connect(sock, addr, addrlen, flags);
3445}
c6d409cf 3446EXPORT_SYMBOL(kernel_connect);
ac5a488e 3447
8a3c245c
PT
3448/**
3449 * kernel_getsockname - get the address which the socket is bound (kernel space)
3450 * @sock: socket
3451 * @addr: address holder
3452 *
3453 * Fills the @addr pointer with the address which the socket is bound.
3454 * Returns 0 or an error code.
3455 */
3456
9b2c45d4 3457int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3458{
9b2c45d4 3459 return sock->ops->getname(sock, addr, 0);
ac5a488e 3460}
c6d409cf 3461EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3462
8a3c245c 3463/**
645f0897 3464 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3465 * @sock: socket
3466 * @addr: address holder
3467 *
3468 * Fills the @addr pointer with the address which the socket is connected.
3469 * Returns 0 or an error code.
3470 */
3471
9b2c45d4 3472int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3473{
9b2c45d4 3474 return sock->ops->getname(sock, addr, 1);
ac5a488e 3475}
c6d409cf 3476EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3477
8a3c245c
PT
3478/**
3479 * kernel_sendpage - send a &page through a socket (kernel space)
3480 * @sock: socket
3481 * @page: page
3482 * @offset: page offset
3483 * @size: total size in bytes
3484 * @flags: flags (MSG_DONTWAIT, ...)
3485 *
3486 * Returns the total amount sent in bytes or an error.
3487 */
3488
ac5a488e
SS
3489int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3490 size_t size, int flags)
3491{
7b62d31d
CL
3492 if (sock->ops->sendpage) {
3493 /* Warn in case the improper page to zero-copy send */
3494 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3495 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3496 }
ac5a488e
SS
3497 return sock_no_sendpage(sock, page, offset, size, flags);
3498}
c6d409cf 3499EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3500
8a3c245c
PT
3501/**
3502 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3503 * @sk: sock
3504 * @page: page
3505 * @offset: page offset
3506 * @size: total size in bytes
3507 * @flags: flags (MSG_DONTWAIT, ...)
3508 *
3509 * Returns the total amount sent in bytes or an error.
3510 * Caller must hold @sk.
3511 */
3512
306b13eb
TH
3513int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3514 size_t size, int flags)
3515{
3516 struct socket *sock = sk->sk_socket;
3517
3518 if (sock->ops->sendpage_locked)
3519 return sock->ops->sendpage_locked(sk, page, offset, size,
3520 flags);
3521
3522 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3523}
3524EXPORT_SYMBOL(kernel_sendpage_locked);
3525
8a3c245c 3526/**
645f0897 3527 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3528 * @sock: socket
3529 * @how: connection part
3530 *
3531 * Returns 0 or an error.
3532 */
3533
91cf45f0
TM
3534int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3535{
3536 return sock->ops->shutdown(sock, how);
3537}
91cf45f0 3538EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3539
8a3c245c
PT
3540/**
3541 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3542 * @sk: socket
3543 *
3544 * This routine returns the IP overhead imposed by a socket i.e.
3545 * the length of the underlying IP header, depending on whether
3546 * this is an IPv4 or IPv6 socket and the length from IP options turned
3547 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3548 */
8a3c245c 3549
113c3075
P
3550u32 kernel_sock_ip_overhead(struct sock *sk)
3551{
3552 struct inet_sock *inet;
3553 struct ip_options_rcu *opt;
3554 u32 overhead = 0;
113c3075
P
3555#if IS_ENABLED(CONFIG_IPV6)
3556 struct ipv6_pinfo *np;
3557 struct ipv6_txoptions *optv6 = NULL;
3558#endif /* IS_ENABLED(CONFIG_IPV6) */
3559
3560 if (!sk)
3561 return overhead;
3562
113c3075
P
3563 switch (sk->sk_family) {
3564 case AF_INET:
3565 inet = inet_sk(sk);
3566 overhead += sizeof(struct iphdr);
3567 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3568 sock_owned_by_user(sk));
113c3075
P
3569 if (opt)
3570 overhead += opt->opt.optlen;
3571 return overhead;
3572#if IS_ENABLED(CONFIG_IPV6)
3573 case AF_INET6:
3574 np = inet6_sk(sk);
3575 overhead += sizeof(struct ipv6hdr);
3576 if (np)
3577 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3578 sock_owned_by_user(sk));
113c3075
P
3579 if (optv6)
3580 overhead += (optv6->opt_flen + optv6->opt_nflen);
3581 return overhead;
3582#endif /* IS_ENABLED(CONFIG_IPV6) */
3583 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3584 return overhead;
3585 }
3586}
3587EXPORT_SYMBOL(kernel_sock_ip_overhead);