]> git.ipfire.org Git - people/ms/linux.git/blame - net/socket.c
io_uring: only return -EBUSY for submit on non-flushed backlog
[people/ms/linux.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
6b96018b 103#include <linux/sockios.h>
076bb0c8 104#include <net/busy_poll.h>
f24b9be5 105#include <linux/errqueue.h>
06021292 106
e0d1095a 107#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
108unsigned int sysctl_net_busy_read __read_mostly;
109unsigned int sysctl_net_busy_poll __read_mostly;
06021292 110#endif
6b96018b 111
8ae5e030
AV
112static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
113static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 114static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
115
116static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
117static __poll_t sock_poll(struct file *file,
118 struct poll_table_struct *wait);
89bddce5 119static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
120#ifdef CONFIG_COMPAT
121static long compat_sock_ioctl(struct file *file,
89bddce5 122 unsigned int cmd, unsigned long arg);
89bbfc95 123#endif
1da177e4 124static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
125static ssize_t sock_sendpage(struct file *file, struct page *page,
126 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 127static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 128 struct pipe_inode_info *pipe, size_t len,
9c55e01c 129 unsigned int flags);
1da177e4 130
1da177e4
LT
131/*
132 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
133 * in the operation structures but are done directly via the socketcall() multiplexor.
134 */
135
da7071d7 136static const struct file_operations socket_file_ops = {
1da177e4
LT
137 .owner = THIS_MODULE,
138 .llseek = no_llseek,
8ae5e030
AV
139 .read_iter = sock_read_iter,
140 .write_iter = sock_write_iter,
1da177e4
LT
141 .poll = sock_poll,
142 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
143#ifdef CONFIG_COMPAT
144 .compat_ioctl = compat_sock_ioctl,
145#endif
1da177e4 146 .mmap = sock_mmap,
1da177e4
LT
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4 161/*
89bddce5
SH
162 * Support routines.
163 * Move socket addresses back and forth across the kernel/user
164 * divide and look after the messy bits.
1da177e4
LT
165 */
166
1da177e4
LT
167/**
168 * move_addr_to_kernel - copy a socket address into kernel space
169 * @uaddr: Address in user space
170 * @kaddr: Address in kernel space
171 * @ulen: Length in user space
172 *
173 * The address is copied into kernel space. If the provided address is
174 * too long an error code of -EINVAL is returned. If the copy gives
175 * invalid addresses -EFAULT is returned. On a success 0 is returned.
176 */
177
43db362d 178int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 179{
230b1839 180 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 181 return -EINVAL;
89bddce5 182 if (ulen == 0)
1da177e4 183 return 0;
89bddce5 184 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 185 return -EFAULT;
3ec3b2fb 186 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
187}
188
189/**
190 * move_addr_to_user - copy an address to user space
191 * @kaddr: kernel space address
192 * @klen: length of address in kernel
193 * @uaddr: user space address
194 * @ulen: pointer to user length field
195 *
196 * The value pointed to by ulen on entry is the buffer length available.
197 * This is overwritten with the buffer space used. -EINVAL is returned
198 * if an overlong buffer is specified or a negative buffer size. -EFAULT
199 * is returned if either the buffer or the length field are not
200 * accessible.
201 * After copying the data up to the limit the user specifies, the true
202 * length of the data is written over the length limit the user
203 * specified. Zero is returned for a success.
204 */
89bddce5 205
43db362d 206static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 207 void __user *uaddr, int __user *ulen)
1da177e4
LT
208{
209 int err;
210 int len;
211
68c6beb3 212 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
213 err = get_user(len, ulen);
214 if (err)
1da177e4 215 return err;
89bddce5
SH
216 if (len > klen)
217 len = klen;
68c6beb3 218 if (len < 0)
1da177e4 219 return -EINVAL;
89bddce5 220 if (len) {
d6fe3945
SG
221 if (audit_sockaddr(klen, kaddr))
222 return -ENOMEM;
89bddce5 223 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
224 return -EFAULT;
225 }
226 /*
89bddce5
SH
227 * "fromlen shall refer to the value before truncation.."
228 * 1003.1g
1da177e4
LT
229 */
230 return __put_user(klen, ulen);
231}
232
08009a76 233static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
333f7909
AV
242 init_waitqueue_head(&ei->socket.wq.wait);
243 ei->socket.wq.fasync_list = NULL;
244 ei->socket.wq.flags = 0;
89bddce5 245
1da177e4
LT
246 ei->socket.state = SS_UNCONNECTED;
247 ei->socket.flags = 0;
248 ei->socket.ops = NULL;
249 ei->socket.sk = NULL;
250 ei->socket.file = NULL;
1da177e4
LT
251
252 return &ei->vfs_inode;
253}
254
6d7855c5 255static void sock_free_inode(struct inode *inode)
1da177e4 256{
43815482
ED
257 struct socket_alloc *ei;
258
259 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 260 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
261}
262
51cc5068 263static void init_once(void *foo)
1da177e4 264{
89bddce5 265 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 266
a35afb83 267 inode_init_once(&ei->vfs_inode);
1da177e4 268}
89bddce5 269
1e911632 270static void init_inodecache(void)
1da177e4
LT
271{
272 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
273 sizeof(struct socket_alloc),
274 0,
275 (SLAB_HWCACHE_ALIGN |
276 SLAB_RECLAIM_ACCOUNT |
5d097056 277 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 278 init_once);
1e911632 279 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
280}
281
b87221de 282static const struct super_operations sockfs_ops = {
c6d409cf 283 .alloc_inode = sock_alloc_inode,
6d7855c5 284 .free_inode = sock_free_inode,
c6d409cf 285 .statfs = simple_statfs,
1da177e4
LT
286};
287
c23fbb6b
ED
288/*
289 * sockfs_dname() is called from d_path().
290 */
291static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
292{
293 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 294 d_inode(dentry)->i_ino);
c23fbb6b
ED
295}
296
3ba13d17 297static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 298 .d_dname = sockfs_dname,
1da177e4
LT
299};
300
bba0bd31
AG
301static int sockfs_xattr_get(const struct xattr_handler *handler,
302 struct dentry *dentry, struct inode *inode,
303 const char *suffix, void *value, size_t size)
304{
305 if (value) {
306 if (dentry->d_name.len + 1 > size)
307 return -ERANGE;
308 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
309 }
310 return dentry->d_name.len + 1;
311}
312
313#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
314#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
315#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
316
317static const struct xattr_handler sockfs_xattr_handler = {
318 .name = XATTR_NAME_SOCKPROTONAME,
319 .get = sockfs_xattr_get,
320};
321
4a590153
AG
322static int sockfs_security_xattr_set(const struct xattr_handler *handler,
323 struct dentry *dentry, struct inode *inode,
324 const char *suffix, const void *value,
325 size_t size, int flags)
326{
327 /* Handled by LSM. */
328 return -EAGAIN;
329}
330
331static const struct xattr_handler sockfs_security_xattr_handler = {
332 .prefix = XATTR_SECURITY_PREFIX,
333 .set = sockfs_security_xattr_set,
334};
335
bba0bd31
AG
336static const struct xattr_handler *sockfs_xattr_handlers[] = {
337 &sockfs_xattr_handler,
4a590153 338 &sockfs_security_xattr_handler,
bba0bd31
AG
339 NULL
340};
341
fba9be49 342static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 343{
fba9be49
DH
344 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
345 if (!ctx)
346 return -ENOMEM;
347 ctx->ops = &sockfs_ops;
348 ctx->dops = &sockfs_dentry_operations;
349 ctx->xattr = sockfs_xattr_handlers;
350 return 0;
c74a1cbb
AV
351}
352
353static struct vfsmount *sock_mnt __read_mostly;
354
355static struct file_system_type sock_fs_type = {
356 .name = "sockfs",
fba9be49 357 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
358 .kill_sb = kill_anon_super,
359};
360
1da177e4
LT
361/*
362 * Obtains the first available file descriptor and sets it up for use.
363 *
39d8c1b6
DM
364 * These functions create file structures and maps them to fd space
365 * of the current process. On success it returns file descriptor
1da177e4
LT
366 * and file struct implicitly stored in sock->file.
367 * Note that another thread may close file descriptor before we return
368 * from this function. We use the fact that now we do not refer
369 * to socket after mapping. If one day we will need it, this
370 * function will increment ref. count on file by 1.
371 *
372 * In any case returned fd MAY BE not valid!
373 * This race condition is unavoidable
374 * with shared fd spaces, we cannot solve it inside kernel,
375 * but we take care of internal coherence yet.
376 */
377
8a3c245c
PT
378/**
379 * sock_alloc_file - Bind a &socket to a &file
380 * @sock: socket
381 * @flags: file status flags
382 * @dname: protocol name
383 *
384 * Returns the &file bound with @sock, implicitly storing it
385 * in sock->file. If dname is %NULL, sets to "".
386 * On failure the return is a ERR pointer (see linux/err.h).
387 * This function uses GFP_KERNEL internally.
388 */
389
aab174f0 390struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 391{
7cbe66b6 392 struct file *file;
1da177e4 393
d93aa9d8
AV
394 if (!dname)
395 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 396
d93aa9d8
AV
397 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
398 O_RDWR | (flags & O_NONBLOCK),
399 &socket_file_ops);
b5ffe634 400 if (IS_ERR(file)) {
8e1611e2 401 sock_release(sock);
39b65252 402 return file;
cc3808f8
AV
403 }
404
405 sock->file = file;
39d8c1b6 406 file->private_data = sock;
d8e464ec 407 stream_open(SOCK_INODE(sock), file);
28407630 408 return file;
39d8c1b6 409}
56b31d1c 410EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 411
56b31d1c 412static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
413{
414 struct file *newfile;
28407630 415 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
416 if (unlikely(fd < 0)) {
417 sock_release(sock);
28407630 418 return fd;
ce4bb04c 419 }
39d8c1b6 420
aab174f0 421 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 422 if (!IS_ERR(newfile)) {
39d8c1b6 423 fd_install(fd, newfile);
28407630
AV
424 return fd;
425 }
7cbe66b6 426
28407630
AV
427 put_unused_fd(fd);
428 return PTR_ERR(newfile);
1da177e4
LT
429}
430
8a3c245c
PT
431/**
432 * sock_from_file - Return the &socket bounded to @file.
433 * @file: file
434 * @err: pointer to an error code return
435 *
436 * On failure returns %NULL and assigns -ENOTSOCK to @err.
437 */
438
406a3c63 439struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 440{
6cb153ca
BL
441 if (file->f_op == &socket_file_ops)
442 return file->private_data; /* set in sock_map_fd */
443
23bb80d2
ED
444 *err = -ENOTSOCK;
445 return NULL;
6cb153ca 446}
406a3c63 447EXPORT_SYMBOL(sock_from_file);
6cb153ca 448
1da177e4 449/**
c6d409cf 450 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
451 * @fd: file handle
452 * @err: pointer to an error code return
453 *
454 * The file handle passed in is locked and the socket it is bound
241c4667 455 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
456 * with a negative errno code and NULL is returned. The function checks
457 * for both invalid handles and passing a handle which is not a socket.
458 *
459 * On a success the socket object pointer is returned.
460 */
461
462struct socket *sockfd_lookup(int fd, int *err)
463{
464 struct file *file;
1da177e4
LT
465 struct socket *sock;
466
89bddce5
SH
467 file = fget(fd);
468 if (!file) {
1da177e4
LT
469 *err = -EBADF;
470 return NULL;
471 }
89bddce5 472
6cb153ca
BL
473 sock = sock_from_file(file, err);
474 if (!sock)
1da177e4 475 fput(file);
6cb153ca
BL
476 return sock;
477}
c6d409cf 478EXPORT_SYMBOL(sockfd_lookup);
1da177e4 479
6cb153ca
BL
480static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
481{
00e188ef 482 struct fd f = fdget(fd);
6cb153ca
BL
483 struct socket *sock;
484
3672558c 485 *err = -EBADF;
00e188ef
AV
486 if (f.file) {
487 sock = sock_from_file(f.file, err);
488 if (likely(sock)) {
489 *fput_needed = f.flags;
6cb153ca 490 return sock;
00e188ef
AV
491 }
492 fdput(f);
1da177e4 493 }
6cb153ca 494 return NULL;
1da177e4
LT
495}
496
600e1779
MY
497static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
498 size_t size)
499{
500 ssize_t len;
501 ssize_t used = 0;
502
c5ef6035 503 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
504 if (len < 0)
505 return len;
506 used += len;
507 if (buffer) {
508 if (size < used)
509 return -ERANGE;
510 buffer += len;
511 }
512
513 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
514 used += len;
515 if (buffer) {
516 if (size < used)
517 return -ERANGE;
518 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
519 buffer += len;
520 }
521
522 return used;
523}
524
dc647ec8 525static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
526{
527 int err = simple_setattr(dentry, iattr);
528
e1a3a60a 529 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
530 struct socket *sock = SOCKET_I(d_inode(dentry));
531
6d8c50dc
CW
532 if (sock->sk)
533 sock->sk->sk_uid = iattr->ia_uid;
534 else
535 err = -ENOENT;
86741ec2
LC
536 }
537
538 return err;
539}
540
600e1779 541static const struct inode_operations sockfs_inode_ops = {
600e1779 542 .listxattr = sockfs_listxattr,
86741ec2 543 .setattr = sockfs_setattr,
600e1779
MY
544};
545
1da177e4 546/**
8a3c245c 547 * sock_alloc - allocate a socket
89bddce5 548 *
1da177e4
LT
549 * Allocate a new inode and socket object. The two are bound together
550 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 551 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
552 */
553
f4a00aac 554struct socket *sock_alloc(void)
1da177e4 555{
89bddce5
SH
556 struct inode *inode;
557 struct socket *sock;
1da177e4 558
a209dfc7 559 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
560 if (!inode)
561 return NULL;
562
563 sock = SOCKET_I(inode);
564
85fe4025 565 inode->i_ino = get_next_ino();
89bddce5 566 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
567 inode->i_uid = current_fsuid();
568 inode->i_gid = current_fsgid();
600e1779 569 inode->i_op = &sockfs_inode_ops;
1da177e4 570
1da177e4
LT
571 return sock;
572}
f4a00aac 573EXPORT_SYMBOL(sock_alloc);
1da177e4 574
1da177e4 575/**
8a3c245c 576 * sock_release - close a socket
1da177e4
LT
577 * @sock: socket to close
578 *
579 * The socket is released from the protocol stack if it has a release
580 * callback, and the inode is then released if the socket is bound to
89bddce5 581 * an inode not a file.
1da177e4 582 */
89bddce5 583
6d8c50dc 584static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
585{
586 if (sock->ops) {
587 struct module *owner = sock->ops->owner;
588
6d8c50dc
CW
589 if (inode)
590 inode_lock(inode);
1da177e4 591 sock->ops->release(sock);
ff7b11aa 592 sock->sk = NULL;
6d8c50dc
CW
593 if (inode)
594 inode_unlock(inode);
1da177e4
LT
595 sock->ops = NULL;
596 module_put(owner);
597 }
598
333f7909 599 if (sock->wq.fasync_list)
3410f22e 600 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 601
1da177e4
LT
602 if (!sock->file) {
603 iput(SOCK_INODE(sock));
604 return;
605 }
89bddce5 606 sock->file = NULL;
1da177e4 607}
6d8c50dc
CW
608
609void sock_release(struct socket *sock)
610{
611 __sock_release(sock, NULL);
612}
c6d409cf 613EXPORT_SYMBOL(sock_release);
1da177e4 614
c14ac945 615void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 616{
140c55d4
ED
617 u8 flags = *tx_flags;
618
c14ac945 619 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
620 flags |= SKBTX_HW_TSTAMP;
621
c14ac945 622 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
623 flags |= SKBTX_SW_TSTAMP;
624
c14ac945 625 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
626 flags |= SKBTX_SCHED_TSTAMP;
627
140c55d4 628 *tx_flags = flags;
20d49473 629}
67cc0d40 630EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 631
8c3c447b
PA
632INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
633 size_t));
a648a592
PA
634INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
635 size_t));
d8725c86 636static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 637{
a648a592
PA
638 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
639 inet_sendmsg, sock, msg,
640 msg_data_left(msg));
d8725c86
AV
641 BUG_ON(ret == -EIOCBQUEUED);
642 return ret;
1da177e4
LT
643}
644
85806af0
RD
645/**
646 * sock_sendmsg - send a message through @sock
647 * @sock: socket
648 * @msg: message to send
649 *
650 * Sends @msg through @sock, passing through LSM.
651 * Returns the number of bytes sent, or an error code.
652 */
d8725c86 653int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 654{
d8725c86 655 int err = security_socket_sendmsg(sock, msg,
01e97e65 656 msg_data_left(msg));
228e548e 657
d8725c86 658 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 659}
c6d409cf 660EXPORT_SYMBOL(sock_sendmsg);
1da177e4 661
8a3c245c
PT
662/**
663 * kernel_sendmsg - send a message through @sock (kernel-space)
664 * @sock: socket
665 * @msg: message header
666 * @vec: kernel vec
667 * @num: vec array length
668 * @size: total message data size
669 *
670 * Builds the message data with @vec and sends it through @sock.
671 * Returns the number of bytes sent, or an error code.
672 */
673
1da177e4
LT
674int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
675 struct kvec *vec, size_t num, size_t size)
676{
aa563d7b 677 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 678 return sock_sendmsg(sock, msg);
1da177e4 679}
c6d409cf 680EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 681
8a3c245c
PT
682/**
683 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
684 * @sk: sock
685 * @msg: message header
686 * @vec: output s/g array
687 * @num: output s/g array length
688 * @size: total message data size
689 *
690 * Builds the message data with @vec and sends it through @sock.
691 * Returns the number of bytes sent, or an error code.
692 * Caller must hold @sk.
693 */
694
306b13eb
TH
695int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
696 struct kvec *vec, size_t num, size_t size)
697{
698 struct socket *sock = sk->sk_socket;
699
700 if (!sock->ops->sendmsg_locked)
db5980d8 701 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 702
aa563d7b 703 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
704
705 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
706}
707EXPORT_SYMBOL(kernel_sendmsg_locked);
708
8605330a
SHY
709static bool skb_is_err_queue(const struct sk_buff *skb)
710{
711 /* pkt_type of skbs enqueued on the error queue are set to
712 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
713 * in recvmsg, since skbs received on a local socket will never
714 * have a pkt_type of PACKET_OUTGOING.
715 */
716 return skb->pkt_type == PACKET_OUTGOING;
717}
718
b50a5c70
ML
719/* On transmit, software and hardware timestamps are returned independently.
720 * As the two skb clones share the hardware timestamp, which may be updated
721 * before the software timestamp is received, a hardware TX timestamp may be
722 * returned only if there is no software TX timestamp. Ignore false software
723 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 724 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
725 * hardware timestamp.
726 */
727static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
728{
729 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
730}
731
aad9c8c4
ML
732static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
733{
734 struct scm_ts_pktinfo ts_pktinfo;
735 struct net_device *orig_dev;
736
737 if (!skb_mac_header_was_set(skb))
738 return;
739
740 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
741
742 rcu_read_lock();
743 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
744 if (orig_dev)
745 ts_pktinfo.if_index = orig_dev->ifindex;
746 rcu_read_unlock();
747
748 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
749 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
750 sizeof(ts_pktinfo), &ts_pktinfo);
751}
752
92f37fd2
ED
753/*
754 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
755 */
756void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
757 struct sk_buff *skb)
758{
20d49473 759 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 760 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
761 struct scm_timestamping_internal tss;
762
b50a5c70 763 int empty = 1, false_tstamp = 0;
20d49473
PO
764 struct skb_shared_hwtstamps *shhwtstamps =
765 skb_hwtstamps(skb);
766
767 /* Race occurred between timestamp enabling and packet
768 receiving. Fill in the current time for now. */
b50a5c70 769 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 770 __net_timestamp(skb);
b50a5c70
ML
771 false_tstamp = 1;
772 }
20d49473
PO
773
774 if (need_software_tstamp) {
775 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
776 if (new_tstamp) {
777 struct __kernel_sock_timeval tv;
778
779 skb_get_new_timestamp(skb, &tv);
780 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
781 sizeof(tv), &tv);
782 } else {
783 struct __kernel_old_timeval tv;
784
785 skb_get_timestamp(skb, &tv);
786 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
787 sizeof(tv), &tv);
788 }
20d49473 789 } else {
887feae3
DD
790 if (new_tstamp) {
791 struct __kernel_timespec ts;
792
793 skb_get_new_timestampns(skb, &ts);
794 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
795 sizeof(ts), &ts);
796 } else {
797 struct timespec ts;
798
799 skb_get_timestampns(skb, &ts);
800 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
801 sizeof(ts), &ts);
802 }
20d49473
PO
803 }
804 }
805
f24b9be5 806 memset(&tss, 0, sizeof(tss));
c199105d 807 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 808 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 809 empty = 0;
4d276eb6 810 if (shhwtstamps &&
b9f40e21 811 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 812 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 813 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 814 empty = 0;
aad9c8c4
ML
815 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
816 !skb_is_err_queue(skb))
817 put_ts_pktinfo(msg, skb);
818 }
1c885808 819 if (!empty) {
9718475e
DD
820 if (sock_flag(sk, SOCK_TSTAMP_NEW))
821 put_cmsg_scm_timestamping64(msg, &tss);
822 else
823 put_cmsg_scm_timestamping(msg, &tss);
1c885808 824
8605330a 825 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 826 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
827 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
828 skb->len, skb->data);
829 }
92f37fd2 830}
7c81fd8b
ACM
831EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
832
6e3e939f
JB
833void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
834 struct sk_buff *skb)
835{
836 int ack;
837
838 if (!sock_flag(sk, SOCK_WIFI_STATUS))
839 return;
840 if (!skb->wifi_acked_valid)
841 return;
842
843 ack = skb->wifi_acked;
844
845 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
846}
847EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
848
11165f14 849static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
850 struct sk_buff *skb)
3b885787 851{
744d5a3e 852 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 853 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 854 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
855}
856
767dd033 857void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
858 struct sk_buff *skb)
859{
860 sock_recv_timestamp(msg, sk, skb);
861 sock_recv_drops(msg, sk, skb);
862}
767dd033 863EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 864
8c3c447b 865INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
866 size_t, int));
867INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
868 size_t, int));
1b784140 869static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 870 int flags)
1da177e4 871{
a648a592
PA
872 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
873 inet_recvmsg, sock, msg, msg_data_left(msg),
874 flags);
1da177e4
LT
875}
876
85806af0
RD
877/**
878 * sock_recvmsg - receive a message from @sock
879 * @sock: socket
880 * @msg: message to receive
881 * @flags: message flags
882 *
883 * Receives @msg from @sock, passing through LSM. Returns the total number
884 * of bytes received, or an error.
885 */
2da62906 886int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 887{
2da62906 888 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 889
2da62906 890 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 891}
c6d409cf 892EXPORT_SYMBOL(sock_recvmsg);
1da177e4 893
c1249c0a 894/**
8a3c245c
PT
895 * kernel_recvmsg - Receive a message from a socket (kernel space)
896 * @sock: The socket to receive the message from
897 * @msg: Received message
898 * @vec: Input s/g array for message data
899 * @num: Size of input s/g array
900 * @size: Number of bytes to read
901 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 902 *
8a3c245c
PT
903 * On return the msg structure contains the scatter/gather array passed in the
904 * vec argument. The array is modified so that it consists of the unfilled
905 * portion of the original array.
c1249c0a 906 *
8a3c245c 907 * The returned value is the total number of bytes received, or an error.
c1249c0a 908 */
8a3c245c 909
89bddce5
SH
910int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
911 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
912{
913 mm_segment_t oldfs = get_fs();
914 int result;
915
aa563d7b 916 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 917 set_fs(KERNEL_DS);
2da62906 918 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
919 set_fs(oldfs);
920 return result;
921}
c6d409cf 922EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 923
ce1d4d3e
CH
924static ssize_t sock_sendpage(struct file *file, struct page *page,
925 int offset, size_t size, loff_t *ppos, int more)
1da177e4 926{
1da177e4
LT
927 struct socket *sock;
928 int flags;
929
ce1d4d3e
CH
930 sock = file->private_data;
931
35f9c09f
ED
932 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
933 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
934 flags |= more;
ce1d4d3e 935
e6949583 936 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 937}
1da177e4 938
9c55e01c 939static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 940 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
941 unsigned int flags)
942{
943 struct socket *sock = file->private_data;
944
997b37da 945 if (unlikely(!sock->ops->splice_read))
95506588 946 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 947
9c55e01c
JA
948 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
949}
950
8ae5e030 951static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 952{
6d652330
AV
953 struct file *file = iocb->ki_filp;
954 struct socket *sock = file->private_data;
0345f931 955 struct msghdr msg = {.msg_iter = *to,
956 .msg_iocb = iocb};
8ae5e030 957 ssize_t res;
ce1d4d3e 958
8ae5e030
AV
959 if (file->f_flags & O_NONBLOCK)
960 msg.msg_flags = MSG_DONTWAIT;
961
962 if (iocb->ki_pos != 0)
1da177e4 963 return -ESPIPE;
027445c3 964
66ee59af 965 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
966 return 0;
967
2da62906 968 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
969 *to = msg.msg_iter;
970 return res;
1da177e4
LT
971}
972
8ae5e030 973static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 974{
6d652330
AV
975 struct file *file = iocb->ki_filp;
976 struct socket *sock = file->private_data;
0345f931 977 struct msghdr msg = {.msg_iter = *from,
978 .msg_iocb = iocb};
8ae5e030 979 ssize_t res;
1da177e4 980
8ae5e030 981 if (iocb->ki_pos != 0)
ce1d4d3e 982 return -ESPIPE;
027445c3 983
8ae5e030
AV
984 if (file->f_flags & O_NONBLOCK)
985 msg.msg_flags = MSG_DONTWAIT;
986
6d652330
AV
987 if (sock->type == SOCK_SEQPACKET)
988 msg.msg_flags |= MSG_EOR;
989
d8725c86 990 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
991 *from = msg.msg_iter;
992 return res;
1da177e4
LT
993}
994
1da177e4
LT
995/*
996 * Atomic setting of ioctl hooks to avoid race
997 * with module unload.
998 */
999
4a3e2f71 1000static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1001static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1002
881d966b 1003void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1004{
4a3e2f71 1005 mutex_lock(&br_ioctl_mutex);
1da177e4 1006 br_ioctl_hook = hook;
4a3e2f71 1007 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1008}
1009EXPORT_SYMBOL(brioctl_set);
1010
4a3e2f71 1011static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1012static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1013
881d966b 1014void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1015{
4a3e2f71 1016 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1017 vlan_ioctl_hook = hook;
4a3e2f71 1018 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1019}
1020EXPORT_SYMBOL(vlan_ioctl_set);
1021
4a3e2f71 1022static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1023static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1024
89bddce5 1025void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1026{
4a3e2f71 1027 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1028 dlci_ioctl_hook = hook;
4a3e2f71 1029 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1030}
1031EXPORT_SYMBOL(dlci_ioctl_set);
1032
6b96018b 1033static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1034 unsigned int cmd, unsigned long arg)
6b96018b
AB
1035{
1036 int err;
1037 void __user *argp = (void __user *)arg;
1038
1039 err = sock->ops->ioctl(sock, cmd, arg);
1040
1041 /*
1042 * If this ioctl is unknown try to hand it down
1043 * to the NIC driver.
1044 */
36fd633e
AV
1045 if (err != -ENOIOCTLCMD)
1046 return err;
6b96018b 1047
36fd633e
AV
1048 if (cmd == SIOCGIFCONF) {
1049 struct ifconf ifc;
1050 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1051 return -EFAULT;
1052 rtnl_lock();
1053 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1054 rtnl_unlock();
1055 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1056 err = -EFAULT;
44c02a2c
AV
1057 } else {
1058 struct ifreq ifr;
1059 bool need_copyout;
63ff03ab 1060 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1061 return -EFAULT;
1062 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1063 if (!err && need_copyout)
63ff03ab 1064 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1065 return -EFAULT;
36fd633e 1066 }
6b96018b
AB
1067 return err;
1068}
1069
1da177e4
LT
1070/*
1071 * With an ioctl, arg may well be a user mode pointer, but we don't know
1072 * what to do with it - that's up to the protocol still.
1073 */
1074
8a3c245c
PT
1075/**
1076 * get_net_ns - increment the refcount of the network namespace
1077 * @ns: common namespace (net)
1078 *
1079 * Returns the net's common namespace.
1080 */
1081
d8d211a2 1082struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1083{
1084 return &get_net(container_of(ns, struct net, ns))->ns;
1085}
d8d211a2 1086EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1087
1da177e4
LT
1088static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1089{
1090 struct socket *sock;
881d966b 1091 struct sock *sk;
1da177e4
LT
1092 void __user *argp = (void __user *)arg;
1093 int pid, err;
881d966b 1094 struct net *net;
1da177e4 1095
b69aee04 1096 sock = file->private_data;
881d966b 1097 sk = sock->sk;
3b1e0a65 1098 net = sock_net(sk);
44c02a2c
AV
1099 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1100 struct ifreq ifr;
1101 bool need_copyout;
1102 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1103 return -EFAULT;
1104 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1105 if (!err && need_copyout)
1106 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1107 return -EFAULT;
1da177e4 1108 } else
3d23e349 1109#ifdef CONFIG_WEXT_CORE
1da177e4 1110 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1111 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1112 } else
3d23e349 1113#endif
89bddce5 1114 switch (cmd) {
1da177e4
LT
1115 case FIOSETOWN:
1116 case SIOCSPGRP:
1117 err = -EFAULT;
1118 if (get_user(pid, (int __user *)argp))
1119 break;
393cc3f5 1120 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1121 break;
1122 case FIOGETOWN:
1123 case SIOCGPGRP:
609d7fa9 1124 err = put_user(f_getown(sock->file),
89bddce5 1125 (int __user *)argp);
1da177e4
LT
1126 break;
1127 case SIOCGIFBR:
1128 case SIOCSIFBR:
1129 case SIOCBRADDBR:
1130 case SIOCBRDELBR:
1131 err = -ENOPKG;
1132 if (!br_ioctl_hook)
1133 request_module("bridge");
1134
4a3e2f71 1135 mutex_lock(&br_ioctl_mutex);
89bddce5 1136 if (br_ioctl_hook)
881d966b 1137 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1138 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1139 break;
1140 case SIOCGIFVLAN:
1141 case SIOCSIFVLAN:
1142 err = -ENOPKG;
1143 if (!vlan_ioctl_hook)
1144 request_module("8021q");
1145
4a3e2f71 1146 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1147 if (vlan_ioctl_hook)
881d966b 1148 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1149 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1150 break;
1da177e4
LT
1151 case SIOCADDDLCI:
1152 case SIOCDELDLCI:
1153 err = -ENOPKG;
1154 if (!dlci_ioctl_hook)
1155 request_module("dlci");
1156
7512cbf6
PE
1157 mutex_lock(&dlci_ioctl_mutex);
1158 if (dlci_ioctl_hook)
1da177e4 1159 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1160 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1161 break;
c62cce2c
AV
1162 case SIOCGSKNS:
1163 err = -EPERM;
1164 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1165 break;
1166
1167 err = open_related_ns(&net->ns, get_net_ns);
1168 break;
0768e170
AB
1169 case SIOCGSTAMP_OLD:
1170 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1171 if (!sock->ops->gettstamp) {
1172 err = -ENOIOCTLCMD;
1173 break;
1174 }
1175 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1176 cmd == SIOCGSTAMP_OLD,
1177 !IS_ENABLED(CONFIG_64BIT));
60747828 1178 break;
0768e170
AB
1179 case SIOCGSTAMP_NEW:
1180 case SIOCGSTAMPNS_NEW:
1181 if (!sock->ops->gettstamp) {
1182 err = -ENOIOCTLCMD;
1183 break;
1184 }
1185 err = sock->ops->gettstamp(sock, argp,
1186 cmd == SIOCGSTAMP_NEW,
1187 false);
c7cbdbf2 1188 break;
1da177e4 1189 default:
63ff03ab 1190 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1191 break;
89bddce5 1192 }
1da177e4
LT
1193 return err;
1194}
1195
8a3c245c
PT
1196/**
1197 * sock_create_lite - creates a socket
1198 * @family: protocol family (AF_INET, ...)
1199 * @type: communication type (SOCK_STREAM, ...)
1200 * @protocol: protocol (0, ...)
1201 * @res: new socket
1202 *
1203 * Creates a new socket and assigns it to @res, passing through LSM.
1204 * The new socket initialization is not complete, see kernel_accept().
1205 * Returns 0 or an error. On failure @res is set to %NULL.
1206 * This function internally uses GFP_KERNEL.
1207 */
1208
1da177e4
LT
1209int sock_create_lite(int family, int type, int protocol, struct socket **res)
1210{
1211 int err;
1212 struct socket *sock = NULL;
89bddce5 1213
1da177e4
LT
1214 err = security_socket_create(family, type, protocol, 1);
1215 if (err)
1216 goto out;
1217
1218 sock = sock_alloc();
1219 if (!sock) {
1220 err = -ENOMEM;
1221 goto out;
1222 }
1223
1da177e4 1224 sock->type = type;
7420ed23
VY
1225 err = security_socket_post_create(sock, family, type, protocol, 1);
1226 if (err)
1227 goto out_release;
1228
1da177e4
LT
1229out:
1230 *res = sock;
1231 return err;
7420ed23
VY
1232out_release:
1233 sock_release(sock);
1234 sock = NULL;
1235 goto out;
1da177e4 1236}
c6d409cf 1237EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1238
1239/* No kernel lock held - perfect */
ade994f4 1240static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1241{
3cafb376 1242 struct socket *sock = file->private_data;
a331de3b 1243 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1244
e88958e6
CH
1245 if (!sock->ops->poll)
1246 return 0;
f641f13b 1247
a331de3b
CH
1248 if (sk_can_busy_loop(sock->sk)) {
1249 /* poll once if requested by the syscall */
1250 if (events & POLL_BUSY_LOOP)
1251 sk_busy_loop(sock->sk, 1);
1252
1253 /* if this socket can poll_ll, tell the system call */
1254 flag = POLL_BUSY_LOOP;
1255 }
1256
1257 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1258}
1259
89bddce5 1260static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1261{
b69aee04 1262 struct socket *sock = file->private_data;
1da177e4
LT
1263
1264 return sock->ops->mmap(file, sock, vma);
1265}
1266
20380731 1267static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1268{
6d8c50dc 1269 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1270 return 0;
1271}
1272
1273/*
1274 * Update the socket async list
1275 *
1276 * Fasync_list locking strategy.
1277 *
1278 * 1. fasync_list is modified only under process context socket lock
1279 * i.e. under semaphore.
1280 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1281 * or under socket lock
1da177e4
LT
1282 */
1283
1284static int sock_fasync(int fd, struct file *filp, int on)
1285{
989a2979
ED
1286 struct socket *sock = filp->private_data;
1287 struct sock *sk = sock->sk;
333f7909 1288 struct socket_wq *wq = &sock->wq;
1da177e4 1289
989a2979 1290 if (sk == NULL)
1da177e4 1291 return -EINVAL;
1da177e4
LT
1292
1293 lock_sock(sk);
eaefd110 1294 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1295
eaefd110 1296 if (!wq->fasync_list)
989a2979
ED
1297 sock_reset_flag(sk, SOCK_FASYNC);
1298 else
bcdce719 1299 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1300
989a2979 1301 release_sock(sk);
1da177e4
LT
1302 return 0;
1303}
1304
ceb5d58b 1305/* This function may be called only under rcu_lock */
1da177e4 1306
ceb5d58b 1307int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1308{
ceb5d58b 1309 if (!wq || !wq->fasync_list)
1da177e4 1310 return -1;
ceb5d58b 1311
89bddce5 1312 switch (how) {
8d8ad9d7 1313 case SOCK_WAKE_WAITD:
ceb5d58b 1314 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1315 break;
1316 goto call_kill;
8d8ad9d7 1317 case SOCK_WAKE_SPACE:
ceb5d58b 1318 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1319 break;
1320 /* fall through */
8d8ad9d7 1321 case SOCK_WAKE_IO:
89bddce5 1322call_kill:
43815482 1323 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1324 break;
8d8ad9d7 1325 case SOCK_WAKE_URG:
43815482 1326 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1327 }
ceb5d58b 1328
1da177e4
LT
1329 return 0;
1330}
c6d409cf 1331EXPORT_SYMBOL(sock_wake_async);
1da177e4 1332
8a3c245c
PT
1333/**
1334 * __sock_create - creates a socket
1335 * @net: net namespace
1336 * @family: protocol family (AF_INET, ...)
1337 * @type: communication type (SOCK_STREAM, ...)
1338 * @protocol: protocol (0, ...)
1339 * @res: new socket
1340 * @kern: boolean for kernel space sockets
1341 *
1342 * Creates a new socket and assigns it to @res, passing through LSM.
1343 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1344 * be set to true if the socket resides in kernel space.
1345 * This function internally uses GFP_KERNEL.
1346 */
1347
721db93a 1348int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1349 struct socket **res, int kern)
1da177e4
LT
1350{
1351 int err;
1352 struct socket *sock;
55737fda 1353 const struct net_proto_family *pf;
1da177e4
LT
1354
1355 /*
89bddce5 1356 * Check protocol is in range
1da177e4
LT
1357 */
1358 if (family < 0 || family >= NPROTO)
1359 return -EAFNOSUPPORT;
1360 if (type < 0 || type >= SOCK_MAX)
1361 return -EINVAL;
1362
1363 /* Compatibility.
1364
1365 This uglymoron is moved from INET layer to here to avoid
1366 deadlock in module load.
1367 */
1368 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1369 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1370 current->comm);
1da177e4
LT
1371 family = PF_PACKET;
1372 }
1373
1374 err = security_socket_create(family, type, protocol, kern);
1375 if (err)
1376 return err;
89bddce5 1377
55737fda
SH
1378 /*
1379 * Allocate the socket and allow the family to set things up. if
1380 * the protocol is 0, the family is instructed to select an appropriate
1381 * default.
1382 */
1383 sock = sock_alloc();
1384 if (!sock) {
e87cc472 1385 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1386 return -ENFILE; /* Not exactly a match, but its the
1387 closest posix thing */
1388 }
1389
1390 sock->type = type;
1391
95a5afca 1392#ifdef CONFIG_MODULES
89bddce5
SH
1393 /* Attempt to load a protocol module if the find failed.
1394 *
1395 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1396 * requested real, full-featured networking support upon configuration.
1397 * Otherwise module support will break!
1398 */
190683a9 1399 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1400 request_module("net-pf-%d", family);
1da177e4
LT
1401#endif
1402
55737fda
SH
1403 rcu_read_lock();
1404 pf = rcu_dereference(net_families[family]);
1405 err = -EAFNOSUPPORT;
1406 if (!pf)
1407 goto out_release;
1da177e4
LT
1408
1409 /*
1410 * We will call the ->create function, that possibly is in a loadable
1411 * module, so we have to bump that loadable module refcnt first.
1412 */
55737fda 1413 if (!try_module_get(pf->owner))
1da177e4
LT
1414 goto out_release;
1415
55737fda
SH
1416 /* Now protected by module ref count */
1417 rcu_read_unlock();
1418
3f378b68 1419 err = pf->create(net, sock, protocol, kern);
55737fda 1420 if (err < 0)
1da177e4 1421 goto out_module_put;
a79af59e 1422
1da177e4
LT
1423 /*
1424 * Now to bump the refcnt of the [loadable] module that owns this
1425 * socket at sock_release time we decrement its refcnt.
1426 */
55737fda
SH
1427 if (!try_module_get(sock->ops->owner))
1428 goto out_module_busy;
1429
1da177e4
LT
1430 /*
1431 * Now that we're done with the ->create function, the [loadable]
1432 * module can have its refcnt decremented
1433 */
55737fda 1434 module_put(pf->owner);
7420ed23
VY
1435 err = security_socket_post_create(sock, family, type, protocol, kern);
1436 if (err)
3b185525 1437 goto out_sock_release;
55737fda 1438 *res = sock;
1da177e4 1439
55737fda
SH
1440 return 0;
1441
1442out_module_busy:
1443 err = -EAFNOSUPPORT;
1da177e4 1444out_module_put:
55737fda
SH
1445 sock->ops = NULL;
1446 module_put(pf->owner);
1447out_sock_release:
1da177e4 1448 sock_release(sock);
55737fda
SH
1449 return err;
1450
1451out_release:
1452 rcu_read_unlock();
1453 goto out_sock_release;
1da177e4 1454}
721db93a 1455EXPORT_SYMBOL(__sock_create);
1da177e4 1456
8a3c245c
PT
1457/**
1458 * sock_create - creates a socket
1459 * @family: protocol family (AF_INET, ...)
1460 * @type: communication type (SOCK_STREAM, ...)
1461 * @protocol: protocol (0, ...)
1462 * @res: new socket
1463 *
1464 * A wrapper around __sock_create().
1465 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1466 */
1467
1da177e4
LT
1468int sock_create(int family, int type, int protocol, struct socket **res)
1469{
1b8d7ae4 1470 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1471}
c6d409cf 1472EXPORT_SYMBOL(sock_create);
1da177e4 1473
8a3c245c
PT
1474/**
1475 * sock_create_kern - creates a socket (kernel space)
1476 * @net: net namespace
1477 * @family: protocol family (AF_INET, ...)
1478 * @type: communication type (SOCK_STREAM, ...)
1479 * @protocol: protocol (0, ...)
1480 * @res: new socket
1481 *
1482 * A wrapper around __sock_create().
1483 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1484 */
1485
eeb1bd5c 1486int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1487{
eeb1bd5c 1488 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1489}
c6d409cf 1490EXPORT_SYMBOL(sock_create_kern);
1da177e4 1491
9d6a15c3 1492int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1493{
1494 int retval;
1495 struct socket *sock;
a677a039
UD
1496 int flags;
1497
e38b36f3
UD
1498 /* Check the SOCK_* constants for consistency. */
1499 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1500 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1501 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1502 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1503
a677a039 1504 flags = type & ~SOCK_TYPE_MASK;
77d27200 1505 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1506 return -EINVAL;
1507 type &= SOCK_TYPE_MASK;
1da177e4 1508
aaca0bdc
UD
1509 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1510 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1511
1da177e4
LT
1512 retval = sock_create(family, type, protocol, &sock);
1513 if (retval < 0)
8e1611e2 1514 return retval;
1da177e4 1515
8e1611e2 1516 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1517}
1518
9d6a15c3
DB
1519SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1520{
1521 return __sys_socket(family, type, protocol);
1522}
1523
1da177e4
LT
1524/*
1525 * Create a pair of connected sockets.
1526 */
1527
6debc8d8 1528int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1529{
1530 struct socket *sock1, *sock2;
1531 int fd1, fd2, err;
db349509 1532 struct file *newfile1, *newfile2;
a677a039
UD
1533 int flags;
1534
1535 flags = type & ~SOCK_TYPE_MASK;
77d27200 1536 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1537 return -EINVAL;
1538 type &= SOCK_TYPE_MASK;
1da177e4 1539
aaca0bdc
UD
1540 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1541 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1542
016a266b
AV
1543 /*
1544 * reserve descriptors and make sure we won't fail
1545 * to return them to userland.
1546 */
1547 fd1 = get_unused_fd_flags(flags);
1548 if (unlikely(fd1 < 0))
1549 return fd1;
1550
1551 fd2 = get_unused_fd_flags(flags);
1552 if (unlikely(fd2 < 0)) {
1553 put_unused_fd(fd1);
1554 return fd2;
1555 }
1556
1557 err = put_user(fd1, &usockvec[0]);
1558 if (err)
1559 goto out;
1560
1561 err = put_user(fd2, &usockvec[1]);
1562 if (err)
1563 goto out;
1564
1da177e4
LT
1565 /*
1566 * Obtain the first socket and check if the underlying protocol
1567 * supports the socketpair call.
1568 */
1569
1570 err = sock_create(family, type, protocol, &sock1);
016a266b 1571 if (unlikely(err < 0))
1da177e4
LT
1572 goto out;
1573
1574 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1575 if (unlikely(err < 0)) {
1576 sock_release(sock1);
1577 goto out;
bf3c23d1 1578 }
d73aa286 1579
d47cd945
DH
1580 err = security_socket_socketpair(sock1, sock2);
1581 if (unlikely(err)) {
1582 sock_release(sock2);
1583 sock_release(sock1);
1584 goto out;
1585 }
1586
016a266b
AV
1587 err = sock1->ops->socketpair(sock1, sock2);
1588 if (unlikely(err < 0)) {
1589 sock_release(sock2);
1590 sock_release(sock1);
1591 goto out;
28407630
AV
1592 }
1593
aab174f0 1594 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1595 if (IS_ERR(newfile1)) {
28407630 1596 err = PTR_ERR(newfile1);
016a266b
AV
1597 sock_release(sock2);
1598 goto out;
28407630
AV
1599 }
1600
aab174f0 1601 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1602 if (IS_ERR(newfile2)) {
1603 err = PTR_ERR(newfile2);
016a266b
AV
1604 fput(newfile1);
1605 goto out;
db349509
AV
1606 }
1607
157cf649 1608 audit_fd_pair(fd1, fd2);
d73aa286 1609
db349509
AV
1610 fd_install(fd1, newfile1);
1611 fd_install(fd2, newfile2);
d73aa286 1612 return 0;
1da177e4 1613
016a266b 1614out:
d73aa286 1615 put_unused_fd(fd2);
d73aa286 1616 put_unused_fd(fd1);
1da177e4
LT
1617 return err;
1618}
1619
6debc8d8
DB
1620SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1621 int __user *, usockvec)
1622{
1623 return __sys_socketpair(family, type, protocol, usockvec);
1624}
1625
1da177e4
LT
1626/*
1627 * Bind a name to a socket. Nothing much to do here since it's
1628 * the protocol's responsibility to handle the local address.
1629 *
1630 * We move the socket address to kernel space before we call
1631 * the protocol layer (having also checked the address is ok).
1632 */
1633
a87d35d8 1634int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1635{
1636 struct socket *sock;
230b1839 1637 struct sockaddr_storage address;
6cb153ca 1638 int err, fput_needed;
1da177e4 1639
89bddce5 1640 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1641 if (sock) {
43db362d 1642 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1643 if (!err) {
89bddce5 1644 err = security_socket_bind(sock,
230b1839 1645 (struct sockaddr *)&address,
89bddce5 1646 addrlen);
6cb153ca
BL
1647 if (!err)
1648 err = sock->ops->bind(sock,
89bddce5 1649 (struct sockaddr *)
230b1839 1650 &address, addrlen);
1da177e4 1651 }
6cb153ca 1652 fput_light(sock->file, fput_needed);
89bddce5 1653 }
1da177e4
LT
1654 return err;
1655}
1656
a87d35d8
DB
1657SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1658{
1659 return __sys_bind(fd, umyaddr, addrlen);
1660}
1661
1da177e4
LT
1662/*
1663 * Perform a listen. Basically, we allow the protocol to do anything
1664 * necessary for a listen, and if that works, we mark the socket as
1665 * ready for listening.
1666 */
1667
25e290ee 1668int __sys_listen(int fd, int backlog)
1da177e4
LT
1669{
1670 struct socket *sock;
6cb153ca 1671 int err, fput_needed;
b8e1f9b5 1672 int somaxconn;
89bddce5
SH
1673
1674 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1675 if (sock) {
8efa6e93 1676 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1677 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1678 backlog = somaxconn;
1da177e4
LT
1679
1680 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1681 if (!err)
1682 err = sock->ops->listen(sock, backlog);
1da177e4 1683
6cb153ca 1684 fput_light(sock->file, fput_needed);
1da177e4
LT
1685 }
1686 return err;
1687}
1688
25e290ee
DB
1689SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1690{
1691 return __sys_listen(fd, backlog);
1692}
1693
de2ea4b6
JA
1694int __sys_accept4_file(struct file *file, unsigned file_flags,
1695 struct sockaddr __user *upeer_sockaddr,
1696 int __user *upeer_addrlen, int flags)
1da177e4
LT
1697{
1698 struct socket *sock, *newsock;
39d8c1b6 1699 struct file *newfile;
de2ea4b6 1700 int err, len, newfd;
230b1839 1701 struct sockaddr_storage address;
1da177e4 1702
77d27200 1703 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1704 return -EINVAL;
1705
1706 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1707 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1708
de2ea4b6 1709 sock = sock_from_file(file, &err);
1da177e4
LT
1710 if (!sock)
1711 goto out;
1712
1713 err = -ENFILE;
c6d409cf
ED
1714 newsock = sock_alloc();
1715 if (!newsock)
de2ea4b6 1716 goto out;
1da177e4
LT
1717
1718 newsock->type = sock->type;
1719 newsock->ops = sock->ops;
1720
1da177e4
LT
1721 /*
1722 * We don't need try_module_get here, as the listening socket (sock)
1723 * has the protocol module (sock->ops->owner) held.
1724 */
1725 __module_get(newsock->ops->owner);
1726
28407630 1727 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1728 if (unlikely(newfd < 0)) {
1729 err = newfd;
9a1875e6 1730 sock_release(newsock);
de2ea4b6 1731 goto out;
39d8c1b6 1732 }
aab174f0 1733 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1734 if (IS_ERR(newfile)) {
28407630
AV
1735 err = PTR_ERR(newfile);
1736 put_unused_fd(newfd);
de2ea4b6 1737 goto out;
28407630 1738 }
39d8c1b6 1739
a79af59e
FF
1740 err = security_socket_accept(sock, newsock);
1741 if (err)
39d8c1b6 1742 goto out_fd;
a79af59e 1743
de2ea4b6
JA
1744 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1745 false);
1da177e4 1746 if (err < 0)
39d8c1b6 1747 goto out_fd;
1da177e4
LT
1748
1749 if (upeer_sockaddr) {
9b2c45d4
DV
1750 len = newsock->ops->getname(newsock,
1751 (struct sockaddr *)&address, 2);
1752 if (len < 0) {
1da177e4 1753 err = -ECONNABORTED;
39d8c1b6 1754 goto out_fd;
1da177e4 1755 }
43db362d 1756 err = move_addr_to_user(&address,
230b1839 1757 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1758 if (err < 0)
39d8c1b6 1759 goto out_fd;
1da177e4
LT
1760 }
1761
1762 /* File flags are not inherited via accept() unlike another OSes. */
1763
39d8c1b6
DM
1764 fd_install(newfd, newfile);
1765 err = newfd;
1da177e4
LT
1766out:
1767 return err;
39d8c1b6 1768out_fd:
9606a216 1769 fput(newfile);
39d8c1b6 1770 put_unused_fd(newfd);
de2ea4b6
JA
1771 goto out;
1772
1773}
1774
1775/*
1776 * For accept, we attempt to create a new socket, set up the link
1777 * with the client, wake up the client, then return the new
1778 * connected fd. We collect the address of the connector in kernel
1779 * space and move it to user at the very end. This is unclean because
1780 * we open the socket then return an error.
1781 *
1782 * 1003.1g adds the ability to recvmsg() to query connection pending
1783 * status to recvmsg. We need to add that support in a way thats
1784 * clean when we restructure accept also.
1785 */
1786
1787int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1788 int __user *upeer_addrlen, int flags)
1789{
1790 int ret = -EBADF;
1791 struct fd f;
1792
1793 f = fdget(fd);
1794 if (f.file) {
1795 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
1796 upeer_addrlen, flags);
1797 if (f.flags)
1798 fput(f.file);
1799 }
1800
1801 return ret;
1da177e4
LT
1802}
1803
4541e805
DB
1804SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1805 int __user *, upeer_addrlen, int, flags)
1806{
1807 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1808}
1809
20f37034
HC
1810SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1811 int __user *, upeer_addrlen)
aaca0bdc 1812{
4541e805 1813 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1814}
1815
1da177e4
LT
1816/*
1817 * Attempt to connect to a socket with the server address. The address
1818 * is in user space so we verify it is OK and move it to kernel space.
1819 *
1820 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1821 * break bindings
1822 *
1823 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1824 * other SEQPACKET protocols that take time to connect() as it doesn't
1825 * include the -EINPROGRESS status for such sockets.
1826 */
1827
1387c2c2 1828int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1829{
1830 struct socket *sock;
230b1839 1831 struct sockaddr_storage address;
6cb153ca 1832 int err, fput_needed;
1da177e4 1833
6cb153ca 1834 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1835 if (!sock)
1836 goto out;
43db362d 1837 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1838 if (err < 0)
1839 goto out_put;
1840
89bddce5 1841 err =
230b1839 1842 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1843 if (err)
1844 goto out_put;
1845
230b1839 1846 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1847 sock->file->f_flags);
1848out_put:
6cb153ca 1849 fput_light(sock->file, fput_needed);
1da177e4
LT
1850out:
1851 return err;
1852}
1853
1387c2c2
DB
1854SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1855 int, addrlen)
1856{
1857 return __sys_connect(fd, uservaddr, addrlen);
1858}
1859
1da177e4
LT
1860/*
1861 * Get the local address ('name') of a socket object. Move the obtained
1862 * name to user space.
1863 */
1864
8882a107
DB
1865int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1866 int __user *usockaddr_len)
1da177e4
LT
1867{
1868 struct socket *sock;
230b1839 1869 struct sockaddr_storage address;
9b2c45d4 1870 int err, fput_needed;
89bddce5 1871
6cb153ca 1872 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1873 if (!sock)
1874 goto out;
1875
1876 err = security_socket_getsockname(sock);
1877 if (err)
1878 goto out_put;
1879
9b2c45d4
DV
1880 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1881 if (err < 0)
1da177e4 1882 goto out_put;
9b2c45d4
DV
1883 /* "err" is actually length in this case */
1884 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1885
1886out_put:
6cb153ca 1887 fput_light(sock->file, fput_needed);
1da177e4
LT
1888out:
1889 return err;
1890}
1891
8882a107
DB
1892SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1893 int __user *, usockaddr_len)
1894{
1895 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1896}
1897
1da177e4
LT
1898/*
1899 * Get the remote address ('name') of a socket object. Move the obtained
1900 * name to user space.
1901 */
1902
b21c8f83
DB
1903int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1904 int __user *usockaddr_len)
1da177e4
LT
1905{
1906 struct socket *sock;
230b1839 1907 struct sockaddr_storage address;
9b2c45d4 1908 int err, fput_needed;
1da177e4 1909
89bddce5
SH
1910 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1911 if (sock != NULL) {
1da177e4
LT
1912 err = security_socket_getpeername(sock);
1913 if (err) {
6cb153ca 1914 fput_light(sock->file, fput_needed);
1da177e4
LT
1915 return err;
1916 }
1917
9b2c45d4
DV
1918 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1919 if (err >= 0)
1920 /* "err" is actually length in this case */
1921 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1922 usockaddr_len);
6cb153ca 1923 fput_light(sock->file, fput_needed);
1da177e4
LT
1924 }
1925 return err;
1926}
1927
b21c8f83
DB
1928SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1929 int __user *, usockaddr_len)
1930{
1931 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1932}
1933
1da177e4
LT
1934/*
1935 * Send a datagram to a given address. We move the address into kernel
1936 * space and check the user space data area is readable before invoking
1937 * the protocol.
1938 */
211b634b
DB
1939int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1940 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1941{
1942 struct socket *sock;
230b1839 1943 struct sockaddr_storage address;
1da177e4
LT
1944 int err;
1945 struct msghdr msg;
1946 struct iovec iov;
6cb153ca 1947 int fput_needed;
6cb153ca 1948
602bd0e9
AV
1949 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1950 if (unlikely(err))
1951 return err;
de0fa95c
PE
1952 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1953 if (!sock)
4387ff75 1954 goto out;
6cb153ca 1955
89bddce5 1956 msg.msg_name = NULL;
89bddce5
SH
1957 msg.msg_control = NULL;
1958 msg.msg_controllen = 0;
1959 msg.msg_namelen = 0;
6cb153ca 1960 if (addr) {
43db362d 1961 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1962 if (err < 0)
1963 goto out_put;
230b1839 1964 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1965 msg.msg_namelen = addr_len;
1da177e4
LT
1966 }
1967 if (sock->file->f_flags & O_NONBLOCK)
1968 flags |= MSG_DONTWAIT;
1969 msg.msg_flags = flags;
d8725c86 1970 err = sock_sendmsg(sock, &msg);
1da177e4 1971
89bddce5 1972out_put:
de0fa95c 1973 fput_light(sock->file, fput_needed);
4387ff75 1974out:
1da177e4
LT
1975 return err;
1976}
1977
211b634b
DB
1978SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1979 unsigned int, flags, struct sockaddr __user *, addr,
1980 int, addr_len)
1981{
1982 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1983}
1984
1da177e4 1985/*
89bddce5 1986 * Send a datagram down a socket.
1da177e4
LT
1987 */
1988
3e0fa65f 1989SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1990 unsigned int, flags)
1da177e4 1991{
211b634b 1992 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1993}
1994
1995/*
89bddce5 1996 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1997 * sender. We verify the buffers are writable and if needed move the
1998 * sender address from kernel to user space.
1999 */
7a09e1eb
DB
2000int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2001 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2002{
2003 struct socket *sock;
2004 struct iovec iov;
2005 struct msghdr msg;
230b1839 2006 struct sockaddr_storage address;
89bddce5 2007 int err, err2;
6cb153ca
BL
2008 int fput_needed;
2009
602bd0e9
AV
2010 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2011 if (unlikely(err))
2012 return err;
de0fa95c 2013 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2014 if (!sock)
de0fa95c 2015 goto out;
1da177e4 2016
89bddce5
SH
2017 msg.msg_control = NULL;
2018 msg.msg_controllen = 0;
f3d33426
HFS
2019 /* Save some cycles and don't copy the address if not needed */
2020 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2021 /* We assume all kernel code knows the size of sockaddr_storage */
2022 msg.msg_namelen = 0;
130ed5d1 2023 msg.msg_iocb = NULL;
9f138fa6 2024 msg.msg_flags = 0;
1da177e4
LT
2025 if (sock->file->f_flags & O_NONBLOCK)
2026 flags |= MSG_DONTWAIT;
2da62906 2027 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2028
89bddce5 2029 if (err >= 0 && addr != NULL) {
43db362d 2030 err2 = move_addr_to_user(&address,
230b1839 2031 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2032 if (err2 < 0)
2033 err = err2;
1da177e4 2034 }
de0fa95c
PE
2035
2036 fput_light(sock->file, fput_needed);
4387ff75 2037out:
1da177e4
LT
2038 return err;
2039}
2040
7a09e1eb
DB
2041SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2042 unsigned int, flags, struct sockaddr __user *, addr,
2043 int __user *, addr_len)
2044{
2045 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2046}
2047
1da177e4 2048/*
89bddce5 2049 * Receive a datagram from a socket.
1da177e4
LT
2050 */
2051
b7c0ddf5
JG
2052SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2053 unsigned int, flags)
1da177e4 2054{
7a09e1eb 2055 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2056}
2057
2058/*
2059 * Set a socket option. Because we don't know the option lengths we have
2060 * to pass the user mode parameter for the protocols to sort out.
2061 */
2062
cc36dca0
DB
2063static int __sys_setsockopt(int fd, int level, int optname,
2064 char __user *optval, int optlen)
1da177e4 2065{
0d01da6a
SF
2066 mm_segment_t oldfs = get_fs();
2067 char *kernel_optval = NULL;
6cb153ca 2068 int err, fput_needed;
1da177e4
LT
2069 struct socket *sock;
2070
2071 if (optlen < 0)
2072 return -EINVAL;
89bddce5
SH
2073
2074 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2075 if (sock != NULL) {
2076 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2077 if (err)
2078 goto out_put;
1da177e4 2079
0d01da6a
SF
2080 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
2081 &optname, optval, &optlen,
2082 &kernel_optval);
2083
2084 if (err < 0) {
2085 goto out_put;
2086 } else if (err > 0) {
2087 err = 0;
2088 goto out_put;
2089 }
2090
2091 if (kernel_optval) {
2092 set_fs(KERNEL_DS);
2093 optval = (char __user __force *)kernel_optval;
2094 }
2095
1da177e4 2096 if (level == SOL_SOCKET)
89bddce5
SH
2097 err =
2098 sock_setsockopt(sock, level, optname, optval,
2099 optlen);
1da177e4 2100 else
89bddce5
SH
2101 err =
2102 sock->ops->setsockopt(sock, level, optname, optval,
2103 optlen);
0d01da6a
SF
2104
2105 if (kernel_optval) {
2106 set_fs(oldfs);
2107 kfree(kernel_optval);
2108 }
6cb153ca
BL
2109out_put:
2110 fput_light(sock->file, fput_needed);
1da177e4
LT
2111 }
2112 return err;
2113}
2114
cc36dca0
DB
2115SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2116 char __user *, optval, int, optlen)
2117{
2118 return __sys_setsockopt(fd, level, optname, optval, optlen);
2119}
2120
1da177e4
LT
2121/*
2122 * Get a socket option. Because we don't know the option lengths we have
2123 * to pass a user mode parameter for the protocols to sort out.
2124 */
2125
13a2d70e
DB
2126static int __sys_getsockopt(int fd, int level, int optname,
2127 char __user *optval, int __user *optlen)
1da177e4 2128{
6cb153ca 2129 int err, fput_needed;
1da177e4 2130 struct socket *sock;
0d01da6a 2131 int max_optlen;
1da177e4 2132
89bddce5
SH
2133 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2134 if (sock != NULL) {
6cb153ca
BL
2135 err = security_socket_getsockopt(sock, level, optname);
2136 if (err)
2137 goto out_put;
1da177e4 2138
0d01da6a
SF
2139 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2140
1da177e4 2141 if (level == SOL_SOCKET)
89bddce5
SH
2142 err =
2143 sock_getsockopt(sock, level, optname, optval,
2144 optlen);
1da177e4 2145 else
89bddce5
SH
2146 err =
2147 sock->ops->getsockopt(sock, level, optname, optval,
2148 optlen);
0d01da6a
SF
2149
2150 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2151 optval, optlen,
2152 max_optlen, err);
6cb153ca
BL
2153out_put:
2154 fput_light(sock->file, fput_needed);
1da177e4
LT
2155 }
2156 return err;
2157}
2158
13a2d70e
DB
2159SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2160 char __user *, optval, int __user *, optlen)
2161{
2162 return __sys_getsockopt(fd, level, optname, optval, optlen);
2163}
2164
1da177e4
LT
2165/*
2166 * Shutdown a socket.
2167 */
2168
005a1aea 2169int __sys_shutdown(int fd, int how)
1da177e4 2170{
6cb153ca 2171 int err, fput_needed;
1da177e4
LT
2172 struct socket *sock;
2173
89bddce5
SH
2174 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2175 if (sock != NULL) {
1da177e4 2176 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2177 if (!err)
2178 err = sock->ops->shutdown(sock, how);
2179 fput_light(sock->file, fput_needed);
1da177e4
LT
2180 }
2181 return err;
2182}
2183
005a1aea
DB
2184SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2185{
2186 return __sys_shutdown(fd, how);
2187}
2188
89bddce5 2189/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2190 * fields which are the same type (int / unsigned) on our platforms.
2191 */
2192#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2193#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2194#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2195
c71d8ebe
TH
2196struct used_address {
2197 struct sockaddr_storage name;
2198 unsigned int name_len;
2199};
2200
da184284
AV
2201static int copy_msghdr_from_user(struct msghdr *kmsg,
2202 struct user_msghdr __user *umsg,
2203 struct sockaddr __user **save_addr,
2204 struct iovec **iov)
1661bf36 2205{
ffb07550 2206 struct user_msghdr msg;
08adb7da
AV
2207 ssize_t err;
2208
ffb07550 2209 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2210 return -EFAULT;
dbb490b9 2211
864d9664 2212 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2213 kmsg->msg_controllen = msg.msg_controllen;
2214 kmsg->msg_flags = msg.msg_flags;
2215
2216 kmsg->msg_namelen = msg.msg_namelen;
2217 if (!msg.msg_name)
6a2a2b3a
AS
2218 kmsg->msg_namelen = 0;
2219
dbb490b9
ML
2220 if (kmsg->msg_namelen < 0)
2221 return -EINVAL;
2222
1661bf36 2223 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2224 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2225
2226 if (save_addr)
ffb07550 2227 *save_addr = msg.msg_name;
08adb7da 2228
ffb07550 2229 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2230 if (!save_addr) {
864d9664
PA
2231 err = move_addr_to_kernel(msg.msg_name,
2232 kmsg->msg_namelen,
08adb7da
AV
2233 kmsg->msg_name);
2234 if (err < 0)
2235 return err;
2236 }
2237 } else {
2238 kmsg->msg_name = NULL;
2239 kmsg->msg_namelen = 0;
2240 }
2241
ffb07550 2242 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2243 return -EMSGSIZE;
2244
0345f931 2245 kmsg->msg_iocb = NULL;
2246
87e5e6da 2247 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2248 msg.msg_iov, msg.msg_iovlen,
da184284 2249 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2250 return err < 0 ? err : 0;
1661bf36
DC
2251}
2252
666547ff 2253static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2254 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2255 struct used_address *used_address,
2256 unsigned int allowed_msghdr_flags)
1da177e4 2257{
89bddce5
SH
2258 struct compat_msghdr __user *msg_compat =
2259 (struct compat_msghdr __user *)msg;
230b1839 2260 struct sockaddr_storage address;
1da177e4 2261 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2262 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2263 __aligned(sizeof(__kernel_size_t));
89bddce5 2264 /* 20 is size of ipv6_pktinfo */
1da177e4 2265 unsigned char *ctl_buf = ctl;
d8725c86 2266 int ctl_len;
08adb7da 2267 ssize_t err;
89bddce5 2268
08adb7da 2269 msg_sys->msg_name = &address;
1da177e4 2270
08449320 2271 if (MSG_CMSG_COMPAT & flags)
08adb7da 2272 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2273 else
08adb7da 2274 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2275 if (err < 0)
da184284 2276 return err;
1da177e4
LT
2277
2278 err = -ENOBUFS;
2279
228e548e 2280 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2281 goto out_freeiov;
28a94d8f 2282 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2283 ctl_len = msg_sys->msg_controllen;
1da177e4 2284 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2285 err =
228e548e 2286 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2287 sizeof(ctl));
1da177e4
LT
2288 if (err)
2289 goto out_freeiov;
228e548e
AB
2290 ctl_buf = msg_sys->msg_control;
2291 ctl_len = msg_sys->msg_controllen;
1da177e4 2292 } else if (ctl_len) {
ac4340fc
DM
2293 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2294 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2295 if (ctl_len > sizeof(ctl)) {
1da177e4 2296 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2297 if (ctl_buf == NULL)
1da177e4
LT
2298 goto out_freeiov;
2299 }
2300 err = -EFAULT;
2301 /*
228e548e 2302 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2303 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2304 * checking falls down on this.
2305 */
fb8621bb 2306 if (copy_from_user(ctl_buf,
228e548e 2307 (void __user __force *)msg_sys->msg_control,
89bddce5 2308 ctl_len))
1da177e4 2309 goto out_freectl;
228e548e 2310 msg_sys->msg_control = ctl_buf;
1da177e4 2311 }
228e548e 2312 msg_sys->msg_flags = flags;
1da177e4
LT
2313
2314 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2315 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2316 /*
2317 * If this is sendmmsg() and current destination address is same as
2318 * previously succeeded address, omit asking LSM's decision.
2319 * used_address->name_len is initialized to UINT_MAX so that the first
2320 * destination address never matches.
2321 */
bc909d9d
MD
2322 if (used_address && msg_sys->msg_name &&
2323 used_address->name_len == msg_sys->msg_namelen &&
2324 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2325 used_address->name_len)) {
d8725c86 2326 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2327 goto out_freectl;
2328 }
d8725c86 2329 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2330 /*
2331 * If this is sendmmsg() and sending to current destination address was
2332 * successful, remember it.
2333 */
2334 if (used_address && err >= 0) {
2335 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2336 if (msg_sys->msg_name)
2337 memcpy(&used_address->name, msg_sys->msg_name,
2338 used_address->name_len);
c71d8ebe 2339 }
1da177e4
LT
2340
2341out_freectl:
89bddce5 2342 if (ctl_buf != ctl)
1da177e4
LT
2343 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2344out_freeiov:
da184284 2345 kfree(iov);
228e548e
AB
2346 return err;
2347}
2348
2349/*
2350 * BSD sendmsg interface
2351 */
0fa03c62
JA
2352long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
2353 unsigned int flags)
2354{
2355 struct msghdr msg_sys;
2356
2357 return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
2358}
228e548e 2359
e1834a32
DB
2360long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2361 bool forbid_cmsg_compat)
228e548e
AB
2362{
2363 int fput_needed, err;
2364 struct msghdr msg_sys;
1be374a0
AL
2365 struct socket *sock;
2366
e1834a32
DB
2367 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2368 return -EINVAL;
2369
1be374a0 2370 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2371 if (!sock)
2372 goto out;
2373
28a94d8f 2374 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2375
6cb153ca 2376 fput_light(sock->file, fput_needed);
89bddce5 2377out:
1da177e4
LT
2378 return err;
2379}
2380
666547ff 2381SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2382{
e1834a32 2383 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2384}
2385
228e548e
AB
2386/*
2387 * Linux sendmmsg interface
2388 */
2389
2390int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2391 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2392{
2393 int fput_needed, err, datagrams;
2394 struct socket *sock;
2395 struct mmsghdr __user *entry;
2396 struct compat_mmsghdr __user *compat_entry;
2397 struct msghdr msg_sys;
c71d8ebe 2398 struct used_address used_address;
f092276d 2399 unsigned int oflags = flags;
228e548e 2400
e1834a32
DB
2401 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2402 return -EINVAL;
2403
98382f41
AB
2404 if (vlen > UIO_MAXIOV)
2405 vlen = UIO_MAXIOV;
228e548e
AB
2406
2407 datagrams = 0;
2408
2409 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2410 if (!sock)
2411 return err;
2412
c71d8ebe 2413 used_address.name_len = UINT_MAX;
228e548e
AB
2414 entry = mmsg;
2415 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2416 err = 0;
f092276d 2417 flags |= MSG_BATCH;
228e548e
AB
2418
2419 while (datagrams < vlen) {
f092276d
TH
2420 if (datagrams == vlen - 1)
2421 flags = oflags;
2422
228e548e 2423 if (MSG_CMSG_COMPAT & flags) {
666547ff 2424 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2425 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2426 if (err < 0)
2427 break;
2428 err = __put_user(err, &compat_entry->msg_len);
2429 ++compat_entry;
2430 } else {
a7526eb5 2431 err = ___sys_sendmsg(sock,
666547ff 2432 (struct user_msghdr __user *)entry,
28a94d8f 2433 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2434 if (err < 0)
2435 break;
2436 err = put_user(err, &entry->msg_len);
2437 ++entry;
2438 }
2439
2440 if (err)
2441 break;
2442 ++datagrams;
3023898b
SHY
2443 if (msg_data_left(&msg_sys))
2444 break;
a78cb84c 2445 cond_resched();
228e548e
AB
2446 }
2447
228e548e
AB
2448 fput_light(sock->file, fput_needed);
2449
728ffb86
AB
2450 /* We only return an error if no datagrams were able to be sent */
2451 if (datagrams != 0)
228e548e
AB
2452 return datagrams;
2453
228e548e
AB
2454 return err;
2455}
2456
2457SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2458 unsigned int, vlen, unsigned int, flags)
2459{
e1834a32 2460 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2461}
2462
666547ff 2463static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2464 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2465{
89bddce5
SH
2466 struct compat_msghdr __user *msg_compat =
2467 (struct compat_msghdr __user *)msg;
1da177e4 2468 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2469 struct iovec *iov = iovstack;
1da177e4 2470 unsigned long cmsg_ptr;
2da62906 2471 int len;
08adb7da 2472 ssize_t err;
1da177e4
LT
2473
2474 /* kernel mode address */
230b1839 2475 struct sockaddr_storage addr;
1da177e4
LT
2476
2477 /* user mode address pointers */
2478 struct sockaddr __user *uaddr;
08adb7da 2479 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2480
08adb7da 2481 msg_sys->msg_name = &addr;
1da177e4 2482
f3d33426 2483 if (MSG_CMSG_COMPAT & flags)
08adb7da 2484 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2485 else
08adb7da 2486 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2487 if (err < 0)
da184284 2488 return err;
1da177e4 2489
a2e27255
ACM
2490 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2491 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2492
f3d33426
HFS
2493 /* We assume all kernel code knows the size of sockaddr_storage */
2494 msg_sys->msg_namelen = 0;
2495
1da177e4
LT
2496 if (sock->file->f_flags & O_NONBLOCK)
2497 flags |= MSG_DONTWAIT;
2da62906 2498 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2499 if (err < 0)
2500 goto out_freeiov;
2501 len = err;
2502
2503 if (uaddr != NULL) {
43db362d 2504 err = move_addr_to_user(&addr,
a2e27255 2505 msg_sys->msg_namelen, uaddr,
89bddce5 2506 uaddr_len);
1da177e4
LT
2507 if (err < 0)
2508 goto out_freeiov;
2509 }
a2e27255 2510 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2511 COMPAT_FLAGS(msg));
1da177e4
LT
2512 if (err)
2513 goto out_freeiov;
2514 if (MSG_CMSG_COMPAT & flags)
a2e27255 2515 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2516 &msg_compat->msg_controllen);
2517 else
a2e27255 2518 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2519 &msg->msg_controllen);
2520 if (err)
2521 goto out_freeiov;
2522 err = len;
2523
2524out_freeiov:
da184284 2525 kfree(iov);
a2e27255
ACM
2526 return err;
2527}
2528
2529/*
2530 * BSD recvmsg interface
2531 */
2532
aa1fa28f
JA
2533long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
2534 unsigned int flags)
2535{
2536 struct msghdr msg_sys;
2537
2538 return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2539}
2540
e1834a32
DB
2541long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2542 bool forbid_cmsg_compat)
a2e27255
ACM
2543{
2544 int fput_needed, err;
2545 struct msghdr msg_sys;
1be374a0
AL
2546 struct socket *sock;
2547
e1834a32
DB
2548 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2549 return -EINVAL;
2550
1be374a0 2551 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2552 if (!sock)
2553 goto out;
2554
a7526eb5 2555 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2556
6cb153ca 2557 fput_light(sock->file, fput_needed);
1da177e4
LT
2558out:
2559 return err;
2560}
2561
666547ff 2562SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2563 unsigned int, flags)
2564{
e1834a32 2565 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2566}
2567
a2e27255
ACM
2568/*
2569 * Linux recvmmsg interface
2570 */
2571
e11d4284
AB
2572static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2573 unsigned int vlen, unsigned int flags,
2574 struct timespec64 *timeout)
a2e27255
ACM
2575{
2576 int fput_needed, err, datagrams;
2577 struct socket *sock;
2578 struct mmsghdr __user *entry;
d7256d0e 2579 struct compat_mmsghdr __user *compat_entry;
a2e27255 2580 struct msghdr msg_sys;
766b9f92
DD
2581 struct timespec64 end_time;
2582 struct timespec64 timeout64;
a2e27255
ACM
2583
2584 if (timeout &&
2585 poll_select_set_timeout(&end_time, timeout->tv_sec,
2586 timeout->tv_nsec))
2587 return -EINVAL;
2588
2589 datagrams = 0;
2590
2591 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2592 if (!sock)
2593 return err;
2594
7797dc41
SHY
2595 if (likely(!(flags & MSG_ERRQUEUE))) {
2596 err = sock_error(sock->sk);
2597 if (err) {
2598 datagrams = err;
2599 goto out_put;
2600 }
e623a9e9 2601 }
a2e27255
ACM
2602
2603 entry = mmsg;
d7256d0e 2604 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2605
2606 while (datagrams < vlen) {
2607 /*
2608 * No need to ask LSM for more than the first datagram.
2609 */
d7256d0e 2610 if (MSG_CMSG_COMPAT & flags) {
666547ff 2611 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2612 &msg_sys, flags & ~MSG_WAITFORONE,
2613 datagrams);
d7256d0e
JMG
2614 if (err < 0)
2615 break;
2616 err = __put_user(err, &compat_entry->msg_len);
2617 ++compat_entry;
2618 } else {
a7526eb5 2619 err = ___sys_recvmsg(sock,
666547ff 2620 (struct user_msghdr __user *)entry,
a7526eb5
AL
2621 &msg_sys, flags & ~MSG_WAITFORONE,
2622 datagrams);
d7256d0e
JMG
2623 if (err < 0)
2624 break;
2625 err = put_user(err, &entry->msg_len);
2626 ++entry;
2627 }
2628
a2e27255
ACM
2629 if (err)
2630 break;
a2e27255
ACM
2631 ++datagrams;
2632
71c5c159
BB
2633 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2634 if (flags & MSG_WAITFORONE)
2635 flags |= MSG_DONTWAIT;
2636
a2e27255 2637 if (timeout) {
766b9f92 2638 ktime_get_ts64(&timeout64);
c2e6c856 2639 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2640 if (timeout->tv_sec < 0) {
2641 timeout->tv_sec = timeout->tv_nsec = 0;
2642 break;
2643 }
2644
2645 /* Timeout, return less than vlen datagrams */
2646 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2647 break;
2648 }
2649
2650 /* Out of band data, return right away */
2651 if (msg_sys.msg_flags & MSG_OOB)
2652 break;
a78cb84c 2653 cond_resched();
a2e27255
ACM
2654 }
2655
a2e27255 2656 if (err == 0)
34b88a68
ACM
2657 goto out_put;
2658
2659 if (datagrams == 0) {
2660 datagrams = err;
2661 goto out_put;
2662 }
a2e27255 2663
34b88a68
ACM
2664 /*
2665 * We may return less entries than requested (vlen) if the
2666 * sock is non block and there aren't enough datagrams...
2667 */
2668 if (err != -EAGAIN) {
a2e27255 2669 /*
34b88a68
ACM
2670 * ... or if recvmsg returns an error after we
2671 * received some datagrams, where we record the
2672 * error to return on the next call or if the
2673 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2674 */
34b88a68 2675 sock->sk->sk_err = -err;
a2e27255 2676 }
34b88a68
ACM
2677out_put:
2678 fput_light(sock->file, fput_needed);
a2e27255 2679
34b88a68 2680 return datagrams;
a2e27255
ACM
2681}
2682
e11d4284
AB
2683int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2684 unsigned int vlen, unsigned int flags,
2685 struct __kernel_timespec __user *timeout,
2686 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2687{
2688 int datagrams;
c2e6c856 2689 struct timespec64 timeout_sys;
a2e27255 2690
e11d4284
AB
2691 if (timeout && get_timespec64(&timeout_sys, timeout))
2692 return -EFAULT;
a2e27255 2693
e11d4284 2694 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2695 return -EFAULT;
2696
e11d4284
AB
2697 if (!timeout && !timeout32)
2698 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2699
2700 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2701
e11d4284
AB
2702 if (datagrams <= 0)
2703 return datagrams;
2704
2705 if (timeout && put_timespec64(&timeout_sys, timeout))
2706 datagrams = -EFAULT;
2707
2708 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2709 datagrams = -EFAULT;
2710
2711 return datagrams;
2712}
2713
1255e269
DB
2714SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2715 unsigned int, vlen, unsigned int, flags,
c2e6c856 2716 struct __kernel_timespec __user *, timeout)
1255e269 2717{
e11d4284
AB
2718 if (flags & MSG_CMSG_COMPAT)
2719 return -EINVAL;
2720
2721 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2722}
2723
2724#ifdef CONFIG_COMPAT_32BIT_TIME
2725SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2726 unsigned int, vlen, unsigned int, flags,
2727 struct old_timespec32 __user *, timeout)
2728{
2729 if (flags & MSG_CMSG_COMPAT)
2730 return -EINVAL;
2731
2732 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2733}
e11d4284 2734#endif
1255e269 2735
a2e27255 2736#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2737/* Argument list sizes for sys_socketcall */
2738#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2739static const unsigned char nargs[21] = {
c6d409cf
ED
2740 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2741 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2742 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2743 AL(4), AL(5), AL(4)
89bddce5
SH
2744};
2745
1da177e4
LT
2746#undef AL
2747
2748/*
89bddce5 2749 * System call vectors.
1da177e4
LT
2750 *
2751 * Argument checking cleaned up. Saved 20% in size.
2752 * This function doesn't need to set the kernel lock because
89bddce5 2753 * it is set by the callees.
1da177e4
LT
2754 */
2755
3e0fa65f 2756SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2757{
2950fa9d 2758 unsigned long a[AUDITSC_ARGS];
89bddce5 2759 unsigned long a0, a1;
1da177e4 2760 int err;
47379052 2761 unsigned int len;
1da177e4 2762
228e548e 2763 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2764 return -EINVAL;
c8e8cd57 2765 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2766
47379052
AV
2767 len = nargs[call];
2768 if (len > sizeof(a))
2769 return -EINVAL;
2770
1da177e4 2771 /* copy_from_user should be SMP safe. */
47379052 2772 if (copy_from_user(a, args, len))
1da177e4 2773 return -EFAULT;
3ec3b2fb 2774
2950fa9d
CG
2775 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2776 if (err)
2777 return err;
3ec3b2fb 2778
89bddce5
SH
2779 a0 = a[0];
2780 a1 = a[1];
2781
2782 switch (call) {
2783 case SYS_SOCKET:
9d6a15c3 2784 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2785 break;
2786 case SYS_BIND:
a87d35d8 2787 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2788 break;
2789 case SYS_CONNECT:
1387c2c2 2790 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2791 break;
2792 case SYS_LISTEN:
25e290ee 2793 err = __sys_listen(a0, a1);
89bddce5
SH
2794 break;
2795 case SYS_ACCEPT:
4541e805
DB
2796 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2797 (int __user *)a[2], 0);
89bddce5
SH
2798 break;
2799 case SYS_GETSOCKNAME:
2800 err =
8882a107
DB
2801 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2802 (int __user *)a[2]);
89bddce5
SH
2803 break;
2804 case SYS_GETPEERNAME:
2805 err =
b21c8f83
DB
2806 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2807 (int __user *)a[2]);
89bddce5
SH
2808 break;
2809 case SYS_SOCKETPAIR:
6debc8d8 2810 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2811 break;
2812 case SYS_SEND:
f3bf896b
DB
2813 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2814 NULL, 0);
89bddce5
SH
2815 break;
2816 case SYS_SENDTO:
211b634b
DB
2817 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2818 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2819 break;
2820 case SYS_RECV:
d27e9afc
DB
2821 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2822 NULL, NULL);
89bddce5
SH
2823 break;
2824 case SYS_RECVFROM:
7a09e1eb
DB
2825 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2826 (struct sockaddr __user *)a[4],
2827 (int __user *)a[5]);
89bddce5
SH
2828 break;
2829 case SYS_SHUTDOWN:
005a1aea 2830 err = __sys_shutdown(a0, a1);
89bddce5
SH
2831 break;
2832 case SYS_SETSOCKOPT:
cc36dca0
DB
2833 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2834 a[4]);
89bddce5
SH
2835 break;
2836 case SYS_GETSOCKOPT:
2837 err =
13a2d70e
DB
2838 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2839 (int __user *)a[4]);
89bddce5
SH
2840 break;
2841 case SYS_SENDMSG:
e1834a32
DB
2842 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2843 a[2], true);
89bddce5 2844 break;
228e548e 2845 case SYS_SENDMMSG:
e1834a32
DB
2846 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2847 a[3], true);
228e548e 2848 break;
89bddce5 2849 case SYS_RECVMSG:
e1834a32
DB
2850 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2851 a[2], true);
89bddce5 2852 break;
a2e27255 2853 case SYS_RECVMMSG:
e11d4284
AB
2854 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2855 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2856 a[2], a[3],
2857 (struct __kernel_timespec __user *)a[4],
2858 NULL);
2859 else
2860 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2861 a[2], a[3], NULL,
2862 (struct old_timespec32 __user *)a[4]);
a2e27255 2863 break;
de11defe 2864 case SYS_ACCEPT4:
4541e805
DB
2865 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2866 (int __user *)a[2], a[3]);
aaca0bdc 2867 break;
89bddce5
SH
2868 default:
2869 err = -EINVAL;
2870 break;
1da177e4
LT
2871 }
2872 return err;
2873}
2874
89bddce5 2875#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2876
55737fda
SH
2877/**
2878 * sock_register - add a socket protocol handler
2879 * @ops: description of protocol
2880 *
1da177e4
LT
2881 * This function is called by a protocol handler that wants to
2882 * advertise its address family, and have it linked into the
e793c0f7 2883 * socket interface. The value ops->family corresponds to the
55737fda 2884 * socket system call protocol family.
1da177e4 2885 */
f0fd27d4 2886int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2887{
2888 int err;
2889
2890 if (ops->family >= NPROTO) {
3410f22e 2891 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2892 return -ENOBUFS;
2893 }
55737fda
SH
2894
2895 spin_lock(&net_family_lock);
190683a9
ED
2896 if (rcu_dereference_protected(net_families[ops->family],
2897 lockdep_is_held(&net_family_lock)))
55737fda
SH
2898 err = -EEXIST;
2899 else {
cf778b00 2900 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2901 err = 0;
2902 }
55737fda
SH
2903 spin_unlock(&net_family_lock);
2904
3410f22e 2905 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2906 return err;
2907}
c6d409cf 2908EXPORT_SYMBOL(sock_register);
1da177e4 2909
55737fda
SH
2910/**
2911 * sock_unregister - remove a protocol handler
2912 * @family: protocol family to remove
2913 *
1da177e4
LT
2914 * This function is called by a protocol handler that wants to
2915 * remove its address family, and have it unlinked from the
55737fda
SH
2916 * new socket creation.
2917 *
2918 * If protocol handler is a module, then it can use module reference
2919 * counts to protect against new references. If protocol handler is not
2920 * a module then it needs to provide its own protection in
2921 * the ops->create routine.
1da177e4 2922 */
f0fd27d4 2923void sock_unregister(int family)
1da177e4 2924{
f0fd27d4 2925 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2926
55737fda 2927 spin_lock(&net_family_lock);
a9b3cd7f 2928 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2929 spin_unlock(&net_family_lock);
2930
2931 synchronize_rcu();
2932
3410f22e 2933 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2934}
c6d409cf 2935EXPORT_SYMBOL(sock_unregister);
1da177e4 2936
bf2ae2e4
XL
2937bool sock_is_registered(int family)
2938{
66b51b0a 2939 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2940}
2941
77d76ea3 2942static int __init sock_init(void)
1da177e4 2943{
b3e19d92 2944 int err;
2ca794e5
EB
2945 /*
2946 * Initialize the network sysctl infrastructure.
2947 */
2948 err = net_sysctl_init();
2949 if (err)
2950 goto out;
b3e19d92 2951
1da177e4 2952 /*
89bddce5 2953 * Initialize skbuff SLAB cache
1da177e4
LT
2954 */
2955 skb_init();
1da177e4
LT
2956
2957 /*
89bddce5 2958 * Initialize the protocols module.
1da177e4
LT
2959 */
2960
2961 init_inodecache();
b3e19d92
NP
2962
2963 err = register_filesystem(&sock_fs_type);
2964 if (err)
2965 goto out_fs;
1da177e4 2966 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2967 if (IS_ERR(sock_mnt)) {
2968 err = PTR_ERR(sock_mnt);
2969 goto out_mount;
2970 }
77d76ea3
AK
2971
2972 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2973 */
2974
2975#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2976 err = netfilter_init();
2977 if (err)
2978 goto out;
1da177e4 2979#endif
cbeb321a 2980
408eccce 2981 ptp_classifier_init();
c1f19b51 2982
b3e19d92
NP
2983out:
2984 return err;
2985
2986out_mount:
2987 unregister_filesystem(&sock_fs_type);
2988out_fs:
2989 goto out;
1da177e4
LT
2990}
2991
77d76ea3
AK
2992core_initcall(sock_init); /* early initcall */
2993
1da177e4
LT
2994#ifdef CONFIG_PROC_FS
2995void socket_seq_show(struct seq_file *seq)
2996{
648845ab
TZ
2997 seq_printf(seq, "sockets: used %d\n",
2998 sock_inuse_get(seq->private));
1da177e4 2999}
89bddce5 3000#endif /* CONFIG_PROC_FS */
1da177e4 3001
89bbfc95 3002#ifdef CONFIG_COMPAT
36fd633e 3003static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3004{
6b96018b 3005 struct compat_ifconf ifc32;
7a229387 3006 struct ifconf ifc;
7a229387
AB
3007 int err;
3008
6b96018b 3009 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3010 return -EFAULT;
3011
36fd633e
AV
3012 ifc.ifc_len = ifc32.ifc_len;
3013 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3014
36fd633e
AV
3015 rtnl_lock();
3016 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3017 rtnl_unlock();
7a229387
AB
3018 if (err)
3019 return err;
3020
36fd633e 3021 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3022 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3023 return -EFAULT;
3024
3025 return 0;
3026}
3027
6b96018b 3028static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3029{
3a7da39d
BH
3030 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3031 bool convert_in = false, convert_out = false;
44c02a2c
AV
3032 size_t buf_size = 0;
3033 struct ethtool_rxnfc __user *rxnfc = NULL;
3034 struct ifreq ifr;
3a7da39d
BH
3035 u32 rule_cnt = 0, actual_rule_cnt;
3036 u32 ethcmd;
7a229387 3037 u32 data;
3a7da39d 3038 int ret;
7a229387 3039
3a7da39d
BH
3040 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3041 return -EFAULT;
7a229387 3042
3a7da39d
BH
3043 compat_rxnfc = compat_ptr(data);
3044
3045 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3046 return -EFAULT;
3047
3a7da39d
BH
3048 /* Most ethtool structures are defined without padding.
3049 * Unfortunately struct ethtool_rxnfc is an exception.
3050 */
3051 switch (ethcmd) {
3052 default:
3053 break;
3054 case ETHTOOL_GRXCLSRLALL:
3055 /* Buffer size is variable */
3056 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3057 return -EFAULT;
3058 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3059 return -ENOMEM;
3060 buf_size += rule_cnt * sizeof(u32);
3061 /* fall through */
3062 case ETHTOOL_GRXRINGS:
3063 case ETHTOOL_GRXCLSRLCNT:
3064 case ETHTOOL_GRXCLSRULE:
55664f32 3065 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3066 convert_out = true;
3067 /* fall through */
3068 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3069 buf_size += sizeof(struct ethtool_rxnfc);
3070 convert_in = true;
44c02a2c 3071 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3072 break;
3073 }
3074
44c02a2c 3075 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3076 return -EFAULT;
3077
44c02a2c 3078 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3079
3a7da39d 3080 if (convert_in) {
127fe533 3081 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3082 * fs.ring_cookie and at the end of fs, but nowhere else.
3083 */
127fe533
AD
3084 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3085 sizeof(compat_rxnfc->fs.m_ext) !=
3086 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3087 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3088 BUILD_BUG_ON(
3089 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3090 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3091 offsetof(struct ethtool_rxnfc, fs.location) -
3092 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3093
3094 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3095 (void __user *)(&rxnfc->fs.m_ext + 1) -
3096 (void __user *)rxnfc) ||
3a7da39d
BH
3097 copy_in_user(&rxnfc->fs.ring_cookie,
3098 &compat_rxnfc->fs.ring_cookie,
954b1244 3099 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3100 (void __user *)&rxnfc->fs.ring_cookie))
3101 return -EFAULT;
3102 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3103 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3104 return -EFAULT;
3105 } else if (copy_in_user(&rxnfc->rule_cnt,
3106 &compat_rxnfc->rule_cnt,
3107 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3108 return -EFAULT;
3109 }
3110
44c02a2c 3111 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3112 if (ret)
3113 return ret;
3114
3115 if (convert_out) {
3116 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3117 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3118 (const void __user *)rxnfc) ||
3a7da39d
BH
3119 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3120 &rxnfc->fs.ring_cookie,
954b1244
SH
3121 (const void __user *)(&rxnfc->fs.location + 1) -
3122 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3123 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3124 sizeof(rxnfc->rule_cnt)))
3125 return -EFAULT;
3126
3127 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3128 /* As an optimisation, we only copy the actual
3129 * number of rules that the underlying
3130 * function returned. Since Mallory might
3131 * change the rule count in user memory, we
3132 * check that it is less than the rule count
3133 * originally given (as the user buffer size),
3134 * which has been range-checked.
3135 */
3136 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3137 return -EFAULT;
3138 if (actual_rule_cnt < rule_cnt)
3139 rule_cnt = actual_rule_cnt;
3140 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3141 &rxnfc->rule_locs[0],
3142 rule_cnt * sizeof(u32)))
3143 return -EFAULT;
3144 }
3145 }
3146
3147 return 0;
7a229387
AB
3148}
3149
7a50a240
AB
3150static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3151{
7a50a240 3152 compat_uptr_t uptr32;
44c02a2c
AV
3153 struct ifreq ifr;
3154 void __user *saved;
3155 int err;
7a50a240 3156
44c02a2c 3157 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3158 return -EFAULT;
3159
3160 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3161 return -EFAULT;
3162
44c02a2c
AV
3163 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3164 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3165
44c02a2c
AV
3166 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3167 if (!err) {
3168 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3169 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3170 err = -EFAULT;
ccbd6a5a 3171 }
44c02a2c 3172 return err;
7a229387
AB
3173}
3174
590d4693
BH
3175/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3176static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3177 struct compat_ifreq __user *u_ifreq32)
7a229387 3178{
44c02a2c 3179 struct ifreq ifreq;
7a229387
AB
3180 u32 data32;
3181
44c02a2c 3182 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3183 return -EFAULT;
44c02a2c 3184 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3185 return -EFAULT;
44c02a2c 3186 ifreq.ifr_data = compat_ptr(data32);
7a229387 3187
44c02a2c 3188 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3189}
3190
37ac39bd
JB
3191static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3192 unsigned int cmd,
3193 struct compat_ifreq __user *uifr32)
3194{
3195 struct ifreq __user *uifr;
3196 int err;
3197
3198 /* Handle the fact that while struct ifreq has the same *layout* on
3199 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3200 * which are handled elsewhere, it still has different *size* due to
3201 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3202 * resulting in struct ifreq being 32 and 40 bytes respectively).
3203 * As a result, if the struct happens to be at the end of a page and
3204 * the next page isn't readable/writable, we get a fault. To prevent
3205 * that, copy back and forth to the full size.
3206 */
3207
3208 uifr = compat_alloc_user_space(sizeof(*uifr));
3209 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3210 return -EFAULT;
3211
3212 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3213
3214 if (!err) {
3215 switch (cmd) {
3216 case SIOCGIFFLAGS:
3217 case SIOCGIFMETRIC:
3218 case SIOCGIFMTU:
3219 case SIOCGIFMEM:
3220 case SIOCGIFHWADDR:
3221 case SIOCGIFINDEX:
3222 case SIOCGIFADDR:
3223 case SIOCGIFBRDADDR:
3224 case SIOCGIFDSTADDR:
3225 case SIOCGIFNETMASK:
3226 case SIOCGIFPFLAGS:
3227 case SIOCGIFTXQLEN:
3228 case SIOCGMIIPHY:
3229 case SIOCGMIIREG:
c6c9fee3 3230 case SIOCGIFNAME:
37ac39bd
JB
3231 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3232 err = -EFAULT;
3233 break;
3234 }
3235 }
3236 return err;
3237}
3238
a2116ed2
AB
3239static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3240 struct compat_ifreq __user *uifr32)
3241{
3242 struct ifreq ifr;
3243 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3244 int err;
3245
3246 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3247 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3248 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3249 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3250 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3251 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3252 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3253 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3254 if (err)
3255 return -EFAULT;
3256
44c02a2c 3257 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3258
3259 if (cmd == SIOCGIFMAP && !err) {
3260 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3261 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3262 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3263 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3264 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3265 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3266 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3267 if (err)
3268 err = -EFAULT;
3269 }
3270 return err;
3271}
3272
7a229387 3273struct rtentry32 {
c6d409cf 3274 u32 rt_pad1;
7a229387
AB
3275 struct sockaddr rt_dst; /* target address */
3276 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3277 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3278 unsigned short rt_flags;
3279 short rt_pad2;
3280 u32 rt_pad3;
3281 unsigned char rt_tos;
3282 unsigned char rt_class;
3283 short rt_pad4;
3284 short rt_metric; /* +1 for binary compatibility! */
7a229387 3285 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3286 u32 rt_mtu; /* per route MTU/Window */
3287 u32 rt_window; /* Window clamping */
7a229387
AB
3288 unsigned short rt_irtt; /* Initial RTT */
3289};
3290
3291struct in6_rtmsg32 {
3292 struct in6_addr rtmsg_dst;
3293 struct in6_addr rtmsg_src;
3294 struct in6_addr rtmsg_gateway;
3295 u32 rtmsg_type;
3296 u16 rtmsg_dst_len;
3297 u16 rtmsg_src_len;
3298 u32 rtmsg_metric;
3299 u32 rtmsg_info;
3300 u32 rtmsg_flags;
3301 s32 rtmsg_ifindex;
3302};
3303
6b96018b
AB
3304static int routing_ioctl(struct net *net, struct socket *sock,
3305 unsigned int cmd, void __user *argp)
7a229387
AB
3306{
3307 int ret;
3308 void *r = NULL;
3309 struct in6_rtmsg r6;
3310 struct rtentry r4;
3311 char devname[16];
3312 u32 rtdev;
3313 mm_segment_t old_fs = get_fs();
3314
6b96018b
AB
3315 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3316 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3317 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3318 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3319 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3320 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3321 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3322 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3323 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3324 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3325 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3326
3327 r = (void *) &r6;
3328 } else { /* ipv4 */
6b96018b 3329 struct rtentry32 __user *ur4 = argp;
c6d409cf 3330 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3331 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3332 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3333 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3334 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3335 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3336 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3337 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3338 if (rtdev) {
c6d409cf 3339 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3340 r4.rt_dev = (char __user __force *)devname;
3341 devname[15] = 0;
7a229387
AB
3342 } else
3343 r4.rt_dev = NULL;
3344
3345 r = (void *) &r4;
3346 }
3347
3348 if (ret) {
3349 ret = -EFAULT;
3350 goto out;
3351 }
3352
c6d409cf 3353 set_fs(KERNEL_DS);
63ff03ab 3354 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3355 set_fs(old_fs);
7a229387
AB
3356
3357out:
7a229387
AB
3358 return ret;
3359}
3360
3361/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3362 * for some operations; this forces use of the newer bridge-utils that
25985edc 3363 * use compatible ioctls
7a229387 3364 */
6b96018b 3365static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3366{
6b96018b 3367 compat_ulong_t tmp;
7a229387 3368
6b96018b 3369 if (get_user(tmp, argp))
7a229387
AB
3370 return -EFAULT;
3371 if (tmp == BRCTL_GET_VERSION)
3372 return BRCTL_VERSION + 1;
3373 return -EINVAL;
3374}
3375
6b96018b
AB
3376static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3377 unsigned int cmd, unsigned long arg)
3378{
3379 void __user *argp = compat_ptr(arg);
3380 struct sock *sk = sock->sk;
3381 struct net *net = sock_net(sk);
7a229387 3382
6b96018b 3383 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3384 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3385
3386 switch (cmd) {
3387 case SIOCSIFBR:
3388 case SIOCGIFBR:
3389 return old_bridge_ioctl(argp);
6b96018b 3390 case SIOCGIFCONF:
36fd633e 3391 return compat_dev_ifconf(net, argp);
6b96018b
AB
3392 case SIOCETHTOOL:
3393 return ethtool_ioctl(net, argp);
7a50a240
AB
3394 case SIOCWANDEV:
3395 return compat_siocwandev(net, argp);
a2116ed2
AB
3396 case SIOCGIFMAP:
3397 case SIOCSIFMAP:
3398 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3399 case SIOCADDRT:
3400 case SIOCDELRT:
3401 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3402 case SIOCGSTAMP_OLD:
3403 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3404 if (!sock->ops->gettstamp)
3405 return -ENOIOCTLCMD;
0768e170 3406 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3407 !COMPAT_USE_64BIT_TIME);
3408
590d4693
BH
3409 case SIOCBONDSLAVEINFOQUERY:
3410 case SIOCBONDINFOQUERY:
a2116ed2 3411 case SIOCSHWTSTAMP:
fd468c74 3412 case SIOCGHWTSTAMP:
590d4693 3413 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3414
3415 case FIOSETOWN:
3416 case SIOCSPGRP:
3417 case FIOGETOWN:
3418 case SIOCGPGRP:
3419 case SIOCBRADDBR:
3420 case SIOCBRDELBR:
3421 case SIOCGIFVLAN:
3422 case SIOCSIFVLAN:
3423 case SIOCADDDLCI:
3424 case SIOCDELDLCI:
c62cce2c 3425 case SIOCGSKNS:
0768e170
AB
3426 case SIOCGSTAMP_NEW:
3427 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3428 return sock_ioctl(file, cmd, arg);
3429
3430 case SIOCGIFFLAGS:
3431 case SIOCSIFFLAGS:
3432 case SIOCGIFMETRIC:
3433 case SIOCSIFMETRIC:
3434 case SIOCGIFMTU:
3435 case SIOCSIFMTU:
3436 case SIOCGIFMEM:
3437 case SIOCSIFMEM:
3438 case SIOCGIFHWADDR:
3439 case SIOCSIFHWADDR:
3440 case SIOCADDMULTI:
3441 case SIOCDELMULTI:
3442 case SIOCGIFINDEX:
6b96018b
AB
3443 case SIOCGIFADDR:
3444 case SIOCSIFADDR:
3445 case SIOCSIFHWBROADCAST:
6b96018b 3446 case SIOCDIFADDR:
6b96018b
AB
3447 case SIOCGIFBRDADDR:
3448 case SIOCSIFBRDADDR:
3449 case SIOCGIFDSTADDR:
3450 case SIOCSIFDSTADDR:
3451 case SIOCGIFNETMASK:
3452 case SIOCSIFNETMASK:
3453 case SIOCSIFPFLAGS:
3454 case SIOCGIFPFLAGS:
3455 case SIOCGIFTXQLEN:
3456 case SIOCSIFTXQLEN:
3457 case SIOCBRADDIF:
3458 case SIOCBRDELIF:
c6c9fee3 3459 case SIOCGIFNAME:
9177efd3
AB
3460 case SIOCSIFNAME:
3461 case SIOCGMIIPHY:
3462 case SIOCGMIIREG:
3463 case SIOCSMIIREG:
f92d4fc9
AV
3464 case SIOCBONDENSLAVE:
3465 case SIOCBONDRELEASE:
3466 case SIOCBONDSETHWADDR:
3467 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3468 return compat_ifreq_ioctl(net, sock, cmd, argp);
3469
6b96018b
AB
3470 case SIOCSARP:
3471 case SIOCGARP:
3472 case SIOCDARP:
6b96018b 3473 case SIOCATMARK:
63ff03ab 3474 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3475 }
3476
6b96018b
AB
3477 return -ENOIOCTLCMD;
3478}
7a229387 3479
95c96174 3480static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3481 unsigned long arg)
89bbfc95
SP
3482{
3483 struct socket *sock = file->private_data;
3484 int ret = -ENOIOCTLCMD;
87de87d5
DM
3485 struct sock *sk;
3486 struct net *net;
3487
3488 sk = sock->sk;
3489 net = sock_net(sk);
89bbfc95
SP
3490
3491 if (sock->ops->compat_ioctl)
3492 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3493
87de87d5
DM
3494 if (ret == -ENOIOCTLCMD &&
3495 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3496 ret = compat_wext_handle_ioctl(net, cmd, arg);
3497
6b96018b
AB
3498 if (ret == -ENOIOCTLCMD)
3499 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3500
89bbfc95
SP
3501 return ret;
3502}
3503#endif
3504
8a3c245c
PT
3505/**
3506 * kernel_bind - bind an address to a socket (kernel space)
3507 * @sock: socket
3508 * @addr: address
3509 * @addrlen: length of address
3510 *
3511 * Returns 0 or an error.
3512 */
3513
ac5a488e
SS
3514int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3515{
3516 return sock->ops->bind(sock, addr, addrlen);
3517}
c6d409cf 3518EXPORT_SYMBOL(kernel_bind);
ac5a488e 3519
8a3c245c
PT
3520/**
3521 * kernel_listen - move socket to listening state (kernel space)
3522 * @sock: socket
3523 * @backlog: pending connections queue size
3524 *
3525 * Returns 0 or an error.
3526 */
3527
ac5a488e
SS
3528int kernel_listen(struct socket *sock, int backlog)
3529{
3530 return sock->ops->listen(sock, backlog);
3531}
c6d409cf 3532EXPORT_SYMBOL(kernel_listen);
ac5a488e 3533
8a3c245c
PT
3534/**
3535 * kernel_accept - accept a connection (kernel space)
3536 * @sock: listening socket
3537 * @newsock: new connected socket
3538 * @flags: flags
3539 *
3540 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3541 * If it fails, @newsock is guaranteed to be %NULL.
3542 * Returns 0 or an error.
3543 */
3544
ac5a488e
SS
3545int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3546{
3547 struct sock *sk = sock->sk;
3548 int err;
3549
3550 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3551 newsock);
3552 if (err < 0)
3553 goto done;
3554
cdfbabfb 3555 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3556 if (err < 0) {
3557 sock_release(*newsock);
fa8705b0 3558 *newsock = NULL;
ac5a488e
SS
3559 goto done;
3560 }
3561
3562 (*newsock)->ops = sock->ops;
1b08534e 3563 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3564
3565done:
3566 return err;
3567}
c6d409cf 3568EXPORT_SYMBOL(kernel_accept);
ac5a488e 3569
8a3c245c
PT
3570/**
3571 * kernel_connect - connect a socket (kernel space)
3572 * @sock: socket
3573 * @addr: address
3574 * @addrlen: address length
3575 * @flags: flags (O_NONBLOCK, ...)
3576 *
3577 * For datagram sockets, @addr is the addres to which datagrams are sent
3578 * by default, and the only address from which datagrams are received.
3579 * For stream sockets, attempts to connect to @addr.
3580 * Returns 0 or an error code.
3581 */
3582
ac5a488e 3583int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3584 int flags)
ac5a488e
SS
3585{
3586 return sock->ops->connect(sock, addr, addrlen, flags);
3587}
c6d409cf 3588EXPORT_SYMBOL(kernel_connect);
ac5a488e 3589
8a3c245c
PT
3590/**
3591 * kernel_getsockname - get the address which the socket is bound (kernel space)
3592 * @sock: socket
3593 * @addr: address holder
3594 *
3595 * Fills the @addr pointer with the address which the socket is bound.
3596 * Returns 0 or an error code.
3597 */
3598
9b2c45d4 3599int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3600{
9b2c45d4 3601 return sock->ops->getname(sock, addr, 0);
ac5a488e 3602}
c6d409cf 3603EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3604
8a3c245c
PT
3605/**
3606 * kernel_peername - get the address which the socket is connected (kernel space)
3607 * @sock: socket
3608 * @addr: address holder
3609 *
3610 * Fills the @addr pointer with the address which the socket is connected.
3611 * Returns 0 or an error code.
3612 */
3613
9b2c45d4 3614int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3615{
9b2c45d4 3616 return sock->ops->getname(sock, addr, 1);
ac5a488e 3617}
c6d409cf 3618EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3619
8a3c245c
PT
3620/**
3621 * kernel_getsockopt - get a socket option (kernel space)
3622 * @sock: socket
3623 * @level: API level (SOL_SOCKET, ...)
3624 * @optname: option tag
3625 * @optval: option value
3626 * @optlen: option length
3627 *
3628 * Assigns the option length to @optlen.
3629 * Returns 0 or an error.
3630 */
3631
ac5a488e
SS
3632int kernel_getsockopt(struct socket *sock, int level, int optname,
3633 char *optval, int *optlen)
3634{
3635 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3636 char __user *uoptval;
3637 int __user *uoptlen;
ac5a488e
SS
3638 int err;
3639
fb8621bb
NK
3640 uoptval = (char __user __force *) optval;
3641 uoptlen = (int __user __force *) optlen;
3642
ac5a488e
SS
3643 set_fs(KERNEL_DS);
3644 if (level == SOL_SOCKET)
fb8621bb 3645 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3646 else
fb8621bb
NK
3647 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3648 uoptlen);
ac5a488e
SS
3649 set_fs(oldfs);
3650 return err;
3651}
c6d409cf 3652EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3653
8a3c245c
PT
3654/**
3655 * kernel_setsockopt - set a socket option (kernel space)
3656 * @sock: socket
3657 * @level: API level (SOL_SOCKET, ...)
3658 * @optname: option tag
3659 * @optval: option value
3660 * @optlen: option length
3661 *
3662 * Returns 0 or an error.
3663 */
3664
ac5a488e 3665int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3666 char *optval, unsigned int optlen)
ac5a488e
SS
3667{
3668 mm_segment_t oldfs = get_fs();
fb8621bb 3669 char __user *uoptval;
ac5a488e
SS
3670 int err;
3671
fb8621bb
NK
3672 uoptval = (char __user __force *) optval;
3673
ac5a488e
SS
3674 set_fs(KERNEL_DS);
3675 if (level == SOL_SOCKET)
fb8621bb 3676 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3677 else
fb8621bb 3678 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3679 optlen);
3680 set_fs(oldfs);
3681 return err;
3682}
c6d409cf 3683EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3684
8a3c245c
PT
3685/**
3686 * kernel_sendpage - send a &page through a socket (kernel space)
3687 * @sock: socket
3688 * @page: page
3689 * @offset: page offset
3690 * @size: total size in bytes
3691 * @flags: flags (MSG_DONTWAIT, ...)
3692 *
3693 * Returns the total amount sent in bytes or an error.
3694 */
3695
ac5a488e
SS
3696int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3697 size_t size, int flags)
3698{
3699 if (sock->ops->sendpage)
3700 return sock->ops->sendpage(sock, page, offset, size, flags);
3701
3702 return sock_no_sendpage(sock, page, offset, size, flags);
3703}
c6d409cf 3704EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3705
8a3c245c
PT
3706/**
3707 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3708 * @sk: sock
3709 * @page: page
3710 * @offset: page offset
3711 * @size: total size in bytes
3712 * @flags: flags (MSG_DONTWAIT, ...)
3713 *
3714 * Returns the total amount sent in bytes or an error.
3715 * Caller must hold @sk.
3716 */
3717
306b13eb
TH
3718int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3719 size_t size, int flags)
3720{
3721 struct socket *sock = sk->sk_socket;
3722
3723 if (sock->ops->sendpage_locked)
3724 return sock->ops->sendpage_locked(sk, page, offset, size,
3725 flags);
3726
3727 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3728}
3729EXPORT_SYMBOL(kernel_sendpage_locked);
3730
8a3c245c
PT
3731/**
3732 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3733 * @sock: socket
3734 * @how: connection part
3735 *
3736 * Returns 0 or an error.
3737 */
3738
91cf45f0
TM
3739int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3740{
3741 return sock->ops->shutdown(sock, how);
3742}
91cf45f0 3743EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3744
8a3c245c
PT
3745/**
3746 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3747 * @sk: socket
3748 *
3749 * This routine returns the IP overhead imposed by a socket i.e.
3750 * the length of the underlying IP header, depending on whether
3751 * this is an IPv4 or IPv6 socket and the length from IP options turned
3752 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3753 */
8a3c245c 3754
113c3075
P
3755u32 kernel_sock_ip_overhead(struct sock *sk)
3756{
3757 struct inet_sock *inet;
3758 struct ip_options_rcu *opt;
3759 u32 overhead = 0;
113c3075
P
3760#if IS_ENABLED(CONFIG_IPV6)
3761 struct ipv6_pinfo *np;
3762 struct ipv6_txoptions *optv6 = NULL;
3763#endif /* IS_ENABLED(CONFIG_IPV6) */
3764
3765 if (!sk)
3766 return overhead;
3767
113c3075
P
3768 switch (sk->sk_family) {
3769 case AF_INET:
3770 inet = inet_sk(sk);
3771 overhead += sizeof(struct iphdr);
3772 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3773 sock_owned_by_user(sk));
113c3075
P
3774 if (opt)
3775 overhead += opt->opt.optlen;
3776 return overhead;
3777#if IS_ENABLED(CONFIG_IPV6)
3778 case AF_INET6:
3779 np = inet6_sk(sk);
3780 overhead += sizeof(struct ipv6hdr);
3781 if (np)
3782 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3783 sock_owned_by_user(sk));
113c3075
P
3784 if (optv6)
3785 overhead += (optv6->opt_flen + optv6->opt_nflen);
3786 return overhead;
3787#endif /* IS_ENABLED(CONFIG_IPV6) */
3788 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3789 return overhead;
3790 }
3791}
3792EXPORT_SYMBOL(kernel_sock_ip_overhead);