]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/socket.c
net: stmmac: Set dma ring length before enabling the DMA
[thirdparty/kernel/stable.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
45c8178c 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b
AB
107#include <linux/sockios.h>
108#include <linux/atalk.h>
076bb0c8 109#include <net/busy_poll.h>
f24b9be5 110#include <linux/errqueue.h>
06021292 111
e0d1095a 112#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
113unsigned int sysctl_net_busy_read __read_mostly;
114unsigned int sysctl_net_busy_poll __read_mostly;
06021292 115#endif
6b96018b 116
8ae5e030
AV
117static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
118static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 119static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
120
121static int sock_close(struct inode *inode, struct file *file);
122static unsigned int sock_poll(struct file *file,
123 struct poll_table_struct *wait);
89bddce5 124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
125#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file,
89bddce5 127 unsigned int cmd, unsigned long arg);
89bbfc95 128#endif
1da177e4 129static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
130static ssize_t sock_sendpage(struct file *file, struct page *page,
131 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 132static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 133 struct pipe_inode_info *pipe, size_t len,
9c55e01c 134 unsigned int flags);
1da177e4 135
1da177e4
LT
136/*
137 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
138 * in the operation structures but are done directly via the socketcall() multiplexor.
139 */
140
da7071d7 141static const struct file_operations socket_file_ops = {
1da177e4
LT
142 .owner = THIS_MODULE,
143 .llseek = no_llseek,
8ae5e030
AV
144 .read_iter = sock_read_iter,
145 .write_iter = sock_write_iter,
1da177e4
LT
146 .poll = sock_poll,
147 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
148#ifdef CONFIG_COMPAT
149 .compat_ioctl = compat_sock_ioctl,
150#endif
1da177e4 151 .mmap = sock_mmap,
1da177e4
LT
152 .release = sock_close,
153 .fasync = sock_fasync,
5274f052
JA
154 .sendpage = sock_sendpage,
155 .splice_write = generic_splice_sendpage,
9c55e01c 156 .splice_read = sock_splice_read,
1da177e4
LT
157};
158
159/*
160 * The protocol list. Each protocol is registered in here.
161 */
162
1da177e4 163static DEFINE_SPINLOCK(net_family_lock);
190683a9 164static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 165
1da177e4
LT
166/*
167 * Statistics counters of the socket lists
168 */
169
c6d409cf 170static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
171
172/*
89bddce5
SH
173 * Support routines.
174 * Move socket addresses back and forth across the kernel/user
175 * divide and look after the messy bits.
1da177e4
LT
176 */
177
1da177e4
LT
178/**
179 * move_addr_to_kernel - copy a socket address into kernel space
180 * @uaddr: Address in user space
181 * @kaddr: Address in kernel space
182 * @ulen: Length in user space
183 *
184 * The address is copied into kernel space. If the provided address is
185 * too long an error code of -EINVAL is returned. If the copy gives
186 * invalid addresses -EFAULT is returned. On a success 0 is returned.
187 */
188
43db362d 189int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 190{
230b1839 191 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 192 return -EINVAL;
89bddce5 193 if (ulen == 0)
1da177e4 194 return 0;
89bddce5 195 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 196 return -EFAULT;
3ec3b2fb 197 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
198}
199
200/**
201 * move_addr_to_user - copy an address to user space
202 * @kaddr: kernel space address
203 * @klen: length of address in kernel
204 * @uaddr: user space address
205 * @ulen: pointer to user length field
206 *
207 * The value pointed to by ulen on entry is the buffer length available.
208 * This is overwritten with the buffer space used. -EINVAL is returned
209 * if an overlong buffer is specified or a negative buffer size. -EFAULT
210 * is returned if either the buffer or the length field are not
211 * accessible.
212 * After copying the data up to the limit the user specifies, the true
213 * length of the data is written over the length limit the user
214 * specified. Zero is returned for a success.
215 */
89bddce5 216
43db362d 217static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 218 void __user *uaddr, int __user *ulen)
1da177e4
LT
219{
220 int err;
221 int len;
222
68c6beb3 223 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
224 err = get_user(len, ulen);
225 if (err)
1da177e4 226 return err;
89bddce5
SH
227 if (len > klen)
228 len = klen;
68c6beb3 229 if (len < 0)
1da177e4 230 return -EINVAL;
89bddce5 231 if (len) {
d6fe3945
SG
232 if (audit_sockaddr(klen, kaddr))
233 return -ENOMEM;
89bddce5 234 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
235 return -EFAULT;
236 }
237 /*
89bddce5
SH
238 * "fromlen shall refer to the value before truncation.."
239 * 1003.1g
1da177e4
LT
240 */
241 return __put_user(klen, ulen);
242}
243
e18b890b 244static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
245
246static struct inode *sock_alloc_inode(struct super_block *sb)
247{
248 struct socket_alloc *ei;
eaefd110 249 struct socket_wq *wq;
89bddce5 250
e94b1766 251 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
252 if (!ei)
253 return NULL;
eaefd110
ED
254 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
255 if (!wq) {
43815482
ED
256 kmem_cache_free(sock_inode_cachep, ei);
257 return NULL;
258 }
eaefd110
ED
259 init_waitqueue_head(&wq->wait);
260 wq->fasync_list = NULL;
574aab1e 261 wq->flags = 0;
eaefd110 262 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 263
1da177e4
LT
264 ei->socket.state = SS_UNCONNECTED;
265 ei->socket.flags = 0;
266 ei->socket.ops = NULL;
267 ei->socket.sk = NULL;
268 ei->socket.file = NULL;
1da177e4
LT
269
270 return &ei->vfs_inode;
271}
272
273static void sock_destroy_inode(struct inode *inode)
274{
43815482 275 struct socket_alloc *ei;
eaefd110 276 struct socket_wq *wq;
43815482
ED
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 279 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 280 kfree_rcu(wq, rcu);
43815482 281 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
282}
283
51cc5068 284static void init_once(void *foo)
1da177e4 285{
89bddce5 286 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 287
a35afb83 288 inode_init_once(&ei->vfs_inode);
1da177e4 289}
89bddce5 290
1e911632 291static void init_inodecache(void)
1da177e4
LT
292{
293 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
294 sizeof(struct socket_alloc),
295 0,
296 (SLAB_HWCACHE_ALIGN |
297 SLAB_RECLAIM_ACCOUNT |
5d097056 298 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 299 init_once);
1e911632 300 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
301}
302
b87221de 303static const struct super_operations sockfs_ops = {
c6d409cf
ED
304 .alloc_inode = sock_alloc_inode,
305 .destroy_inode = sock_destroy_inode,
306 .statfs = simple_statfs,
1da177e4
LT
307};
308
c23fbb6b
ED
309/*
310 * sockfs_dname() is called from d_path().
311 */
312static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
313{
314 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 315 d_inode(dentry)->i_ino);
c23fbb6b
ED
316}
317
3ba13d17 318static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 319 .d_dname = sockfs_dname,
1da177e4
LT
320};
321
bba0bd31
AG
322static int sockfs_xattr_get(const struct xattr_handler *handler,
323 struct dentry *dentry, struct inode *inode,
324 const char *suffix, void *value, size_t size)
325{
326 if (value) {
327 if (dentry->d_name.len + 1 > size)
328 return -ERANGE;
329 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
330 }
331 return dentry->d_name.len + 1;
332}
333
334#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
335#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
336#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
337
338static const struct xattr_handler sockfs_xattr_handler = {
339 .name = XATTR_NAME_SOCKPROTONAME,
340 .get = sockfs_xattr_get,
341};
342
4a590153
AG
343static int sockfs_security_xattr_set(const struct xattr_handler *handler,
344 struct dentry *dentry, struct inode *inode,
345 const char *suffix, const void *value,
346 size_t size, int flags)
347{
348 /* Handled by LSM. */
349 return -EAGAIN;
350}
351
352static const struct xattr_handler sockfs_security_xattr_handler = {
353 .prefix = XATTR_SECURITY_PREFIX,
354 .set = sockfs_security_xattr_set,
355};
356
bba0bd31
AG
357static const struct xattr_handler *sockfs_xattr_handlers[] = {
358 &sockfs_xattr_handler,
4a590153 359 &sockfs_security_xattr_handler,
bba0bd31
AG
360 NULL
361};
362
c74a1cbb
AV
363static struct dentry *sockfs_mount(struct file_system_type *fs_type,
364 int flags, const char *dev_name, void *data)
365{
bba0bd31
AG
366 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
367 sockfs_xattr_handlers,
368 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
369}
370
371static struct vfsmount *sock_mnt __read_mostly;
372
373static struct file_system_type sock_fs_type = {
374 .name = "sockfs",
375 .mount = sockfs_mount,
376 .kill_sb = kill_anon_super,
377};
378
1da177e4
LT
379/*
380 * Obtains the first available file descriptor and sets it up for use.
381 *
39d8c1b6
DM
382 * These functions create file structures and maps them to fd space
383 * of the current process. On success it returns file descriptor
1da177e4
LT
384 * and file struct implicitly stored in sock->file.
385 * Note that another thread may close file descriptor before we return
386 * from this function. We use the fact that now we do not refer
387 * to socket after mapping. If one day we will need it, this
388 * function will increment ref. count on file by 1.
389 *
390 * In any case returned fd MAY BE not valid!
391 * This race condition is unavoidable
392 * with shared fd spaces, we cannot solve it inside kernel,
393 * but we take care of internal coherence yet.
394 */
395
aab174f0 396struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 397{
7cbe66b6 398 struct qstr name = { .name = "" };
2c48b9c4 399 struct path path;
7cbe66b6 400 struct file *file;
1da177e4 401
600e1779
MY
402 if (dname) {
403 name.name = dname;
404 name.len = strlen(name.name);
405 } else if (sock->sk) {
406 name.name = sock->sk->sk_prot_creator->name;
407 name.len = strlen(name.name);
408 }
4b936885 409 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
410 if (unlikely(!path.dentry))
411 return ERR_PTR(-ENOMEM);
2c48b9c4 412 path.mnt = mntget(sock_mnt);
39d8c1b6 413
2c48b9c4 414 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 415
2c48b9c4 416 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 417 &socket_file_ops);
b5ffe634 418 if (IS_ERR(file)) {
cc3808f8 419 /* drop dentry, keep inode */
c5ef6035 420 ihold(d_inode(path.dentry));
2c48b9c4 421 path_put(&path);
39b65252 422 return file;
cc3808f8
AV
423 }
424
425 sock->file = file;
77d27200 426 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 427 file->private_data = sock;
28407630 428 return file;
39d8c1b6 429}
56b31d1c 430EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 431
56b31d1c 432static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
433{
434 struct file *newfile;
28407630
AV
435 int fd = get_unused_fd_flags(flags);
436 if (unlikely(fd < 0))
437 return fd;
39d8c1b6 438
aab174f0 439 newfile = sock_alloc_file(sock, flags, NULL);
28407630 440 if (likely(!IS_ERR(newfile))) {
39d8c1b6 441 fd_install(fd, newfile);
28407630
AV
442 return fd;
443 }
7cbe66b6 444
28407630
AV
445 put_unused_fd(fd);
446 return PTR_ERR(newfile);
1da177e4
LT
447}
448
406a3c63 449struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 450{
6cb153ca
BL
451 if (file->f_op == &socket_file_ops)
452 return file->private_data; /* set in sock_map_fd */
453
23bb80d2
ED
454 *err = -ENOTSOCK;
455 return NULL;
6cb153ca 456}
406a3c63 457EXPORT_SYMBOL(sock_from_file);
6cb153ca 458
1da177e4 459/**
c6d409cf 460 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
461 * @fd: file handle
462 * @err: pointer to an error code return
463 *
464 * The file handle passed in is locked and the socket it is bound
241c4667 465 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
466 * with a negative errno code and NULL is returned. The function checks
467 * for both invalid handles and passing a handle which is not a socket.
468 *
469 * On a success the socket object pointer is returned.
470 */
471
472struct socket *sockfd_lookup(int fd, int *err)
473{
474 struct file *file;
1da177e4
LT
475 struct socket *sock;
476
89bddce5
SH
477 file = fget(fd);
478 if (!file) {
1da177e4
LT
479 *err = -EBADF;
480 return NULL;
481 }
89bddce5 482
6cb153ca
BL
483 sock = sock_from_file(file, err);
484 if (!sock)
1da177e4 485 fput(file);
6cb153ca
BL
486 return sock;
487}
c6d409cf 488EXPORT_SYMBOL(sockfd_lookup);
1da177e4 489
6cb153ca
BL
490static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
491{
00e188ef 492 struct fd f = fdget(fd);
6cb153ca
BL
493 struct socket *sock;
494
3672558c 495 *err = -EBADF;
00e188ef
AV
496 if (f.file) {
497 sock = sock_from_file(f.file, err);
498 if (likely(sock)) {
499 *fput_needed = f.flags;
6cb153ca 500 return sock;
00e188ef
AV
501 }
502 fdput(f);
1da177e4 503 }
6cb153ca 504 return NULL;
1da177e4
LT
505}
506
600e1779
MY
507static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
508 size_t size)
509{
510 ssize_t len;
511 ssize_t used = 0;
512
c5ef6035 513 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
514 if (len < 0)
515 return len;
516 used += len;
517 if (buffer) {
518 if (size < used)
519 return -ERANGE;
520 buffer += len;
521 }
522
523 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
524 used += len;
525 if (buffer) {
526 if (size < used)
527 return -ERANGE;
528 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
529 buffer += len;
530 }
531
532 return used;
533}
534
dc647ec8 535static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
536{
537 int err = simple_setattr(dentry, iattr);
538
e1a3a60a 539 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
540 struct socket *sock = SOCKET_I(d_inode(dentry));
541
91717ffc
CW
542 if (sock->sk)
543 sock->sk->sk_uid = iattr->ia_uid;
544 else
545 err = -ENOENT;
86741ec2
LC
546 }
547
548 return err;
549}
550
600e1779 551static const struct inode_operations sockfs_inode_ops = {
600e1779 552 .listxattr = sockfs_listxattr,
86741ec2 553 .setattr = sockfs_setattr,
600e1779
MY
554};
555
1da177e4
LT
556/**
557 * sock_alloc - allocate a socket
89bddce5 558 *
1da177e4
LT
559 * Allocate a new inode and socket object. The two are bound together
560 * and initialised. The socket is then returned. If we are out of inodes
561 * NULL is returned.
562 */
563
f4a00aac 564struct socket *sock_alloc(void)
1da177e4 565{
89bddce5
SH
566 struct inode *inode;
567 struct socket *sock;
1da177e4 568
a209dfc7 569 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
570 if (!inode)
571 return NULL;
572
573 sock = SOCKET_I(inode);
574
85fe4025 575 inode->i_ino = get_next_ino();
89bddce5 576 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
577 inode->i_uid = current_fsuid();
578 inode->i_gid = current_fsgid();
600e1779 579 inode->i_op = &sockfs_inode_ops;
1da177e4 580
19e8d69c 581 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
582 return sock;
583}
f4a00aac 584EXPORT_SYMBOL(sock_alloc);
1da177e4 585
1da177e4
LT
586/**
587 * sock_release - close a socket
588 * @sock: socket to close
589 *
590 * The socket is released from the protocol stack if it has a release
591 * callback, and the inode is then released if the socket is bound to
89bddce5 592 * an inode not a file.
1da177e4 593 */
89bddce5 594
91717ffc 595static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
596{
597 if (sock->ops) {
598 struct module *owner = sock->ops->owner;
599
91717ffc
CW
600 if (inode)
601 inode_lock(inode);
1da177e4 602 sock->ops->release(sock);
e5e8350d 603 sock->sk = NULL;
91717ffc
CW
604 if (inode)
605 inode_unlock(inode);
1da177e4
LT
606 sock->ops = NULL;
607 module_put(owner);
608 }
609
eaefd110 610 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 611 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 612
19e8d69c 613 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
614 if (!sock->file) {
615 iput(SOCK_INODE(sock));
616 return;
617 }
89bddce5 618 sock->file = NULL;
1da177e4 619}
91717ffc
CW
620
621void sock_release(struct socket *sock)
622{
623 __sock_release(sock, NULL);
624}
c6d409cf 625EXPORT_SYMBOL(sock_release);
1da177e4 626
c14ac945 627void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 628{
140c55d4
ED
629 u8 flags = *tx_flags;
630
c14ac945 631 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
632 flags |= SKBTX_HW_TSTAMP;
633
c14ac945 634 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
635 flags |= SKBTX_SW_TSTAMP;
636
c14ac945 637 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
638 flags |= SKBTX_SCHED_TSTAMP;
639
140c55d4 640 *tx_flags = flags;
20d49473 641}
67cc0d40 642EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 643
d8725c86 644static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 645{
01e97e65 646 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
647 BUG_ON(ret == -EIOCBQUEUED);
648 return ret;
1da177e4
LT
649}
650
d8725c86 651int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 652{
d8725c86 653 int err = security_socket_sendmsg(sock, msg,
01e97e65 654 msg_data_left(msg));
228e548e 655
d8725c86 656 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 657}
c6d409cf 658EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
659
660int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
661 struct kvec *vec, size_t num, size_t size)
662{
6aa24814 663 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 664 return sock_sendmsg(sock, msg);
1da177e4 665}
c6d409cf 666EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 667
306b13eb
TH
668int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
669 struct kvec *vec, size_t num, size_t size)
670{
671 struct socket *sock = sk->sk_socket;
672
673 if (!sock->ops->sendmsg_locked)
db5980d8 674 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb
TH
675
676 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
677
678 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
679}
680EXPORT_SYMBOL(kernel_sendmsg_locked);
681
8605330a
SHY
682static bool skb_is_err_queue(const struct sk_buff *skb)
683{
684 /* pkt_type of skbs enqueued on the error queue are set to
685 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
686 * in recvmsg, since skbs received on a local socket will never
687 * have a pkt_type of PACKET_OUTGOING.
688 */
689 return skb->pkt_type == PACKET_OUTGOING;
690}
691
b50a5c70
ML
692/* On transmit, software and hardware timestamps are returned independently.
693 * As the two skb clones share the hardware timestamp, which may be updated
694 * before the software timestamp is received, a hardware TX timestamp may be
695 * returned only if there is no software TX timestamp. Ignore false software
696 * timestamps, which may be made in the __sock_recv_timestamp() call when the
697 * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
698 * hardware timestamp.
699 */
700static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
701{
702 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
703}
704
aad9c8c4
ML
705static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
706{
707 struct scm_ts_pktinfo ts_pktinfo;
708 struct net_device *orig_dev;
709
710 if (!skb_mac_header_was_set(skb))
711 return;
712
713 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
714
715 rcu_read_lock();
716 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
717 if (orig_dev)
718 ts_pktinfo.if_index = orig_dev->ifindex;
719 rcu_read_unlock();
720
721 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
722 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
723 sizeof(ts_pktinfo), &ts_pktinfo);
724}
725
92f37fd2
ED
726/*
727 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
728 */
729void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
730 struct sk_buff *skb)
731{
20d49473 732 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 733 struct scm_timestamping tss;
b50a5c70 734 int empty = 1, false_tstamp = 0;
20d49473
PO
735 struct skb_shared_hwtstamps *shhwtstamps =
736 skb_hwtstamps(skb);
737
738 /* Race occurred between timestamp enabling and packet
739 receiving. Fill in the current time for now. */
b50a5c70 740 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 741 __net_timestamp(skb);
b50a5c70
ML
742 false_tstamp = 1;
743 }
20d49473
PO
744
745 if (need_software_tstamp) {
746 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
747 struct timeval tv;
748 skb_get_timestamp(skb, &tv);
749 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
750 sizeof(tv), &tv);
751 } else {
f24b9be5
WB
752 struct timespec ts;
753 skb_get_timestampns(skb, &ts);
20d49473 754 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 755 sizeof(ts), &ts);
20d49473
PO
756 }
757 }
758
f24b9be5 759 memset(&tss, 0, sizeof(tss));
c199105d 760 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 761 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 762 empty = 0;
4d276eb6 763 if (shhwtstamps &&
b9f40e21 764 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 765 !skb_is_swtx_tstamp(skb, false_tstamp) &&
aad9c8c4 766 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 767 empty = 0;
aad9c8c4
ML
768 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
769 !skb_is_err_queue(skb))
770 put_ts_pktinfo(msg, skb);
771 }
1c885808 772 if (!empty) {
20d49473 773 put_cmsg(msg, SOL_SOCKET,
f24b9be5 774 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 775
8605330a 776 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 777 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
778 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
779 skb->len, skb->data);
780 }
92f37fd2 781}
7c81fd8b
ACM
782EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
783
6e3e939f
JB
784void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
785 struct sk_buff *skb)
786{
787 int ack;
788
789 if (!sock_flag(sk, SOCK_WIFI_STATUS))
790 return;
791 if (!skb->wifi_acked_valid)
792 return;
793
794 ack = skb->wifi_acked;
795
796 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
797}
798EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
799
11165f14 800static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
801 struct sk_buff *skb)
3b885787 802{
744d5a3e 803 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 804 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 805 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
806}
807
767dd033 808void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
809 struct sk_buff *skb)
810{
811 sock_recv_timestamp(msg, sk, skb);
812 sock_recv_drops(msg, sk, skb);
813}
767dd033 814EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 815
1b784140 816static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 817 int flags)
1da177e4 818{
2da62906 819 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
820}
821
2da62906 822int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 823{
2da62906 824 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 825
2da62906 826 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 827}
c6d409cf 828EXPORT_SYMBOL(sock_recvmsg);
1da177e4 829
c1249c0a
ML
830/**
831 * kernel_recvmsg - Receive a message from a socket (kernel space)
832 * @sock: The socket to receive the message from
833 * @msg: Received message
834 * @vec: Input s/g array for message data
835 * @num: Size of input s/g array
836 * @size: Number of bytes to read
837 * @flags: Message flags (MSG_DONTWAIT, etc...)
838 *
839 * On return the msg structure contains the scatter/gather array passed in the
840 * vec argument. The array is modified so that it consists of the unfilled
841 * portion of the original array.
842 *
843 * The returned value is the total number of bytes received, or an error.
844 */
89bddce5
SH
845int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
846 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
847{
848 mm_segment_t oldfs = get_fs();
849 int result;
850
6aa24814 851 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 852 set_fs(KERNEL_DS);
2da62906 853 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
854 set_fs(oldfs);
855 return result;
856}
c6d409cf 857EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 858
ce1d4d3e
CH
859static ssize_t sock_sendpage(struct file *file, struct page *page,
860 int offset, size_t size, loff_t *ppos, int more)
1da177e4 861{
1da177e4
LT
862 struct socket *sock;
863 int flags;
864
ce1d4d3e
CH
865 sock = file->private_data;
866
35f9c09f
ED
867 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
868 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
869 flags |= more;
ce1d4d3e 870
e6949583 871 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 872}
1da177e4 873
9c55e01c 874static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 875 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
876 unsigned int flags)
877{
878 struct socket *sock = file->private_data;
879
997b37da
RDC
880 if (unlikely(!sock->ops->splice_read))
881 return -EINVAL;
882
9c55e01c
JA
883 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
884}
885
8ae5e030 886static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 887{
6d652330
AV
888 struct file *file = iocb->ki_filp;
889 struct socket *sock = file->private_data;
0345f931 890 struct msghdr msg = {.msg_iter = *to,
891 .msg_iocb = iocb};
8ae5e030 892 ssize_t res;
ce1d4d3e 893
8ae5e030
AV
894 if (file->f_flags & O_NONBLOCK)
895 msg.msg_flags = MSG_DONTWAIT;
896
897 if (iocb->ki_pos != 0)
1da177e4 898 return -ESPIPE;
027445c3 899
66ee59af 900 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
901 return 0;
902
2da62906 903 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
904 *to = msg.msg_iter;
905 return res;
1da177e4
LT
906}
907
8ae5e030 908static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 909{
6d652330
AV
910 struct file *file = iocb->ki_filp;
911 struct socket *sock = file->private_data;
0345f931 912 struct msghdr msg = {.msg_iter = *from,
913 .msg_iocb = iocb};
8ae5e030 914 ssize_t res;
1da177e4 915
8ae5e030 916 if (iocb->ki_pos != 0)
ce1d4d3e 917 return -ESPIPE;
027445c3 918
8ae5e030
AV
919 if (file->f_flags & O_NONBLOCK)
920 msg.msg_flags = MSG_DONTWAIT;
921
6d652330
AV
922 if (sock->type == SOCK_SEQPACKET)
923 msg.msg_flags |= MSG_EOR;
924
d8725c86 925 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
926 *from = msg.msg_iter;
927 return res;
1da177e4
LT
928}
929
1da177e4
LT
930/*
931 * Atomic setting of ioctl hooks to avoid race
932 * with module unload.
933 */
934
4a3e2f71 935static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 936static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 937
881d966b 938void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 939{
4a3e2f71 940 mutex_lock(&br_ioctl_mutex);
1da177e4 941 br_ioctl_hook = hook;
4a3e2f71 942 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
943}
944EXPORT_SYMBOL(brioctl_set);
945
4a3e2f71 946static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 947static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 948
881d966b 949void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 950{
4a3e2f71 951 mutex_lock(&vlan_ioctl_mutex);
1da177e4 952 vlan_ioctl_hook = hook;
4a3e2f71 953 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
954}
955EXPORT_SYMBOL(vlan_ioctl_set);
956
4a3e2f71 957static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 958static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 959
89bddce5 960void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 961{
4a3e2f71 962 mutex_lock(&dlci_ioctl_mutex);
1da177e4 963 dlci_ioctl_hook = hook;
4a3e2f71 964 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
965}
966EXPORT_SYMBOL(dlci_ioctl_set);
967
6b96018b
AB
968static long sock_do_ioctl(struct net *net, struct socket *sock,
969 unsigned int cmd, unsigned long arg)
970{
971 int err;
972 void __user *argp = (void __user *)arg;
973
974 err = sock->ops->ioctl(sock, cmd, arg);
975
976 /*
977 * If this ioctl is unknown try to hand it down
978 * to the NIC driver.
979 */
980 if (err == -ENOIOCTLCMD)
981 err = dev_ioctl(net, cmd, argp);
982
983 return err;
984}
985
1da177e4
LT
986/*
987 * With an ioctl, arg may well be a user mode pointer, but we don't know
988 * what to do with it - that's up to the protocol still.
989 */
990
c62cce2c
AV
991static struct ns_common *get_net_ns(struct ns_common *ns)
992{
993 return &get_net(container_of(ns, struct net, ns))->ns;
994}
995
1da177e4
LT
996static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
997{
998 struct socket *sock;
881d966b 999 struct sock *sk;
1da177e4
LT
1000 void __user *argp = (void __user *)arg;
1001 int pid, err;
881d966b 1002 struct net *net;
1da177e4 1003
b69aee04 1004 sock = file->private_data;
881d966b 1005 sk = sock->sk;
3b1e0a65 1006 net = sock_net(sk);
1da177e4 1007 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1008 err = dev_ioctl(net, cmd, argp);
1da177e4 1009 } else
3d23e349 1010#ifdef CONFIG_WEXT_CORE
1da177e4 1011 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1012 err = dev_ioctl(net, cmd, argp);
1da177e4 1013 } else
3d23e349 1014#endif
89bddce5 1015 switch (cmd) {
1da177e4
LT
1016 case FIOSETOWN:
1017 case SIOCSPGRP:
1018 err = -EFAULT;
1019 if (get_user(pid, (int __user *)argp))
1020 break;
393cc3f5 1021 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1022 break;
1023 case FIOGETOWN:
1024 case SIOCGPGRP:
609d7fa9 1025 err = put_user(f_getown(sock->file),
89bddce5 1026 (int __user *)argp);
1da177e4
LT
1027 break;
1028 case SIOCGIFBR:
1029 case SIOCSIFBR:
1030 case SIOCBRADDBR:
1031 case SIOCBRDELBR:
1032 err = -ENOPKG;
1033 if (!br_ioctl_hook)
1034 request_module("bridge");
1035
4a3e2f71 1036 mutex_lock(&br_ioctl_mutex);
89bddce5 1037 if (br_ioctl_hook)
881d966b 1038 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1039 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1040 break;
1041 case SIOCGIFVLAN:
1042 case SIOCSIFVLAN:
1043 err = -ENOPKG;
1044 if (!vlan_ioctl_hook)
1045 request_module("8021q");
1046
4a3e2f71 1047 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1048 if (vlan_ioctl_hook)
881d966b 1049 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1050 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1051 break;
1da177e4
LT
1052 case SIOCADDDLCI:
1053 case SIOCDELDLCI:
1054 err = -ENOPKG;
1055 if (!dlci_ioctl_hook)
1056 request_module("dlci");
1057
7512cbf6
PE
1058 mutex_lock(&dlci_ioctl_mutex);
1059 if (dlci_ioctl_hook)
1da177e4 1060 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1061 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1062 break;
c62cce2c
AV
1063 case SIOCGSKNS:
1064 err = -EPERM;
1065 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1066 break;
1067
1068 err = open_related_ns(&net->ns, get_net_ns);
1069 break;
1da177e4 1070 default:
6b96018b 1071 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1072 break;
89bddce5 1073 }
1da177e4
LT
1074 return err;
1075}
1076
1077int sock_create_lite(int family, int type, int protocol, struct socket **res)
1078{
1079 int err;
1080 struct socket *sock = NULL;
89bddce5 1081
1da177e4
LT
1082 err = security_socket_create(family, type, protocol, 1);
1083 if (err)
1084 goto out;
1085
1086 sock = sock_alloc();
1087 if (!sock) {
1088 err = -ENOMEM;
1089 goto out;
1090 }
1091
1da177e4 1092 sock->type = type;
7420ed23
VY
1093 err = security_socket_post_create(sock, family, type, protocol, 1);
1094 if (err)
1095 goto out_release;
1096
1da177e4
LT
1097out:
1098 *res = sock;
1099 return err;
7420ed23
VY
1100out_release:
1101 sock_release(sock);
1102 sock = NULL;
1103 goto out;
1da177e4 1104}
c6d409cf 1105EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1106
1107/* No kernel lock held - perfect */
89bddce5 1108static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1109{
cbf55001 1110 unsigned int busy_flag = 0;
1da177e4
LT
1111 struct socket *sock;
1112
1113 /*
89bddce5 1114 * We can't return errors to poll, so it's either yes or no.
1da177e4 1115 */
b69aee04 1116 sock = file->private_data;
2d48d67f 1117
cbf55001 1118 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1119 /* this socket can poll_ll so tell the system call */
cbf55001 1120 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1121
1122 /* once, only if requested by syscall */
cbf55001
ET
1123 if (wait && (wait->_key & POLL_BUSY_LOOP))
1124 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1125 }
1126
cbf55001 1127 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1128}
1129
89bddce5 1130static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1131{
b69aee04 1132 struct socket *sock = file->private_data;
1da177e4
LT
1133
1134 return sock->ops->mmap(file, sock, vma);
1135}
1136
20380731 1137static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1138{
91717ffc 1139 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1140 return 0;
1141}
1142
1143/*
1144 * Update the socket async list
1145 *
1146 * Fasync_list locking strategy.
1147 *
1148 * 1. fasync_list is modified only under process context socket lock
1149 * i.e. under semaphore.
1150 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1151 * or under socket lock
1da177e4
LT
1152 */
1153
1154static int sock_fasync(int fd, struct file *filp, int on)
1155{
989a2979
ED
1156 struct socket *sock = filp->private_data;
1157 struct sock *sk = sock->sk;
eaefd110 1158 struct socket_wq *wq;
1da177e4 1159
989a2979 1160 if (sk == NULL)
1da177e4 1161 return -EINVAL;
1da177e4
LT
1162
1163 lock_sock(sk);
1e1d04e6 1164 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1165 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1166
eaefd110 1167 if (!wq->fasync_list)
989a2979
ED
1168 sock_reset_flag(sk, SOCK_FASYNC);
1169 else
bcdce719 1170 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1171
989a2979 1172 release_sock(sk);
1da177e4
LT
1173 return 0;
1174}
1175
ceb5d58b 1176/* This function may be called only under rcu_lock */
1da177e4 1177
ceb5d58b 1178int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1179{
ceb5d58b 1180 if (!wq || !wq->fasync_list)
1da177e4 1181 return -1;
ceb5d58b 1182
89bddce5 1183 switch (how) {
8d8ad9d7 1184 case SOCK_WAKE_WAITD:
ceb5d58b 1185 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1186 break;
1187 goto call_kill;
8d8ad9d7 1188 case SOCK_WAKE_SPACE:
ceb5d58b 1189 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1190 break;
1191 /* fall through */
8d8ad9d7 1192 case SOCK_WAKE_IO:
89bddce5 1193call_kill:
43815482 1194 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1195 break;
8d8ad9d7 1196 case SOCK_WAKE_URG:
43815482 1197 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1198 }
ceb5d58b 1199
1da177e4
LT
1200 return 0;
1201}
c6d409cf 1202EXPORT_SYMBOL(sock_wake_async);
1da177e4 1203
721db93a 1204int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1205 struct socket **res, int kern)
1da177e4
LT
1206{
1207 int err;
1208 struct socket *sock;
55737fda 1209 const struct net_proto_family *pf;
1da177e4
LT
1210
1211 /*
89bddce5 1212 * Check protocol is in range
1da177e4
LT
1213 */
1214 if (family < 0 || family >= NPROTO)
1215 return -EAFNOSUPPORT;
1216 if (type < 0 || type >= SOCK_MAX)
1217 return -EINVAL;
1218
1219 /* Compatibility.
1220
1221 This uglymoron is moved from INET layer to here to avoid
1222 deadlock in module load.
1223 */
1224 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1225 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1226 current->comm);
1da177e4
LT
1227 family = PF_PACKET;
1228 }
1229
1230 err = security_socket_create(family, type, protocol, kern);
1231 if (err)
1232 return err;
89bddce5 1233
55737fda
SH
1234 /*
1235 * Allocate the socket and allow the family to set things up. if
1236 * the protocol is 0, the family is instructed to select an appropriate
1237 * default.
1238 */
1239 sock = sock_alloc();
1240 if (!sock) {
e87cc472 1241 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1242 return -ENFILE; /* Not exactly a match, but its the
1243 closest posix thing */
1244 }
1245
1246 sock->type = type;
1247
95a5afca 1248#ifdef CONFIG_MODULES
89bddce5
SH
1249 /* Attempt to load a protocol module if the find failed.
1250 *
1251 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1252 * requested real, full-featured networking support upon configuration.
1253 * Otherwise module support will break!
1254 */
190683a9 1255 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1256 request_module("net-pf-%d", family);
1da177e4
LT
1257#endif
1258
55737fda
SH
1259 rcu_read_lock();
1260 pf = rcu_dereference(net_families[family]);
1261 err = -EAFNOSUPPORT;
1262 if (!pf)
1263 goto out_release;
1da177e4
LT
1264
1265 /*
1266 * We will call the ->create function, that possibly is in a loadable
1267 * module, so we have to bump that loadable module refcnt first.
1268 */
55737fda 1269 if (!try_module_get(pf->owner))
1da177e4
LT
1270 goto out_release;
1271
55737fda
SH
1272 /* Now protected by module ref count */
1273 rcu_read_unlock();
1274
3f378b68 1275 err = pf->create(net, sock, protocol, kern);
55737fda 1276 if (err < 0)
1da177e4 1277 goto out_module_put;
a79af59e 1278
1da177e4
LT
1279 /*
1280 * Now to bump the refcnt of the [loadable] module that owns this
1281 * socket at sock_release time we decrement its refcnt.
1282 */
55737fda
SH
1283 if (!try_module_get(sock->ops->owner))
1284 goto out_module_busy;
1285
1da177e4
LT
1286 /*
1287 * Now that we're done with the ->create function, the [loadable]
1288 * module can have its refcnt decremented
1289 */
55737fda 1290 module_put(pf->owner);
7420ed23
VY
1291 err = security_socket_post_create(sock, family, type, protocol, kern);
1292 if (err)
3b185525 1293 goto out_sock_release;
55737fda 1294 *res = sock;
1da177e4 1295
55737fda
SH
1296 return 0;
1297
1298out_module_busy:
1299 err = -EAFNOSUPPORT;
1da177e4 1300out_module_put:
55737fda
SH
1301 sock->ops = NULL;
1302 module_put(pf->owner);
1303out_sock_release:
1da177e4 1304 sock_release(sock);
55737fda
SH
1305 return err;
1306
1307out_release:
1308 rcu_read_unlock();
1309 goto out_sock_release;
1da177e4 1310}
721db93a 1311EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1312
1313int sock_create(int family, int type, int protocol, struct socket **res)
1314{
1b8d7ae4 1315 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1316}
c6d409cf 1317EXPORT_SYMBOL(sock_create);
1da177e4 1318
eeb1bd5c 1319int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1320{
eeb1bd5c 1321 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1322}
c6d409cf 1323EXPORT_SYMBOL(sock_create_kern);
1da177e4 1324
3e0fa65f 1325SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1326{
1327 int retval;
1328 struct socket *sock;
a677a039
UD
1329 int flags;
1330
e38b36f3
UD
1331 /* Check the SOCK_* constants for consistency. */
1332 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1333 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1334 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1335 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1336
a677a039 1337 flags = type & ~SOCK_TYPE_MASK;
77d27200 1338 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1339 return -EINVAL;
1340 type &= SOCK_TYPE_MASK;
1da177e4 1341
aaca0bdc
UD
1342 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1343 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1344
1da177e4
LT
1345 retval = sock_create(family, type, protocol, &sock);
1346 if (retval < 0)
1347 goto out;
1348
77d27200 1349 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1350 if (retval < 0)
1351 goto out_release;
1352
1353out:
1354 /* It may be already another descriptor 8) Not kernel problem. */
1355 return retval;
1356
1357out_release:
1358 sock_release(sock);
1359 return retval;
1360}
1361
1362/*
1363 * Create a pair of connected sockets.
1364 */
1365
3e0fa65f
HC
1366SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1367 int __user *, usockvec)
1da177e4
LT
1368{
1369 struct socket *sock1, *sock2;
1370 int fd1, fd2, err;
db349509 1371 struct file *newfile1, *newfile2;
a677a039
UD
1372 int flags;
1373
1374 flags = type & ~SOCK_TYPE_MASK;
77d27200 1375 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1376 return -EINVAL;
1377 type &= SOCK_TYPE_MASK;
1da177e4 1378
aaca0bdc
UD
1379 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1380 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1381
1da177e4
LT
1382 /*
1383 * Obtain the first socket and check if the underlying protocol
1384 * supports the socketpair call.
1385 */
1386
1387 err = sock_create(family, type, protocol, &sock1);
1388 if (err < 0)
1389 goto out;
1390
1391 err = sock_create(family, type, protocol, &sock2);
1392 if (err < 0)
1393 goto out_release_1;
1394
1395 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1396 if (err < 0)
1da177e4
LT
1397 goto out_release_both;
1398
28407630 1399 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1400 if (unlikely(fd1 < 0)) {
1401 err = fd1;
db349509 1402 goto out_release_both;
bf3c23d1 1403 }
d73aa286 1404
28407630 1405 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1406 if (unlikely(fd2 < 0)) {
1407 err = fd2;
d73aa286 1408 goto out_put_unused_1;
28407630
AV
1409 }
1410
aab174f0 1411 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1412 if (IS_ERR(newfile1)) {
28407630 1413 err = PTR_ERR(newfile1);
d73aa286 1414 goto out_put_unused_both;
28407630
AV
1415 }
1416
aab174f0 1417 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1418 if (IS_ERR(newfile2)) {
1419 err = PTR_ERR(newfile2);
d73aa286 1420 goto out_fput_1;
db349509
AV
1421 }
1422
d73aa286
YD
1423 err = put_user(fd1, &usockvec[0]);
1424 if (err)
1425 goto out_fput_both;
1426
1427 err = put_user(fd2, &usockvec[1]);
1428 if (err)
1429 goto out_fput_both;
1430
157cf649 1431 audit_fd_pair(fd1, fd2);
d73aa286 1432
db349509
AV
1433 fd_install(fd1, newfile1);
1434 fd_install(fd2, newfile2);
1da177e4
LT
1435 /* fd1 and fd2 may be already another descriptors.
1436 * Not kernel problem.
1437 */
1438
d73aa286 1439 return 0;
1da177e4 1440
d73aa286
YD
1441out_fput_both:
1442 fput(newfile2);
1443 fput(newfile1);
1444 put_unused_fd(fd2);
1445 put_unused_fd(fd1);
1446 goto out;
1447
1448out_fput_1:
1449 fput(newfile1);
1450 put_unused_fd(fd2);
1451 put_unused_fd(fd1);
1452 sock_release(sock2);
1453 goto out;
1da177e4 1454
d73aa286
YD
1455out_put_unused_both:
1456 put_unused_fd(fd2);
1457out_put_unused_1:
1458 put_unused_fd(fd1);
1da177e4 1459out_release_both:
89bddce5 1460 sock_release(sock2);
1da177e4 1461out_release_1:
89bddce5 1462 sock_release(sock1);
1da177e4
LT
1463out:
1464 return err;
1465}
1466
1da177e4
LT
1467/*
1468 * Bind a name to a socket. Nothing much to do here since it's
1469 * the protocol's responsibility to handle the local address.
1470 *
1471 * We move the socket address to kernel space before we call
1472 * the protocol layer (having also checked the address is ok).
1473 */
1474
20f37034 1475SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1476{
1477 struct socket *sock;
230b1839 1478 struct sockaddr_storage address;
6cb153ca 1479 int err, fput_needed;
1da177e4 1480
89bddce5 1481 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1482 if (sock) {
43db362d 1483 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1484 if (err >= 0) {
1485 err = security_socket_bind(sock,
230b1839 1486 (struct sockaddr *)&address,
89bddce5 1487 addrlen);
6cb153ca
BL
1488 if (!err)
1489 err = sock->ops->bind(sock,
89bddce5 1490 (struct sockaddr *)
230b1839 1491 &address, addrlen);
1da177e4 1492 }
6cb153ca 1493 fput_light(sock->file, fput_needed);
89bddce5 1494 }
1da177e4
LT
1495 return err;
1496}
1497
1da177e4
LT
1498/*
1499 * Perform a listen. Basically, we allow the protocol to do anything
1500 * necessary for a listen, and if that works, we mark the socket as
1501 * ready for listening.
1502 */
1503
3e0fa65f 1504SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1505{
1506 struct socket *sock;
6cb153ca 1507 int err, fput_needed;
b8e1f9b5 1508 int somaxconn;
89bddce5
SH
1509
1510 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1511 if (sock) {
8efa6e93 1512 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1513 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1514 backlog = somaxconn;
1da177e4
LT
1515
1516 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1517 if (!err)
1518 err = sock->ops->listen(sock, backlog);
1da177e4 1519
6cb153ca 1520 fput_light(sock->file, fput_needed);
1da177e4
LT
1521 }
1522 return err;
1523}
1524
1da177e4
LT
1525/*
1526 * For accept, we attempt to create a new socket, set up the link
1527 * with the client, wake up the client, then return the new
1528 * connected fd. We collect the address of the connector in kernel
1529 * space and move it to user at the very end. This is unclean because
1530 * we open the socket then return an error.
1531 *
1532 * 1003.1g adds the ability to recvmsg() to query connection pending
1533 * status to recvmsg. We need to add that support in a way thats
1534 * clean when we restucture accept also.
1535 */
1536
20f37034
HC
1537SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1538 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1539{
1540 struct socket *sock, *newsock;
39d8c1b6 1541 struct file *newfile;
6cb153ca 1542 int err, len, newfd, fput_needed;
230b1839 1543 struct sockaddr_storage address;
1da177e4 1544
77d27200 1545 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1546 return -EINVAL;
1547
1548 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1549 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1550
6cb153ca 1551 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1552 if (!sock)
1553 goto out;
1554
1555 err = -ENFILE;
c6d409cf
ED
1556 newsock = sock_alloc();
1557 if (!newsock)
1da177e4
LT
1558 goto out_put;
1559
1560 newsock->type = sock->type;
1561 newsock->ops = sock->ops;
1562
1da177e4
LT
1563 /*
1564 * We don't need try_module_get here, as the listening socket (sock)
1565 * has the protocol module (sock->ops->owner) held.
1566 */
1567 __module_get(newsock->ops->owner);
1568
28407630 1569 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1570 if (unlikely(newfd < 0)) {
1571 err = newfd;
9a1875e6
DM
1572 sock_release(newsock);
1573 goto out_put;
39d8c1b6 1574 }
aab174f0 1575 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1576 if (IS_ERR(newfile)) {
28407630
AV
1577 err = PTR_ERR(newfile);
1578 put_unused_fd(newfd);
1579 sock_release(newsock);
1580 goto out_put;
1581 }
39d8c1b6 1582
a79af59e
FF
1583 err = security_socket_accept(sock, newsock);
1584 if (err)
39d8c1b6 1585 goto out_fd;
a79af59e 1586
cdfbabfb 1587 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1588 if (err < 0)
39d8c1b6 1589 goto out_fd;
1da177e4
LT
1590
1591 if (upeer_sockaddr) {
230b1839 1592 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1593 &len, 2) < 0) {
1da177e4 1594 err = -ECONNABORTED;
39d8c1b6 1595 goto out_fd;
1da177e4 1596 }
43db362d 1597 err = move_addr_to_user(&address,
230b1839 1598 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1599 if (err < 0)
39d8c1b6 1600 goto out_fd;
1da177e4
LT
1601 }
1602
1603 /* File flags are not inherited via accept() unlike another OSes. */
1604
39d8c1b6
DM
1605 fd_install(newfd, newfile);
1606 err = newfd;
1da177e4 1607
1da177e4 1608out_put:
6cb153ca 1609 fput_light(sock->file, fput_needed);
1da177e4
LT
1610out:
1611 return err;
39d8c1b6 1612out_fd:
9606a216 1613 fput(newfile);
39d8c1b6 1614 put_unused_fd(newfd);
1da177e4
LT
1615 goto out_put;
1616}
1617
20f37034
HC
1618SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1619 int __user *, upeer_addrlen)
aaca0bdc 1620{
de11defe 1621 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1622}
1623
1da177e4
LT
1624/*
1625 * Attempt to connect to a socket with the server address. The address
1626 * is in user space so we verify it is OK and move it to kernel space.
1627 *
1628 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1629 * break bindings
1630 *
1631 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1632 * other SEQPACKET protocols that take time to connect() as it doesn't
1633 * include the -EINPROGRESS status for such sockets.
1634 */
1635
20f37034
HC
1636SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1637 int, addrlen)
1da177e4
LT
1638{
1639 struct socket *sock;
230b1839 1640 struct sockaddr_storage address;
6cb153ca 1641 int err, fput_needed;
1da177e4 1642
6cb153ca 1643 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1644 if (!sock)
1645 goto out;
43db362d 1646 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1647 if (err < 0)
1648 goto out_put;
1649
89bddce5 1650 err =
230b1839 1651 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1652 if (err)
1653 goto out_put;
1654
230b1839 1655 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1656 sock->file->f_flags);
1657out_put:
6cb153ca 1658 fput_light(sock->file, fput_needed);
1da177e4
LT
1659out:
1660 return err;
1661}
1662
1663/*
1664 * Get the local address ('name') of a socket object. Move the obtained
1665 * name to user space.
1666 */
1667
20f37034
HC
1668SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1669 int __user *, usockaddr_len)
1da177e4
LT
1670{
1671 struct socket *sock;
230b1839 1672 struct sockaddr_storage address;
6cb153ca 1673 int len, err, fput_needed;
89bddce5 1674
6cb153ca 1675 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1676 if (!sock)
1677 goto out;
1678
1679 err = security_socket_getsockname(sock);
1680 if (err)
1681 goto out_put;
1682
230b1839 1683 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1684 if (err)
1685 goto out_put;
43db362d 1686 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1687
1688out_put:
6cb153ca 1689 fput_light(sock->file, fput_needed);
1da177e4
LT
1690out:
1691 return err;
1692}
1693
1694/*
1695 * Get the remote address ('name') of a socket object. Move the obtained
1696 * name to user space.
1697 */
1698
20f37034
HC
1699SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1700 int __user *, usockaddr_len)
1da177e4
LT
1701{
1702 struct socket *sock;
230b1839 1703 struct sockaddr_storage address;
6cb153ca 1704 int len, err, fput_needed;
1da177e4 1705
89bddce5
SH
1706 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1707 if (sock != NULL) {
1da177e4
LT
1708 err = security_socket_getpeername(sock);
1709 if (err) {
6cb153ca 1710 fput_light(sock->file, fput_needed);
1da177e4
LT
1711 return err;
1712 }
1713
89bddce5 1714 err =
230b1839 1715 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1716 1);
1da177e4 1717 if (!err)
43db362d 1718 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1719 usockaddr_len);
6cb153ca 1720 fput_light(sock->file, fput_needed);
1da177e4
LT
1721 }
1722 return err;
1723}
1724
1725/*
1726 * Send a datagram to a given address. We move the address into kernel
1727 * space and check the user space data area is readable before invoking
1728 * the protocol.
1729 */
1730
3e0fa65f 1731SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1732 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1733 int, addr_len)
1da177e4
LT
1734{
1735 struct socket *sock;
230b1839 1736 struct sockaddr_storage address;
1da177e4
LT
1737 int err;
1738 struct msghdr msg;
1739 struct iovec iov;
6cb153ca 1740 int fput_needed;
6cb153ca 1741
602bd0e9
AV
1742 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1743 if (unlikely(err))
1744 return err;
de0fa95c
PE
1745 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1746 if (!sock)
4387ff75 1747 goto out;
6cb153ca 1748
89bddce5 1749 msg.msg_name = NULL;
89bddce5
SH
1750 msg.msg_control = NULL;
1751 msg.msg_controllen = 0;
1752 msg.msg_namelen = 0;
6cb153ca 1753 if (addr) {
43db362d 1754 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1755 if (err < 0)
1756 goto out_put;
230b1839 1757 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1758 msg.msg_namelen = addr_len;
1da177e4
LT
1759 }
1760 if (sock->file->f_flags & O_NONBLOCK)
1761 flags |= MSG_DONTWAIT;
1762 msg.msg_flags = flags;
d8725c86 1763 err = sock_sendmsg(sock, &msg);
1da177e4 1764
89bddce5 1765out_put:
de0fa95c 1766 fput_light(sock->file, fput_needed);
4387ff75 1767out:
1da177e4
LT
1768 return err;
1769}
1770
1771/*
89bddce5 1772 * Send a datagram down a socket.
1da177e4
LT
1773 */
1774
3e0fa65f 1775SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1776 unsigned int, flags)
1da177e4
LT
1777{
1778 return sys_sendto(fd, buff, len, flags, NULL, 0);
1779}
1780
1781/*
89bddce5 1782 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1783 * sender. We verify the buffers are writable and if needed move the
1784 * sender address from kernel to user space.
1785 */
1786
3e0fa65f 1787SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1788 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1789 int __user *, addr_len)
1da177e4
LT
1790{
1791 struct socket *sock;
1792 struct iovec iov;
1793 struct msghdr msg;
230b1839 1794 struct sockaddr_storage address;
89bddce5 1795 int err, err2;
6cb153ca
BL
1796 int fput_needed;
1797
602bd0e9
AV
1798 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1799 if (unlikely(err))
1800 return err;
de0fa95c 1801 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1802 if (!sock)
de0fa95c 1803 goto out;
1da177e4 1804
89bddce5
SH
1805 msg.msg_control = NULL;
1806 msg.msg_controllen = 0;
f3d33426
HFS
1807 /* Save some cycles and don't copy the address if not needed */
1808 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1809 /* We assume all kernel code knows the size of sockaddr_storage */
1810 msg.msg_namelen = 0;
130ed5d1 1811 msg.msg_iocb = NULL;
9f138fa6 1812 msg.msg_flags = 0;
1da177e4
LT
1813 if (sock->file->f_flags & O_NONBLOCK)
1814 flags |= MSG_DONTWAIT;
2da62906 1815 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1816
89bddce5 1817 if (err >= 0 && addr != NULL) {
43db362d 1818 err2 = move_addr_to_user(&address,
230b1839 1819 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1820 if (err2 < 0)
1821 err = err2;
1da177e4 1822 }
de0fa95c
PE
1823
1824 fput_light(sock->file, fput_needed);
4387ff75 1825out:
1da177e4
LT
1826 return err;
1827}
1828
1829/*
89bddce5 1830 * Receive a datagram from a socket.
1da177e4
LT
1831 */
1832
b7c0ddf5
JG
1833SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1834 unsigned int, flags)
1da177e4
LT
1835{
1836 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1837}
1838
1839/*
1840 * Set a socket option. Because we don't know the option lengths we have
1841 * to pass the user mode parameter for the protocols to sort out.
1842 */
1843
20f37034
HC
1844SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1845 char __user *, optval, int, optlen)
1da177e4 1846{
6cb153ca 1847 int err, fput_needed;
1da177e4
LT
1848 struct socket *sock;
1849
1850 if (optlen < 0)
1851 return -EINVAL;
89bddce5
SH
1852
1853 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1854 if (sock != NULL) {
1855 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1856 if (err)
1857 goto out_put;
1da177e4
LT
1858
1859 if (level == SOL_SOCKET)
89bddce5
SH
1860 err =
1861 sock_setsockopt(sock, level, optname, optval,
1862 optlen);
1da177e4 1863 else
89bddce5
SH
1864 err =
1865 sock->ops->setsockopt(sock, level, optname, optval,
1866 optlen);
6cb153ca
BL
1867out_put:
1868 fput_light(sock->file, fput_needed);
1da177e4
LT
1869 }
1870 return err;
1871}
1872
1873/*
1874 * Get a socket option. Because we don't know the option lengths we have
1875 * to pass a user mode parameter for the protocols to sort out.
1876 */
1877
20f37034
HC
1878SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1879 char __user *, optval, int __user *, optlen)
1da177e4 1880{
6cb153ca 1881 int err, fput_needed;
1da177e4
LT
1882 struct socket *sock;
1883
89bddce5
SH
1884 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1885 if (sock != NULL) {
6cb153ca
BL
1886 err = security_socket_getsockopt(sock, level, optname);
1887 if (err)
1888 goto out_put;
1da177e4
LT
1889
1890 if (level == SOL_SOCKET)
89bddce5
SH
1891 err =
1892 sock_getsockopt(sock, level, optname, optval,
1893 optlen);
1da177e4 1894 else
89bddce5
SH
1895 err =
1896 sock->ops->getsockopt(sock, level, optname, optval,
1897 optlen);
6cb153ca
BL
1898out_put:
1899 fput_light(sock->file, fput_needed);
1da177e4
LT
1900 }
1901 return err;
1902}
1903
1da177e4
LT
1904/*
1905 * Shutdown a socket.
1906 */
1907
754fe8d2 1908SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1909{
6cb153ca 1910 int err, fput_needed;
1da177e4
LT
1911 struct socket *sock;
1912
89bddce5
SH
1913 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1914 if (sock != NULL) {
1da177e4 1915 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1916 if (!err)
1917 err = sock->ops->shutdown(sock, how);
1918 fput_light(sock->file, fput_needed);
1da177e4
LT
1919 }
1920 return err;
1921}
1922
89bddce5 1923/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1924 * fields which are the same type (int / unsigned) on our platforms.
1925 */
1926#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1927#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1928#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1929
c71d8ebe
TH
1930struct used_address {
1931 struct sockaddr_storage name;
1932 unsigned int name_len;
1933};
1934
da184284
AV
1935static int copy_msghdr_from_user(struct msghdr *kmsg,
1936 struct user_msghdr __user *umsg,
1937 struct sockaddr __user **save_addr,
1938 struct iovec **iov)
1661bf36 1939{
ffb07550 1940 struct user_msghdr msg;
08adb7da
AV
1941 ssize_t err;
1942
ffb07550 1943 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 1944 return -EFAULT;
dbb490b9 1945
864d9664 1946 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
1947 kmsg->msg_controllen = msg.msg_controllen;
1948 kmsg->msg_flags = msg.msg_flags;
1949
1950 kmsg->msg_namelen = msg.msg_namelen;
1951 if (!msg.msg_name)
6a2a2b3a
AS
1952 kmsg->msg_namelen = 0;
1953
dbb490b9
ML
1954 if (kmsg->msg_namelen < 0)
1955 return -EINVAL;
1956
1661bf36 1957 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1958 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1959
1960 if (save_addr)
ffb07550 1961 *save_addr = msg.msg_name;
08adb7da 1962
ffb07550 1963 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 1964 if (!save_addr) {
864d9664
PA
1965 err = move_addr_to_kernel(msg.msg_name,
1966 kmsg->msg_namelen,
08adb7da
AV
1967 kmsg->msg_name);
1968 if (err < 0)
1969 return err;
1970 }
1971 } else {
1972 kmsg->msg_name = NULL;
1973 kmsg->msg_namelen = 0;
1974 }
1975
ffb07550 1976 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
1977 return -EMSGSIZE;
1978
0345f931 1979 kmsg->msg_iocb = NULL;
1980
ffb07550
AV
1981 return import_iovec(save_addr ? READ : WRITE,
1982 msg.msg_iov, msg.msg_iovlen,
da184284 1983 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1984}
1985
666547ff 1986static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1987 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1988 struct used_address *used_address,
1989 unsigned int allowed_msghdr_flags)
1da177e4 1990{
89bddce5
SH
1991 struct compat_msghdr __user *msg_compat =
1992 (struct compat_msghdr __user *)msg;
230b1839 1993 struct sockaddr_storage address;
1da177e4 1994 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1995 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1996 __aligned(sizeof(__kernel_size_t));
89bddce5 1997 /* 20 is size of ipv6_pktinfo */
1da177e4 1998 unsigned char *ctl_buf = ctl;
d8725c86 1999 int ctl_len;
08adb7da 2000 ssize_t err;
89bddce5 2001
08adb7da 2002 msg_sys->msg_name = &address;
1da177e4 2003
08449320 2004 if (MSG_CMSG_COMPAT & flags)
08adb7da 2005 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2006 else
08adb7da 2007 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2008 if (err < 0)
da184284 2009 return err;
1da177e4
LT
2010
2011 err = -ENOBUFS;
2012
228e548e 2013 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2014 goto out_freeiov;
28a94d8f 2015 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2016 ctl_len = msg_sys->msg_controllen;
1da177e4 2017 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2018 err =
228e548e 2019 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2020 sizeof(ctl));
1da177e4
LT
2021 if (err)
2022 goto out_freeiov;
228e548e
AB
2023 ctl_buf = msg_sys->msg_control;
2024 ctl_len = msg_sys->msg_controllen;
1da177e4 2025 } else if (ctl_len) {
ac4340fc
DM
2026 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2027 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2028 if (ctl_len > sizeof(ctl)) {
1da177e4 2029 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2030 if (ctl_buf == NULL)
1da177e4
LT
2031 goto out_freeiov;
2032 }
2033 err = -EFAULT;
2034 /*
228e548e 2035 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2036 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2037 * checking falls down on this.
2038 */
fb8621bb 2039 if (copy_from_user(ctl_buf,
228e548e 2040 (void __user __force *)msg_sys->msg_control,
89bddce5 2041 ctl_len))
1da177e4 2042 goto out_freectl;
228e548e 2043 msg_sys->msg_control = ctl_buf;
1da177e4 2044 }
228e548e 2045 msg_sys->msg_flags = flags;
1da177e4
LT
2046
2047 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2048 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2049 /*
2050 * If this is sendmmsg() and current destination address is same as
2051 * previously succeeded address, omit asking LSM's decision.
2052 * used_address->name_len is initialized to UINT_MAX so that the first
2053 * destination address never matches.
2054 */
bc909d9d
MD
2055 if (used_address && msg_sys->msg_name &&
2056 used_address->name_len == msg_sys->msg_namelen &&
2057 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2058 used_address->name_len)) {
d8725c86 2059 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2060 goto out_freectl;
2061 }
d8725c86 2062 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2063 /*
2064 * If this is sendmmsg() and sending to current destination address was
2065 * successful, remember it.
2066 */
2067 if (used_address && err >= 0) {
2068 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2069 if (msg_sys->msg_name)
2070 memcpy(&used_address->name, msg_sys->msg_name,
2071 used_address->name_len);
c71d8ebe 2072 }
1da177e4
LT
2073
2074out_freectl:
89bddce5 2075 if (ctl_buf != ctl)
1da177e4
LT
2076 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2077out_freeiov:
da184284 2078 kfree(iov);
228e548e
AB
2079 return err;
2080}
2081
2082/*
2083 * BSD sendmsg interface
2084 */
2085
666547ff 2086long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2087{
2088 int fput_needed, err;
2089 struct msghdr msg_sys;
1be374a0
AL
2090 struct socket *sock;
2091
1be374a0 2092 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2093 if (!sock)
2094 goto out;
2095
28a94d8f 2096 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2097
6cb153ca 2098 fput_light(sock->file, fput_needed);
89bddce5 2099out:
1da177e4
LT
2100 return err;
2101}
2102
666547ff 2103SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2104{
2105 if (flags & MSG_CMSG_COMPAT)
2106 return -EINVAL;
2107 return __sys_sendmsg(fd, msg, flags);
2108}
2109
228e548e
AB
2110/*
2111 * Linux sendmmsg interface
2112 */
2113
2114int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2115 unsigned int flags)
2116{
2117 int fput_needed, err, datagrams;
2118 struct socket *sock;
2119 struct mmsghdr __user *entry;
2120 struct compat_mmsghdr __user *compat_entry;
2121 struct msghdr msg_sys;
c71d8ebe 2122 struct used_address used_address;
f092276d 2123 unsigned int oflags = flags;
228e548e 2124
98382f41
AB
2125 if (vlen > UIO_MAXIOV)
2126 vlen = UIO_MAXIOV;
228e548e
AB
2127
2128 datagrams = 0;
2129
2130 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2131 if (!sock)
2132 return err;
2133
c71d8ebe 2134 used_address.name_len = UINT_MAX;
228e548e
AB
2135 entry = mmsg;
2136 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2137 err = 0;
f092276d 2138 flags |= MSG_BATCH;
228e548e
AB
2139
2140 while (datagrams < vlen) {
f092276d
TH
2141 if (datagrams == vlen - 1)
2142 flags = oflags;
2143
228e548e 2144 if (MSG_CMSG_COMPAT & flags) {
666547ff 2145 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2146 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2147 if (err < 0)
2148 break;
2149 err = __put_user(err, &compat_entry->msg_len);
2150 ++compat_entry;
2151 } else {
a7526eb5 2152 err = ___sys_sendmsg(sock,
666547ff 2153 (struct user_msghdr __user *)entry,
28a94d8f 2154 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2155 if (err < 0)
2156 break;
2157 err = put_user(err, &entry->msg_len);
2158 ++entry;
2159 }
2160
2161 if (err)
2162 break;
2163 ++datagrams;
3023898b
SHY
2164 if (msg_data_left(&msg_sys))
2165 break;
a78cb84c 2166 cond_resched();
228e548e
AB
2167 }
2168
228e548e
AB
2169 fput_light(sock->file, fput_needed);
2170
728ffb86
AB
2171 /* We only return an error if no datagrams were able to be sent */
2172 if (datagrams != 0)
228e548e
AB
2173 return datagrams;
2174
228e548e
AB
2175 return err;
2176}
2177
2178SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2179 unsigned int, vlen, unsigned int, flags)
2180{
1be374a0
AL
2181 if (flags & MSG_CMSG_COMPAT)
2182 return -EINVAL;
228e548e
AB
2183 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2184}
2185
666547ff 2186static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2187 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2188{
89bddce5
SH
2189 struct compat_msghdr __user *msg_compat =
2190 (struct compat_msghdr __user *)msg;
1da177e4 2191 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2192 struct iovec *iov = iovstack;
1da177e4 2193 unsigned long cmsg_ptr;
2da62906 2194 int len;
08adb7da 2195 ssize_t err;
1da177e4
LT
2196
2197 /* kernel mode address */
230b1839 2198 struct sockaddr_storage addr;
1da177e4
LT
2199
2200 /* user mode address pointers */
2201 struct sockaddr __user *uaddr;
08adb7da 2202 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2203
08adb7da 2204 msg_sys->msg_name = &addr;
1da177e4 2205
f3d33426 2206 if (MSG_CMSG_COMPAT & flags)
08adb7da 2207 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2208 else
08adb7da 2209 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2210 if (err < 0)
da184284 2211 return err;
1da177e4 2212
a2e27255
ACM
2213 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2214 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2215
f3d33426
HFS
2216 /* We assume all kernel code knows the size of sockaddr_storage */
2217 msg_sys->msg_namelen = 0;
2218
1da177e4
LT
2219 if (sock->file->f_flags & O_NONBLOCK)
2220 flags |= MSG_DONTWAIT;
2da62906 2221 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2222 if (err < 0)
2223 goto out_freeiov;
2224 len = err;
2225
2226 if (uaddr != NULL) {
43db362d 2227 err = move_addr_to_user(&addr,
a2e27255 2228 msg_sys->msg_namelen, uaddr,
89bddce5 2229 uaddr_len);
1da177e4
LT
2230 if (err < 0)
2231 goto out_freeiov;
2232 }
a2e27255 2233 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2234 COMPAT_FLAGS(msg));
1da177e4
LT
2235 if (err)
2236 goto out_freeiov;
2237 if (MSG_CMSG_COMPAT & flags)
a2e27255 2238 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2239 &msg_compat->msg_controllen);
2240 else
a2e27255 2241 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2242 &msg->msg_controllen);
2243 if (err)
2244 goto out_freeiov;
2245 err = len;
2246
2247out_freeiov:
da184284 2248 kfree(iov);
a2e27255
ACM
2249 return err;
2250}
2251
2252/*
2253 * BSD recvmsg interface
2254 */
2255
666547ff 2256long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2257{
2258 int fput_needed, err;
2259 struct msghdr msg_sys;
1be374a0
AL
2260 struct socket *sock;
2261
1be374a0 2262 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2263 if (!sock)
2264 goto out;
2265
a7526eb5 2266 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2267
6cb153ca 2268 fput_light(sock->file, fput_needed);
1da177e4
LT
2269out:
2270 return err;
2271}
2272
666547ff 2273SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2274 unsigned int, flags)
2275{
2276 if (flags & MSG_CMSG_COMPAT)
2277 return -EINVAL;
2278 return __sys_recvmsg(fd, msg, flags);
2279}
2280
a2e27255
ACM
2281/*
2282 * Linux recvmmsg interface
2283 */
2284
2285int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2286 unsigned int flags, struct timespec *timeout)
2287{
2288 int fput_needed, err, datagrams;
2289 struct socket *sock;
2290 struct mmsghdr __user *entry;
d7256d0e 2291 struct compat_mmsghdr __user *compat_entry;
a2e27255 2292 struct msghdr msg_sys;
766b9f92
DD
2293 struct timespec64 end_time;
2294 struct timespec64 timeout64;
a2e27255
ACM
2295
2296 if (timeout &&
2297 poll_select_set_timeout(&end_time, timeout->tv_sec,
2298 timeout->tv_nsec))
2299 return -EINVAL;
2300
2301 datagrams = 0;
2302
2303 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2304 if (!sock)
2305 return err;
2306
2307 err = sock_error(sock->sk);
e623a9e9
MJ
2308 if (err) {
2309 datagrams = err;
a2e27255 2310 goto out_put;
e623a9e9 2311 }
a2e27255
ACM
2312
2313 entry = mmsg;
d7256d0e 2314 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2315
2316 while (datagrams < vlen) {
2317 /*
2318 * No need to ask LSM for more than the first datagram.
2319 */
d7256d0e 2320 if (MSG_CMSG_COMPAT & flags) {
666547ff 2321 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2322 &msg_sys, flags & ~MSG_WAITFORONE,
2323 datagrams);
d7256d0e
JMG
2324 if (err < 0)
2325 break;
2326 err = __put_user(err, &compat_entry->msg_len);
2327 ++compat_entry;
2328 } else {
a7526eb5 2329 err = ___sys_recvmsg(sock,
666547ff 2330 (struct user_msghdr __user *)entry,
a7526eb5
AL
2331 &msg_sys, flags & ~MSG_WAITFORONE,
2332 datagrams);
d7256d0e
JMG
2333 if (err < 0)
2334 break;
2335 err = put_user(err, &entry->msg_len);
2336 ++entry;
2337 }
2338
a2e27255
ACM
2339 if (err)
2340 break;
a2e27255
ACM
2341 ++datagrams;
2342
71c5c159
BB
2343 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2344 if (flags & MSG_WAITFORONE)
2345 flags |= MSG_DONTWAIT;
2346
a2e27255 2347 if (timeout) {
766b9f92
DD
2348 ktime_get_ts64(&timeout64);
2349 *timeout = timespec64_to_timespec(
2350 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2351 if (timeout->tv_sec < 0) {
2352 timeout->tv_sec = timeout->tv_nsec = 0;
2353 break;
2354 }
2355
2356 /* Timeout, return less than vlen datagrams */
2357 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2358 break;
2359 }
2360
2361 /* Out of band data, return right away */
2362 if (msg_sys.msg_flags & MSG_OOB)
2363 break;
a78cb84c 2364 cond_resched();
a2e27255
ACM
2365 }
2366
a2e27255 2367 if (err == 0)
34b88a68
ACM
2368 goto out_put;
2369
2370 if (datagrams == 0) {
2371 datagrams = err;
2372 goto out_put;
2373 }
a2e27255 2374
34b88a68
ACM
2375 /*
2376 * We may return less entries than requested (vlen) if the
2377 * sock is non block and there aren't enough datagrams...
2378 */
2379 if (err != -EAGAIN) {
a2e27255 2380 /*
34b88a68
ACM
2381 * ... or if recvmsg returns an error after we
2382 * received some datagrams, where we record the
2383 * error to return on the next call or if the
2384 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2385 */
34b88a68 2386 sock->sk->sk_err = -err;
a2e27255 2387 }
34b88a68
ACM
2388out_put:
2389 fput_light(sock->file, fput_needed);
a2e27255 2390
34b88a68 2391 return datagrams;
a2e27255
ACM
2392}
2393
2394SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2395 unsigned int, vlen, unsigned int, flags,
2396 struct timespec __user *, timeout)
2397{
2398 int datagrams;
2399 struct timespec timeout_sys;
2400
1be374a0
AL
2401 if (flags & MSG_CMSG_COMPAT)
2402 return -EINVAL;
2403
a2e27255
ACM
2404 if (!timeout)
2405 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2406
2407 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2408 return -EFAULT;
2409
2410 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2411
2412 if (datagrams > 0 &&
2413 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2414 datagrams = -EFAULT;
2415
2416 return datagrams;
2417}
2418
2419#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2420/* Argument list sizes for sys_socketcall */
2421#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2422static const unsigned char nargs[21] = {
c6d409cf
ED
2423 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2424 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2425 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2426 AL(4), AL(5), AL(4)
89bddce5
SH
2427};
2428
1da177e4
LT
2429#undef AL
2430
2431/*
89bddce5 2432 * System call vectors.
1da177e4
LT
2433 *
2434 * Argument checking cleaned up. Saved 20% in size.
2435 * This function doesn't need to set the kernel lock because
89bddce5 2436 * it is set by the callees.
1da177e4
LT
2437 */
2438
3e0fa65f 2439SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2440{
2950fa9d 2441 unsigned long a[AUDITSC_ARGS];
89bddce5 2442 unsigned long a0, a1;
1da177e4 2443 int err;
47379052 2444 unsigned int len;
1da177e4 2445
228e548e 2446 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2447 return -EINVAL;
45c8178c 2448 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2449
47379052
AV
2450 len = nargs[call];
2451 if (len > sizeof(a))
2452 return -EINVAL;
2453
1da177e4 2454 /* copy_from_user should be SMP safe. */
47379052 2455 if (copy_from_user(a, args, len))
1da177e4 2456 return -EFAULT;
3ec3b2fb 2457
2950fa9d
CG
2458 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2459 if (err)
2460 return err;
3ec3b2fb 2461
89bddce5
SH
2462 a0 = a[0];
2463 a1 = a[1];
2464
2465 switch (call) {
2466 case SYS_SOCKET:
2467 err = sys_socket(a0, a1, a[2]);
2468 break;
2469 case SYS_BIND:
2470 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2471 break;
2472 case SYS_CONNECT:
2473 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2474 break;
2475 case SYS_LISTEN:
2476 err = sys_listen(a0, a1);
2477 break;
2478 case SYS_ACCEPT:
de11defe
UD
2479 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2480 (int __user *)a[2], 0);
89bddce5
SH
2481 break;
2482 case SYS_GETSOCKNAME:
2483 err =
2484 sys_getsockname(a0, (struct sockaddr __user *)a1,
2485 (int __user *)a[2]);
2486 break;
2487 case SYS_GETPEERNAME:
2488 err =
2489 sys_getpeername(a0, (struct sockaddr __user *)a1,
2490 (int __user *)a[2]);
2491 break;
2492 case SYS_SOCKETPAIR:
2493 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2494 break;
2495 case SYS_SEND:
2496 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2497 break;
2498 case SYS_SENDTO:
2499 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2500 (struct sockaddr __user *)a[4], a[5]);
2501 break;
2502 case SYS_RECV:
2503 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2504 break;
2505 case SYS_RECVFROM:
2506 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2507 (struct sockaddr __user *)a[4],
2508 (int __user *)a[5]);
2509 break;
2510 case SYS_SHUTDOWN:
2511 err = sys_shutdown(a0, a1);
2512 break;
2513 case SYS_SETSOCKOPT:
2514 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2515 break;
2516 case SYS_GETSOCKOPT:
2517 err =
2518 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2519 (int __user *)a[4]);
2520 break;
2521 case SYS_SENDMSG:
666547ff 2522 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2523 break;
228e548e
AB
2524 case SYS_SENDMMSG:
2525 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2526 break;
89bddce5 2527 case SYS_RECVMSG:
666547ff 2528 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2529 break;
a2e27255
ACM
2530 case SYS_RECVMMSG:
2531 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2532 (struct timespec __user *)a[4]);
2533 break;
de11defe
UD
2534 case SYS_ACCEPT4:
2535 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2536 (int __user *)a[2], a[3]);
aaca0bdc 2537 break;
89bddce5
SH
2538 default:
2539 err = -EINVAL;
2540 break;
1da177e4
LT
2541 }
2542 return err;
2543}
2544
89bddce5 2545#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2546
55737fda
SH
2547/**
2548 * sock_register - add a socket protocol handler
2549 * @ops: description of protocol
2550 *
1da177e4
LT
2551 * This function is called by a protocol handler that wants to
2552 * advertise its address family, and have it linked into the
e793c0f7 2553 * socket interface. The value ops->family corresponds to the
55737fda 2554 * socket system call protocol family.
1da177e4 2555 */
f0fd27d4 2556int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2557{
2558 int err;
2559
2560 if (ops->family >= NPROTO) {
3410f22e 2561 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2562 return -ENOBUFS;
2563 }
55737fda
SH
2564
2565 spin_lock(&net_family_lock);
190683a9
ED
2566 if (rcu_dereference_protected(net_families[ops->family],
2567 lockdep_is_held(&net_family_lock)))
55737fda
SH
2568 err = -EEXIST;
2569 else {
cf778b00 2570 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2571 err = 0;
2572 }
55737fda
SH
2573 spin_unlock(&net_family_lock);
2574
3410f22e 2575 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2576 return err;
2577}
c6d409cf 2578EXPORT_SYMBOL(sock_register);
1da177e4 2579
55737fda
SH
2580/**
2581 * sock_unregister - remove a protocol handler
2582 * @family: protocol family to remove
2583 *
1da177e4
LT
2584 * This function is called by a protocol handler that wants to
2585 * remove its address family, and have it unlinked from the
55737fda
SH
2586 * new socket creation.
2587 *
2588 * If protocol handler is a module, then it can use module reference
2589 * counts to protect against new references. If protocol handler is not
2590 * a module then it needs to provide its own protection in
2591 * the ops->create routine.
1da177e4 2592 */
f0fd27d4 2593void sock_unregister(int family)
1da177e4 2594{
f0fd27d4 2595 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2596
55737fda 2597 spin_lock(&net_family_lock);
a9b3cd7f 2598 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2599 spin_unlock(&net_family_lock);
2600
2601 synchronize_rcu();
2602
3410f22e 2603 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2604}
c6d409cf 2605EXPORT_SYMBOL(sock_unregister);
1da177e4 2606
77d76ea3 2607static int __init sock_init(void)
1da177e4 2608{
b3e19d92 2609 int err;
2ca794e5
EB
2610 /*
2611 * Initialize the network sysctl infrastructure.
2612 */
2613 err = net_sysctl_init();
2614 if (err)
2615 goto out;
b3e19d92 2616
1da177e4 2617 /*
89bddce5 2618 * Initialize skbuff SLAB cache
1da177e4
LT
2619 */
2620 skb_init();
1da177e4
LT
2621
2622 /*
89bddce5 2623 * Initialize the protocols module.
1da177e4
LT
2624 */
2625
2626 init_inodecache();
b3e19d92
NP
2627
2628 err = register_filesystem(&sock_fs_type);
2629 if (err)
2630 goto out_fs;
1da177e4 2631 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2632 if (IS_ERR(sock_mnt)) {
2633 err = PTR_ERR(sock_mnt);
2634 goto out_mount;
2635 }
77d76ea3
AK
2636
2637 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2638 */
2639
2640#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2641 err = netfilter_init();
2642 if (err)
2643 goto out;
1da177e4 2644#endif
cbeb321a 2645
408eccce 2646 ptp_classifier_init();
c1f19b51 2647
b3e19d92
NP
2648out:
2649 return err;
2650
2651out_mount:
2652 unregister_filesystem(&sock_fs_type);
2653out_fs:
2654 goto out;
1da177e4
LT
2655}
2656
77d76ea3
AK
2657core_initcall(sock_init); /* early initcall */
2658
6fde36d5
AS
2659static int __init jit_init(void)
2660{
2661#ifdef CONFIG_BPF_JIT_ALWAYS_ON
2662 bpf_jit_enable = 1;
2663#endif
2664 return 0;
2665}
2666pure_initcall(jit_init);
2667
1da177e4
LT
2668#ifdef CONFIG_PROC_FS
2669void socket_seq_show(struct seq_file *seq)
2670{
2671 int cpu;
2672 int counter = 0;
2673
6f912042 2674 for_each_possible_cpu(cpu)
89bddce5 2675 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2676
2677 /* It can be negative, by the way. 8) */
2678 if (counter < 0)
2679 counter = 0;
2680
2681 seq_printf(seq, "sockets: used %d\n", counter);
2682}
89bddce5 2683#endif /* CONFIG_PROC_FS */
1da177e4 2684
89bbfc95 2685#ifdef CONFIG_COMPAT
6b96018b 2686static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2687 unsigned int cmd, void __user *up)
7a229387 2688{
7a229387
AB
2689 mm_segment_t old_fs = get_fs();
2690 struct timeval ktv;
2691 int err;
2692
2693 set_fs(KERNEL_DS);
6b96018b 2694 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2695 set_fs(old_fs);
644595f8 2696 if (!err)
ed6fe9d6 2697 err = compat_put_timeval(&ktv, up);
644595f8 2698
7a229387
AB
2699 return err;
2700}
2701
6b96018b 2702static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2703 unsigned int cmd, void __user *up)
7a229387 2704{
7a229387
AB
2705 mm_segment_t old_fs = get_fs();
2706 struct timespec kts;
2707 int err;
2708
2709 set_fs(KERNEL_DS);
6b96018b 2710 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2711 set_fs(old_fs);
644595f8 2712 if (!err)
ed6fe9d6 2713 err = compat_put_timespec(&kts, up);
644595f8 2714
7a229387
AB
2715 return err;
2716}
2717
6b96018b 2718static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2719{
2720 struct ifreq __user *uifr;
2721 int err;
2722
2723 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2724 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2725 return -EFAULT;
2726
6b96018b 2727 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2728 if (err)
2729 return err;
2730
6b96018b 2731 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2732 return -EFAULT;
2733
2734 return 0;
2735}
2736
6b96018b 2737static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2738{
6b96018b 2739 struct compat_ifconf ifc32;
7a229387
AB
2740 struct ifconf ifc;
2741 struct ifconf __user *uifc;
6b96018b 2742 struct compat_ifreq __user *ifr32;
7a229387
AB
2743 struct ifreq __user *ifr;
2744 unsigned int i, j;
2745 int err;
2746
6b96018b 2747 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2748 return -EFAULT;
2749
43da5f2e 2750 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2751 if (ifc32.ifcbuf == 0) {
2752 ifc32.ifc_len = 0;
2753 ifc.ifc_len = 0;
2754 ifc.ifc_req = NULL;
2755 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2756 } else {
c6d409cf
ED
2757 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2758 sizeof(struct ifreq);
7a229387
AB
2759 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2760 ifc.ifc_len = len;
2761 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2762 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2763 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2764 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2765 return -EFAULT;
2766 ifr++;
2767 ifr32++;
2768 }
2769 }
2770 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2771 return -EFAULT;
2772
6b96018b 2773 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2774 if (err)
2775 return err;
2776
2777 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2778 return -EFAULT;
2779
2780 ifr = ifc.ifc_req;
2781 ifr32 = compat_ptr(ifc32.ifcbuf);
2782 for (i = 0, j = 0;
c6d409cf
ED
2783 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2784 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2785 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2786 return -EFAULT;
2787 ifr32++;
2788 ifr++;
2789 }
2790
2791 if (ifc32.ifcbuf == 0) {
2792 /* Translate from 64-bit structure multiple to
2793 * a 32-bit one.
2794 */
2795 i = ifc.ifc_len;
6b96018b 2796 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2797 ifc32.ifc_len = i;
2798 } else {
2799 ifc32.ifc_len = i;
2800 }
6b96018b 2801 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2802 return -EFAULT;
2803
2804 return 0;
2805}
2806
6b96018b 2807static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2808{
3a7da39d
BH
2809 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2810 bool convert_in = false, convert_out = false;
2811 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2812 struct ethtool_rxnfc __user *rxnfc;
7a229387 2813 struct ifreq __user *ifr;
3a7da39d
BH
2814 u32 rule_cnt = 0, actual_rule_cnt;
2815 u32 ethcmd;
7a229387 2816 u32 data;
3a7da39d 2817 int ret;
7a229387 2818
3a7da39d
BH
2819 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2820 return -EFAULT;
7a229387 2821
3a7da39d
BH
2822 compat_rxnfc = compat_ptr(data);
2823
2824 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2825 return -EFAULT;
2826
3a7da39d
BH
2827 /* Most ethtool structures are defined without padding.
2828 * Unfortunately struct ethtool_rxnfc is an exception.
2829 */
2830 switch (ethcmd) {
2831 default:
2832 break;
2833 case ETHTOOL_GRXCLSRLALL:
2834 /* Buffer size is variable */
2835 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2836 return -EFAULT;
2837 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2838 return -ENOMEM;
2839 buf_size += rule_cnt * sizeof(u32);
2840 /* fall through */
2841 case ETHTOOL_GRXRINGS:
2842 case ETHTOOL_GRXCLSRLCNT:
2843 case ETHTOOL_GRXCLSRULE:
55664f32 2844 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2845 convert_out = true;
2846 /* fall through */
2847 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2848 buf_size += sizeof(struct ethtool_rxnfc);
2849 convert_in = true;
2850 break;
2851 }
2852
2853 ifr = compat_alloc_user_space(buf_size);
954b1244 2854 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2855
2856 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2857 return -EFAULT;
2858
3a7da39d
BH
2859 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2860 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2861 return -EFAULT;
2862
3a7da39d 2863 if (convert_in) {
127fe533 2864 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2865 * fs.ring_cookie and at the end of fs, but nowhere else.
2866 */
127fe533
AD
2867 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2868 sizeof(compat_rxnfc->fs.m_ext) !=
2869 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2870 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2871 BUILD_BUG_ON(
2872 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2873 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2874 offsetof(struct ethtool_rxnfc, fs.location) -
2875 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2876
2877 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2878 (void __user *)(&rxnfc->fs.m_ext + 1) -
2879 (void __user *)rxnfc) ||
3a7da39d
BH
2880 copy_in_user(&rxnfc->fs.ring_cookie,
2881 &compat_rxnfc->fs.ring_cookie,
954b1244 2882 (void __user *)(&rxnfc->fs.location + 1) -
7d584568
WW
2883 (void __user *)&rxnfc->fs.ring_cookie))
2884 return -EFAULT;
2885 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2886 if (put_user(rule_cnt, &rxnfc->rule_cnt))
2887 return -EFAULT;
2888 } else if (copy_in_user(&rxnfc->rule_cnt,
2889 &compat_rxnfc->rule_cnt,
2890 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
2891 return -EFAULT;
2892 }
2893
2894 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2895 if (ret)
2896 return ret;
2897
2898 if (convert_out) {
2899 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2900 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2901 (const void __user *)rxnfc) ||
3a7da39d
BH
2902 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2903 &rxnfc->fs.ring_cookie,
954b1244
SH
2904 (const void __user *)(&rxnfc->fs.location + 1) -
2905 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2906 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2907 sizeof(rxnfc->rule_cnt)))
2908 return -EFAULT;
2909
2910 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2911 /* As an optimisation, we only copy the actual
2912 * number of rules that the underlying
2913 * function returned. Since Mallory might
2914 * change the rule count in user memory, we
2915 * check that it is less than the rule count
2916 * originally given (as the user buffer size),
2917 * which has been range-checked.
2918 */
2919 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2920 return -EFAULT;
2921 if (actual_rule_cnt < rule_cnt)
2922 rule_cnt = actual_rule_cnt;
2923 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2924 &rxnfc->rule_locs[0],
2925 rule_cnt * sizeof(u32)))
2926 return -EFAULT;
2927 }
2928 }
2929
2930 return 0;
7a229387
AB
2931}
2932
7a50a240
AB
2933static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2934{
2935 void __user *uptr;
2936 compat_uptr_t uptr32;
2937 struct ifreq __user *uifr;
2938
c6d409cf 2939 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2940 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2941 return -EFAULT;
2942
2943 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2944 return -EFAULT;
2945
2946 uptr = compat_ptr(uptr32);
2947
2948 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2949 return -EFAULT;
2950
2951 return dev_ioctl(net, SIOCWANDEV, uifr);
2952}
2953
6b96018b
AB
2954static int bond_ioctl(struct net *net, unsigned int cmd,
2955 struct compat_ifreq __user *ifr32)
7a229387
AB
2956{
2957 struct ifreq kifr;
7a229387
AB
2958 mm_segment_t old_fs;
2959 int err;
7a229387
AB
2960
2961 switch (cmd) {
2962 case SIOCBONDENSLAVE:
2963 case SIOCBONDRELEASE:
2964 case SIOCBONDSETHWADDR:
2965 case SIOCBONDCHANGEACTIVE:
6b96018b 2966 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2967 return -EFAULT;
2968
2969 old_fs = get_fs();
c6d409cf 2970 set_fs(KERNEL_DS);
c3f52ae6 2971 err = dev_ioctl(net, cmd,
2972 (struct ifreq __user __force *) &kifr);
c6d409cf 2973 set_fs(old_fs);
7a229387
AB
2974
2975 return err;
7a229387 2976 default:
07d106d0 2977 return -ENOIOCTLCMD;
ccbd6a5a 2978 }
7a229387
AB
2979}
2980
590d4693
BH
2981/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2982static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2983 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2984{
2985 struct ifreq __user *u_ifreq64;
7a229387
AB
2986 char tmp_buf[IFNAMSIZ];
2987 void __user *data64;
2988 u32 data32;
2989
2990 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2991 IFNAMSIZ))
2992 return -EFAULT;
417c3522 2993 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2994 return -EFAULT;
2995 data64 = compat_ptr(data32);
2996
2997 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2998
7a229387
AB
2999 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
3000 IFNAMSIZ))
3001 return -EFAULT;
417c3522 3002 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
3003 return -EFAULT;
3004
6b96018b 3005 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
3006}
3007
6b96018b
AB
3008static int dev_ifsioc(struct net *net, struct socket *sock,
3009 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3010{
a2116ed2 3011 struct ifreq __user *uifr;
7a229387
AB
3012 int err;
3013
a2116ed2
AB
3014 uifr = compat_alloc_user_space(sizeof(*uifr));
3015 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3016 return -EFAULT;
3017
3018 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3019
7a229387
AB
3020 if (!err) {
3021 switch (cmd) {
3022 case SIOCGIFFLAGS:
3023 case SIOCGIFMETRIC:
3024 case SIOCGIFMTU:
3025 case SIOCGIFMEM:
3026 case SIOCGIFHWADDR:
3027 case SIOCGIFINDEX:
3028 case SIOCGIFADDR:
3029 case SIOCGIFBRDADDR:
3030 case SIOCGIFDSTADDR:
3031 case SIOCGIFNETMASK:
fab2532b 3032 case SIOCGIFPFLAGS:
7a229387 3033 case SIOCGIFTXQLEN:
fab2532b
AB
3034 case SIOCGMIIPHY:
3035 case SIOCGMIIREG:
a2116ed2 3036 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3037 err = -EFAULT;
3038 break;
3039 }
3040 }
3041 return err;
3042}
3043
a2116ed2
AB
3044static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3045 struct compat_ifreq __user *uifr32)
3046{
3047 struct ifreq ifr;
3048 struct compat_ifmap __user *uifmap32;
3049 mm_segment_t old_fs;
3050 int err;
3051
3052 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3053 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3054 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3055 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3056 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3057 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3058 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3059 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3060 if (err)
3061 return -EFAULT;
3062
3063 old_fs = get_fs();
c6d409cf 3064 set_fs(KERNEL_DS);
c3f52ae6 3065 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3066 set_fs(old_fs);
a2116ed2
AB
3067
3068 if (cmd == SIOCGIFMAP && !err) {
3069 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3070 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3071 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3072 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3073 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3074 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3075 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3076 if (err)
3077 err = -EFAULT;
3078 }
3079 return err;
3080}
3081
7a229387 3082struct rtentry32 {
c6d409cf 3083 u32 rt_pad1;
7a229387
AB
3084 struct sockaddr rt_dst; /* target address */
3085 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3086 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3087 unsigned short rt_flags;
3088 short rt_pad2;
3089 u32 rt_pad3;
3090 unsigned char rt_tos;
3091 unsigned char rt_class;
3092 short rt_pad4;
3093 short rt_metric; /* +1 for binary compatibility! */
7a229387 3094 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3095 u32 rt_mtu; /* per route MTU/Window */
3096 u32 rt_window; /* Window clamping */
7a229387
AB
3097 unsigned short rt_irtt; /* Initial RTT */
3098};
3099
3100struct in6_rtmsg32 {
3101 struct in6_addr rtmsg_dst;
3102 struct in6_addr rtmsg_src;
3103 struct in6_addr rtmsg_gateway;
3104 u32 rtmsg_type;
3105 u16 rtmsg_dst_len;
3106 u16 rtmsg_src_len;
3107 u32 rtmsg_metric;
3108 u32 rtmsg_info;
3109 u32 rtmsg_flags;
3110 s32 rtmsg_ifindex;
3111};
3112
6b96018b
AB
3113static int routing_ioctl(struct net *net, struct socket *sock,
3114 unsigned int cmd, void __user *argp)
7a229387
AB
3115{
3116 int ret;
3117 void *r = NULL;
3118 struct in6_rtmsg r6;
3119 struct rtentry r4;
3120 char devname[16];
3121 u32 rtdev;
3122 mm_segment_t old_fs = get_fs();
3123
6b96018b
AB
3124 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3125 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3126 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3127 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3128 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3129 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3130 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3131 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3132 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3133 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3134 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3135
3136 r = (void *) &r6;
3137 } else { /* ipv4 */
6b96018b 3138 struct rtentry32 __user *ur4 = argp;
c6d409cf 3139 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3140 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3141 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3142 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3143 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3144 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3145 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3146 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3147 if (rtdev) {
c6d409cf 3148 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3149 r4.rt_dev = (char __user __force *)devname;
3150 devname[15] = 0;
7a229387
AB
3151 } else
3152 r4.rt_dev = NULL;
3153
3154 r = (void *) &r4;
3155 }
3156
3157 if (ret) {
3158 ret = -EFAULT;
3159 goto out;
3160 }
3161
c6d409cf 3162 set_fs(KERNEL_DS);
6b96018b 3163 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3164 set_fs(old_fs);
7a229387
AB
3165
3166out:
7a229387
AB
3167 return ret;
3168}
3169
3170/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3171 * for some operations; this forces use of the newer bridge-utils that
25985edc 3172 * use compatible ioctls
7a229387 3173 */
6b96018b 3174static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3175{
6b96018b 3176 compat_ulong_t tmp;
7a229387 3177
6b96018b 3178 if (get_user(tmp, argp))
7a229387
AB
3179 return -EFAULT;
3180 if (tmp == BRCTL_GET_VERSION)
3181 return BRCTL_VERSION + 1;
3182 return -EINVAL;
3183}
3184
6b96018b
AB
3185static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3186 unsigned int cmd, unsigned long arg)
3187{
3188 void __user *argp = compat_ptr(arg);
3189 struct sock *sk = sock->sk;
3190 struct net *net = sock_net(sk);
7a229387 3191
6b96018b 3192 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3193 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3194
3195 switch (cmd) {
3196 case SIOCSIFBR:
3197 case SIOCGIFBR:
3198 return old_bridge_ioctl(argp);
3199 case SIOCGIFNAME:
3200 return dev_ifname32(net, argp);
3201 case SIOCGIFCONF:
3202 return dev_ifconf(net, argp);
3203 case SIOCETHTOOL:
3204 return ethtool_ioctl(net, argp);
7a50a240
AB
3205 case SIOCWANDEV:
3206 return compat_siocwandev(net, argp);
a2116ed2
AB
3207 case SIOCGIFMAP:
3208 case SIOCSIFMAP:
3209 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3210 case SIOCBONDENSLAVE:
3211 case SIOCBONDRELEASE:
3212 case SIOCBONDSETHWADDR:
6b96018b
AB
3213 case SIOCBONDCHANGEACTIVE:
3214 return bond_ioctl(net, cmd, argp);
3215 case SIOCADDRT:
3216 case SIOCDELRT:
3217 return routing_ioctl(net, sock, cmd, argp);
3218 case SIOCGSTAMP:
3219 return do_siocgstamp(net, sock, cmd, argp);
3220 case SIOCGSTAMPNS:
3221 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3222 case SIOCBONDSLAVEINFOQUERY:
3223 case SIOCBONDINFOQUERY:
a2116ed2 3224 case SIOCSHWTSTAMP:
fd468c74 3225 case SIOCGHWTSTAMP:
590d4693 3226 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3227
3228 case FIOSETOWN:
3229 case SIOCSPGRP:
3230 case FIOGETOWN:
3231 case SIOCGPGRP:
3232 case SIOCBRADDBR:
3233 case SIOCBRDELBR:
3234 case SIOCGIFVLAN:
3235 case SIOCSIFVLAN:
3236 case SIOCADDDLCI:
3237 case SIOCDELDLCI:
c62cce2c 3238 case SIOCGSKNS:
6b96018b
AB
3239 return sock_ioctl(file, cmd, arg);
3240
3241 case SIOCGIFFLAGS:
3242 case SIOCSIFFLAGS:
3243 case SIOCGIFMETRIC:
3244 case SIOCSIFMETRIC:
3245 case SIOCGIFMTU:
3246 case SIOCSIFMTU:
3247 case SIOCGIFMEM:
3248 case SIOCSIFMEM:
3249 case SIOCGIFHWADDR:
3250 case SIOCSIFHWADDR:
3251 case SIOCADDMULTI:
3252 case SIOCDELMULTI:
3253 case SIOCGIFINDEX:
6b96018b
AB
3254 case SIOCGIFADDR:
3255 case SIOCSIFADDR:
3256 case SIOCSIFHWBROADCAST:
6b96018b 3257 case SIOCDIFADDR:
6b96018b
AB
3258 case SIOCGIFBRDADDR:
3259 case SIOCSIFBRDADDR:
3260 case SIOCGIFDSTADDR:
3261 case SIOCSIFDSTADDR:
3262 case SIOCGIFNETMASK:
3263 case SIOCSIFNETMASK:
3264 case SIOCSIFPFLAGS:
3265 case SIOCGIFPFLAGS:
3266 case SIOCGIFTXQLEN:
3267 case SIOCSIFTXQLEN:
3268 case SIOCBRADDIF:
3269 case SIOCBRDELIF:
9177efd3
AB
3270 case SIOCSIFNAME:
3271 case SIOCGMIIPHY:
3272 case SIOCGMIIREG:
3273 case SIOCSMIIREG:
6b96018b 3274 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3275
6b96018b
AB
3276 case SIOCSARP:
3277 case SIOCGARP:
3278 case SIOCDARP:
6b96018b 3279 case SIOCATMARK:
9177efd3
AB
3280 return sock_do_ioctl(net, sock, cmd, arg);
3281 }
3282
6b96018b
AB
3283 return -ENOIOCTLCMD;
3284}
7a229387 3285
95c96174 3286static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3287 unsigned long arg)
89bbfc95
SP
3288{
3289 struct socket *sock = file->private_data;
3290 int ret = -ENOIOCTLCMD;
87de87d5
DM
3291 struct sock *sk;
3292 struct net *net;
3293
3294 sk = sock->sk;
3295 net = sock_net(sk);
89bbfc95
SP
3296
3297 if (sock->ops->compat_ioctl)
3298 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3299
87de87d5
DM
3300 if (ret == -ENOIOCTLCMD &&
3301 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3302 ret = compat_wext_handle_ioctl(net, cmd, arg);
3303
6b96018b
AB
3304 if (ret == -ENOIOCTLCMD)
3305 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3306
89bbfc95
SP
3307 return ret;
3308}
3309#endif
3310
ac5a488e
SS
3311int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3312{
3313 return sock->ops->bind(sock, addr, addrlen);
3314}
c6d409cf 3315EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3316
3317int kernel_listen(struct socket *sock, int backlog)
3318{
3319 return sock->ops->listen(sock, backlog);
3320}
c6d409cf 3321EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3322
3323int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3324{
3325 struct sock *sk = sock->sk;
3326 int err;
3327
3328 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3329 newsock);
3330 if (err < 0)
3331 goto done;
3332
cdfbabfb 3333 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3334 if (err < 0) {
3335 sock_release(*newsock);
fa8705b0 3336 *newsock = NULL;
ac5a488e
SS
3337 goto done;
3338 }
3339
3340 (*newsock)->ops = sock->ops;
1b08534e 3341 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3342
3343done:
3344 return err;
3345}
c6d409cf 3346EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3347
3348int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3349 int flags)
ac5a488e
SS
3350{
3351 return sock->ops->connect(sock, addr, addrlen, flags);
3352}
c6d409cf 3353EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3354
3355int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3356 int *addrlen)
3357{
3358 return sock->ops->getname(sock, addr, addrlen, 0);
3359}
c6d409cf 3360EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3361
3362int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3363 int *addrlen)
3364{
3365 return sock->ops->getname(sock, addr, addrlen, 1);
3366}
c6d409cf 3367EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3368
3369int kernel_getsockopt(struct socket *sock, int level, int optname,
3370 char *optval, int *optlen)
3371{
3372 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3373 char __user *uoptval;
3374 int __user *uoptlen;
ac5a488e
SS
3375 int err;
3376
fb8621bb
NK
3377 uoptval = (char __user __force *) optval;
3378 uoptlen = (int __user __force *) optlen;
3379
ac5a488e
SS
3380 set_fs(KERNEL_DS);
3381 if (level == SOL_SOCKET)
fb8621bb 3382 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3383 else
fb8621bb
NK
3384 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3385 uoptlen);
ac5a488e
SS
3386 set_fs(oldfs);
3387 return err;
3388}
c6d409cf 3389EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3390
3391int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3392 char *optval, unsigned int optlen)
ac5a488e
SS
3393{
3394 mm_segment_t oldfs = get_fs();
fb8621bb 3395 char __user *uoptval;
ac5a488e
SS
3396 int err;
3397
fb8621bb
NK
3398 uoptval = (char __user __force *) optval;
3399
ac5a488e
SS
3400 set_fs(KERNEL_DS);
3401 if (level == SOL_SOCKET)
fb8621bb 3402 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3403 else
fb8621bb 3404 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3405 optlen);
3406 set_fs(oldfs);
3407 return err;
3408}
c6d409cf 3409EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3410
3411int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3412 size_t size, int flags)
3413{
3414 if (sock->ops->sendpage)
3415 return sock->ops->sendpage(sock, page, offset, size, flags);
3416
3417 return sock_no_sendpage(sock, page, offset, size, flags);
3418}
c6d409cf 3419EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3420
306b13eb
TH
3421int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3422 size_t size, int flags)
3423{
3424 struct socket *sock = sk->sk_socket;
3425
3426 if (sock->ops->sendpage_locked)
3427 return sock->ops->sendpage_locked(sk, page, offset, size,
3428 flags);
3429
3430 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3431}
3432EXPORT_SYMBOL(kernel_sendpage_locked);
3433
ac5a488e
SS
3434int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3435{
3436 mm_segment_t oldfs = get_fs();
3437 int err;
3438
3439 set_fs(KERNEL_DS);
3440 err = sock->ops->ioctl(sock, cmd, arg);
3441 set_fs(oldfs);
3442
3443 return err;
3444}
c6d409cf 3445EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3446
91cf45f0
TM
3447int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3448{
3449 return sock->ops->shutdown(sock, how);
3450}
91cf45f0 3451EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3452
3453/* This routine returns the IP overhead imposed by a socket i.e.
3454 * the length of the underlying IP header, depending on whether
3455 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3456 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3457 */
3458u32 kernel_sock_ip_overhead(struct sock *sk)
3459{
3460 struct inet_sock *inet;
3461 struct ip_options_rcu *opt;
3462 u32 overhead = 0;
113c3075
P
3463#if IS_ENABLED(CONFIG_IPV6)
3464 struct ipv6_pinfo *np;
3465 struct ipv6_txoptions *optv6 = NULL;
3466#endif /* IS_ENABLED(CONFIG_IPV6) */
3467
3468 if (!sk)
3469 return overhead;
3470
113c3075
P
3471 switch (sk->sk_family) {
3472 case AF_INET:
3473 inet = inet_sk(sk);
3474 overhead += sizeof(struct iphdr);
3475 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3476 sock_owned_by_user(sk));
113c3075
P
3477 if (opt)
3478 overhead += opt->opt.optlen;
3479 return overhead;
3480#if IS_ENABLED(CONFIG_IPV6)
3481 case AF_INET6:
3482 np = inet6_sk(sk);
3483 overhead += sizeof(struct ipv6hdr);
3484 if (np)
3485 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3486 sock_owned_by_user(sk));
113c3075
P
3487 if (optv6)
3488 overhead += (optv6->opt_flen + optv6->opt_nflen);
3489 return overhead;
3490#endif /* IS_ENABLED(CONFIG_IPV6) */
3491 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3492 return overhead;
3493 }
3494}
3495EXPORT_SYMBOL(kernel_sock_ip_overhead);