]> git.ipfire.org Git - thirdparty/linux.git/blame - kernel/sysctl.c
sysctl: Fix data-races in proc_dou8vec_minmax().
[thirdparty/linux.git] / kernel / sysctl.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * sysctl.c: General linux system control interface
4 *
5 * Begun 24 March 1995, Stephen Tweedie
6 * Added /proc support, Dec 1995
7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10 * Dynamic registration fixes, Stephen Tweedie.
11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13 * Horn.
14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17 * Wendling.
18 * The list_for_each() macro wasn't appropriate for the sysctl loop.
19 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
20 */
21
1da177e4
LT
22#include <linux/module.h>
23#include <linux/mm.h>
24#include <linux/swap.h>
25#include <linux/slab.h>
26#include <linux/sysctl.h>
5a04cca6 27#include <linux/bitmap.h>
d33ed52d 28#include <linux/signal.h>
f39650de 29#include <linux/panic.h>
455cd5ab 30#include <linux/printk.h>
1da177e4 31#include <linux/proc_fs.h>
72c2d582 32#include <linux/security.h>
1da177e4 33#include <linux/ctype.h>
fd4b616b 34#include <linux/kmemleak.h>
b6459415 35#include <linux/filter.h>
62239ac2 36#include <linux/fs.h>
1da177e4
LT
37#include <linux/init.h>
38#include <linux/kernel.h>
0296b228 39#include <linux/kobject.h>
20380731 40#include <linux/net.h>
1da177e4
LT
41#include <linux/sysrq.h>
42#include <linux/highuid.h>
43#include <linux/writeback.h>
3fff4c42 44#include <linux/ratelimit.h>
76ab0f53 45#include <linux/compaction.h>
1da177e4 46#include <linux/hugetlb.h>
1da177e4 47#include <linux/initrd.h>
0b77f5bf 48#include <linux/key.h>
1da177e4
LT
49#include <linux/times.h>
50#include <linux/limits.h>
51#include <linux/dcache.h>
52#include <linux/syscalls.h>
c748e134 53#include <linux/vmstat.h>
c255d844
PM
54#include <linux/nfs_fs.h>
55#include <linux/acpi.h>
10a0a8d4 56#include <linux/reboot.h>
b0fc494f 57#include <linux/ftrace.h>
cdd6c482 58#include <linux/perf_event.h>
8e4228e1 59#include <linux/oom.h>
17f60a7d 60#include <linux/kmod.h>
73efc039 61#include <linux/capability.h>
40401530 62#include <linux/binfmts.h>
cf4aebc2 63#include <linux/sched/sysctl.h>
d2921684 64#include <linux/mount.h>
cefdca0a 65#include <linux/userfaultfd_k.h>
2374c09b 66#include <linux/pid.h>
1da177e4 67
7f2923c4
CB
68#include "../lib/kstrtox.h"
69
7c0f6ba6 70#include <linux/uaccess.h>
1da177e4
LT
71#include <asm/processor.h>
72
29cbc78b
AK
73#ifdef CONFIG_X86
74#include <asm/nmi.h>
0741f4d2 75#include <asm/stacktrace.h>
6e7c4025 76#include <asm/io.h>
29cbc78b 77#endif
d550bbd4
DH
78#ifdef CONFIG_SPARC
79#include <asm/setup.h>
80#endif
4f0e056f
DY
81#ifdef CONFIG_RT_MUTEXES
82#include <linux/rtmutex.h>
83#endif
504d7cf1 84
1da177e4
LT
85#if defined(CONFIG_SYSCTL)
86
c4f3b63f 87/* Constants used for minimum and maximum */
c4f3b63f 88
c5dfd78e 89#ifdef CONFIG_PERF_EVENTS
d73840ec 90static const int six_hundred_forty_kb = 640 * 1024;
c5dfd78e 91#endif
c4f3b63f 92
9e4a5bda 93
f628867d 94static const int ngroups_max = NGROUPS_MAX;
73efc039 95static const int cap_last_cap = CAP_LAST_CAP;
1da177e4 96
d6f8ff73 97#ifdef CONFIG_PROC_SYSCTL
f4aacea2 98
a19ac337
LR
99/**
100 * enum sysctl_writes_mode - supported sysctl write modes
101 *
102 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
65f50f25
WH
103 * to be written, and multiple writes on the same sysctl file descriptor
104 * will rewrite the sysctl value, regardless of file position. No warning
105 * is issued when the initial position is not 0.
a19ac337 106 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
65f50f25 107 * not 0.
a19ac337 108 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
65f50f25
WH
109 * file position 0 and the value must be fully contained in the buffer
110 * sent to the write syscall. If dealing with strings respect the file
111 * position, but restrict this to the max length of the buffer, anything
112 * passed the max length will be ignored. Multiple writes will append
113 * to the buffer.
a19ac337
LR
114 *
115 * These write modes control how current file position affects the behavior of
116 * updating sysctl values through the proc interface on each write.
117 */
118enum sysctl_writes_mode {
119 SYSCTL_WRITES_LEGACY = -1,
120 SYSCTL_WRITES_WARN = 0,
121 SYSCTL_WRITES_STRICT = 1,
122};
f4aacea2 123
a19ac337 124static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
f461d2dc 125#endif /* CONFIG_PROC_SYSCTL */
ceb18132 126
67f3977f
AG
127#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
128 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
1da177e4
LT
129int sysctl_legacy_va_layout;
130#endif
131
5e771905 132#ifdef CONFIG_COMPACTION
2452dcb9 133/* min_extfrag_threshold is SYSCTL_ZERO */;
d73840ec 134static const int max_extfrag_threshold = 1000;
5e771905
MG
135#endif
136
f461d2dc
CH
137#endif /* CONFIG_SYSCTL */
138
139/*
140 * /proc/sys support
141 */
142
b89a8171 143#ifdef CONFIG_PROC_SYSCTL
1da177e4 144
f8808300 145static int _proc_do_string(char *data, int maxlen, int write,
32927393 146 char *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
147{
148 size_t len;
32927393 149 char c, *p;
8d060877
ON
150
151 if (!data || !maxlen || !*lenp) {
1da177e4
LT
152 *lenp = 0;
153 return 0;
154 }
8d060877 155
1da177e4 156 if (write) {
f4aacea2
KC
157 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
158 /* Only continue writes not past the end of buffer. */
159 len = strlen(data);
160 if (len > maxlen - 1)
161 len = maxlen - 1;
162
163 if (*ppos > len)
164 return 0;
165 len = *ppos;
166 } else {
167 /* Start writing from beginning of buffer. */
168 len = 0;
169 }
170
2ca9bb45 171 *ppos += *lenp;
1da177e4 172 p = buffer;
2ca9bb45 173 while ((p - buffer) < *lenp && len < maxlen - 1) {
32927393 174 c = *(p++);
1da177e4
LT
175 if (c == 0 || c == '\n')
176 break;
2ca9bb45 177 data[len++] = c;
1da177e4 178 }
f8808300 179 data[len] = 0;
1da177e4 180 } else {
f5dd3d6f
SV
181 len = strlen(data);
182 if (len > maxlen)
183 len = maxlen;
8d060877
ON
184
185 if (*ppos > len) {
186 *lenp = 0;
187 return 0;
188 }
189
190 data += *ppos;
191 len -= *ppos;
192
1da177e4
LT
193 if (len > *lenp)
194 len = *lenp;
195 if (len)
32927393 196 memcpy(buffer, data, len);
1da177e4 197 if (len < *lenp) {
32927393 198 buffer[len] = '\n';
1da177e4
LT
199 len++;
200 }
201 *lenp = len;
202 *ppos += len;
203 }
204 return 0;
205}
206
f4aacea2
KC
207static void warn_sysctl_write(struct ctl_table *table)
208{
209 pr_warn_once("%s wrote to %s when file position was not 0!\n"
210 "This will not be supported in the future. To silence this\n"
211 "warning, set kernel.sysctl_writes_strict = -1\n",
212 current->comm, table->procname);
213}
214
d383d484 215/**
5f733e8a 216 * proc_first_pos_non_zero_ignore - check if first position is allowed
d383d484
LR
217 * @ppos: file position
218 * @table: the sysctl table
219 *
220 * Returns true if the first position is non-zero and the sysctl_writes_strict
221 * mode indicates this is not allowed for numeric input types. String proc
5f733e8a 222 * handlers can ignore the return value.
d383d484
LR
223 */
224static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
225 struct ctl_table *table)
226{
227 if (!*ppos)
228 return false;
229
230 switch (sysctl_writes_strict) {
231 case SYSCTL_WRITES_STRICT:
232 return true;
233 case SYSCTL_WRITES_WARN:
234 warn_sysctl_write(table);
235 return false;
236 default:
237 return false;
238 }
239}
240
f5dd3d6f
SV
241/**
242 * proc_dostring - read a string sysctl
243 * @table: the sysctl table
244 * @write: %TRUE if this is a write to the sysctl file
f5dd3d6f
SV
245 * @buffer: the user buffer
246 * @lenp: the size of the user buffer
247 * @ppos: file position
248 *
249 * Reads/writes a string from/to the user buffer. If the kernel
250 * buffer provided is not large enough to hold the string, the
251 * string is truncated. The copied string is %NULL-terminated.
252 * If the string is being read by the user process, it is copied
253 * and a newline '\n' is added. It is truncated if the buffer is
254 * not large enough.
255 *
256 * Returns 0 on success.
257 */
8d65af78 258int proc_dostring(struct ctl_table *table, int write,
32927393 259 void *buffer, size_t *lenp, loff_t *ppos)
f5dd3d6f 260{
d383d484
LR
261 if (write)
262 proc_first_pos_non_zero_ignore(ppos, table);
f4aacea2 263
32927393
CH
264 return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
265 ppos);
f5dd3d6f
SV
266}
267
00b7c339
AW
268static size_t proc_skip_spaces(char **buf)
269{
270 size_t ret;
271 char *tmp = skip_spaces(*buf);
272 ret = tmp - *buf;
273 *buf = tmp;
274 return ret;
275}
276
9f977fb7
OP
277static void proc_skip_char(char **buf, size_t *size, const char v)
278{
279 while (*size) {
280 if (**buf != v)
281 break;
282 (*size)--;
283 (*buf)++;
284 }
285}
286
7f2923c4
CB
287/**
288 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
289 * fail on overflow
290 *
291 * @cp: kernel buffer containing the string to parse
292 * @endp: pointer to store the trailing characters
293 * @base: the base to use
294 * @res: where the parsed integer will be stored
295 *
296 * In case of success 0 is returned and @res will contain the parsed integer,
297 * @endp will hold any trailing characters.
298 * This function will fail the parse on overflow. If there wasn't an overflow
299 * the function will defer the decision what characters count as invalid to the
300 * caller.
301 */
302static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
303 unsigned long *res)
304{
305 unsigned long long result;
306 unsigned int rv;
307
308 cp = _parse_integer_fixup_radix(cp, &base);
309 rv = _parse_integer(cp, base, &result);
310 if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
311 return -ERANGE;
312
313 cp += rv;
314
315 if (endp)
316 *endp = (char *)cp;
317
318 *res = (unsigned long)result;
319 return 0;
320}
321
00b7c339
AW
322#define TMPBUFLEN 22
323/**
0fc377bd 324 * proc_get_long - reads an ASCII formatted integer from a user buffer
00b7c339 325 *
0fc377bd
RD
326 * @buf: a kernel buffer
327 * @size: size of the kernel buffer
328 * @val: this is where the number will be stored
329 * @neg: set to %TRUE if number is negative
330 * @perm_tr: a vector which contains the allowed trailers
331 * @perm_tr_len: size of the perm_tr vector
332 * @tr: pointer to store the trailer character
00b7c339 333 *
0fc377bd
RD
334 * In case of success %0 is returned and @buf and @size are updated with
335 * the amount of bytes read. If @tr is non-NULL and a trailing
336 * character exists (size is non-zero after returning from this
337 * function), @tr is updated with the trailing character.
00b7c339
AW
338 */
339static int proc_get_long(char **buf, size_t *size,
340 unsigned long *val, bool *neg,
341 const char *perm_tr, unsigned perm_tr_len, char *tr)
342{
343 int len;
344 char *p, tmp[TMPBUFLEN];
345
346 if (!*size)
347 return -EINVAL;
348
349 len = *size;
350 if (len > TMPBUFLEN - 1)
351 len = TMPBUFLEN - 1;
352
353 memcpy(tmp, *buf, len);
354
355 tmp[len] = 0;
356 p = tmp;
357 if (*p == '-' && *size > 1) {
358 *neg = true;
359 p++;
360 } else
361 *neg = false;
362 if (!isdigit(*p))
363 return -EINVAL;
364
7f2923c4
CB
365 if (strtoul_lenient(p, &p, 0, val))
366 return -EINVAL;
00b7c339
AW
367
368 len = p - tmp;
369
370 /* We don't know if the next char is whitespace thus we may accept
371 * invalid integers (e.g. 1234...a) or two integers instead of one
372 * (e.g. 123...1). So lets not allow such large numbers. */
373 if (len == TMPBUFLEN - 1)
374 return -EINVAL;
375
376 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
377 return -EINVAL;
1da177e4 378
00b7c339
AW
379 if (tr && (len < *size))
380 *tr = *p;
381
382 *buf += len;
383 *size -= len;
384
385 return 0;
386}
387
388/**
0fc377bd 389 * proc_put_long - converts an integer to a decimal ASCII formatted string
00b7c339 390 *
0fc377bd
RD
391 * @buf: the user buffer
392 * @size: the size of the user buffer
393 * @val: the integer to be converted
394 * @neg: sign of the number, %TRUE for negative
00b7c339 395 *
32927393
CH
396 * In case of success @buf and @size are updated with the amount of bytes
397 * written.
00b7c339 398 */
32927393 399static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
00b7c339
AW
400{
401 int len;
402 char tmp[TMPBUFLEN], *p = tmp;
403
404 sprintf(p, "%s%lu", neg ? "-" : "", val);
405 len = strlen(tmp);
406 if (len > *size)
407 len = *size;
32927393 408 memcpy(*buf, tmp, len);
00b7c339
AW
409 *size -= len;
410 *buf += len;
00b7c339
AW
411}
412#undef TMPBUFLEN
413
32927393 414static void proc_put_char(void **buf, size_t *size, char c)
00b7c339
AW
415{
416 if (*size) {
32927393
CH
417 char **buffer = (char **)buf;
418 **buffer = c;
419
420 (*size)--;
421 (*buffer)++;
00b7c339
AW
422 *buf = *buffer;
423 }
00b7c339 424}
1da177e4 425
a2071573
JH
426static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
427 int *valp,
428 int write, void *data)
429{
430 if (write) {
431 *(bool *)valp = *lvalp;
432 } else {
433 int val = *(bool *)valp;
434
435 *lvalp = (unsigned long)val;
436 *negp = false;
437 }
438 return 0;
439}
440
00b7c339 441static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
442 int *valp,
443 int write, void *data)
444{
445 if (write) {
230633d1
HS
446 if (*negp) {
447 if (*lvalp > (unsigned long) INT_MAX + 1)
448 return -EINVAL;
1f1be04b 449 WRITE_ONCE(*valp, -*lvalp);
230633d1
HS
450 } else {
451 if (*lvalp > (unsigned long) INT_MAX)
452 return -EINVAL;
1f1be04b 453 WRITE_ONCE(*valp, *lvalp);
230633d1 454 }
1da177e4 455 } else {
1f1be04b 456 int val = READ_ONCE(*valp);
1da177e4 457 if (val < 0) {
00b7c339 458 *negp = true;
9a5bc726 459 *lvalp = -(unsigned long)val;
1da177e4 460 } else {
00b7c339 461 *negp = false;
1da177e4
LT
462 *lvalp = (unsigned long)val;
463 }
464 }
465 return 0;
466}
467
4f2fec00
LR
468static int do_proc_douintvec_conv(unsigned long *lvalp,
469 unsigned int *valp,
470 int write, void *data)
e7d316a0
SAK
471{
472 if (write) {
425fffd8
LZ
473 if (*lvalp > UINT_MAX)
474 return -EINVAL;
4762b532 475 WRITE_ONCE(*valp, *lvalp);
e7d316a0 476 } else {
4762b532 477 unsigned int val = READ_ONCE(*valp);
e7d316a0
SAK
478 *lvalp = (unsigned long)val;
479 }
480 return 0;
481}
482
00b7c339
AW
483static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
484
d8217f07 485static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
32927393 486 int write, void *buffer,
fcfbd547 487 size_t *lenp, loff_t *ppos,
00b7c339 488 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
1da177e4
LT
489 int write, void *data),
490 void *data)
491{
00b7c339 492 int *i, vleft, first = 1, err = 0;
00b7c339 493 size_t left;
32927393 494 char *p;
1da177e4 495
00b7c339 496 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
1da177e4
LT
497 *lenp = 0;
498 return 0;
499 }
500
fcfbd547 501 i = (int *) tbl_data;
1da177e4
LT
502 vleft = table->maxlen / sizeof(*i);
503 left = *lenp;
504
505 if (!conv)
506 conv = do_proc_dointvec_conv;
507
00b7c339 508 if (write) {
d383d484
LR
509 if (proc_first_pos_non_zero_ignore(ppos, table))
510 goto out;
f4aacea2 511
00b7c339
AW
512 if (left > PAGE_SIZE - 1)
513 left = PAGE_SIZE - 1;
32927393 514 p = buffer;
00b7c339
AW
515 }
516
1da177e4 517 for (; left && vleft--; i++, first=0) {
00b7c339
AW
518 unsigned long lval;
519 bool neg;
1da177e4 520
00b7c339 521 if (write) {
70f6cbb6 522 left -= proc_skip_spaces(&p);
1da177e4 523
563b0467
O
524 if (!left)
525 break;
70f6cbb6 526 err = proc_get_long(&p, &left, &lval, &neg,
00b7c339
AW
527 proc_wspace_sep,
528 sizeof(proc_wspace_sep), NULL);
529 if (err)
1da177e4 530 break;
00b7c339
AW
531 if (conv(&neg, &lval, i, 1, data)) {
532 err = -EINVAL;
1da177e4 533 break;
00b7c339 534 }
1da177e4 535 } else {
00b7c339
AW
536 if (conv(&neg, &lval, i, 0, data)) {
537 err = -EINVAL;
538 break;
539 }
1da177e4 540 if (!first)
32927393
CH
541 proc_put_char(&buffer, &left, '\t');
542 proc_put_long(&buffer, &left, lval, neg);
1da177e4
LT
543 }
544 }
545
00b7c339 546 if (!write && !first && left && !err)
32927393 547 proc_put_char(&buffer, &left, '\n');
563b0467 548 if (write && !err && left)
70f6cbb6 549 left -= proc_skip_spaces(&p);
32927393
CH
550 if (write && first)
551 return err ? : -EINVAL;
1da177e4 552 *lenp -= left;
f4aacea2 553out:
1da177e4 554 *ppos += *lenp;
00b7c339 555 return err;
1da177e4
LT
556}
557
8d65af78 558static int do_proc_dointvec(struct ctl_table *table, int write,
32927393 559 void *buffer, size_t *lenp, loff_t *ppos,
00b7c339 560 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
fcfbd547
KK
561 int write, void *data),
562 void *data)
563{
8d65af78 564 return __do_proc_dointvec(table->data, table, write,
fcfbd547
KK
565 buffer, lenp, ppos, conv, data);
566}
567
4f2fec00
LR
568static int do_proc_douintvec_w(unsigned int *tbl_data,
569 struct ctl_table *table,
32927393 570 void *buffer,
4f2fec00
LR
571 size_t *lenp, loff_t *ppos,
572 int (*conv)(unsigned long *lvalp,
573 unsigned int *valp,
574 int write, void *data),
575 void *data)
576{
577 unsigned long lval;
578 int err = 0;
579 size_t left;
580 bool neg;
32927393 581 char *p = buffer;
4f2fec00
LR
582
583 left = *lenp;
584
585 if (proc_first_pos_non_zero_ignore(ppos, table))
586 goto bail_early;
587
588 if (left > PAGE_SIZE - 1)
589 left = PAGE_SIZE - 1;
590
4f2fec00
LR
591 left -= proc_skip_spaces(&p);
592 if (!left) {
593 err = -EINVAL;
594 goto out_free;
595 }
596
597 err = proc_get_long(&p, &left, &lval, &neg,
598 proc_wspace_sep,
599 sizeof(proc_wspace_sep), NULL);
600 if (err || neg) {
601 err = -EINVAL;
602 goto out_free;
603 }
604
605 if (conv(&lval, tbl_data, 1, data)) {
606 err = -EINVAL;
607 goto out_free;
608 }
609
610 if (!err && left)
611 left -= proc_skip_spaces(&p);
612
613out_free:
4f2fec00
LR
614 if (err)
615 return -EINVAL;
616
617 return 0;
618
619 /* This is in keeping with old __do_proc_dointvec() */
620bail_early:
621 *ppos += *lenp;
622 return err;
623}
624
32927393 625static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
4f2fec00
LR
626 size_t *lenp, loff_t *ppos,
627 int (*conv)(unsigned long *lvalp,
628 unsigned int *valp,
629 int write, void *data),
630 void *data)
631{
632 unsigned long lval;
633 int err = 0;
634 size_t left;
635
636 left = *lenp;
637
638 if (conv(&lval, tbl_data, 0, data)) {
639 err = -EINVAL;
640 goto out;
641 }
642
32927393
CH
643 proc_put_long(&buffer, &left, lval, false);
644 if (!left)
4f2fec00
LR
645 goto out;
646
32927393 647 proc_put_char(&buffer, &left, '\n');
4f2fec00
LR
648
649out:
650 *lenp -= left;
651 *ppos += *lenp;
652
653 return err;
654}
655
656static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
32927393 657 int write, void *buffer,
4f2fec00
LR
658 size_t *lenp, loff_t *ppos,
659 int (*conv)(unsigned long *lvalp,
660 unsigned int *valp,
661 int write, void *data),
662 void *data)
663{
664 unsigned int *i, vleft;
665
666 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
667 *lenp = 0;
668 return 0;
669 }
670
671 i = (unsigned int *) tbl_data;
672 vleft = table->maxlen / sizeof(*i);
673
674 /*
675 * Arrays are not supported, keep this simple. *Do not* add
676 * support for them.
677 */
678 if (vleft != 1) {
679 *lenp = 0;
680 return -EINVAL;
681 }
682
683 if (!conv)
684 conv = do_proc_douintvec_conv;
685
686 if (write)
687 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
688 conv, data);
689 return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
690}
691
1998f193
LC
692int do_proc_douintvec(struct ctl_table *table, int write,
693 void *buffer, size_t *lenp, loff_t *ppos,
694 int (*conv)(unsigned long *lvalp,
695 unsigned int *valp,
696 int write, void *data),
697 void *data)
4f2fec00
LR
698{
699 return __do_proc_douintvec(table->data, table, write,
700 buffer, lenp, ppos, conv, data);
701}
702
a2071573
JH
703/**
704 * proc_dobool - read/write a bool
705 * @table: the sysctl table
706 * @write: %TRUE if this is a write to the sysctl file
707 * @buffer: the user buffer
708 * @lenp: the size of the user buffer
709 * @ppos: file position
710 *
711 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
712 * values from/to the user buffer, treated as an ASCII string.
713 *
714 * Returns 0 on success.
715 */
716int proc_dobool(struct ctl_table *table, int write, void *buffer,
717 size_t *lenp, loff_t *ppos)
718{
719 return do_proc_dointvec(table, write, buffer, lenp, ppos,
720 do_proc_dobool_conv, NULL);
721}
722
1da177e4
LT
723/**
724 * proc_dointvec - read a vector of integers
725 * @table: the sysctl table
726 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
727 * @buffer: the user buffer
728 * @lenp: the size of the user buffer
729 * @ppos: file position
730 *
731 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
732 * values from/to the user buffer, treated as an ASCII string.
733 *
734 * Returns 0 on success.
735 */
32927393
CH
736int proc_dointvec(struct ctl_table *table, int write, void *buffer,
737 size_t *lenp, loff_t *ppos)
1da177e4 738{
e7d316a0
SAK
739 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
740}
741
6923aa0d
SAS
742#ifdef CONFIG_COMPACTION
743static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
32927393 744 int write, void *buffer, size_t *lenp, loff_t *ppos)
6923aa0d
SAS
745{
746 int ret, old;
747
748 if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
749 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
750
751 old = *(int *)table->data;
752 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
753 if (ret)
754 return ret;
755 if (old != *(int *)table->data)
756 pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
757 table->procname, current->comm,
758 task_pid_nr(current));
759 return ret;
760}
761#endif
762
e7d316a0
SAK
763/**
764 * proc_douintvec - read a vector of unsigned integers
765 * @table: the sysctl table
766 * @write: %TRUE if this is a write to the sysctl file
767 * @buffer: the user buffer
768 * @lenp: the size of the user buffer
769 * @ppos: file position
770 *
771 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
772 * values from/to the user buffer, treated as an ASCII string.
773 *
774 * Returns 0 on success.
775 */
32927393
CH
776int proc_douintvec(struct ctl_table *table, int write, void *buffer,
777 size_t *lenp, loff_t *ppos)
e7d316a0 778{
4f2fec00
LR
779 return do_proc_douintvec(table, write, buffer, lenp, ppos,
780 do_proc_douintvec_conv, NULL);
1da177e4
LT
781}
782
34f5a398 783/*
25ddbb18
AK
784 * Taint values can only be increased
785 * This means we can safely use a temporary.
34f5a398 786 */
8d65af78 787static int proc_taint(struct ctl_table *table, int write,
32927393 788 void *buffer, size_t *lenp, loff_t *ppos)
34f5a398 789{
25ddbb18
AK
790 struct ctl_table t;
791 unsigned long tmptaint = get_taint();
792 int err;
34f5a398 793
91fcd412 794 if (write && !capable(CAP_SYS_ADMIN))
34f5a398
TT
795 return -EPERM;
796
25ddbb18
AK
797 t = *table;
798 t.data = &tmptaint;
8d65af78 799 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
25ddbb18
AK
800 if (err < 0)
801 return err;
802
803 if (write) {
db38d5c1
RA
804 int i;
805
806 /*
807 * If we are relying on panic_on_taint not producing
808 * false positives due to userspace input, bail out
809 * before setting the requested taint flags.
810 */
811 if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
812 return -EINVAL;
813
25ddbb18
AK
814 /*
815 * Poor man's atomic or. Not worth adding a primitive
816 * to everyone's atomic.h for this
817 */
e77132e7
RA
818 for (i = 0; i < TAINT_FLAGS_COUNT; i++)
819 if ((1UL << i) & tmptaint)
373d4d09 820 add_taint(i, LOCKDEP_STILL_OK);
25ddbb18
AK
821 }
822
823 return err;
34f5a398
TT
824}
825
24704f36
WL
826/**
827 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
828 * @min: pointer to minimum allowable value
829 * @max: pointer to maximum allowable value
830 *
831 * The do_proc_dointvec_minmax_conv_param structure provides the
832 * minimum and maximum values for doing range checking for those sysctl
833 * parameters that use the proc_dointvec_minmax() handler.
834 */
1da177e4
LT
835struct do_proc_dointvec_minmax_conv_param {
836 int *min;
837 int *max;
838};
839
00b7c339
AW
840static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
841 int *valp,
1da177e4
LT
842 int write, void *data)
843{
2bc4fc60 844 int tmp, ret;
1da177e4 845 struct do_proc_dointvec_minmax_conv_param *param = data;
2bc4fc60
ZW
846 /*
847 * If writing, first do so via a temporary local int so we can
848 * bounds-check it before touching *valp.
849 */
850 int *ip = write ? &tmp : valp;
851
852 ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
853 if (ret)
854 return ret;
855
1da177e4 856 if (write) {
2bc4fc60
ZW
857 if ((param->min && *param->min > tmp) ||
858 (param->max && *param->max < tmp))
1da177e4 859 return -EINVAL;
f613d86d 860 WRITE_ONCE(*valp, tmp);
1da177e4 861 }
2bc4fc60 862
1da177e4
LT
863 return 0;
864}
865
866/**
867 * proc_dointvec_minmax - read a vector of integers with min/max values
868 * @table: the sysctl table
869 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
870 * @buffer: the user buffer
871 * @lenp: the size of the user buffer
872 * @ppos: file position
873 *
874 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
875 * values from/to the user buffer, treated as an ASCII string.
876 *
877 * This routine will ensure the values are within the range specified by
878 * table->extra1 (min) and table->extra2 (max).
879 *
24704f36 880 * Returns 0 on success or -EINVAL on write when the range check fails.
1da177e4 881 */
8d65af78 882int proc_dointvec_minmax(struct ctl_table *table, int write,
32927393 883 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
884{
885 struct do_proc_dointvec_minmax_conv_param param = {
886 .min = (int *) table->extra1,
887 .max = (int *) table->extra2,
888 };
8d65af78 889 return do_proc_dointvec(table, write, buffer, lenp, ppos,
1da177e4
LT
890 do_proc_dointvec_minmax_conv, &param);
891}
892
24704f36
WL
893/**
894 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
895 * @min: pointer to minimum allowable value
896 * @max: pointer to maximum allowable value
897 *
898 * The do_proc_douintvec_minmax_conv_param structure provides the
899 * minimum and maximum values for doing range checking for those sysctl
900 * parameters that use the proc_douintvec_minmax() handler.
901 */
61d9b56a
LR
902struct do_proc_douintvec_minmax_conv_param {
903 unsigned int *min;
904 unsigned int *max;
905};
906
907static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
908 unsigned int *valp,
909 int write, void *data)
910{
2bc4fc60
ZW
911 int ret;
912 unsigned int tmp;
61d9b56a 913 struct do_proc_douintvec_minmax_conv_param *param = data;
2bc4fc60
ZW
914 /* write via temporary local uint for bounds-checking */
915 unsigned int *up = write ? &tmp : valp;
61d9b56a 916
2bc4fc60
ZW
917 ret = do_proc_douintvec_conv(lvalp, up, write, data);
918 if (ret)
919 return ret;
fb910c42 920
2bc4fc60
ZW
921 if (write) {
922 if ((param->min && *param->min > tmp) ||
923 (param->max && *param->max < tmp))
61d9b56a
LR
924 return -ERANGE;
925
2d3b559d 926 WRITE_ONCE(*valp, tmp);
61d9b56a
LR
927 }
928
929 return 0;
930}
931
932/**
933 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
934 * @table: the sysctl table
935 * @write: %TRUE if this is a write to the sysctl file
936 * @buffer: the user buffer
937 * @lenp: the size of the user buffer
938 * @ppos: file position
939 *
940 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
941 * values from/to the user buffer, treated as an ASCII string. Negative
942 * strings are not allowed.
943 *
944 * This routine will ensure the values are within the range specified by
945 * table->extra1 (min) and table->extra2 (max). There is a final sanity
946 * check for UINT_MAX to avoid having to support wrap around uses from
947 * userspace.
948 *
24704f36 949 * Returns 0 on success or -ERANGE on write when the range check fails.
61d9b56a
LR
950 */
951int proc_douintvec_minmax(struct ctl_table *table, int write,
32927393 952 void *buffer, size_t *lenp, loff_t *ppos)
61d9b56a
LR
953{
954 struct do_proc_douintvec_minmax_conv_param param = {
955 .min = (unsigned int *) table->extra1,
956 .max = (unsigned int *) table->extra2,
957 };
958 return do_proc_douintvec(table, write, buffer, lenp, ppos,
959 do_proc_douintvec_minmax_conv, &param);
960}
961
cb944413
ED
962/**
963 * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
964 * @table: the sysctl table
965 * @write: %TRUE if this is a write to the sysctl file
966 * @buffer: the user buffer
967 * @lenp: the size of the user buffer
968 * @ppos: file position
969 *
970 * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
971 * values from/to the user buffer, treated as an ASCII string. Negative
972 * strings are not allowed.
973 *
974 * This routine will ensure the values are within the range specified by
975 * table->extra1 (min) and table->extra2 (max).
976 *
977 * Returns 0 on success or an error on write when the range check fails.
978 */
979int proc_dou8vec_minmax(struct ctl_table *table, int write,
980 void *buffer, size_t *lenp, loff_t *ppos)
981{
982 struct ctl_table tmp;
983 unsigned int min = 0, max = 255U, val;
984 u8 *data = table->data;
985 struct do_proc_douintvec_minmax_conv_param param = {
986 .min = &min,
987 .max = &max,
988 };
989 int res;
990
991 /* Do not support arrays yet. */
992 if (table->maxlen != sizeof(u8))
993 return -EINVAL;
994
995 if (table->extra1) {
996 min = *(unsigned int *) table->extra1;
997 if (min > 255U)
998 return -EINVAL;
999 }
1000 if (table->extra2) {
1001 max = *(unsigned int *) table->extra2;
1002 if (max > 255U)
1003 return -EINVAL;
1004 }
1005
1006 tmp = *table;
1007
1008 tmp.maxlen = sizeof(val);
1009 tmp.data = &val;
7dee5d77 1010 val = READ_ONCE(*data);
cb944413
ED
1011 res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1012 do_proc_douintvec_minmax_conv, &param);
1013 if (res)
1014 return res;
1015 if (write)
7dee5d77 1016 WRITE_ONCE(*data, val);
cb944413
ED
1017 return 0;
1018}
1019EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1020
eaee4172
DS
1021#ifdef CONFIG_MAGIC_SYSRQ
1022static int sysrq_sysctl_handler(struct ctl_table *table, int write,
32927393 1023 void *buffer, size_t *lenp, loff_t *ppos)
eaee4172
DS
1024{
1025 int tmp, ret;
1026
1027 tmp = sysrq_mask();
1028
1029 ret = __do_proc_dointvec(&tmp, table, write, buffer,
1030 lenp, ppos, NULL, NULL);
1031 if (ret || !write)
1032 return ret;
1033
1034 if (write)
1035 sysrq_toggle_support(tmp);
1036
1037 return 0;
1038}
1039#endif
1040
32927393
CH
1041static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1042 int write, void *buffer, size_t *lenp, loff_t *ppos,
1043 unsigned long convmul, unsigned long convdiv)
1da177e4 1044{
00b7c339
AW
1045 unsigned long *i, *min, *max;
1046 int vleft, first = 1, err = 0;
00b7c339 1047 size_t left;
32927393 1048 char *p;
00b7c339
AW
1049
1050 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1da177e4
LT
1051 *lenp = 0;
1052 return 0;
1053 }
00b7c339 1054
fcfbd547 1055 i = (unsigned long *) data;
1da177e4
LT
1056 min = (unsigned long *) table->extra1;
1057 max = (unsigned long *) table->extra2;
1058 vleft = table->maxlen / sizeof(unsigned long);
1059 left = *lenp;
00b7c339
AW
1060
1061 if (write) {
d383d484
LR
1062 if (proc_first_pos_non_zero_ignore(ppos, table))
1063 goto out;
f4aacea2 1064
00b7c339
AW
1065 if (left > PAGE_SIZE - 1)
1066 left = PAGE_SIZE - 1;
32927393 1067 p = buffer;
00b7c339
AW
1068 }
1069
27b3d80a 1070 for (; left && vleft--; i++, first = 0) {
00b7c339
AW
1071 unsigned long val;
1072
1da177e4 1073 if (write) {
00b7c339
AW
1074 bool neg;
1075
70f6cbb6 1076 left -= proc_skip_spaces(&p);
09be1784
CL
1077 if (!left)
1078 break;
00b7c339 1079
70f6cbb6 1080 err = proc_get_long(&p, &left, &val, &neg,
00b7c339
AW
1081 proc_wspace_sep,
1082 sizeof(proc_wspace_sep), NULL);
1622ed7d
BL
1083 if (err || neg) {
1084 err = -EINVAL;
1da177e4 1085 break;
1622ed7d
BL
1086 }
1087
ff9f8a7c 1088 val = convmul * val / convdiv;
e260ad01
CB
1089 if ((min && val < *min) || (max && val > *max)) {
1090 err = -EINVAL;
1091 break;
1092 }
c31bcc8f 1093 WRITE_ONCE(*i, val);
1da177e4 1094 } else {
c31bcc8f 1095 val = convdiv * READ_ONCE(*i) / convmul;
32927393
CH
1096 if (!first)
1097 proc_put_char(&buffer, &left, '\t');
1098 proc_put_long(&buffer, &left, val, false);
1da177e4
LT
1099 }
1100 }
1101
00b7c339 1102 if (!write && !first && left && !err)
32927393 1103 proc_put_char(&buffer, &left, '\n');
00b7c339 1104 if (write && !err)
70f6cbb6 1105 left -= proc_skip_spaces(&p);
32927393
CH
1106 if (write && first)
1107 return err ? : -EINVAL;
1da177e4 1108 *lenp -= left;
f4aacea2 1109out:
1da177e4 1110 *ppos += *lenp;
00b7c339 1111 return err;
1da177e4
LT
1112}
1113
d8217f07 1114static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
32927393
CH
1115 void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1116 unsigned long convdiv)
fcfbd547
KK
1117{
1118 return __do_proc_doulongvec_minmax(table->data, table, write,
8d65af78 1119 buffer, lenp, ppos, convmul, convdiv);
fcfbd547
KK
1120}
1121
1da177e4
LT
1122/**
1123 * proc_doulongvec_minmax - read a vector of long integers with min/max values
1124 * @table: the sysctl table
1125 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1126 * @buffer: the user buffer
1127 * @lenp: the size of the user buffer
1128 * @ppos: file position
1129 *
1130 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1131 * values from/to the user buffer, treated as an ASCII string.
1132 *
1133 * This routine will ensure the values are within the range specified by
1134 * table->extra1 (min) and table->extra2 (max).
1135 *
1136 * Returns 0 on success.
1137 */
8d65af78 1138int proc_doulongvec_minmax(struct ctl_table *table, int write,
32927393 1139 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1140{
8d65af78 1141 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1da177e4
LT
1142}
1143
1144/**
1145 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1146 * @table: the sysctl table
1147 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1148 * @buffer: the user buffer
1149 * @lenp: the size of the user buffer
1150 * @ppos: file position
1151 *
1152 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1153 * values from/to the user buffer, treated as an ASCII string. The values
1154 * are treated as milliseconds, and converted to jiffies when they are stored.
1155 *
1156 * This routine will ensure the values are within the range specified by
1157 * table->extra1 (min) and table->extra2 (max).
1158 *
1159 * Returns 0 on success.
1160 */
d8217f07 1161int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
32927393 1162 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1163{
8d65af78 1164 return do_proc_doulongvec_minmax(table, write, buffer,
1da177e4
LT
1165 lenp, ppos, HZ, 1000l);
1166}
1167
1168
00b7c339 1169static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
1170 int *valp,
1171 int write, void *data)
1172{
1173 if (write) {
63259457 1174 if (*lvalp > INT_MAX / HZ)
cba9f33d 1175 return 1;
e8778208
KI
1176 if (*negp)
1177 WRITE_ONCE(*valp, -*lvalp * HZ);
1178 else
1179 WRITE_ONCE(*valp, *lvalp * HZ);
1da177e4 1180 } else {
e8778208 1181 int val = READ_ONCE(*valp);
1da177e4
LT
1182 unsigned long lval;
1183 if (val < 0) {
00b7c339 1184 *negp = true;
9a5bc726 1185 lval = -(unsigned long)val;
1da177e4 1186 } else {
00b7c339 1187 *negp = false;
1da177e4
LT
1188 lval = (unsigned long)val;
1189 }
1190 *lvalp = lval / HZ;
1191 }
1192 return 0;
1193}
1194
00b7c339 1195static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
1196 int *valp,
1197 int write, void *data)
1198{
1199 if (write) {
cba9f33d
BS
1200 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1201 return 1;
1da177e4
LT
1202 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1203 } else {
1204 int val = *valp;
1205 unsigned long lval;
1206 if (val < 0) {
00b7c339 1207 *negp = true;
9a5bc726 1208 lval = -(unsigned long)val;
1da177e4 1209 } else {
00b7c339 1210 *negp = false;
1da177e4
LT
1211 lval = (unsigned long)val;
1212 }
1213 *lvalp = jiffies_to_clock_t(lval);
1214 }
1215 return 0;
1216}
1217
00b7c339 1218static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
1219 int *valp,
1220 int write, void *data)
1221{
1222 if (write) {
d738ce8f
FF
1223 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1224
1225 if (jif > INT_MAX)
1226 return 1;
1227 *valp = (int)jif;
1da177e4
LT
1228 } else {
1229 int val = *valp;
1230 unsigned long lval;
1231 if (val < 0) {
00b7c339 1232 *negp = true;
9a5bc726 1233 lval = -(unsigned long)val;
1da177e4 1234 } else {
00b7c339 1235 *negp = false;
1da177e4
LT
1236 lval = (unsigned long)val;
1237 }
1238 *lvalp = jiffies_to_msecs(lval);
1239 }
1240 return 0;
1241}
1242
1243/**
1244 * proc_dointvec_jiffies - read a vector of integers as seconds
1245 * @table: the sysctl table
1246 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1247 * @buffer: the user buffer
1248 * @lenp: the size of the user buffer
1249 * @ppos: file position
1250 *
1251 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1252 * values from/to the user buffer, treated as an ASCII string.
1253 * The values read are assumed to be in seconds, and are converted into
1254 * jiffies.
1255 *
1256 * Returns 0 on success.
1257 */
8d65af78 1258int proc_dointvec_jiffies(struct ctl_table *table, int write,
32927393 1259 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1260{
8d65af78 1261 return do_proc_dointvec(table,write,buffer,lenp,ppos,
1da177e4
LT
1262 do_proc_dointvec_jiffies_conv,NULL);
1263}
1264
1265/**
1266 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1267 * @table: the sysctl table
1268 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1269 * @buffer: the user buffer
1270 * @lenp: the size of the user buffer
1e5d5331 1271 * @ppos: pointer to the file position
1da177e4
LT
1272 *
1273 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1274 * values from/to the user buffer, treated as an ASCII string.
1275 * The values read are assumed to be in 1/USER_HZ seconds, and
1276 * are converted into jiffies.
1277 *
1278 * Returns 0 on success.
1279 */
8d65af78 1280int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
32927393 1281 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1282{
8d65af78 1283 return do_proc_dointvec(table,write,buffer,lenp,ppos,
1da177e4
LT
1284 do_proc_dointvec_userhz_jiffies_conv,NULL);
1285}
1286
1287/**
1288 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1289 * @table: the sysctl table
1290 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1291 * @buffer: the user buffer
1292 * @lenp: the size of the user buffer
67be2dd1
MW
1293 * @ppos: file position
1294 * @ppos: the current position in the file
1da177e4
LT
1295 *
1296 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1297 * values from/to the user buffer, treated as an ASCII string.
1298 * The values read are assumed to be in 1/1000 seconds, and
1299 * are converted into jiffies.
1300 *
1301 * Returns 0 on success.
1302 */
32927393
CH
1303int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1304 size_t *lenp, loff_t *ppos)
1da177e4 1305{
8d65af78 1306 return do_proc_dointvec(table, write, buffer, lenp, ppos,
1da177e4
LT
1307 do_proc_dointvec_ms_jiffies_conv, NULL);
1308}
1309
32927393
CH
1310static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1311 size_t *lenp, loff_t *ppos)
9ec52099
CLG
1312{
1313 struct pid *new_pid;
1314 pid_t tmp;
1315 int r;
1316
6c5f3e7b 1317 tmp = pid_vnr(cad_pid);
9ec52099 1318
8d65af78 1319 r = __do_proc_dointvec(&tmp, table, write, buffer,
9ec52099
CLG
1320 lenp, ppos, NULL, NULL);
1321 if (r || !write)
1322 return r;
1323
1324 new_pid = find_get_pid(tmp);
1325 if (!new_pid)
1326 return -ESRCH;
1327
1328 put_pid(xchg(&cad_pid, new_pid));
1329 return 0;
1330}
1331
9f977fb7
OP
1332/**
1333 * proc_do_large_bitmap - read/write from/to a large bitmap
1334 * @table: the sysctl table
1335 * @write: %TRUE if this is a write to the sysctl file
1336 * @buffer: the user buffer
1337 * @lenp: the size of the user buffer
1338 * @ppos: file position
1339 *
1340 * The bitmap is stored at table->data and the bitmap length (in bits)
1341 * in table->maxlen.
1342 *
1343 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1344 * large bitmaps may be represented in a compact manner. Writing into
1345 * the file will clear the bitmap then update it with the given input.
1346 *
1347 * Returns 0 on success.
1348 */
1349int proc_do_large_bitmap(struct ctl_table *table, int write,
32927393 1350 void *buffer, size_t *lenp, loff_t *ppos)
9f977fb7
OP
1351{
1352 int err = 0;
9f977fb7
OP
1353 size_t left = *lenp;
1354 unsigned long bitmap_len = table->maxlen;
122ff243 1355 unsigned long *bitmap = *(unsigned long **) table->data;
9f977fb7
OP
1356 unsigned long *tmp_bitmap = NULL;
1357 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1358
122ff243 1359 if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
9f977fb7
OP
1360 *lenp = 0;
1361 return 0;
1362 }
1363
1364 if (write) {
32927393 1365 char *p = buffer;
3116ad38 1366 size_t skipped = 0;
9f977fb7 1367
3116ad38 1368 if (left > PAGE_SIZE - 1) {
9f977fb7 1369 left = PAGE_SIZE - 1;
3116ad38
ES
1370 /* How much of the buffer we'll skip this pass */
1371 skipped = *lenp - left;
1372 }
9f977fb7 1373
475dae38 1374 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
32927393 1375 if (!tmp_bitmap)
9f977fb7 1376 return -ENOMEM;
70f6cbb6 1377 proc_skip_char(&p, &left, '\n');
9f977fb7
OP
1378 while (!err && left) {
1379 unsigned long val_a, val_b;
1380 bool neg;
3116ad38 1381 size_t saved_left;
9f977fb7 1382
3116ad38
ES
1383 /* In case we stop parsing mid-number, we can reset */
1384 saved_left = left;
70f6cbb6 1385 err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
9f977fb7 1386 sizeof(tr_a), &c);
3116ad38
ES
1387 /*
1388 * If we consumed the entirety of a truncated buffer or
1389 * only one char is left (may be a "-"), then stop here,
1390 * reset, & come back for more.
1391 */
1392 if ((left <= 1) && skipped) {
1393 left = saved_left;
1394 break;
1395 }
1396
9f977fb7
OP
1397 if (err)
1398 break;
1399 if (val_a >= bitmap_len || neg) {
1400 err = -EINVAL;
1401 break;
1402 }
1403
1404 val_b = val_a;
1405 if (left) {
70f6cbb6 1406 p++;
9f977fb7
OP
1407 left--;
1408 }
1409
1410 if (c == '-') {
70f6cbb6 1411 err = proc_get_long(&p, &left, &val_b,
9f977fb7
OP
1412 &neg, tr_b, sizeof(tr_b),
1413 &c);
3116ad38
ES
1414 /*
1415 * If we consumed all of a truncated buffer or
1416 * then stop here, reset, & come back for more.
1417 */
1418 if (!left && skipped) {
1419 left = saved_left;
1420 break;
1421 }
1422
9f977fb7
OP
1423 if (err)
1424 break;
1425 if (val_b >= bitmap_len || neg ||
1426 val_a > val_b) {
1427 err = -EINVAL;
1428 break;
1429 }
1430 if (left) {
70f6cbb6 1431 p++;
9f977fb7
OP
1432 left--;
1433 }
1434 }
1435
5a04cca6 1436 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
70f6cbb6 1437 proc_skip_char(&p, &left, '\n');
9f977fb7 1438 }
3116ad38 1439 left += skipped;
9f977fb7
OP
1440 } else {
1441 unsigned long bit_a, bit_b = 0;
9a52c5f3 1442 bool first = 1;
9f977fb7
OP
1443
1444 while (left) {
1445 bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1446 if (bit_a >= bitmap_len)
1447 break;
1448 bit_b = find_next_zero_bit(bitmap, bitmap_len,
1449 bit_a + 1) - 1;
1450
32927393
CH
1451 if (!first)
1452 proc_put_char(&buffer, &left, ',');
1453 proc_put_long(&buffer, &left, bit_a, false);
9f977fb7 1454 if (bit_a != bit_b) {
32927393
CH
1455 proc_put_char(&buffer, &left, '-');
1456 proc_put_long(&buffer, &left, bit_b, false);
9f977fb7
OP
1457 }
1458
1459 first = 0; bit_b++;
1460 }
32927393 1461 proc_put_char(&buffer, &left, '\n');
9f977fb7
OP
1462 }
1463
1464 if (!err) {
1465 if (write) {
1466 if (*ppos)
1467 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1468 else
5a04cca6 1469 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
9f977fb7 1470 }
9f977fb7
OP
1471 *lenp -= left;
1472 *ppos += *lenp;
9f977fb7 1473 }
f9eb2fdd 1474
475dae38 1475 bitmap_free(tmp_bitmap);
f9eb2fdd 1476 return err;
9f977fb7
OP
1477}
1478
55610500 1479#else /* CONFIG_PROC_SYSCTL */
1da177e4 1480
8d65af78 1481int proc_dostring(struct ctl_table *table, int write,
32927393 1482 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
1483{
1484 return -ENOSYS;
1485}
1486
a2071573
JH
1487int proc_dobool(struct ctl_table *table, int write,
1488 void *buffer, size_t *lenp, loff_t *ppos)
1489{
1490 return -ENOSYS;
1491}
1492
f461d2dc 1493int proc_dointvec(struct ctl_table *table, int write,
32927393 1494 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1495{
1496 return -ENOSYS;
1497}
1498
1499int proc_douintvec(struct ctl_table *table, int write,
32927393 1500 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1501{
1502 return -ENOSYS;
1503}
1504
1505int proc_dointvec_minmax(struct ctl_table *table, int write,
32927393 1506 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1507{
1508 return -ENOSYS;
1509}
1510
1511int proc_douintvec_minmax(struct ctl_table *table, int write,
32927393 1512 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1513{
1514 return -ENOSYS;
9f977fb7
OP
1515}
1516
cb944413
ED
1517int proc_dou8vec_minmax(struct ctl_table *table, int write,
1518 void *buffer, size_t *lenp, loff_t *ppos)
1519{
1520 return -ENOSYS;
1521}
1522
f461d2dc 1523int proc_dointvec_jiffies(struct ctl_table *table, int write,
32927393 1524 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1525{
1526 return -ENOSYS;
1527}
1da177e4 1528
f461d2dc 1529int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
32927393 1530 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
1531{
1532 return -ENOSYS;
1533}
1534
f461d2dc 1535int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
32927393 1536 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
1537{
1538 return -ENOSYS;
1539}
1540
f461d2dc 1541int proc_doulongvec_minmax(struct ctl_table *table, int write,
32927393 1542 void *buffer, size_t *lenp, loff_t *ppos)
e7d316a0
SAK
1543{
1544 return -ENOSYS;
1545}
1546
f461d2dc 1547int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
32927393 1548 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1549{
32927393 1550 return -ENOSYS;
1da177e4
LT
1551}
1552
f461d2dc 1553int proc_do_large_bitmap(struct ctl_table *table, int write,
32927393 1554 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
1555{
1556 return -ENOSYS;
1557}
1558
f461d2dc
CH
1559#endif /* CONFIG_PROC_SYSCTL */
1560
1561#if defined(CONFIG_SYSCTL)
1562int proc_do_static_key(struct ctl_table *table, int write,
32927393 1563 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1564{
f461d2dc
CH
1565 struct static_key *key = (struct static_key *)table->data;
1566 static DEFINE_MUTEX(static_key_mutex);
1567 int val, ret;
1568 struct ctl_table tmp = {
1569 .data = &val,
1570 .maxlen = sizeof(val),
1571 .mode = table->mode,
1572 .extra1 = SYSCTL_ZERO,
1573 .extra2 = SYSCTL_ONE,
1574 };
1575
1576 if (write && !capable(CAP_SYS_ADMIN))
1577 return -EPERM;
1578
1579 mutex_lock(&static_key_mutex);
1580 val = static_key_enabled(key);
1581 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1582 if (write && !ret) {
1583 if (val)
1584 static_key_enable(key);
1585 else
1586 static_key_disable(key);
1587 }
1588 mutex_unlock(&static_key_mutex);
1589 return ret;
1da177e4
LT
1590}
1591
f461d2dc 1592static struct ctl_table kern_table[] = {
b7cc6ec7 1593#ifdef CONFIG_NUMA_BALANCING
f461d2dc
CH
1594 {
1595 .procname = "numa_balancing",
1596 .data = NULL, /* filled in by handler */
1597 .maxlen = sizeof(unsigned int),
1598 .mode = 0644,
1599 .proc_handler = sysctl_numa_balancing,
1600 .extra1 = SYSCTL_ZERO,
c574bbe9 1601 .extra2 = SYSCTL_FOUR,
f461d2dc
CH
1602 },
1603#endif /* CONFIG_NUMA_BALANCING */
f461d2dc
CH
1604 {
1605 .procname = "panic",
1606 .data = &panic_timeout,
1607 .maxlen = sizeof(int),
1608 .mode = 0644,
1609 .proc_handler = proc_dointvec,
1610 },
f461d2dc
CH
1611#ifdef CONFIG_PROC_SYSCTL
1612 {
1613 .procname = "tainted",
1614 .maxlen = sizeof(long),
1615 .mode = 0644,
1616 .proc_handler = proc_taint,
1617 },
1618 {
1619 .procname = "sysctl_writes_strict",
1620 .data = &sysctl_writes_strict,
1621 .maxlen = sizeof(int),
1622 .mode = 0644,
1623 .proc_handler = proc_dointvec_minmax,
78e36f3b 1624 .extra1 = SYSCTL_NEG_ONE,
f461d2dc
CH
1625 .extra2 = SYSCTL_ONE,
1626 },
f461d2dc
CH
1627#endif
1628 {
1629 .procname = "print-fatal-signals",
1630 .data = &print_fatal_signals,
1631 .maxlen = sizeof(int),
1632 .mode = 0644,
1633 .proc_handler = proc_dointvec,
1634 },
1635#ifdef CONFIG_SPARC
1636 {
1637 .procname = "reboot-cmd",
1638 .data = reboot_command,
1639 .maxlen = 256,
1640 .mode = 0644,
1641 .proc_handler = proc_dostring,
1642 },
1643 {
1644 .procname = "stop-a",
1645 .data = &stop_a_enabled,
1646 .maxlen = sizeof (int),
1647 .mode = 0644,
1648 .proc_handler = proc_dointvec,
1649 },
1650 {
1651 .procname = "scons-poweroff",
1652 .data = &scons_pwroff,
1653 .maxlen = sizeof (int),
1654 .mode = 0644,
1655 .proc_handler = proc_dointvec,
1656 },
1657#endif
1658#ifdef CONFIG_SPARC64
1659 {
1660 .procname = "tsb-ratio",
1661 .data = &sysctl_tsb_ratio,
1662 .maxlen = sizeof (int),
1663 .mode = 0644,
1664 .proc_handler = proc_dointvec,
1665 },
1666#endif
1667#ifdef CONFIG_PARISC
1668 {
1669 .procname = "soft-power",
1670 .data = &pwrsw_enabled,
1671 .maxlen = sizeof (int),
1672 .mode = 0644,
1673 .proc_handler = proc_dointvec,
1674 },
1675#endif
1676#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1677 {
1678 .procname = "unaligned-trap",
1679 .data = &unaligned_enabled,
1680 .maxlen = sizeof (int),
1681 .mode = 0644,
1682 .proc_handler = proc_dointvec,
1683 },
f461d2dc
CH
1684#endif
1685#ifdef CONFIG_STACK_TRACER
1686 {
1687 .procname = "stack_tracer_enabled",
1688 .data = &stack_tracer_enabled,
1689 .maxlen = sizeof(int),
1690 .mode = 0644,
1691 .proc_handler = stack_trace_sysctl,
1692 },
1693#endif
1694#ifdef CONFIG_TRACING
1695 {
1696 .procname = "ftrace_dump_on_oops",
1697 .data = &ftrace_dump_on_oops,
1698 .maxlen = sizeof(int),
1699 .mode = 0644,
1700 .proc_handler = proc_dointvec,
1701 },
1702 {
1703 .procname = "traceoff_on_warning",
1704 .data = &__disable_trace_on_warning,
1705 .maxlen = sizeof(__disable_trace_on_warning),
1706 .mode = 0644,
1707 .proc_handler = proc_dointvec,
1708 },
1709 {
1710 .procname = "tracepoint_printk",
1711 .data = &tracepoint_printk,
1712 .maxlen = sizeof(tracepoint_printk),
1713 .mode = 0644,
1714 .proc_handler = tracepoint_printk_sysctl,
1715 },
1716#endif
f461d2dc
CH
1717#ifdef CONFIG_MODULES
1718 {
1719 .procname = "modprobe",
1720 .data = &modprobe_path,
1721 .maxlen = KMOD_PATH_LEN,
1722 .mode = 0644,
1723 .proc_handler = proc_dostring,
1724 },
1725 {
1726 .procname = "modules_disabled",
1727 .data = &modules_disabled,
1728 .maxlen = sizeof(int),
1729 .mode = 0644,
1730 /* only handle a transition from default "0" to "1" */
1731 .proc_handler = proc_dointvec_minmax,
1732 .extra1 = SYSCTL_ONE,
1733 .extra2 = SYSCTL_ONE,
1734 },
1735#endif
1736#ifdef CONFIG_UEVENT_HELPER
1737 {
1738 .procname = "hotplug",
1739 .data = &uevent_helper,
1740 .maxlen = UEVENT_HELPER_PATH_LEN,
1741 .mode = 0644,
1742 .proc_handler = proc_dostring,
1743 },
1744#endif
f461d2dc
CH
1745#ifdef CONFIG_MAGIC_SYSRQ
1746 {
1747 .procname = "sysrq",
1748 .data = NULL,
1749 .maxlen = sizeof (int),
1750 .mode = 0644,
1751 .proc_handler = sysrq_sysctl_handler,
1752 },
1753#endif
1754#ifdef CONFIG_PROC_SYSCTL
1755 {
1756 .procname = "cad_pid",
1757 .data = NULL,
1758 .maxlen = sizeof (int),
1759 .mode = 0600,
1760 .proc_handler = proc_do_cad_pid,
1761 },
1762#endif
1763 {
1764 .procname = "threads-max",
1765 .data = NULL,
1766 .maxlen = sizeof(int),
1767 .mode = 0644,
1768 .proc_handler = sysctl_max_threads,
1769 },
f461d2dc
CH
1770 {
1771 .procname = "usermodehelper",
1772 .mode = 0555,
1773 .child = usermodehelper_table,
1774 },
f461d2dc
CH
1775 {
1776 .procname = "overflowuid",
1777 .data = &overflowuid,
1778 .maxlen = sizeof(int),
1779 .mode = 0644,
1780 .proc_handler = proc_dointvec_minmax,
2452dcb9 1781 .extra1 = SYSCTL_ZERO,
54771613 1782 .extra2 = SYSCTL_MAXOLDUID,
f461d2dc
CH
1783 },
1784 {
1785 .procname = "overflowgid",
1786 .data = &overflowgid,
1787 .maxlen = sizeof(int),
1788 .mode = 0644,
1789 .proc_handler = proc_dointvec_minmax,
2452dcb9 1790 .extra1 = SYSCTL_ZERO,
54771613 1791 .extra2 = SYSCTL_MAXOLDUID,
f461d2dc
CH
1792 },
1793#ifdef CONFIG_S390
1794 {
1795 .procname = "userprocess_debug",
1796 .data = &show_unhandled_signals,
1797 .maxlen = sizeof(int),
1798 .mode = 0644,
1799 .proc_handler = proc_dointvec,
1800 },
1801#endif
1802 {
1803 .procname = "pid_max",
1804 .data = &pid_max,
1805 .maxlen = sizeof (int),
1806 .mode = 0644,
1807 .proc_handler = proc_dointvec_minmax,
1808 .extra1 = &pid_max_min,
1809 .extra2 = &pid_max_max,
1810 },
1811 {
1812 .procname = "panic_on_oops",
1813 .data = &panic_on_oops,
1814 .maxlen = sizeof(int),
1815 .mode = 0644,
1816 .proc_handler = proc_dointvec,
1817 },
1818 {
1819 .procname = "panic_print",
1820 .data = &panic_print,
1821 .maxlen = sizeof(unsigned long),
1822 .mode = 0644,
1823 .proc_handler = proc_doulongvec_minmax,
1824 },
f461d2dc
CH
1825 {
1826 .procname = "ngroups_max",
f628867d 1827 .data = (void *)&ngroups_max,
f461d2dc
CH
1828 .maxlen = sizeof (int),
1829 .mode = 0444,
1830 .proc_handler = proc_dointvec,
1831 },
1832 {
1833 .procname = "cap_last_cap",
1834 .data = (void *)&cap_last_cap,
1835 .maxlen = sizeof(int),
1836 .mode = 0444,
1837 .proc_handler = proc_dointvec,
1838 },
f461d2dc
CH
1839#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1840 {
1841 .procname = "unknown_nmi_panic",
1842 .data = &unknown_nmi_panic,
1843 .maxlen = sizeof (int),
1844 .mode = 0644,
1845 .proc_handler = proc_dointvec,
1846 },
1847#endif
61d9b56a 1848
cb8e59cc
LT
1849#if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1850 defined(CONFIG_DEBUG_STACKOVERFLOW)
f461d2dc 1851 {
cb8e59cc
LT
1852 .procname = "panic_on_stackoverflow",
1853 .data = &sysctl_panic_on_stackoverflow,
f461d2dc
CH
1854 .maxlen = sizeof(int),
1855 .mode = 0644,
1856 .proc_handler = proc_dointvec,
1857 },
cb8e59cc
LT
1858#endif
1859#if defined(CONFIG_X86)
f461d2dc 1860 {
cb8e59cc
LT
1861 .procname = "panic_on_unrecovered_nmi",
1862 .data = &panic_on_unrecovered_nmi,
f461d2dc
CH
1863 .maxlen = sizeof(int),
1864 .mode = 0644,
1865 .proc_handler = proc_dointvec,
1866 },
f461d2dc 1867 {
cb8e59cc
LT
1868 .procname = "panic_on_io_nmi",
1869 .data = &panic_on_io_nmi,
f461d2dc
CH
1870 .maxlen = sizeof(int),
1871 .mode = 0644,
1872 .proc_handler = proc_dointvec,
1873 },
f461d2dc
CH
1874 {
1875 .procname = "bootloader_type",
1876 .data = &bootloader_type,
1877 .maxlen = sizeof (int),
1878 .mode = 0444,
1879 .proc_handler = proc_dointvec,
1880 },
1881 {
1882 .procname = "bootloader_version",
1883 .data = &bootloader_version,
1884 .maxlen = sizeof (int),
1885 .mode = 0444,
1886 .proc_handler = proc_dointvec,
1887 },
1888 {
1889 .procname = "io_delay_type",
1890 .data = &io_delay_type,
1891 .maxlen = sizeof(int),
1892 .mode = 0644,
1893 .proc_handler = proc_dointvec,
1894 },
1895#endif
1896#if defined(CONFIG_MMU)
1897 {
1898 .procname = "randomize_va_space",
1899 .data = &randomize_va_space,
1900 .maxlen = sizeof(int),
1901 .mode = 0644,
1902 .proc_handler = proc_dointvec,
1903 },
1904#endif
1905#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1906 {
1907 .procname = "spin_retry",
1908 .data = &spin_retry,
1909 .maxlen = sizeof (int),
1910 .mode = 0644,
1911 .proc_handler = proc_dointvec,
1912 },
1913#endif
1914#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1915 {
1916 .procname = "acpi_video_flags",
1917 .data = &acpi_realmode_flags,
1918 .maxlen = sizeof (unsigned long),
1919 .mode = 0644,
1920 .proc_handler = proc_doulongvec_minmax,
1921 },
1922#endif
1923#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1924 {
1925 .procname = "ignore-unaligned-usertrap",
1926 .data = &no_unaligned_warning,
1927 .maxlen = sizeof (int),
1928 .mode = 0644,
1929 .proc_handler = proc_dointvec,
1930 },
1931#endif
1932#ifdef CONFIG_IA64
1933 {
1934 .procname = "unaligned-dump-stack",
1935 .data = &unaligned_dump_stack,
1936 .maxlen = sizeof (int),
1937 .mode = 0644,
1938 .proc_handler = proc_dointvec,
1939 },
1940#endif
f461d2dc
CH
1941#ifdef CONFIG_RT_MUTEXES
1942 {
1943 .procname = "max_lock_depth",
1944 .data = &max_lock_depth,
1945 .maxlen = sizeof(int),
1946 .mode = 0644,
1947 .proc_handler = proc_dointvec,
1948 },
1949#endif
f461d2dc
CH
1950#ifdef CONFIG_KEYS
1951 {
1952 .procname = "keys",
1953 .mode = 0555,
1954 .child = key_sysctls,
1955 },
1956#endif
1957#ifdef CONFIG_PERF_EVENTS
1958 /*
1959 * User-space scripts rely on the existence of this file
1960 * as a feature check for perf_events being enabled.
1961 *
1962 * So it's an ABI, do not remove!
1963 */
1964 {
1965 .procname = "perf_event_paranoid",
1966 .data = &sysctl_perf_event_paranoid,
1967 .maxlen = sizeof(sysctl_perf_event_paranoid),
1968 .mode = 0644,
1969 .proc_handler = proc_dointvec,
1970 },
1971 {
1972 .procname = "perf_event_mlock_kb",
1973 .data = &sysctl_perf_event_mlock,
1974 .maxlen = sizeof(sysctl_perf_event_mlock),
1975 .mode = 0644,
1976 .proc_handler = proc_dointvec,
1977 },
1978 {
1979 .procname = "perf_event_max_sample_rate",
1980 .data = &sysctl_perf_event_sample_rate,
1981 .maxlen = sizeof(sysctl_perf_event_sample_rate),
1982 .mode = 0644,
1983 .proc_handler = perf_proc_update_handler,
1984 .extra1 = SYSCTL_ONE,
1985 },
1986 {
1987 .procname = "perf_cpu_time_max_percent",
1988 .data = &sysctl_perf_cpu_time_max_percent,
1989 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
1990 .mode = 0644,
1991 .proc_handler = perf_cpu_time_max_percent_handler,
1992 .extra1 = SYSCTL_ZERO,
78e36f3b 1993 .extra2 = SYSCTL_ONE_HUNDRED,
f461d2dc
CH
1994 },
1995 {
1996 .procname = "perf_event_max_stack",
1997 .data = &sysctl_perf_event_max_stack,
1998 .maxlen = sizeof(sysctl_perf_event_max_stack),
1999 .mode = 0644,
2000 .proc_handler = perf_event_max_stack_handler,
2001 .extra1 = SYSCTL_ZERO,
d73840ec 2002 .extra2 = (void *)&six_hundred_forty_kb,
f461d2dc
CH
2003 },
2004 {
2005 .procname = "perf_event_max_contexts_per_stack",
2006 .data = &sysctl_perf_event_max_contexts_per_stack,
2007 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
2008 .mode = 0644,
2009 .proc_handler = perf_event_max_stack_handler,
2010 .extra1 = SYSCTL_ZERO,
78e36f3b 2011 .extra2 = SYSCTL_ONE_THOUSAND,
f461d2dc
CH
2012 },
2013#endif
2014 {
2015 .procname = "panic_on_warn",
2016 .data = &panic_on_warn,
2017 .maxlen = sizeof(int),
2018 .mode = 0644,
2019 .proc_handler = proc_dointvec_minmax,
2020 .extra1 = SYSCTL_ZERO,
2021 .extra2 = SYSCTL_ONE,
2022 },
f461d2dc
CH
2023#if defined(CONFIG_TREE_RCU)
2024 {
2025 .procname = "panic_on_rcu_stall",
2026 .data = &sysctl_panic_on_rcu_stall,
2027 .maxlen = sizeof(sysctl_panic_on_rcu_stall),
2028 .mode = 0644,
2029 .proc_handler = proc_dointvec_minmax,
2030 .extra1 = SYSCTL_ZERO,
2031 .extra2 = SYSCTL_ONE,
2032 },
2033#endif
dfe56404 2034#if defined(CONFIG_TREE_RCU)
2035 {
2036 .procname = "max_rcu_stall_to_panic",
2037 .data = &sysctl_max_rcu_stall_to_panic,
2038 .maxlen = sizeof(sysctl_max_rcu_stall_to_panic),
2039 .mode = 0644,
2040 .proc_handler = proc_dointvec_minmax,
2041 .extra1 = SYSCTL_ONE,
2042 .extra2 = SYSCTL_INT_MAX,
2043 },
f461d2dc
CH
2044#endif
2045 { }
2046};
1da177e4 2047
f461d2dc
CH
2048static struct ctl_table vm_table[] = {
2049 {
2050 .procname = "overcommit_memory",
2051 .data = &sysctl_overcommit_memory,
2052 .maxlen = sizeof(sysctl_overcommit_memory),
2053 .mode = 0644,
56f3547b 2054 .proc_handler = overcommit_policy_handler,
f461d2dc 2055 .extra1 = SYSCTL_ZERO,
78e36f3b 2056 .extra2 = SYSCTL_TWO,
f461d2dc 2057 },
f461d2dc
CH
2058 {
2059 .procname = "overcommit_ratio",
2060 .data = &sysctl_overcommit_ratio,
2061 .maxlen = sizeof(sysctl_overcommit_ratio),
2062 .mode = 0644,
2063 .proc_handler = overcommit_ratio_handler,
2064 },
2065 {
2066 .procname = "overcommit_kbytes",
2067 .data = &sysctl_overcommit_kbytes,
2068 .maxlen = sizeof(sysctl_overcommit_kbytes),
2069 .mode = 0644,
2070 .proc_handler = overcommit_kbytes_handler,
2071 },
2072 {
2073 .procname = "page-cluster",
2074 .data = &page_cluster,
2075 .maxlen = sizeof(int),
2076 .mode = 0644,
2077 .proc_handler = proc_dointvec_minmax,
2078 .extra1 = SYSCTL_ZERO,
2079 },
f461d2dc
CH
2080 {
2081 .procname = "dirtytime_expire_seconds",
2082 .data = &dirtytime_expire_interval,
2083 .maxlen = sizeof(dirtytime_expire_interval),
2084 .mode = 0644,
2085 .proc_handler = dirtytime_interval_handler,
2086 .extra1 = SYSCTL_ZERO,
2087 },
2088 {
2089 .procname = "swappiness",
2090 .data = &vm_swappiness,
2091 .maxlen = sizeof(vm_swappiness),
2092 .mode = 0644,
2093 .proc_handler = proc_dointvec_minmax,
2094 .extra1 = SYSCTL_ZERO,
78e36f3b 2095 .extra2 = SYSCTL_TWO_HUNDRED,
f461d2dc
CH
2096 },
2097#ifdef CONFIG_HUGETLB_PAGE
2098 {
2099 .procname = "nr_hugepages",
2100 .data = NULL,
2101 .maxlen = sizeof(unsigned long),
2102 .mode = 0644,
2103 .proc_handler = hugetlb_sysctl_handler,
2104 },
2105#ifdef CONFIG_NUMA
2106 {
2107 .procname = "nr_hugepages_mempolicy",
2108 .data = NULL,
2109 .maxlen = sizeof(unsigned long),
2110 .mode = 0644,
2111 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
2112 },
2113 {
2114 .procname = "numa_stat",
2115 .data = &sysctl_vm_numa_stat,
2116 .maxlen = sizeof(int),
2117 .mode = 0644,
2118 .proc_handler = sysctl_vm_numa_stat_handler,
2119 .extra1 = SYSCTL_ZERO,
2120 .extra2 = SYSCTL_ONE,
2121 },
2122#endif
2123 {
2124 .procname = "hugetlb_shm_group",
2125 .data = &sysctl_hugetlb_shm_group,
2126 .maxlen = sizeof(gid_t),
2127 .mode = 0644,
2128 .proc_handler = proc_dointvec,
2129 },
2130 {
2131 .procname = "nr_overcommit_hugepages",
2132 .data = NULL,
2133 .maxlen = sizeof(unsigned long),
2134 .mode = 0644,
2135 .proc_handler = hugetlb_overcommit_handler,
2136 },
2137#endif
2138 {
2139 .procname = "lowmem_reserve_ratio",
2140 .data = &sysctl_lowmem_reserve_ratio,
2141 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
2142 .mode = 0644,
2143 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
2144 },
2145 {
2146 .procname = "drop_caches",
2147 .data = &sysctl_drop_caches,
2148 .maxlen = sizeof(int),
2149 .mode = 0200,
2150 .proc_handler = drop_caches_sysctl_handler,
2151 .extra1 = SYSCTL_ONE,
78e36f3b 2152 .extra2 = SYSCTL_FOUR,
f461d2dc
CH
2153 },
2154#ifdef CONFIG_COMPACTION
2155 {
2156 .procname = "compact_memory",
ef498438 2157 .data = NULL,
f461d2dc
CH
2158 .maxlen = sizeof(int),
2159 .mode = 0200,
2160 .proc_handler = sysctl_compaction_handler,
2161 },
facdaa91
NG
2162 {
2163 .procname = "compaction_proactiveness",
2164 .data = &sysctl_compaction_proactiveness,
d34c0a75 2165 .maxlen = sizeof(sysctl_compaction_proactiveness),
facdaa91 2166 .mode = 0644,
65d759c8 2167 .proc_handler = compaction_proactiveness_sysctl_handler,
facdaa91 2168 .extra1 = SYSCTL_ZERO,
78e36f3b 2169 .extra2 = SYSCTL_ONE_HUNDRED,
facdaa91 2170 },
f461d2dc
CH
2171 {
2172 .procname = "extfrag_threshold",
2173 .data = &sysctl_extfrag_threshold,
2174 .maxlen = sizeof(int),
2175 .mode = 0644,
2176 .proc_handler = proc_dointvec_minmax,
2452dcb9 2177 .extra1 = SYSCTL_ZERO,
d73840ec 2178 .extra2 = (void *)&max_extfrag_threshold,
f461d2dc
CH
2179 },
2180 {
2181 .procname = "compact_unevictable_allowed",
2182 .data = &sysctl_compact_unevictable_allowed,
2183 .maxlen = sizeof(int),
2184 .mode = 0644,
2185 .proc_handler = proc_dointvec_minmax_warn_RT_change,
2186 .extra1 = SYSCTL_ZERO,
2187 .extra2 = SYSCTL_ONE,
2188 },
1da177e4 2189
f461d2dc
CH
2190#endif /* CONFIG_COMPACTION */
2191 {
2192 .procname = "min_free_kbytes",
2193 .data = &min_free_kbytes,
2194 .maxlen = sizeof(min_free_kbytes),
2195 .mode = 0644,
2196 .proc_handler = min_free_kbytes_sysctl_handler,
2197 .extra1 = SYSCTL_ZERO,
2198 },
2199 {
2200 .procname = "watermark_boost_factor",
2201 .data = &watermark_boost_factor,
2202 .maxlen = sizeof(watermark_boost_factor),
2203 .mode = 0644,
2204 .proc_handler = proc_dointvec_minmax,
2205 .extra1 = SYSCTL_ZERO,
2206 },
2207 {
2208 .procname = "watermark_scale_factor",
2209 .data = &watermark_scale_factor,
2210 .maxlen = sizeof(watermark_scale_factor),
2211 .mode = 0644,
2212 .proc_handler = watermark_scale_factor_sysctl_handler,
2213 .extra1 = SYSCTL_ONE,
78e36f3b 2214 .extra2 = SYSCTL_THREE_THOUSAND,
f461d2dc
CH
2215 },
2216 {
74f44822
MG
2217 .procname = "percpu_pagelist_high_fraction",
2218 .data = &percpu_pagelist_high_fraction,
2219 .maxlen = sizeof(percpu_pagelist_high_fraction),
f461d2dc 2220 .mode = 0644,
74f44822 2221 .proc_handler = percpu_pagelist_high_fraction_sysctl_handler,
f461d2dc
CH
2222 .extra1 = SYSCTL_ZERO,
2223 },
5ef64cc8
LT
2224 {
2225 .procname = "page_lock_unfairness",
2226 .data = &sysctl_page_lock_unfairness,
2227 .maxlen = sizeof(sysctl_page_lock_unfairness),
2228 .mode = 0644,
2229 .proc_handler = proc_dointvec_minmax,
2230 .extra1 = SYSCTL_ZERO,
2231 },
f461d2dc
CH
2232#ifdef CONFIG_MMU
2233 {
2234 .procname = "max_map_count",
2235 .data = &sysctl_max_map_count,
2236 .maxlen = sizeof(sysctl_max_map_count),
2237 .mode = 0644,
2238 .proc_handler = proc_dointvec_minmax,
2239 .extra1 = SYSCTL_ZERO,
2240 },
2241#else
2242 {
2243 .procname = "nr_trim_pages",
2244 .data = &sysctl_nr_trim_pages,
2245 .maxlen = sizeof(sysctl_nr_trim_pages),
2246 .mode = 0644,
2247 .proc_handler = proc_dointvec_minmax,
2248 .extra1 = SYSCTL_ZERO,
2249 },
2250#endif
f461d2dc
CH
2251 {
2252 .procname = "vfs_cache_pressure",
2253 .data = &sysctl_vfs_cache_pressure,
2254 .maxlen = sizeof(sysctl_vfs_cache_pressure),
2255 .mode = 0644,
3b3376f2 2256 .proc_handler = proc_dointvec_minmax,
f461d2dc
CH
2257 .extra1 = SYSCTL_ZERO,
2258 },
2259#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2260 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2261 {
2262 .procname = "legacy_va_layout",
2263 .data = &sysctl_legacy_va_layout,
2264 .maxlen = sizeof(sysctl_legacy_va_layout),
2265 .mode = 0644,
3b3376f2 2266 .proc_handler = proc_dointvec_minmax,
f461d2dc
CH
2267 .extra1 = SYSCTL_ZERO,
2268 },
2269#endif
2270#ifdef CONFIG_NUMA
2271 {
2272 .procname = "zone_reclaim_mode",
2273 .data = &node_reclaim_mode,
2274 .maxlen = sizeof(node_reclaim_mode),
2275 .mode = 0644,
3b3376f2 2276 .proc_handler = proc_dointvec_minmax,
f461d2dc
CH
2277 .extra1 = SYSCTL_ZERO,
2278 },
2279 {
2280 .procname = "min_unmapped_ratio",
2281 .data = &sysctl_min_unmapped_ratio,
2282 .maxlen = sizeof(sysctl_min_unmapped_ratio),
2283 .mode = 0644,
2284 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
2285 .extra1 = SYSCTL_ZERO,
78e36f3b 2286 .extra2 = SYSCTL_ONE_HUNDRED,
f461d2dc
CH
2287 },
2288 {
2289 .procname = "min_slab_ratio",
2290 .data = &sysctl_min_slab_ratio,
2291 .maxlen = sizeof(sysctl_min_slab_ratio),
2292 .mode = 0644,
2293 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
2294 .extra1 = SYSCTL_ZERO,
78e36f3b 2295 .extra2 = SYSCTL_ONE_HUNDRED,
f461d2dc
CH
2296 },
2297#endif
2298#ifdef CONFIG_SMP
2299 {
2300 .procname = "stat_interval",
2301 .data = &sysctl_stat_interval,
2302 .maxlen = sizeof(sysctl_stat_interval),
2303 .mode = 0644,
2304 .proc_handler = proc_dointvec_jiffies,
2305 },
2306 {
2307 .procname = "stat_refresh",
2308 .data = NULL,
2309 .maxlen = 0,
2310 .mode = 0600,
2311 .proc_handler = vmstat_refresh,
2312 },
2313#endif
2314#ifdef CONFIG_MMU
2315 {
2316 .procname = "mmap_min_addr",
2317 .data = &dac_mmap_min_addr,
2318 .maxlen = sizeof(unsigned long),
2319 .mode = 0644,
2320 .proc_handler = mmap_min_addr_handler,
2321 },
2322#endif
2323#ifdef CONFIG_NUMA
2324 {
2325 .procname = "numa_zonelist_order",
2326 .data = &numa_zonelist_order,
2327 .maxlen = NUMA_ZONELIST_ORDER_LEN,
2328 .mode = 0644,
2329 .proc_handler = numa_zonelist_order_handler,
2330 },
2331#endif
2332#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2333 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2334 {
2335 .procname = "vdso_enabled",
2336#ifdef CONFIG_X86_32
2337 .data = &vdso32_enabled,
2338 .maxlen = sizeof(vdso32_enabled),
2339#else
2340 .data = &vdso_enabled,
2341 .maxlen = sizeof(vdso_enabled),
2342#endif
2343 .mode = 0644,
2344 .proc_handler = proc_dointvec,
2345 .extra1 = SYSCTL_ZERO,
2346 },
2347#endif
f461d2dc
CH
2348#ifdef CONFIG_MEMORY_FAILURE
2349 {
2350 .procname = "memory_failure_early_kill",
2351 .data = &sysctl_memory_failure_early_kill,
2352 .maxlen = sizeof(sysctl_memory_failure_early_kill),
2353 .mode = 0644,
2354 .proc_handler = proc_dointvec_minmax,
2355 .extra1 = SYSCTL_ZERO,
2356 .extra2 = SYSCTL_ONE,
2357 },
2358 {
2359 .procname = "memory_failure_recovery",
2360 .data = &sysctl_memory_failure_recovery,
2361 .maxlen = sizeof(sysctl_memory_failure_recovery),
2362 .mode = 0644,
2363 .proc_handler = proc_dointvec_minmax,
2364 .extra1 = SYSCTL_ZERO,
2365 .extra2 = SYSCTL_ONE,
2366 },
2367#endif
2368 {
2369 .procname = "user_reserve_kbytes",
2370 .data = &sysctl_user_reserve_kbytes,
2371 .maxlen = sizeof(sysctl_user_reserve_kbytes),
2372 .mode = 0644,
2373 .proc_handler = proc_doulongvec_minmax,
2374 },
2375 {
2376 .procname = "admin_reserve_kbytes",
2377 .data = &sysctl_admin_reserve_kbytes,
2378 .maxlen = sizeof(sysctl_admin_reserve_kbytes),
2379 .mode = 0644,
2380 .proc_handler = proc_doulongvec_minmax,
2381 },
2382#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2383 {
2384 .procname = "mmap_rnd_bits",
2385 .data = &mmap_rnd_bits,
2386 .maxlen = sizeof(mmap_rnd_bits),
2387 .mode = 0600,
2388 .proc_handler = proc_dointvec_minmax,
2389 .extra1 = (void *)&mmap_rnd_bits_min,
2390 .extra2 = (void *)&mmap_rnd_bits_max,
2391 },
2392#endif
2393#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2394 {
2395 .procname = "mmap_rnd_compat_bits",
2396 .data = &mmap_rnd_compat_bits,
2397 .maxlen = sizeof(mmap_rnd_compat_bits),
2398 .mode = 0600,
2399 .proc_handler = proc_dointvec_minmax,
2400 .extra1 = (void *)&mmap_rnd_compat_bits_min,
2401 .extra2 = (void *)&mmap_rnd_compat_bits_max,
2402 },
2403#endif
2404#ifdef CONFIG_USERFAULTFD
2405 {
2406 .procname = "unprivileged_userfaultfd",
2407 .data = &sysctl_unprivileged_userfaultfd,
2408 .maxlen = sizeof(sysctl_unprivileged_userfaultfd),
2409 .mode = 0644,
2410 .proc_handler = proc_dointvec_minmax,
2411 .extra1 = SYSCTL_ZERO,
2412 .extra2 = SYSCTL_ONE,
2413 },
2414#endif
2415 { }
2416};
1da177e4 2417
f461d2dc
CH
2418static struct ctl_table debug_table[] = {
2419#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2420 {
2421 .procname = "exception-trace",
2422 .data = &show_unhandled_signals,
2423 .maxlen = sizeof(int),
2424 .mode = 0644,
2425 .proc_handler = proc_dointvec
2426 },
f461d2dc
CH
2427#endif
2428 { }
2429};
1da177e4 2430
f461d2dc
CH
2431static struct ctl_table dev_table[] = {
2432 { }
2433};
1da177e4 2434
51cb8dfc
LC
2435DECLARE_SYSCTL_BASE(kernel, kern_table);
2436DECLARE_SYSCTL_BASE(vm, vm_table);
51cb8dfc
LC
2437DECLARE_SYSCTL_BASE(debug, debug_table);
2438DECLARE_SYSCTL_BASE(dev, dev_table);
1da177e4 2439
d8c0418a 2440int __init sysctl_init_bases(void)
492ecee8 2441{
51cb8dfc
LC
2442 register_sysctl_base(kernel);
2443 register_sysctl_base(vm);
51cb8dfc
LC
2444 register_sysctl_base(debug);
2445 register_sysctl_base(dev);
492ecee8 2446
f461d2dc 2447 return 0;
492ecee8 2448}
f461d2dc 2449#endif /* CONFIG_SYSCTL */
1da177e4
LT
2450/*
2451 * No sense putting this after each symbol definition, twice,
2452 * exception granted :-)
2453 */
a2071573 2454EXPORT_SYMBOL(proc_dobool);
1da177e4 2455EXPORT_SYMBOL(proc_dointvec);
e7d316a0 2456EXPORT_SYMBOL(proc_douintvec);
1da177e4
LT
2457EXPORT_SYMBOL(proc_dointvec_jiffies);
2458EXPORT_SYMBOL(proc_dointvec_minmax);
61d9b56a 2459EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
1da177e4
LT
2460EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2461EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2462EXPORT_SYMBOL(proc_dostring);
2463EXPORT_SYMBOL(proc_doulongvec_minmax);
2464EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
0bc19985 2465EXPORT_SYMBOL(proc_do_large_bitmap);