]> git.ipfire.org Git - thirdparty/linux.git/blob - kernel/sysctl.c
Merge tag 'char-misc-5.2-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[thirdparty/linux.git] / kernel / sysctl.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * sysctl.c: General linux system control interface
4 *
5 * Begun 24 March 1995, Stephen Tweedie
6 * Added /proc support, Dec 1995
7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10 * Dynamic registration fixes, Stephen Tweedie.
11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13 * Horn.
14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17 * Wendling.
18 * The list_for_each() macro wasn't appropriate for the sysctl loop.
19 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
20 */
21
22 #include <linux/module.h>
23 #include <linux/aio.h>
24 #include <linux/mm.h>
25 #include <linux/swap.h>
26 #include <linux/slab.h>
27 #include <linux/sysctl.h>
28 #include <linux/bitmap.h>
29 #include <linux/signal.h>
30 #include <linux/printk.h>
31 #include <linux/proc_fs.h>
32 #include <linux/security.h>
33 #include <linux/ctype.h>
34 #include <linux/kmemleak.h>
35 #include <linux/fs.h>
36 #include <linux/init.h>
37 #include <linux/kernel.h>
38 #include <linux/kobject.h>
39 #include <linux/net.h>
40 #include <linux/sysrq.h>
41 #include <linux/highuid.h>
42 #include <linux/writeback.h>
43 #include <linux/ratelimit.h>
44 #include <linux/compaction.h>
45 #include <linux/hugetlb.h>
46 #include <linux/initrd.h>
47 #include <linux/key.h>
48 #include <linux/times.h>
49 #include <linux/limits.h>
50 #include <linux/dcache.h>
51 #include <linux/dnotify.h>
52 #include <linux/syscalls.h>
53 #include <linux/vmstat.h>
54 #include <linux/nfs_fs.h>
55 #include <linux/acpi.h>
56 #include <linux/reboot.h>
57 #include <linux/ftrace.h>
58 #include <linux/perf_event.h>
59 #include <linux/kprobes.h>
60 #include <linux/pipe_fs_i.h>
61 #include <linux/oom.h>
62 #include <linux/kmod.h>
63 #include <linux/capability.h>
64 #include <linux/binfmts.h>
65 #include <linux/sched/sysctl.h>
66 #include <linux/sched/coredump.h>
67 #include <linux/kexec.h>
68 #include <linux/bpf.h>
69 #include <linux/mount.h>
70 #include <linux/userfaultfd_k.h>
71
72 #include "../lib/kstrtox.h"
73
74 #include <linux/uaccess.h>
75 #include <asm/processor.h>
76
77 #ifdef CONFIG_X86
78 #include <asm/nmi.h>
79 #include <asm/stacktrace.h>
80 #include <asm/io.h>
81 #endif
82 #ifdef CONFIG_SPARC
83 #include <asm/setup.h>
84 #endif
85 #ifdef CONFIG_BSD_PROCESS_ACCT
86 #include <linux/acct.h>
87 #endif
88 #ifdef CONFIG_RT_MUTEXES
89 #include <linux/rtmutex.h>
90 #endif
91 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
92 #include <linux/lockdep.h>
93 #endif
94 #ifdef CONFIG_CHR_DEV_SG
95 #include <scsi/sg.h>
96 #endif
97 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
98 #include <linux/stackleak.h>
99 #endif
100 #ifdef CONFIG_LOCKUP_DETECTOR
101 #include <linux/nmi.h>
102 #endif
103
104 #if defined(CONFIG_SYSCTL)
105
106 /* External variables not in a header file. */
107 extern int suid_dumpable;
108 #ifdef CONFIG_COREDUMP
109 extern int core_uses_pid;
110 extern char core_pattern[];
111 extern unsigned int core_pipe_limit;
112 #endif
113 extern int pid_max;
114 extern int pid_max_min, pid_max_max;
115 extern int percpu_pagelist_fraction;
116 extern int latencytop_enabled;
117 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
118 #ifndef CONFIG_MMU
119 extern int sysctl_nr_trim_pages;
120 #endif
121
122 /* Constants used for minimum and maximum */
123 #ifdef CONFIG_LOCKUP_DETECTOR
124 static int sixty = 60;
125 #endif
126
127 static int __maybe_unused neg_one = -1;
128
129 static int zero;
130 static int __maybe_unused one = 1;
131 static int __maybe_unused two = 2;
132 static int __maybe_unused four = 4;
133 static unsigned long zero_ul;
134 static unsigned long one_ul = 1;
135 static unsigned long long_max = LONG_MAX;
136 static int one_hundred = 100;
137 static int one_thousand = 1000;
138 #ifdef CONFIG_PRINTK
139 static int ten_thousand = 10000;
140 #endif
141 #ifdef CONFIG_PERF_EVENTS
142 static int six_hundred_forty_kb = 640 * 1024;
143 #endif
144
145 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
146 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
147
148 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
149 static int maxolduid = 65535;
150 static int minolduid;
151
152 static int ngroups_max = NGROUPS_MAX;
153 static const int cap_last_cap = CAP_LAST_CAP;
154
155 /*
156 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
157 * and hung_task_check_interval_secs
158 */
159 #ifdef CONFIG_DETECT_HUNG_TASK
160 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
161 #endif
162
163 #ifdef CONFIG_INOTIFY_USER
164 #include <linux/inotify.h>
165 #endif
166 #ifdef CONFIG_SPARC
167 #endif
168
169 #ifdef __hppa__
170 extern int pwrsw_enabled;
171 #endif
172
173 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
174 extern int unaligned_enabled;
175 #endif
176
177 #ifdef CONFIG_IA64
178 extern int unaligned_dump_stack;
179 #endif
180
181 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
182 extern int no_unaligned_warning;
183 #endif
184
185 #ifdef CONFIG_PROC_SYSCTL
186
187 /**
188 * enum sysctl_writes_mode - supported sysctl write modes
189 *
190 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
191 * to be written, and multiple writes on the same sysctl file descriptor
192 * will rewrite the sysctl value, regardless of file position. No warning
193 * is issued when the initial position is not 0.
194 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
195 * not 0.
196 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
197 * file position 0 and the value must be fully contained in the buffer
198 * sent to the write syscall. If dealing with strings respect the file
199 * position, but restrict this to the max length of the buffer, anything
200 * passed the max lenght will be ignored. Multiple writes will append
201 * to the buffer.
202 *
203 * These write modes control how current file position affects the behavior of
204 * updating sysctl values through the proc interface on each write.
205 */
206 enum sysctl_writes_mode {
207 SYSCTL_WRITES_LEGACY = -1,
208 SYSCTL_WRITES_WARN = 0,
209 SYSCTL_WRITES_STRICT = 1,
210 };
211
212 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
213
214 static int proc_do_cad_pid(struct ctl_table *table, int write,
215 void __user *buffer, size_t *lenp, loff_t *ppos);
216 static int proc_taint(struct ctl_table *table, int write,
217 void __user *buffer, size_t *lenp, loff_t *ppos);
218 #endif
219
220 #ifdef CONFIG_PRINTK
221 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
222 void __user *buffer, size_t *lenp, loff_t *ppos);
223 #endif
224
225 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
226 void __user *buffer, size_t *lenp, loff_t *ppos);
227 #ifdef CONFIG_COREDUMP
228 static int proc_dostring_coredump(struct ctl_table *table, int write,
229 void __user *buffer, size_t *lenp, loff_t *ppos);
230 #endif
231 static int proc_dopipe_max_size(struct ctl_table *table, int write,
232 void __user *buffer, size_t *lenp, loff_t *ppos);
233 #ifdef CONFIG_BPF_SYSCALL
234 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
235 void __user *buffer, size_t *lenp,
236 loff_t *ppos);
237 #endif
238
239 #ifdef CONFIG_MAGIC_SYSRQ
240 /* Note: sysrq code uses its own private copy */
241 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
242
243 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
244 void __user *buffer, size_t *lenp,
245 loff_t *ppos)
246 {
247 int error;
248
249 error = proc_dointvec(table, write, buffer, lenp, ppos);
250 if (error)
251 return error;
252
253 if (write)
254 sysrq_toggle_support(__sysrq_enabled);
255
256 return 0;
257 }
258
259 #endif
260
261 static struct ctl_table kern_table[];
262 static struct ctl_table vm_table[];
263 static struct ctl_table fs_table[];
264 static struct ctl_table debug_table[];
265 static struct ctl_table dev_table[];
266 extern struct ctl_table random_table[];
267 #ifdef CONFIG_EPOLL
268 extern struct ctl_table epoll_table[];
269 #endif
270
271 #ifdef CONFIG_FW_LOADER_USER_HELPER
272 extern struct ctl_table firmware_config_table[];
273 #endif
274
275 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
276 int sysctl_legacy_va_layout;
277 #endif
278
279 /* The default sysctl tables: */
280
281 static struct ctl_table sysctl_base_table[] = {
282 {
283 .procname = "kernel",
284 .mode = 0555,
285 .child = kern_table,
286 },
287 {
288 .procname = "vm",
289 .mode = 0555,
290 .child = vm_table,
291 },
292 {
293 .procname = "fs",
294 .mode = 0555,
295 .child = fs_table,
296 },
297 {
298 .procname = "debug",
299 .mode = 0555,
300 .child = debug_table,
301 },
302 {
303 .procname = "dev",
304 .mode = 0555,
305 .child = dev_table,
306 },
307 { }
308 };
309
310 #ifdef CONFIG_SCHED_DEBUG
311 static int min_sched_granularity_ns = 100000; /* 100 usecs */
312 static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
313 static int min_wakeup_granularity_ns; /* 0 usecs */
314 static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
315 #ifdef CONFIG_SMP
316 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
317 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
318 #endif /* CONFIG_SMP */
319 #endif /* CONFIG_SCHED_DEBUG */
320
321 #ifdef CONFIG_COMPACTION
322 static int min_extfrag_threshold;
323 static int max_extfrag_threshold = 1000;
324 #endif
325
326 static struct ctl_table kern_table[] = {
327 {
328 .procname = "sched_child_runs_first",
329 .data = &sysctl_sched_child_runs_first,
330 .maxlen = sizeof(unsigned int),
331 .mode = 0644,
332 .proc_handler = proc_dointvec,
333 },
334 #ifdef CONFIG_SCHED_DEBUG
335 {
336 .procname = "sched_min_granularity_ns",
337 .data = &sysctl_sched_min_granularity,
338 .maxlen = sizeof(unsigned int),
339 .mode = 0644,
340 .proc_handler = sched_proc_update_handler,
341 .extra1 = &min_sched_granularity_ns,
342 .extra2 = &max_sched_granularity_ns,
343 },
344 {
345 .procname = "sched_latency_ns",
346 .data = &sysctl_sched_latency,
347 .maxlen = sizeof(unsigned int),
348 .mode = 0644,
349 .proc_handler = sched_proc_update_handler,
350 .extra1 = &min_sched_granularity_ns,
351 .extra2 = &max_sched_granularity_ns,
352 },
353 {
354 .procname = "sched_wakeup_granularity_ns",
355 .data = &sysctl_sched_wakeup_granularity,
356 .maxlen = sizeof(unsigned int),
357 .mode = 0644,
358 .proc_handler = sched_proc_update_handler,
359 .extra1 = &min_wakeup_granularity_ns,
360 .extra2 = &max_wakeup_granularity_ns,
361 },
362 #ifdef CONFIG_SMP
363 {
364 .procname = "sched_tunable_scaling",
365 .data = &sysctl_sched_tunable_scaling,
366 .maxlen = sizeof(enum sched_tunable_scaling),
367 .mode = 0644,
368 .proc_handler = sched_proc_update_handler,
369 .extra1 = &min_sched_tunable_scaling,
370 .extra2 = &max_sched_tunable_scaling,
371 },
372 {
373 .procname = "sched_migration_cost_ns",
374 .data = &sysctl_sched_migration_cost,
375 .maxlen = sizeof(unsigned int),
376 .mode = 0644,
377 .proc_handler = proc_dointvec,
378 },
379 {
380 .procname = "sched_nr_migrate",
381 .data = &sysctl_sched_nr_migrate,
382 .maxlen = sizeof(unsigned int),
383 .mode = 0644,
384 .proc_handler = proc_dointvec,
385 },
386 #ifdef CONFIG_SCHEDSTATS
387 {
388 .procname = "sched_schedstats",
389 .data = NULL,
390 .maxlen = sizeof(unsigned int),
391 .mode = 0644,
392 .proc_handler = sysctl_schedstats,
393 .extra1 = &zero,
394 .extra2 = &one,
395 },
396 #endif /* CONFIG_SCHEDSTATS */
397 #endif /* CONFIG_SMP */
398 #ifdef CONFIG_NUMA_BALANCING
399 {
400 .procname = "numa_balancing_scan_delay_ms",
401 .data = &sysctl_numa_balancing_scan_delay,
402 .maxlen = sizeof(unsigned int),
403 .mode = 0644,
404 .proc_handler = proc_dointvec,
405 },
406 {
407 .procname = "numa_balancing_scan_period_min_ms",
408 .data = &sysctl_numa_balancing_scan_period_min,
409 .maxlen = sizeof(unsigned int),
410 .mode = 0644,
411 .proc_handler = proc_dointvec,
412 },
413 {
414 .procname = "numa_balancing_scan_period_max_ms",
415 .data = &sysctl_numa_balancing_scan_period_max,
416 .maxlen = sizeof(unsigned int),
417 .mode = 0644,
418 .proc_handler = proc_dointvec,
419 },
420 {
421 .procname = "numa_balancing_scan_size_mb",
422 .data = &sysctl_numa_balancing_scan_size,
423 .maxlen = sizeof(unsigned int),
424 .mode = 0644,
425 .proc_handler = proc_dointvec_minmax,
426 .extra1 = &one,
427 },
428 {
429 .procname = "numa_balancing",
430 .data = NULL, /* filled in by handler */
431 .maxlen = sizeof(unsigned int),
432 .mode = 0644,
433 .proc_handler = sysctl_numa_balancing,
434 .extra1 = &zero,
435 .extra2 = &one,
436 },
437 #endif /* CONFIG_NUMA_BALANCING */
438 #endif /* CONFIG_SCHED_DEBUG */
439 {
440 .procname = "sched_rt_period_us",
441 .data = &sysctl_sched_rt_period,
442 .maxlen = sizeof(unsigned int),
443 .mode = 0644,
444 .proc_handler = sched_rt_handler,
445 },
446 {
447 .procname = "sched_rt_runtime_us",
448 .data = &sysctl_sched_rt_runtime,
449 .maxlen = sizeof(int),
450 .mode = 0644,
451 .proc_handler = sched_rt_handler,
452 },
453 {
454 .procname = "sched_rr_timeslice_ms",
455 .data = &sysctl_sched_rr_timeslice,
456 .maxlen = sizeof(int),
457 .mode = 0644,
458 .proc_handler = sched_rr_handler,
459 },
460 #ifdef CONFIG_SCHED_AUTOGROUP
461 {
462 .procname = "sched_autogroup_enabled",
463 .data = &sysctl_sched_autogroup_enabled,
464 .maxlen = sizeof(unsigned int),
465 .mode = 0644,
466 .proc_handler = proc_dointvec_minmax,
467 .extra1 = &zero,
468 .extra2 = &one,
469 },
470 #endif
471 #ifdef CONFIG_CFS_BANDWIDTH
472 {
473 .procname = "sched_cfs_bandwidth_slice_us",
474 .data = &sysctl_sched_cfs_bandwidth_slice,
475 .maxlen = sizeof(unsigned int),
476 .mode = 0644,
477 .proc_handler = proc_dointvec_minmax,
478 .extra1 = &one,
479 },
480 #endif
481 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
482 {
483 .procname = "sched_energy_aware",
484 .data = &sysctl_sched_energy_aware,
485 .maxlen = sizeof(unsigned int),
486 .mode = 0644,
487 .proc_handler = sched_energy_aware_handler,
488 .extra1 = &zero,
489 .extra2 = &one,
490 },
491 #endif
492 #ifdef CONFIG_PROVE_LOCKING
493 {
494 .procname = "prove_locking",
495 .data = &prove_locking,
496 .maxlen = sizeof(int),
497 .mode = 0644,
498 .proc_handler = proc_dointvec,
499 },
500 #endif
501 #ifdef CONFIG_LOCK_STAT
502 {
503 .procname = "lock_stat",
504 .data = &lock_stat,
505 .maxlen = sizeof(int),
506 .mode = 0644,
507 .proc_handler = proc_dointvec,
508 },
509 #endif
510 {
511 .procname = "panic",
512 .data = &panic_timeout,
513 .maxlen = sizeof(int),
514 .mode = 0644,
515 .proc_handler = proc_dointvec,
516 },
517 #ifdef CONFIG_COREDUMP
518 {
519 .procname = "core_uses_pid",
520 .data = &core_uses_pid,
521 .maxlen = sizeof(int),
522 .mode = 0644,
523 .proc_handler = proc_dointvec,
524 },
525 {
526 .procname = "core_pattern",
527 .data = core_pattern,
528 .maxlen = CORENAME_MAX_SIZE,
529 .mode = 0644,
530 .proc_handler = proc_dostring_coredump,
531 },
532 {
533 .procname = "core_pipe_limit",
534 .data = &core_pipe_limit,
535 .maxlen = sizeof(unsigned int),
536 .mode = 0644,
537 .proc_handler = proc_dointvec,
538 },
539 #endif
540 #ifdef CONFIG_PROC_SYSCTL
541 {
542 .procname = "tainted",
543 .maxlen = sizeof(long),
544 .mode = 0644,
545 .proc_handler = proc_taint,
546 },
547 {
548 .procname = "sysctl_writes_strict",
549 .data = &sysctl_writes_strict,
550 .maxlen = sizeof(int),
551 .mode = 0644,
552 .proc_handler = proc_dointvec_minmax,
553 .extra1 = &neg_one,
554 .extra2 = &one,
555 },
556 #endif
557 #ifdef CONFIG_LATENCYTOP
558 {
559 .procname = "latencytop",
560 .data = &latencytop_enabled,
561 .maxlen = sizeof(int),
562 .mode = 0644,
563 .proc_handler = sysctl_latencytop,
564 },
565 #endif
566 #ifdef CONFIG_BLK_DEV_INITRD
567 {
568 .procname = "real-root-dev",
569 .data = &real_root_dev,
570 .maxlen = sizeof(int),
571 .mode = 0644,
572 .proc_handler = proc_dointvec,
573 },
574 #endif
575 {
576 .procname = "print-fatal-signals",
577 .data = &print_fatal_signals,
578 .maxlen = sizeof(int),
579 .mode = 0644,
580 .proc_handler = proc_dointvec,
581 },
582 #ifdef CONFIG_SPARC
583 {
584 .procname = "reboot-cmd",
585 .data = reboot_command,
586 .maxlen = 256,
587 .mode = 0644,
588 .proc_handler = proc_dostring,
589 },
590 {
591 .procname = "stop-a",
592 .data = &stop_a_enabled,
593 .maxlen = sizeof (int),
594 .mode = 0644,
595 .proc_handler = proc_dointvec,
596 },
597 {
598 .procname = "scons-poweroff",
599 .data = &scons_pwroff,
600 .maxlen = sizeof (int),
601 .mode = 0644,
602 .proc_handler = proc_dointvec,
603 },
604 #endif
605 #ifdef CONFIG_SPARC64
606 {
607 .procname = "tsb-ratio",
608 .data = &sysctl_tsb_ratio,
609 .maxlen = sizeof (int),
610 .mode = 0644,
611 .proc_handler = proc_dointvec,
612 },
613 #endif
614 #ifdef __hppa__
615 {
616 .procname = "soft-power",
617 .data = &pwrsw_enabled,
618 .maxlen = sizeof (int),
619 .mode = 0644,
620 .proc_handler = proc_dointvec,
621 },
622 #endif
623 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
624 {
625 .procname = "unaligned-trap",
626 .data = &unaligned_enabled,
627 .maxlen = sizeof (int),
628 .mode = 0644,
629 .proc_handler = proc_dointvec,
630 },
631 #endif
632 {
633 .procname = "ctrl-alt-del",
634 .data = &C_A_D,
635 .maxlen = sizeof(int),
636 .mode = 0644,
637 .proc_handler = proc_dointvec,
638 },
639 #ifdef CONFIG_FUNCTION_TRACER
640 {
641 .procname = "ftrace_enabled",
642 .data = &ftrace_enabled,
643 .maxlen = sizeof(int),
644 .mode = 0644,
645 .proc_handler = ftrace_enable_sysctl,
646 },
647 #endif
648 #ifdef CONFIG_STACK_TRACER
649 {
650 .procname = "stack_tracer_enabled",
651 .data = &stack_tracer_enabled,
652 .maxlen = sizeof(int),
653 .mode = 0644,
654 .proc_handler = stack_trace_sysctl,
655 },
656 #endif
657 #ifdef CONFIG_TRACING
658 {
659 .procname = "ftrace_dump_on_oops",
660 .data = &ftrace_dump_on_oops,
661 .maxlen = sizeof(int),
662 .mode = 0644,
663 .proc_handler = proc_dointvec,
664 },
665 {
666 .procname = "traceoff_on_warning",
667 .data = &__disable_trace_on_warning,
668 .maxlen = sizeof(__disable_trace_on_warning),
669 .mode = 0644,
670 .proc_handler = proc_dointvec,
671 },
672 {
673 .procname = "tracepoint_printk",
674 .data = &tracepoint_printk,
675 .maxlen = sizeof(tracepoint_printk),
676 .mode = 0644,
677 .proc_handler = tracepoint_printk_sysctl,
678 },
679 #endif
680 #ifdef CONFIG_KEXEC_CORE
681 {
682 .procname = "kexec_load_disabled",
683 .data = &kexec_load_disabled,
684 .maxlen = sizeof(int),
685 .mode = 0644,
686 /* only handle a transition from default "0" to "1" */
687 .proc_handler = proc_dointvec_minmax,
688 .extra1 = &one,
689 .extra2 = &one,
690 },
691 #endif
692 #ifdef CONFIG_MODULES
693 {
694 .procname = "modprobe",
695 .data = &modprobe_path,
696 .maxlen = KMOD_PATH_LEN,
697 .mode = 0644,
698 .proc_handler = proc_dostring,
699 },
700 {
701 .procname = "modules_disabled",
702 .data = &modules_disabled,
703 .maxlen = sizeof(int),
704 .mode = 0644,
705 /* only handle a transition from default "0" to "1" */
706 .proc_handler = proc_dointvec_minmax,
707 .extra1 = &one,
708 .extra2 = &one,
709 },
710 #endif
711 #ifdef CONFIG_UEVENT_HELPER
712 {
713 .procname = "hotplug",
714 .data = &uevent_helper,
715 .maxlen = UEVENT_HELPER_PATH_LEN,
716 .mode = 0644,
717 .proc_handler = proc_dostring,
718 },
719 #endif
720 #ifdef CONFIG_CHR_DEV_SG
721 {
722 .procname = "sg-big-buff",
723 .data = &sg_big_buff,
724 .maxlen = sizeof (int),
725 .mode = 0444,
726 .proc_handler = proc_dointvec,
727 },
728 #endif
729 #ifdef CONFIG_BSD_PROCESS_ACCT
730 {
731 .procname = "acct",
732 .data = &acct_parm,
733 .maxlen = 3*sizeof(int),
734 .mode = 0644,
735 .proc_handler = proc_dointvec,
736 },
737 #endif
738 #ifdef CONFIG_MAGIC_SYSRQ
739 {
740 .procname = "sysrq",
741 .data = &__sysrq_enabled,
742 .maxlen = sizeof (int),
743 .mode = 0644,
744 .proc_handler = sysrq_sysctl_handler,
745 },
746 #endif
747 #ifdef CONFIG_PROC_SYSCTL
748 {
749 .procname = "cad_pid",
750 .data = NULL,
751 .maxlen = sizeof (int),
752 .mode = 0600,
753 .proc_handler = proc_do_cad_pid,
754 },
755 #endif
756 {
757 .procname = "threads-max",
758 .data = NULL,
759 .maxlen = sizeof(int),
760 .mode = 0644,
761 .proc_handler = sysctl_max_threads,
762 },
763 {
764 .procname = "random",
765 .mode = 0555,
766 .child = random_table,
767 },
768 {
769 .procname = "usermodehelper",
770 .mode = 0555,
771 .child = usermodehelper_table,
772 },
773 #ifdef CONFIG_FW_LOADER_USER_HELPER
774 {
775 .procname = "firmware_config",
776 .mode = 0555,
777 .child = firmware_config_table,
778 },
779 #endif
780 {
781 .procname = "overflowuid",
782 .data = &overflowuid,
783 .maxlen = sizeof(int),
784 .mode = 0644,
785 .proc_handler = proc_dointvec_minmax,
786 .extra1 = &minolduid,
787 .extra2 = &maxolduid,
788 },
789 {
790 .procname = "overflowgid",
791 .data = &overflowgid,
792 .maxlen = sizeof(int),
793 .mode = 0644,
794 .proc_handler = proc_dointvec_minmax,
795 .extra1 = &minolduid,
796 .extra2 = &maxolduid,
797 },
798 #ifdef CONFIG_S390
799 #ifdef CONFIG_MATHEMU
800 {
801 .procname = "ieee_emulation_warnings",
802 .data = &sysctl_ieee_emulation_warnings,
803 .maxlen = sizeof(int),
804 .mode = 0644,
805 .proc_handler = proc_dointvec,
806 },
807 #endif
808 {
809 .procname = "userprocess_debug",
810 .data = &show_unhandled_signals,
811 .maxlen = sizeof(int),
812 .mode = 0644,
813 .proc_handler = proc_dointvec,
814 },
815 #endif
816 {
817 .procname = "pid_max",
818 .data = &pid_max,
819 .maxlen = sizeof (int),
820 .mode = 0644,
821 .proc_handler = proc_dointvec_minmax,
822 .extra1 = &pid_max_min,
823 .extra2 = &pid_max_max,
824 },
825 {
826 .procname = "panic_on_oops",
827 .data = &panic_on_oops,
828 .maxlen = sizeof(int),
829 .mode = 0644,
830 .proc_handler = proc_dointvec,
831 },
832 {
833 .procname = "panic_print",
834 .data = &panic_print,
835 .maxlen = sizeof(unsigned long),
836 .mode = 0644,
837 .proc_handler = proc_doulongvec_minmax,
838 },
839 #if defined CONFIG_PRINTK
840 {
841 .procname = "printk",
842 .data = &console_loglevel,
843 .maxlen = 4*sizeof(int),
844 .mode = 0644,
845 .proc_handler = proc_dointvec,
846 },
847 {
848 .procname = "printk_ratelimit",
849 .data = &printk_ratelimit_state.interval,
850 .maxlen = sizeof(int),
851 .mode = 0644,
852 .proc_handler = proc_dointvec_jiffies,
853 },
854 {
855 .procname = "printk_ratelimit_burst",
856 .data = &printk_ratelimit_state.burst,
857 .maxlen = sizeof(int),
858 .mode = 0644,
859 .proc_handler = proc_dointvec,
860 },
861 {
862 .procname = "printk_delay",
863 .data = &printk_delay_msec,
864 .maxlen = sizeof(int),
865 .mode = 0644,
866 .proc_handler = proc_dointvec_minmax,
867 .extra1 = &zero,
868 .extra2 = &ten_thousand,
869 },
870 {
871 .procname = "printk_devkmsg",
872 .data = devkmsg_log_str,
873 .maxlen = DEVKMSG_STR_MAX_SIZE,
874 .mode = 0644,
875 .proc_handler = devkmsg_sysctl_set_loglvl,
876 },
877 {
878 .procname = "dmesg_restrict",
879 .data = &dmesg_restrict,
880 .maxlen = sizeof(int),
881 .mode = 0644,
882 .proc_handler = proc_dointvec_minmax_sysadmin,
883 .extra1 = &zero,
884 .extra2 = &one,
885 },
886 {
887 .procname = "kptr_restrict",
888 .data = &kptr_restrict,
889 .maxlen = sizeof(int),
890 .mode = 0644,
891 .proc_handler = proc_dointvec_minmax_sysadmin,
892 .extra1 = &zero,
893 .extra2 = &two,
894 },
895 #endif
896 {
897 .procname = "ngroups_max",
898 .data = &ngroups_max,
899 .maxlen = sizeof (int),
900 .mode = 0444,
901 .proc_handler = proc_dointvec,
902 },
903 {
904 .procname = "cap_last_cap",
905 .data = (void *)&cap_last_cap,
906 .maxlen = sizeof(int),
907 .mode = 0444,
908 .proc_handler = proc_dointvec,
909 },
910 #if defined(CONFIG_LOCKUP_DETECTOR)
911 {
912 .procname = "watchdog",
913 .data = &watchdog_user_enabled,
914 .maxlen = sizeof(int),
915 .mode = 0644,
916 .proc_handler = proc_watchdog,
917 .extra1 = &zero,
918 .extra2 = &one,
919 },
920 {
921 .procname = "watchdog_thresh",
922 .data = &watchdog_thresh,
923 .maxlen = sizeof(int),
924 .mode = 0644,
925 .proc_handler = proc_watchdog_thresh,
926 .extra1 = &zero,
927 .extra2 = &sixty,
928 },
929 {
930 .procname = "nmi_watchdog",
931 .data = &nmi_watchdog_user_enabled,
932 .maxlen = sizeof(int),
933 .mode = NMI_WATCHDOG_SYSCTL_PERM,
934 .proc_handler = proc_nmi_watchdog,
935 .extra1 = &zero,
936 .extra2 = &one,
937 },
938 {
939 .procname = "watchdog_cpumask",
940 .data = &watchdog_cpumask_bits,
941 .maxlen = NR_CPUS,
942 .mode = 0644,
943 .proc_handler = proc_watchdog_cpumask,
944 },
945 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
946 {
947 .procname = "soft_watchdog",
948 .data = &soft_watchdog_user_enabled,
949 .maxlen = sizeof(int),
950 .mode = 0644,
951 .proc_handler = proc_soft_watchdog,
952 .extra1 = &zero,
953 .extra2 = &one,
954 },
955 {
956 .procname = "softlockup_panic",
957 .data = &softlockup_panic,
958 .maxlen = sizeof(int),
959 .mode = 0644,
960 .proc_handler = proc_dointvec_minmax,
961 .extra1 = &zero,
962 .extra2 = &one,
963 },
964 #ifdef CONFIG_SMP
965 {
966 .procname = "softlockup_all_cpu_backtrace",
967 .data = &sysctl_softlockup_all_cpu_backtrace,
968 .maxlen = sizeof(int),
969 .mode = 0644,
970 .proc_handler = proc_dointvec_minmax,
971 .extra1 = &zero,
972 .extra2 = &one,
973 },
974 #endif /* CONFIG_SMP */
975 #endif
976 #ifdef CONFIG_HARDLOCKUP_DETECTOR
977 {
978 .procname = "hardlockup_panic",
979 .data = &hardlockup_panic,
980 .maxlen = sizeof(int),
981 .mode = 0644,
982 .proc_handler = proc_dointvec_minmax,
983 .extra1 = &zero,
984 .extra2 = &one,
985 },
986 #ifdef CONFIG_SMP
987 {
988 .procname = "hardlockup_all_cpu_backtrace",
989 .data = &sysctl_hardlockup_all_cpu_backtrace,
990 .maxlen = sizeof(int),
991 .mode = 0644,
992 .proc_handler = proc_dointvec_minmax,
993 .extra1 = &zero,
994 .extra2 = &one,
995 },
996 #endif /* CONFIG_SMP */
997 #endif
998 #endif
999
1000 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1001 {
1002 .procname = "unknown_nmi_panic",
1003 .data = &unknown_nmi_panic,
1004 .maxlen = sizeof (int),
1005 .mode = 0644,
1006 .proc_handler = proc_dointvec,
1007 },
1008 #endif
1009 #if defined(CONFIG_X86)
1010 {
1011 .procname = "panic_on_unrecovered_nmi",
1012 .data = &panic_on_unrecovered_nmi,
1013 .maxlen = sizeof(int),
1014 .mode = 0644,
1015 .proc_handler = proc_dointvec,
1016 },
1017 {
1018 .procname = "panic_on_io_nmi",
1019 .data = &panic_on_io_nmi,
1020 .maxlen = sizeof(int),
1021 .mode = 0644,
1022 .proc_handler = proc_dointvec,
1023 },
1024 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1025 {
1026 .procname = "panic_on_stackoverflow",
1027 .data = &sysctl_panic_on_stackoverflow,
1028 .maxlen = sizeof(int),
1029 .mode = 0644,
1030 .proc_handler = proc_dointvec,
1031 },
1032 #endif
1033 {
1034 .procname = "bootloader_type",
1035 .data = &bootloader_type,
1036 .maxlen = sizeof (int),
1037 .mode = 0444,
1038 .proc_handler = proc_dointvec,
1039 },
1040 {
1041 .procname = "bootloader_version",
1042 .data = &bootloader_version,
1043 .maxlen = sizeof (int),
1044 .mode = 0444,
1045 .proc_handler = proc_dointvec,
1046 },
1047 {
1048 .procname = "io_delay_type",
1049 .data = &io_delay_type,
1050 .maxlen = sizeof(int),
1051 .mode = 0644,
1052 .proc_handler = proc_dointvec,
1053 },
1054 #endif
1055 #if defined(CONFIG_MMU)
1056 {
1057 .procname = "randomize_va_space",
1058 .data = &randomize_va_space,
1059 .maxlen = sizeof(int),
1060 .mode = 0644,
1061 .proc_handler = proc_dointvec,
1062 },
1063 #endif
1064 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1065 {
1066 .procname = "spin_retry",
1067 .data = &spin_retry,
1068 .maxlen = sizeof (int),
1069 .mode = 0644,
1070 .proc_handler = proc_dointvec,
1071 },
1072 #endif
1073 #if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1074 {
1075 .procname = "acpi_video_flags",
1076 .data = &acpi_realmode_flags,
1077 .maxlen = sizeof (unsigned long),
1078 .mode = 0644,
1079 .proc_handler = proc_doulongvec_minmax,
1080 },
1081 #endif
1082 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1083 {
1084 .procname = "ignore-unaligned-usertrap",
1085 .data = &no_unaligned_warning,
1086 .maxlen = sizeof (int),
1087 .mode = 0644,
1088 .proc_handler = proc_dointvec,
1089 },
1090 #endif
1091 #ifdef CONFIG_IA64
1092 {
1093 .procname = "unaligned-dump-stack",
1094 .data = &unaligned_dump_stack,
1095 .maxlen = sizeof (int),
1096 .mode = 0644,
1097 .proc_handler = proc_dointvec,
1098 },
1099 #endif
1100 #ifdef CONFIG_DETECT_HUNG_TASK
1101 {
1102 .procname = "hung_task_panic",
1103 .data = &sysctl_hung_task_panic,
1104 .maxlen = sizeof(int),
1105 .mode = 0644,
1106 .proc_handler = proc_dointvec_minmax,
1107 .extra1 = &zero,
1108 .extra2 = &one,
1109 },
1110 {
1111 .procname = "hung_task_check_count",
1112 .data = &sysctl_hung_task_check_count,
1113 .maxlen = sizeof(int),
1114 .mode = 0644,
1115 .proc_handler = proc_dointvec_minmax,
1116 .extra1 = &zero,
1117 },
1118 {
1119 .procname = "hung_task_timeout_secs",
1120 .data = &sysctl_hung_task_timeout_secs,
1121 .maxlen = sizeof(unsigned long),
1122 .mode = 0644,
1123 .proc_handler = proc_dohung_task_timeout_secs,
1124 .extra2 = &hung_task_timeout_max,
1125 },
1126 {
1127 .procname = "hung_task_check_interval_secs",
1128 .data = &sysctl_hung_task_check_interval_secs,
1129 .maxlen = sizeof(unsigned long),
1130 .mode = 0644,
1131 .proc_handler = proc_dohung_task_timeout_secs,
1132 .extra2 = &hung_task_timeout_max,
1133 },
1134 {
1135 .procname = "hung_task_warnings",
1136 .data = &sysctl_hung_task_warnings,
1137 .maxlen = sizeof(int),
1138 .mode = 0644,
1139 .proc_handler = proc_dointvec_minmax,
1140 .extra1 = &neg_one,
1141 },
1142 #endif
1143 #ifdef CONFIG_RT_MUTEXES
1144 {
1145 .procname = "max_lock_depth",
1146 .data = &max_lock_depth,
1147 .maxlen = sizeof(int),
1148 .mode = 0644,
1149 .proc_handler = proc_dointvec,
1150 },
1151 #endif
1152 {
1153 .procname = "poweroff_cmd",
1154 .data = &poweroff_cmd,
1155 .maxlen = POWEROFF_CMD_PATH_LEN,
1156 .mode = 0644,
1157 .proc_handler = proc_dostring,
1158 },
1159 #ifdef CONFIG_KEYS
1160 {
1161 .procname = "keys",
1162 .mode = 0555,
1163 .child = key_sysctls,
1164 },
1165 #endif
1166 #ifdef CONFIG_PERF_EVENTS
1167 /*
1168 * User-space scripts rely on the existence of this file
1169 * as a feature check for perf_events being enabled.
1170 *
1171 * So it's an ABI, do not remove!
1172 */
1173 {
1174 .procname = "perf_event_paranoid",
1175 .data = &sysctl_perf_event_paranoid,
1176 .maxlen = sizeof(sysctl_perf_event_paranoid),
1177 .mode = 0644,
1178 .proc_handler = proc_dointvec,
1179 },
1180 {
1181 .procname = "perf_event_mlock_kb",
1182 .data = &sysctl_perf_event_mlock,
1183 .maxlen = sizeof(sysctl_perf_event_mlock),
1184 .mode = 0644,
1185 .proc_handler = proc_dointvec,
1186 },
1187 {
1188 .procname = "perf_event_max_sample_rate",
1189 .data = &sysctl_perf_event_sample_rate,
1190 .maxlen = sizeof(sysctl_perf_event_sample_rate),
1191 .mode = 0644,
1192 .proc_handler = perf_proc_update_handler,
1193 .extra1 = &one,
1194 },
1195 {
1196 .procname = "perf_cpu_time_max_percent",
1197 .data = &sysctl_perf_cpu_time_max_percent,
1198 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
1199 .mode = 0644,
1200 .proc_handler = perf_cpu_time_max_percent_handler,
1201 .extra1 = &zero,
1202 .extra2 = &one_hundred,
1203 },
1204 {
1205 .procname = "perf_event_max_stack",
1206 .data = &sysctl_perf_event_max_stack,
1207 .maxlen = sizeof(sysctl_perf_event_max_stack),
1208 .mode = 0644,
1209 .proc_handler = perf_event_max_stack_handler,
1210 .extra1 = &zero,
1211 .extra2 = &six_hundred_forty_kb,
1212 },
1213 {
1214 .procname = "perf_event_max_contexts_per_stack",
1215 .data = &sysctl_perf_event_max_contexts_per_stack,
1216 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
1217 .mode = 0644,
1218 .proc_handler = perf_event_max_stack_handler,
1219 .extra1 = &zero,
1220 .extra2 = &one_thousand,
1221 },
1222 #endif
1223 {
1224 .procname = "panic_on_warn",
1225 .data = &panic_on_warn,
1226 .maxlen = sizeof(int),
1227 .mode = 0644,
1228 .proc_handler = proc_dointvec_minmax,
1229 .extra1 = &zero,
1230 .extra2 = &one,
1231 },
1232 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1233 {
1234 .procname = "timer_migration",
1235 .data = &sysctl_timer_migration,
1236 .maxlen = sizeof(unsigned int),
1237 .mode = 0644,
1238 .proc_handler = timer_migration_handler,
1239 .extra1 = &zero,
1240 .extra2 = &one,
1241 },
1242 #endif
1243 #ifdef CONFIG_BPF_SYSCALL
1244 {
1245 .procname = "unprivileged_bpf_disabled",
1246 .data = &sysctl_unprivileged_bpf_disabled,
1247 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
1248 .mode = 0644,
1249 /* only handle a transition from default "0" to "1" */
1250 .proc_handler = proc_dointvec_minmax,
1251 .extra1 = &one,
1252 .extra2 = &one,
1253 },
1254 {
1255 .procname = "bpf_stats_enabled",
1256 .data = &sysctl_bpf_stats_enabled,
1257 .maxlen = sizeof(sysctl_bpf_stats_enabled),
1258 .mode = 0644,
1259 .proc_handler = proc_dointvec_minmax_bpf_stats,
1260 .extra1 = &zero,
1261 .extra2 = &one,
1262 },
1263 #endif
1264 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1265 {
1266 .procname = "panic_on_rcu_stall",
1267 .data = &sysctl_panic_on_rcu_stall,
1268 .maxlen = sizeof(sysctl_panic_on_rcu_stall),
1269 .mode = 0644,
1270 .proc_handler = proc_dointvec_minmax,
1271 .extra1 = &zero,
1272 .extra2 = &one,
1273 },
1274 #endif
1275 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1276 {
1277 .procname = "stack_erasing",
1278 .data = NULL,
1279 .maxlen = sizeof(int),
1280 .mode = 0600,
1281 .proc_handler = stack_erasing_sysctl,
1282 .extra1 = &zero,
1283 .extra2 = &one,
1284 },
1285 #endif
1286 { }
1287 };
1288
1289 static struct ctl_table vm_table[] = {
1290 {
1291 .procname = "overcommit_memory",
1292 .data = &sysctl_overcommit_memory,
1293 .maxlen = sizeof(sysctl_overcommit_memory),
1294 .mode = 0644,
1295 .proc_handler = proc_dointvec_minmax,
1296 .extra1 = &zero,
1297 .extra2 = &two,
1298 },
1299 {
1300 .procname = "panic_on_oom",
1301 .data = &sysctl_panic_on_oom,
1302 .maxlen = sizeof(sysctl_panic_on_oom),
1303 .mode = 0644,
1304 .proc_handler = proc_dointvec_minmax,
1305 .extra1 = &zero,
1306 .extra2 = &two,
1307 },
1308 {
1309 .procname = "oom_kill_allocating_task",
1310 .data = &sysctl_oom_kill_allocating_task,
1311 .maxlen = sizeof(sysctl_oom_kill_allocating_task),
1312 .mode = 0644,
1313 .proc_handler = proc_dointvec,
1314 },
1315 {
1316 .procname = "oom_dump_tasks",
1317 .data = &sysctl_oom_dump_tasks,
1318 .maxlen = sizeof(sysctl_oom_dump_tasks),
1319 .mode = 0644,
1320 .proc_handler = proc_dointvec,
1321 },
1322 {
1323 .procname = "overcommit_ratio",
1324 .data = &sysctl_overcommit_ratio,
1325 .maxlen = sizeof(sysctl_overcommit_ratio),
1326 .mode = 0644,
1327 .proc_handler = overcommit_ratio_handler,
1328 },
1329 {
1330 .procname = "overcommit_kbytes",
1331 .data = &sysctl_overcommit_kbytes,
1332 .maxlen = sizeof(sysctl_overcommit_kbytes),
1333 .mode = 0644,
1334 .proc_handler = overcommit_kbytes_handler,
1335 },
1336 {
1337 .procname = "page-cluster",
1338 .data = &page_cluster,
1339 .maxlen = sizeof(int),
1340 .mode = 0644,
1341 .proc_handler = proc_dointvec_minmax,
1342 .extra1 = &zero,
1343 },
1344 {
1345 .procname = "dirty_background_ratio",
1346 .data = &dirty_background_ratio,
1347 .maxlen = sizeof(dirty_background_ratio),
1348 .mode = 0644,
1349 .proc_handler = dirty_background_ratio_handler,
1350 .extra1 = &zero,
1351 .extra2 = &one_hundred,
1352 },
1353 {
1354 .procname = "dirty_background_bytes",
1355 .data = &dirty_background_bytes,
1356 .maxlen = sizeof(dirty_background_bytes),
1357 .mode = 0644,
1358 .proc_handler = dirty_background_bytes_handler,
1359 .extra1 = &one_ul,
1360 },
1361 {
1362 .procname = "dirty_ratio",
1363 .data = &vm_dirty_ratio,
1364 .maxlen = sizeof(vm_dirty_ratio),
1365 .mode = 0644,
1366 .proc_handler = dirty_ratio_handler,
1367 .extra1 = &zero,
1368 .extra2 = &one_hundred,
1369 },
1370 {
1371 .procname = "dirty_bytes",
1372 .data = &vm_dirty_bytes,
1373 .maxlen = sizeof(vm_dirty_bytes),
1374 .mode = 0644,
1375 .proc_handler = dirty_bytes_handler,
1376 .extra1 = &dirty_bytes_min,
1377 },
1378 {
1379 .procname = "dirty_writeback_centisecs",
1380 .data = &dirty_writeback_interval,
1381 .maxlen = sizeof(dirty_writeback_interval),
1382 .mode = 0644,
1383 .proc_handler = dirty_writeback_centisecs_handler,
1384 },
1385 {
1386 .procname = "dirty_expire_centisecs",
1387 .data = &dirty_expire_interval,
1388 .maxlen = sizeof(dirty_expire_interval),
1389 .mode = 0644,
1390 .proc_handler = proc_dointvec_minmax,
1391 .extra1 = &zero,
1392 },
1393 {
1394 .procname = "dirtytime_expire_seconds",
1395 .data = &dirtytime_expire_interval,
1396 .maxlen = sizeof(dirtytime_expire_interval),
1397 .mode = 0644,
1398 .proc_handler = dirtytime_interval_handler,
1399 .extra1 = &zero,
1400 },
1401 {
1402 .procname = "swappiness",
1403 .data = &vm_swappiness,
1404 .maxlen = sizeof(vm_swappiness),
1405 .mode = 0644,
1406 .proc_handler = proc_dointvec_minmax,
1407 .extra1 = &zero,
1408 .extra2 = &one_hundred,
1409 },
1410 #ifdef CONFIG_HUGETLB_PAGE
1411 {
1412 .procname = "nr_hugepages",
1413 .data = NULL,
1414 .maxlen = sizeof(unsigned long),
1415 .mode = 0644,
1416 .proc_handler = hugetlb_sysctl_handler,
1417 },
1418 #ifdef CONFIG_NUMA
1419 {
1420 .procname = "nr_hugepages_mempolicy",
1421 .data = NULL,
1422 .maxlen = sizeof(unsigned long),
1423 .mode = 0644,
1424 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1425 },
1426 {
1427 .procname = "numa_stat",
1428 .data = &sysctl_vm_numa_stat,
1429 .maxlen = sizeof(int),
1430 .mode = 0644,
1431 .proc_handler = sysctl_vm_numa_stat_handler,
1432 .extra1 = &zero,
1433 .extra2 = &one,
1434 },
1435 #endif
1436 {
1437 .procname = "hugetlb_shm_group",
1438 .data = &sysctl_hugetlb_shm_group,
1439 .maxlen = sizeof(gid_t),
1440 .mode = 0644,
1441 .proc_handler = proc_dointvec,
1442 },
1443 {
1444 .procname = "nr_overcommit_hugepages",
1445 .data = NULL,
1446 .maxlen = sizeof(unsigned long),
1447 .mode = 0644,
1448 .proc_handler = hugetlb_overcommit_handler,
1449 },
1450 #endif
1451 {
1452 .procname = "lowmem_reserve_ratio",
1453 .data = &sysctl_lowmem_reserve_ratio,
1454 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
1455 .mode = 0644,
1456 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
1457 },
1458 {
1459 .procname = "drop_caches",
1460 .data = &sysctl_drop_caches,
1461 .maxlen = sizeof(int),
1462 .mode = 0644,
1463 .proc_handler = drop_caches_sysctl_handler,
1464 .extra1 = &one,
1465 .extra2 = &four,
1466 },
1467 #ifdef CONFIG_COMPACTION
1468 {
1469 .procname = "compact_memory",
1470 .data = &sysctl_compact_memory,
1471 .maxlen = sizeof(int),
1472 .mode = 0200,
1473 .proc_handler = sysctl_compaction_handler,
1474 },
1475 {
1476 .procname = "extfrag_threshold",
1477 .data = &sysctl_extfrag_threshold,
1478 .maxlen = sizeof(int),
1479 .mode = 0644,
1480 .proc_handler = proc_dointvec_minmax,
1481 .extra1 = &min_extfrag_threshold,
1482 .extra2 = &max_extfrag_threshold,
1483 },
1484 {
1485 .procname = "compact_unevictable_allowed",
1486 .data = &sysctl_compact_unevictable_allowed,
1487 .maxlen = sizeof(int),
1488 .mode = 0644,
1489 .proc_handler = proc_dointvec,
1490 .extra1 = &zero,
1491 .extra2 = &one,
1492 },
1493
1494 #endif /* CONFIG_COMPACTION */
1495 {
1496 .procname = "min_free_kbytes",
1497 .data = &min_free_kbytes,
1498 .maxlen = sizeof(min_free_kbytes),
1499 .mode = 0644,
1500 .proc_handler = min_free_kbytes_sysctl_handler,
1501 .extra1 = &zero,
1502 },
1503 {
1504 .procname = "watermark_boost_factor",
1505 .data = &watermark_boost_factor,
1506 .maxlen = sizeof(watermark_boost_factor),
1507 .mode = 0644,
1508 .proc_handler = watermark_boost_factor_sysctl_handler,
1509 .extra1 = &zero,
1510 },
1511 {
1512 .procname = "watermark_scale_factor",
1513 .data = &watermark_scale_factor,
1514 .maxlen = sizeof(watermark_scale_factor),
1515 .mode = 0644,
1516 .proc_handler = watermark_scale_factor_sysctl_handler,
1517 .extra1 = &one,
1518 .extra2 = &one_thousand,
1519 },
1520 {
1521 .procname = "percpu_pagelist_fraction",
1522 .data = &percpu_pagelist_fraction,
1523 .maxlen = sizeof(percpu_pagelist_fraction),
1524 .mode = 0644,
1525 .proc_handler = percpu_pagelist_fraction_sysctl_handler,
1526 .extra1 = &zero,
1527 },
1528 #ifdef CONFIG_MMU
1529 {
1530 .procname = "max_map_count",
1531 .data = &sysctl_max_map_count,
1532 .maxlen = sizeof(sysctl_max_map_count),
1533 .mode = 0644,
1534 .proc_handler = proc_dointvec_minmax,
1535 .extra1 = &zero,
1536 },
1537 #else
1538 {
1539 .procname = "nr_trim_pages",
1540 .data = &sysctl_nr_trim_pages,
1541 .maxlen = sizeof(sysctl_nr_trim_pages),
1542 .mode = 0644,
1543 .proc_handler = proc_dointvec_minmax,
1544 .extra1 = &zero,
1545 },
1546 #endif
1547 {
1548 .procname = "laptop_mode",
1549 .data = &laptop_mode,
1550 .maxlen = sizeof(laptop_mode),
1551 .mode = 0644,
1552 .proc_handler = proc_dointvec_jiffies,
1553 },
1554 {
1555 .procname = "block_dump",
1556 .data = &block_dump,
1557 .maxlen = sizeof(block_dump),
1558 .mode = 0644,
1559 .proc_handler = proc_dointvec,
1560 .extra1 = &zero,
1561 },
1562 {
1563 .procname = "vfs_cache_pressure",
1564 .data = &sysctl_vfs_cache_pressure,
1565 .maxlen = sizeof(sysctl_vfs_cache_pressure),
1566 .mode = 0644,
1567 .proc_handler = proc_dointvec,
1568 .extra1 = &zero,
1569 },
1570 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1571 {
1572 .procname = "legacy_va_layout",
1573 .data = &sysctl_legacy_va_layout,
1574 .maxlen = sizeof(sysctl_legacy_va_layout),
1575 .mode = 0644,
1576 .proc_handler = proc_dointvec,
1577 .extra1 = &zero,
1578 },
1579 #endif
1580 #ifdef CONFIG_NUMA
1581 {
1582 .procname = "zone_reclaim_mode",
1583 .data = &node_reclaim_mode,
1584 .maxlen = sizeof(node_reclaim_mode),
1585 .mode = 0644,
1586 .proc_handler = proc_dointvec,
1587 .extra1 = &zero,
1588 },
1589 {
1590 .procname = "min_unmapped_ratio",
1591 .data = &sysctl_min_unmapped_ratio,
1592 .maxlen = sizeof(sysctl_min_unmapped_ratio),
1593 .mode = 0644,
1594 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
1595 .extra1 = &zero,
1596 .extra2 = &one_hundred,
1597 },
1598 {
1599 .procname = "min_slab_ratio",
1600 .data = &sysctl_min_slab_ratio,
1601 .maxlen = sizeof(sysctl_min_slab_ratio),
1602 .mode = 0644,
1603 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
1604 .extra1 = &zero,
1605 .extra2 = &one_hundred,
1606 },
1607 #endif
1608 #ifdef CONFIG_SMP
1609 {
1610 .procname = "stat_interval",
1611 .data = &sysctl_stat_interval,
1612 .maxlen = sizeof(sysctl_stat_interval),
1613 .mode = 0644,
1614 .proc_handler = proc_dointvec_jiffies,
1615 },
1616 {
1617 .procname = "stat_refresh",
1618 .data = NULL,
1619 .maxlen = 0,
1620 .mode = 0600,
1621 .proc_handler = vmstat_refresh,
1622 },
1623 #endif
1624 #ifdef CONFIG_MMU
1625 {
1626 .procname = "mmap_min_addr",
1627 .data = &dac_mmap_min_addr,
1628 .maxlen = sizeof(unsigned long),
1629 .mode = 0644,
1630 .proc_handler = mmap_min_addr_handler,
1631 },
1632 #endif
1633 #ifdef CONFIG_NUMA
1634 {
1635 .procname = "numa_zonelist_order",
1636 .data = &numa_zonelist_order,
1637 .maxlen = NUMA_ZONELIST_ORDER_LEN,
1638 .mode = 0644,
1639 .proc_handler = numa_zonelist_order_handler,
1640 },
1641 #endif
1642 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1643 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1644 {
1645 .procname = "vdso_enabled",
1646 #ifdef CONFIG_X86_32
1647 .data = &vdso32_enabled,
1648 .maxlen = sizeof(vdso32_enabled),
1649 #else
1650 .data = &vdso_enabled,
1651 .maxlen = sizeof(vdso_enabled),
1652 #endif
1653 .mode = 0644,
1654 .proc_handler = proc_dointvec,
1655 .extra1 = &zero,
1656 },
1657 #endif
1658 #ifdef CONFIG_HIGHMEM
1659 {
1660 .procname = "highmem_is_dirtyable",
1661 .data = &vm_highmem_is_dirtyable,
1662 .maxlen = sizeof(vm_highmem_is_dirtyable),
1663 .mode = 0644,
1664 .proc_handler = proc_dointvec_minmax,
1665 .extra1 = &zero,
1666 .extra2 = &one,
1667 },
1668 #endif
1669 #ifdef CONFIG_MEMORY_FAILURE
1670 {
1671 .procname = "memory_failure_early_kill",
1672 .data = &sysctl_memory_failure_early_kill,
1673 .maxlen = sizeof(sysctl_memory_failure_early_kill),
1674 .mode = 0644,
1675 .proc_handler = proc_dointvec_minmax,
1676 .extra1 = &zero,
1677 .extra2 = &one,
1678 },
1679 {
1680 .procname = "memory_failure_recovery",
1681 .data = &sysctl_memory_failure_recovery,
1682 .maxlen = sizeof(sysctl_memory_failure_recovery),
1683 .mode = 0644,
1684 .proc_handler = proc_dointvec_minmax,
1685 .extra1 = &zero,
1686 .extra2 = &one,
1687 },
1688 #endif
1689 {
1690 .procname = "user_reserve_kbytes",
1691 .data = &sysctl_user_reserve_kbytes,
1692 .maxlen = sizeof(sysctl_user_reserve_kbytes),
1693 .mode = 0644,
1694 .proc_handler = proc_doulongvec_minmax,
1695 },
1696 {
1697 .procname = "admin_reserve_kbytes",
1698 .data = &sysctl_admin_reserve_kbytes,
1699 .maxlen = sizeof(sysctl_admin_reserve_kbytes),
1700 .mode = 0644,
1701 .proc_handler = proc_doulongvec_minmax,
1702 },
1703 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1704 {
1705 .procname = "mmap_rnd_bits",
1706 .data = &mmap_rnd_bits,
1707 .maxlen = sizeof(mmap_rnd_bits),
1708 .mode = 0600,
1709 .proc_handler = proc_dointvec_minmax,
1710 .extra1 = (void *)&mmap_rnd_bits_min,
1711 .extra2 = (void *)&mmap_rnd_bits_max,
1712 },
1713 #endif
1714 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1715 {
1716 .procname = "mmap_rnd_compat_bits",
1717 .data = &mmap_rnd_compat_bits,
1718 .maxlen = sizeof(mmap_rnd_compat_bits),
1719 .mode = 0600,
1720 .proc_handler = proc_dointvec_minmax,
1721 .extra1 = (void *)&mmap_rnd_compat_bits_min,
1722 .extra2 = (void *)&mmap_rnd_compat_bits_max,
1723 },
1724 #endif
1725 #ifdef CONFIG_USERFAULTFD
1726 {
1727 .procname = "unprivileged_userfaultfd",
1728 .data = &sysctl_unprivileged_userfaultfd,
1729 .maxlen = sizeof(sysctl_unprivileged_userfaultfd),
1730 .mode = 0644,
1731 .proc_handler = proc_dointvec_minmax,
1732 .extra1 = &zero,
1733 .extra2 = &one,
1734 },
1735 #endif
1736 { }
1737 };
1738
1739 static struct ctl_table fs_table[] = {
1740 {
1741 .procname = "inode-nr",
1742 .data = &inodes_stat,
1743 .maxlen = 2*sizeof(long),
1744 .mode = 0444,
1745 .proc_handler = proc_nr_inodes,
1746 },
1747 {
1748 .procname = "inode-state",
1749 .data = &inodes_stat,
1750 .maxlen = 7*sizeof(long),
1751 .mode = 0444,
1752 .proc_handler = proc_nr_inodes,
1753 },
1754 {
1755 .procname = "file-nr",
1756 .data = &files_stat,
1757 .maxlen = sizeof(files_stat),
1758 .mode = 0444,
1759 .proc_handler = proc_nr_files,
1760 },
1761 {
1762 .procname = "file-max",
1763 .data = &files_stat.max_files,
1764 .maxlen = sizeof(files_stat.max_files),
1765 .mode = 0644,
1766 .proc_handler = proc_doulongvec_minmax,
1767 .extra1 = &zero_ul,
1768 .extra2 = &long_max,
1769 },
1770 {
1771 .procname = "nr_open",
1772 .data = &sysctl_nr_open,
1773 .maxlen = sizeof(unsigned int),
1774 .mode = 0644,
1775 .proc_handler = proc_dointvec_minmax,
1776 .extra1 = &sysctl_nr_open_min,
1777 .extra2 = &sysctl_nr_open_max,
1778 },
1779 {
1780 .procname = "dentry-state",
1781 .data = &dentry_stat,
1782 .maxlen = 6*sizeof(long),
1783 .mode = 0444,
1784 .proc_handler = proc_nr_dentry,
1785 },
1786 {
1787 .procname = "overflowuid",
1788 .data = &fs_overflowuid,
1789 .maxlen = sizeof(int),
1790 .mode = 0644,
1791 .proc_handler = proc_dointvec_minmax,
1792 .extra1 = &minolduid,
1793 .extra2 = &maxolduid,
1794 },
1795 {
1796 .procname = "overflowgid",
1797 .data = &fs_overflowgid,
1798 .maxlen = sizeof(int),
1799 .mode = 0644,
1800 .proc_handler = proc_dointvec_minmax,
1801 .extra1 = &minolduid,
1802 .extra2 = &maxolduid,
1803 },
1804 #ifdef CONFIG_FILE_LOCKING
1805 {
1806 .procname = "leases-enable",
1807 .data = &leases_enable,
1808 .maxlen = sizeof(int),
1809 .mode = 0644,
1810 .proc_handler = proc_dointvec,
1811 },
1812 #endif
1813 #ifdef CONFIG_DNOTIFY
1814 {
1815 .procname = "dir-notify-enable",
1816 .data = &dir_notify_enable,
1817 .maxlen = sizeof(int),
1818 .mode = 0644,
1819 .proc_handler = proc_dointvec,
1820 },
1821 #endif
1822 #ifdef CONFIG_MMU
1823 #ifdef CONFIG_FILE_LOCKING
1824 {
1825 .procname = "lease-break-time",
1826 .data = &lease_break_time,
1827 .maxlen = sizeof(int),
1828 .mode = 0644,
1829 .proc_handler = proc_dointvec,
1830 },
1831 #endif
1832 #ifdef CONFIG_AIO
1833 {
1834 .procname = "aio-nr",
1835 .data = &aio_nr,
1836 .maxlen = sizeof(aio_nr),
1837 .mode = 0444,
1838 .proc_handler = proc_doulongvec_minmax,
1839 },
1840 {
1841 .procname = "aio-max-nr",
1842 .data = &aio_max_nr,
1843 .maxlen = sizeof(aio_max_nr),
1844 .mode = 0644,
1845 .proc_handler = proc_doulongvec_minmax,
1846 },
1847 #endif /* CONFIG_AIO */
1848 #ifdef CONFIG_INOTIFY_USER
1849 {
1850 .procname = "inotify",
1851 .mode = 0555,
1852 .child = inotify_table,
1853 },
1854 #endif
1855 #ifdef CONFIG_EPOLL
1856 {
1857 .procname = "epoll",
1858 .mode = 0555,
1859 .child = epoll_table,
1860 },
1861 #endif
1862 #endif
1863 {
1864 .procname = "protected_symlinks",
1865 .data = &sysctl_protected_symlinks,
1866 .maxlen = sizeof(int),
1867 .mode = 0600,
1868 .proc_handler = proc_dointvec_minmax,
1869 .extra1 = &zero,
1870 .extra2 = &one,
1871 },
1872 {
1873 .procname = "protected_hardlinks",
1874 .data = &sysctl_protected_hardlinks,
1875 .maxlen = sizeof(int),
1876 .mode = 0600,
1877 .proc_handler = proc_dointvec_minmax,
1878 .extra1 = &zero,
1879 .extra2 = &one,
1880 },
1881 {
1882 .procname = "protected_fifos",
1883 .data = &sysctl_protected_fifos,
1884 .maxlen = sizeof(int),
1885 .mode = 0600,
1886 .proc_handler = proc_dointvec_minmax,
1887 .extra1 = &zero,
1888 .extra2 = &two,
1889 },
1890 {
1891 .procname = "protected_regular",
1892 .data = &sysctl_protected_regular,
1893 .maxlen = sizeof(int),
1894 .mode = 0600,
1895 .proc_handler = proc_dointvec_minmax,
1896 .extra1 = &zero,
1897 .extra2 = &two,
1898 },
1899 {
1900 .procname = "suid_dumpable",
1901 .data = &suid_dumpable,
1902 .maxlen = sizeof(int),
1903 .mode = 0644,
1904 .proc_handler = proc_dointvec_minmax_coredump,
1905 .extra1 = &zero,
1906 .extra2 = &two,
1907 },
1908 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1909 {
1910 .procname = "binfmt_misc",
1911 .mode = 0555,
1912 .child = sysctl_mount_point,
1913 },
1914 #endif
1915 {
1916 .procname = "pipe-max-size",
1917 .data = &pipe_max_size,
1918 .maxlen = sizeof(pipe_max_size),
1919 .mode = 0644,
1920 .proc_handler = proc_dopipe_max_size,
1921 },
1922 {
1923 .procname = "pipe-user-pages-hard",
1924 .data = &pipe_user_pages_hard,
1925 .maxlen = sizeof(pipe_user_pages_hard),
1926 .mode = 0644,
1927 .proc_handler = proc_doulongvec_minmax,
1928 },
1929 {
1930 .procname = "pipe-user-pages-soft",
1931 .data = &pipe_user_pages_soft,
1932 .maxlen = sizeof(pipe_user_pages_soft),
1933 .mode = 0644,
1934 .proc_handler = proc_doulongvec_minmax,
1935 },
1936 {
1937 .procname = "mount-max",
1938 .data = &sysctl_mount_max,
1939 .maxlen = sizeof(unsigned int),
1940 .mode = 0644,
1941 .proc_handler = proc_dointvec_minmax,
1942 .extra1 = &one,
1943 },
1944 { }
1945 };
1946
1947 static struct ctl_table debug_table[] = {
1948 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1949 {
1950 .procname = "exception-trace",
1951 .data = &show_unhandled_signals,
1952 .maxlen = sizeof(int),
1953 .mode = 0644,
1954 .proc_handler = proc_dointvec
1955 },
1956 #endif
1957 #if defined(CONFIG_OPTPROBES)
1958 {
1959 .procname = "kprobes-optimization",
1960 .data = &sysctl_kprobes_optimization,
1961 .maxlen = sizeof(int),
1962 .mode = 0644,
1963 .proc_handler = proc_kprobes_optimization_handler,
1964 .extra1 = &zero,
1965 .extra2 = &one,
1966 },
1967 #endif
1968 { }
1969 };
1970
1971 static struct ctl_table dev_table[] = {
1972 { }
1973 };
1974
1975 int __init sysctl_init(void)
1976 {
1977 struct ctl_table_header *hdr;
1978
1979 hdr = register_sysctl_table(sysctl_base_table);
1980 kmemleak_not_leak(hdr);
1981 return 0;
1982 }
1983
1984 #endif /* CONFIG_SYSCTL */
1985
1986 /*
1987 * /proc/sys support
1988 */
1989
1990 #ifdef CONFIG_PROC_SYSCTL
1991
1992 static int _proc_do_string(char *data, int maxlen, int write,
1993 char __user *buffer,
1994 size_t *lenp, loff_t *ppos)
1995 {
1996 size_t len;
1997 char __user *p;
1998 char c;
1999
2000 if (!data || !maxlen || !*lenp) {
2001 *lenp = 0;
2002 return 0;
2003 }
2004
2005 if (write) {
2006 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
2007 /* Only continue writes not past the end of buffer. */
2008 len = strlen(data);
2009 if (len > maxlen - 1)
2010 len = maxlen - 1;
2011
2012 if (*ppos > len)
2013 return 0;
2014 len = *ppos;
2015 } else {
2016 /* Start writing from beginning of buffer. */
2017 len = 0;
2018 }
2019
2020 *ppos += *lenp;
2021 p = buffer;
2022 while ((p - buffer) < *lenp && len < maxlen - 1) {
2023 if (get_user(c, p++))
2024 return -EFAULT;
2025 if (c == 0 || c == '\n')
2026 break;
2027 data[len++] = c;
2028 }
2029 data[len] = 0;
2030 } else {
2031 len = strlen(data);
2032 if (len > maxlen)
2033 len = maxlen;
2034
2035 if (*ppos > len) {
2036 *lenp = 0;
2037 return 0;
2038 }
2039
2040 data += *ppos;
2041 len -= *ppos;
2042
2043 if (len > *lenp)
2044 len = *lenp;
2045 if (len)
2046 if (copy_to_user(buffer, data, len))
2047 return -EFAULT;
2048 if (len < *lenp) {
2049 if (put_user('\n', buffer + len))
2050 return -EFAULT;
2051 len++;
2052 }
2053 *lenp = len;
2054 *ppos += len;
2055 }
2056 return 0;
2057 }
2058
2059 static void warn_sysctl_write(struct ctl_table *table)
2060 {
2061 pr_warn_once("%s wrote to %s when file position was not 0!\n"
2062 "This will not be supported in the future. To silence this\n"
2063 "warning, set kernel.sysctl_writes_strict = -1\n",
2064 current->comm, table->procname);
2065 }
2066
2067 /**
2068 * proc_first_pos_non_zero_ignore - check if first position is allowed
2069 * @ppos: file position
2070 * @table: the sysctl table
2071 *
2072 * Returns true if the first position is non-zero and the sysctl_writes_strict
2073 * mode indicates this is not allowed for numeric input types. String proc
2074 * handlers can ignore the return value.
2075 */
2076 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2077 struct ctl_table *table)
2078 {
2079 if (!*ppos)
2080 return false;
2081
2082 switch (sysctl_writes_strict) {
2083 case SYSCTL_WRITES_STRICT:
2084 return true;
2085 case SYSCTL_WRITES_WARN:
2086 warn_sysctl_write(table);
2087 return false;
2088 default:
2089 return false;
2090 }
2091 }
2092
2093 /**
2094 * proc_dostring - read a string sysctl
2095 * @table: the sysctl table
2096 * @write: %TRUE if this is a write to the sysctl file
2097 * @buffer: the user buffer
2098 * @lenp: the size of the user buffer
2099 * @ppos: file position
2100 *
2101 * Reads/writes a string from/to the user buffer. If the kernel
2102 * buffer provided is not large enough to hold the string, the
2103 * string is truncated. The copied string is %NULL-terminated.
2104 * If the string is being read by the user process, it is copied
2105 * and a newline '\n' is added. It is truncated if the buffer is
2106 * not large enough.
2107 *
2108 * Returns 0 on success.
2109 */
2110 int proc_dostring(struct ctl_table *table, int write,
2111 void __user *buffer, size_t *lenp, loff_t *ppos)
2112 {
2113 if (write)
2114 proc_first_pos_non_zero_ignore(ppos, table);
2115
2116 return _proc_do_string((char *)(table->data), table->maxlen, write,
2117 (char __user *)buffer, lenp, ppos);
2118 }
2119
2120 static size_t proc_skip_spaces(char **buf)
2121 {
2122 size_t ret;
2123 char *tmp = skip_spaces(*buf);
2124 ret = tmp - *buf;
2125 *buf = tmp;
2126 return ret;
2127 }
2128
2129 static void proc_skip_char(char **buf, size_t *size, const char v)
2130 {
2131 while (*size) {
2132 if (**buf != v)
2133 break;
2134 (*size)--;
2135 (*buf)++;
2136 }
2137 }
2138
2139 /**
2140 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
2141 * fail on overflow
2142 *
2143 * @cp: kernel buffer containing the string to parse
2144 * @endp: pointer to store the trailing characters
2145 * @base: the base to use
2146 * @res: where the parsed integer will be stored
2147 *
2148 * In case of success 0 is returned and @res will contain the parsed integer,
2149 * @endp will hold any trailing characters.
2150 * This function will fail the parse on overflow. If there wasn't an overflow
2151 * the function will defer the decision what characters count as invalid to the
2152 * caller.
2153 */
2154 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
2155 unsigned long *res)
2156 {
2157 unsigned long long result;
2158 unsigned int rv;
2159
2160 cp = _parse_integer_fixup_radix(cp, &base);
2161 rv = _parse_integer(cp, base, &result);
2162 if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
2163 return -ERANGE;
2164
2165 cp += rv;
2166
2167 if (endp)
2168 *endp = (char *)cp;
2169
2170 *res = (unsigned long)result;
2171 return 0;
2172 }
2173
2174 #define TMPBUFLEN 22
2175 /**
2176 * proc_get_long - reads an ASCII formatted integer from a user buffer
2177 *
2178 * @buf: a kernel buffer
2179 * @size: size of the kernel buffer
2180 * @val: this is where the number will be stored
2181 * @neg: set to %TRUE if number is negative
2182 * @perm_tr: a vector which contains the allowed trailers
2183 * @perm_tr_len: size of the perm_tr vector
2184 * @tr: pointer to store the trailer character
2185 *
2186 * In case of success %0 is returned and @buf and @size are updated with
2187 * the amount of bytes read. If @tr is non-NULL and a trailing
2188 * character exists (size is non-zero after returning from this
2189 * function), @tr is updated with the trailing character.
2190 */
2191 static int proc_get_long(char **buf, size_t *size,
2192 unsigned long *val, bool *neg,
2193 const char *perm_tr, unsigned perm_tr_len, char *tr)
2194 {
2195 int len;
2196 char *p, tmp[TMPBUFLEN];
2197
2198 if (!*size)
2199 return -EINVAL;
2200
2201 len = *size;
2202 if (len > TMPBUFLEN - 1)
2203 len = TMPBUFLEN - 1;
2204
2205 memcpy(tmp, *buf, len);
2206
2207 tmp[len] = 0;
2208 p = tmp;
2209 if (*p == '-' && *size > 1) {
2210 *neg = true;
2211 p++;
2212 } else
2213 *neg = false;
2214 if (!isdigit(*p))
2215 return -EINVAL;
2216
2217 if (strtoul_lenient(p, &p, 0, val))
2218 return -EINVAL;
2219
2220 len = p - tmp;
2221
2222 /* We don't know if the next char is whitespace thus we may accept
2223 * invalid integers (e.g. 1234...a) or two integers instead of one
2224 * (e.g. 123...1). So lets not allow such large numbers. */
2225 if (len == TMPBUFLEN - 1)
2226 return -EINVAL;
2227
2228 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2229 return -EINVAL;
2230
2231 if (tr && (len < *size))
2232 *tr = *p;
2233
2234 *buf += len;
2235 *size -= len;
2236
2237 return 0;
2238 }
2239
2240 /**
2241 * proc_put_long - converts an integer to a decimal ASCII formatted string
2242 *
2243 * @buf: the user buffer
2244 * @size: the size of the user buffer
2245 * @val: the integer to be converted
2246 * @neg: sign of the number, %TRUE for negative
2247 *
2248 * In case of success %0 is returned and @buf and @size are updated with
2249 * the amount of bytes written.
2250 */
2251 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2252 bool neg)
2253 {
2254 int len;
2255 char tmp[TMPBUFLEN], *p = tmp;
2256
2257 sprintf(p, "%s%lu", neg ? "-" : "", val);
2258 len = strlen(tmp);
2259 if (len > *size)
2260 len = *size;
2261 if (copy_to_user(*buf, tmp, len))
2262 return -EFAULT;
2263 *size -= len;
2264 *buf += len;
2265 return 0;
2266 }
2267 #undef TMPBUFLEN
2268
2269 static int proc_put_char(void __user **buf, size_t *size, char c)
2270 {
2271 if (*size) {
2272 char __user **buffer = (char __user **)buf;
2273 if (put_user(c, *buffer))
2274 return -EFAULT;
2275 (*size)--, (*buffer)++;
2276 *buf = *buffer;
2277 }
2278 return 0;
2279 }
2280
2281 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2282 int *valp,
2283 int write, void *data)
2284 {
2285 if (write) {
2286 if (*negp) {
2287 if (*lvalp > (unsigned long) INT_MAX + 1)
2288 return -EINVAL;
2289 *valp = -*lvalp;
2290 } else {
2291 if (*lvalp > (unsigned long) INT_MAX)
2292 return -EINVAL;
2293 *valp = *lvalp;
2294 }
2295 } else {
2296 int val = *valp;
2297 if (val < 0) {
2298 *negp = true;
2299 *lvalp = -(unsigned long)val;
2300 } else {
2301 *negp = false;
2302 *lvalp = (unsigned long)val;
2303 }
2304 }
2305 return 0;
2306 }
2307
2308 static int do_proc_douintvec_conv(unsigned long *lvalp,
2309 unsigned int *valp,
2310 int write, void *data)
2311 {
2312 if (write) {
2313 if (*lvalp > UINT_MAX)
2314 return -EINVAL;
2315 *valp = *lvalp;
2316 } else {
2317 unsigned int val = *valp;
2318 *lvalp = (unsigned long)val;
2319 }
2320 return 0;
2321 }
2322
2323 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2324
2325 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2326 int write, void __user *buffer,
2327 size_t *lenp, loff_t *ppos,
2328 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2329 int write, void *data),
2330 void *data)
2331 {
2332 int *i, vleft, first = 1, err = 0;
2333 size_t left;
2334 char *kbuf = NULL, *p;
2335
2336 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2337 *lenp = 0;
2338 return 0;
2339 }
2340
2341 i = (int *) tbl_data;
2342 vleft = table->maxlen / sizeof(*i);
2343 left = *lenp;
2344
2345 if (!conv)
2346 conv = do_proc_dointvec_conv;
2347
2348 if (write) {
2349 if (proc_first_pos_non_zero_ignore(ppos, table))
2350 goto out;
2351
2352 if (left > PAGE_SIZE - 1)
2353 left = PAGE_SIZE - 1;
2354 p = kbuf = memdup_user_nul(buffer, left);
2355 if (IS_ERR(kbuf))
2356 return PTR_ERR(kbuf);
2357 }
2358
2359 for (; left && vleft--; i++, first=0) {
2360 unsigned long lval;
2361 bool neg;
2362
2363 if (write) {
2364 left -= proc_skip_spaces(&p);
2365
2366 if (!left)
2367 break;
2368 err = proc_get_long(&p, &left, &lval, &neg,
2369 proc_wspace_sep,
2370 sizeof(proc_wspace_sep), NULL);
2371 if (err)
2372 break;
2373 if (conv(&neg, &lval, i, 1, data)) {
2374 err = -EINVAL;
2375 break;
2376 }
2377 } else {
2378 if (conv(&neg, &lval, i, 0, data)) {
2379 err = -EINVAL;
2380 break;
2381 }
2382 if (!first)
2383 err = proc_put_char(&buffer, &left, '\t');
2384 if (err)
2385 break;
2386 err = proc_put_long(&buffer, &left, lval, neg);
2387 if (err)
2388 break;
2389 }
2390 }
2391
2392 if (!write && !first && left && !err)
2393 err = proc_put_char(&buffer, &left, '\n');
2394 if (write && !err && left)
2395 left -= proc_skip_spaces(&p);
2396 if (write) {
2397 kfree(kbuf);
2398 if (first)
2399 return err ? : -EINVAL;
2400 }
2401 *lenp -= left;
2402 out:
2403 *ppos += *lenp;
2404 return err;
2405 }
2406
2407 static int do_proc_dointvec(struct ctl_table *table, int write,
2408 void __user *buffer, size_t *lenp, loff_t *ppos,
2409 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2410 int write, void *data),
2411 void *data)
2412 {
2413 return __do_proc_dointvec(table->data, table, write,
2414 buffer, lenp, ppos, conv, data);
2415 }
2416
2417 static int do_proc_douintvec_w(unsigned int *tbl_data,
2418 struct ctl_table *table,
2419 void __user *buffer,
2420 size_t *lenp, loff_t *ppos,
2421 int (*conv)(unsigned long *lvalp,
2422 unsigned int *valp,
2423 int write, void *data),
2424 void *data)
2425 {
2426 unsigned long lval;
2427 int err = 0;
2428 size_t left;
2429 bool neg;
2430 char *kbuf = NULL, *p;
2431
2432 left = *lenp;
2433
2434 if (proc_first_pos_non_zero_ignore(ppos, table))
2435 goto bail_early;
2436
2437 if (left > PAGE_SIZE - 1)
2438 left = PAGE_SIZE - 1;
2439
2440 p = kbuf = memdup_user_nul(buffer, left);
2441 if (IS_ERR(kbuf))
2442 return -EINVAL;
2443
2444 left -= proc_skip_spaces(&p);
2445 if (!left) {
2446 err = -EINVAL;
2447 goto out_free;
2448 }
2449
2450 err = proc_get_long(&p, &left, &lval, &neg,
2451 proc_wspace_sep,
2452 sizeof(proc_wspace_sep), NULL);
2453 if (err || neg) {
2454 err = -EINVAL;
2455 goto out_free;
2456 }
2457
2458 if (conv(&lval, tbl_data, 1, data)) {
2459 err = -EINVAL;
2460 goto out_free;
2461 }
2462
2463 if (!err && left)
2464 left -= proc_skip_spaces(&p);
2465
2466 out_free:
2467 kfree(kbuf);
2468 if (err)
2469 return -EINVAL;
2470
2471 return 0;
2472
2473 /* This is in keeping with old __do_proc_dointvec() */
2474 bail_early:
2475 *ppos += *lenp;
2476 return err;
2477 }
2478
2479 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2480 size_t *lenp, loff_t *ppos,
2481 int (*conv)(unsigned long *lvalp,
2482 unsigned int *valp,
2483 int write, void *data),
2484 void *data)
2485 {
2486 unsigned long lval;
2487 int err = 0;
2488 size_t left;
2489
2490 left = *lenp;
2491
2492 if (conv(&lval, tbl_data, 0, data)) {
2493 err = -EINVAL;
2494 goto out;
2495 }
2496
2497 err = proc_put_long(&buffer, &left, lval, false);
2498 if (err || !left)
2499 goto out;
2500
2501 err = proc_put_char(&buffer, &left, '\n');
2502
2503 out:
2504 *lenp -= left;
2505 *ppos += *lenp;
2506
2507 return err;
2508 }
2509
2510 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2511 int write, void __user *buffer,
2512 size_t *lenp, loff_t *ppos,
2513 int (*conv)(unsigned long *lvalp,
2514 unsigned int *valp,
2515 int write, void *data),
2516 void *data)
2517 {
2518 unsigned int *i, vleft;
2519
2520 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2521 *lenp = 0;
2522 return 0;
2523 }
2524
2525 i = (unsigned int *) tbl_data;
2526 vleft = table->maxlen / sizeof(*i);
2527
2528 /*
2529 * Arrays are not supported, keep this simple. *Do not* add
2530 * support for them.
2531 */
2532 if (vleft != 1) {
2533 *lenp = 0;
2534 return -EINVAL;
2535 }
2536
2537 if (!conv)
2538 conv = do_proc_douintvec_conv;
2539
2540 if (write)
2541 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2542 conv, data);
2543 return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2544 }
2545
2546 static int do_proc_douintvec(struct ctl_table *table, int write,
2547 void __user *buffer, size_t *lenp, loff_t *ppos,
2548 int (*conv)(unsigned long *lvalp,
2549 unsigned int *valp,
2550 int write, void *data),
2551 void *data)
2552 {
2553 return __do_proc_douintvec(table->data, table, write,
2554 buffer, lenp, ppos, conv, data);
2555 }
2556
2557 /**
2558 * proc_dointvec - read a vector of integers
2559 * @table: the sysctl table
2560 * @write: %TRUE if this is a write to the sysctl file
2561 * @buffer: the user buffer
2562 * @lenp: the size of the user buffer
2563 * @ppos: file position
2564 *
2565 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2566 * values from/to the user buffer, treated as an ASCII string.
2567 *
2568 * Returns 0 on success.
2569 */
2570 int proc_dointvec(struct ctl_table *table, int write,
2571 void __user *buffer, size_t *lenp, loff_t *ppos)
2572 {
2573 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2574 }
2575
2576 /**
2577 * proc_douintvec - read a vector of unsigned integers
2578 * @table: the sysctl table
2579 * @write: %TRUE if this is a write to the sysctl file
2580 * @buffer: the user buffer
2581 * @lenp: the size of the user buffer
2582 * @ppos: file position
2583 *
2584 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2585 * values from/to the user buffer, treated as an ASCII string.
2586 *
2587 * Returns 0 on success.
2588 */
2589 int proc_douintvec(struct ctl_table *table, int write,
2590 void __user *buffer, size_t *lenp, loff_t *ppos)
2591 {
2592 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2593 do_proc_douintvec_conv, NULL);
2594 }
2595
2596 /*
2597 * Taint values can only be increased
2598 * This means we can safely use a temporary.
2599 */
2600 static int proc_taint(struct ctl_table *table, int write,
2601 void __user *buffer, size_t *lenp, loff_t *ppos)
2602 {
2603 struct ctl_table t;
2604 unsigned long tmptaint = get_taint();
2605 int err;
2606
2607 if (write && !capable(CAP_SYS_ADMIN))
2608 return -EPERM;
2609
2610 t = *table;
2611 t.data = &tmptaint;
2612 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2613 if (err < 0)
2614 return err;
2615
2616 if (write) {
2617 /*
2618 * Poor man's atomic or. Not worth adding a primitive
2619 * to everyone's atomic.h for this
2620 */
2621 int i;
2622 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2623 if ((tmptaint >> i) & 1)
2624 add_taint(i, LOCKDEP_STILL_OK);
2625 }
2626 }
2627
2628 return err;
2629 }
2630
2631 #ifdef CONFIG_PRINTK
2632 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2633 void __user *buffer, size_t *lenp, loff_t *ppos)
2634 {
2635 if (write && !capable(CAP_SYS_ADMIN))
2636 return -EPERM;
2637
2638 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2639 }
2640 #endif
2641
2642 /**
2643 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2644 * @min: pointer to minimum allowable value
2645 * @max: pointer to maximum allowable value
2646 *
2647 * The do_proc_dointvec_minmax_conv_param structure provides the
2648 * minimum and maximum values for doing range checking for those sysctl
2649 * parameters that use the proc_dointvec_minmax() handler.
2650 */
2651 struct do_proc_dointvec_minmax_conv_param {
2652 int *min;
2653 int *max;
2654 };
2655
2656 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2657 int *valp,
2658 int write, void *data)
2659 {
2660 int tmp, ret;
2661 struct do_proc_dointvec_minmax_conv_param *param = data;
2662 /*
2663 * If writing, first do so via a temporary local int so we can
2664 * bounds-check it before touching *valp.
2665 */
2666 int *ip = write ? &tmp : valp;
2667
2668 ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
2669 if (ret)
2670 return ret;
2671
2672 if (write) {
2673 if ((param->min && *param->min > tmp) ||
2674 (param->max && *param->max < tmp))
2675 return -EINVAL;
2676 *valp = tmp;
2677 }
2678
2679 return 0;
2680 }
2681
2682 /**
2683 * proc_dointvec_minmax - read a vector of integers with min/max values
2684 * @table: the sysctl table
2685 * @write: %TRUE if this is a write to the sysctl file
2686 * @buffer: the user buffer
2687 * @lenp: the size of the user buffer
2688 * @ppos: file position
2689 *
2690 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2691 * values from/to the user buffer, treated as an ASCII string.
2692 *
2693 * This routine will ensure the values are within the range specified by
2694 * table->extra1 (min) and table->extra2 (max).
2695 *
2696 * Returns 0 on success or -EINVAL on write when the range check fails.
2697 */
2698 int proc_dointvec_minmax(struct ctl_table *table, int write,
2699 void __user *buffer, size_t *lenp, loff_t *ppos)
2700 {
2701 struct do_proc_dointvec_minmax_conv_param param = {
2702 .min = (int *) table->extra1,
2703 .max = (int *) table->extra2,
2704 };
2705 return do_proc_dointvec(table, write, buffer, lenp, ppos,
2706 do_proc_dointvec_minmax_conv, &param);
2707 }
2708
2709 /**
2710 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2711 * @min: pointer to minimum allowable value
2712 * @max: pointer to maximum allowable value
2713 *
2714 * The do_proc_douintvec_minmax_conv_param structure provides the
2715 * minimum and maximum values for doing range checking for those sysctl
2716 * parameters that use the proc_douintvec_minmax() handler.
2717 */
2718 struct do_proc_douintvec_minmax_conv_param {
2719 unsigned int *min;
2720 unsigned int *max;
2721 };
2722
2723 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2724 unsigned int *valp,
2725 int write, void *data)
2726 {
2727 int ret;
2728 unsigned int tmp;
2729 struct do_proc_douintvec_minmax_conv_param *param = data;
2730 /* write via temporary local uint for bounds-checking */
2731 unsigned int *up = write ? &tmp : valp;
2732
2733 ret = do_proc_douintvec_conv(lvalp, up, write, data);
2734 if (ret)
2735 return ret;
2736
2737 if (write) {
2738 if ((param->min && *param->min > tmp) ||
2739 (param->max && *param->max < tmp))
2740 return -ERANGE;
2741
2742 *valp = tmp;
2743 }
2744
2745 return 0;
2746 }
2747
2748 /**
2749 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2750 * @table: the sysctl table
2751 * @write: %TRUE if this is a write to the sysctl file
2752 * @buffer: the user buffer
2753 * @lenp: the size of the user buffer
2754 * @ppos: file position
2755 *
2756 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2757 * values from/to the user buffer, treated as an ASCII string. Negative
2758 * strings are not allowed.
2759 *
2760 * This routine will ensure the values are within the range specified by
2761 * table->extra1 (min) and table->extra2 (max). There is a final sanity
2762 * check for UINT_MAX to avoid having to support wrap around uses from
2763 * userspace.
2764 *
2765 * Returns 0 on success or -ERANGE on write when the range check fails.
2766 */
2767 int proc_douintvec_minmax(struct ctl_table *table, int write,
2768 void __user *buffer, size_t *lenp, loff_t *ppos)
2769 {
2770 struct do_proc_douintvec_minmax_conv_param param = {
2771 .min = (unsigned int *) table->extra1,
2772 .max = (unsigned int *) table->extra2,
2773 };
2774 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2775 do_proc_douintvec_minmax_conv, &param);
2776 }
2777
2778 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2779 unsigned int *valp,
2780 int write, void *data)
2781 {
2782 if (write) {
2783 unsigned int val;
2784
2785 val = round_pipe_size(*lvalp);
2786 if (val == 0)
2787 return -EINVAL;
2788
2789 *valp = val;
2790 } else {
2791 unsigned int val = *valp;
2792 *lvalp = (unsigned long) val;
2793 }
2794
2795 return 0;
2796 }
2797
2798 static int proc_dopipe_max_size(struct ctl_table *table, int write,
2799 void __user *buffer, size_t *lenp, loff_t *ppos)
2800 {
2801 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2802 do_proc_dopipe_max_size_conv, NULL);
2803 }
2804
2805 static void validate_coredump_safety(void)
2806 {
2807 #ifdef CONFIG_COREDUMP
2808 if (suid_dumpable == SUID_DUMP_ROOT &&
2809 core_pattern[0] != '/' && core_pattern[0] != '|') {
2810 printk(KERN_WARNING
2811 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2812 "Pipe handler or fully qualified core dump path required.\n"
2813 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2814 );
2815 }
2816 #endif
2817 }
2818
2819 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2820 void __user *buffer, size_t *lenp, loff_t *ppos)
2821 {
2822 int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2823 if (!error)
2824 validate_coredump_safety();
2825 return error;
2826 }
2827
2828 #ifdef CONFIG_COREDUMP
2829 static int proc_dostring_coredump(struct ctl_table *table, int write,
2830 void __user *buffer, size_t *lenp, loff_t *ppos)
2831 {
2832 int error = proc_dostring(table, write, buffer, lenp, ppos);
2833 if (!error)
2834 validate_coredump_safety();
2835 return error;
2836 }
2837 #endif
2838
2839 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2840 void __user *buffer,
2841 size_t *lenp, loff_t *ppos,
2842 unsigned long convmul,
2843 unsigned long convdiv)
2844 {
2845 unsigned long *i, *min, *max;
2846 int vleft, first = 1, err = 0;
2847 size_t left;
2848 char *kbuf = NULL, *p;
2849
2850 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2851 *lenp = 0;
2852 return 0;
2853 }
2854
2855 i = (unsigned long *) data;
2856 min = (unsigned long *) table->extra1;
2857 max = (unsigned long *) table->extra2;
2858 vleft = table->maxlen / sizeof(unsigned long);
2859 left = *lenp;
2860
2861 if (write) {
2862 if (proc_first_pos_non_zero_ignore(ppos, table))
2863 goto out;
2864
2865 if (left > PAGE_SIZE - 1)
2866 left = PAGE_SIZE - 1;
2867 p = kbuf = memdup_user_nul(buffer, left);
2868 if (IS_ERR(kbuf))
2869 return PTR_ERR(kbuf);
2870 }
2871
2872 for (; left && vleft--; i++, first = 0) {
2873 unsigned long val;
2874
2875 if (write) {
2876 bool neg;
2877
2878 left -= proc_skip_spaces(&p);
2879 if (!left)
2880 break;
2881
2882 err = proc_get_long(&p, &left, &val, &neg,
2883 proc_wspace_sep,
2884 sizeof(proc_wspace_sep), NULL);
2885 if (err)
2886 break;
2887 if (neg)
2888 continue;
2889 val = convmul * val / convdiv;
2890 if ((min && val < *min) || (max && val > *max)) {
2891 err = -EINVAL;
2892 break;
2893 }
2894 *i = val;
2895 } else {
2896 val = convdiv * (*i) / convmul;
2897 if (!first) {
2898 err = proc_put_char(&buffer, &left, '\t');
2899 if (err)
2900 break;
2901 }
2902 err = proc_put_long(&buffer, &left, val, false);
2903 if (err)
2904 break;
2905 }
2906 }
2907
2908 if (!write && !first && left && !err)
2909 err = proc_put_char(&buffer, &left, '\n');
2910 if (write && !err)
2911 left -= proc_skip_spaces(&p);
2912 if (write) {
2913 kfree(kbuf);
2914 if (first)
2915 return err ? : -EINVAL;
2916 }
2917 *lenp -= left;
2918 out:
2919 *ppos += *lenp;
2920 return err;
2921 }
2922
2923 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2924 void __user *buffer,
2925 size_t *lenp, loff_t *ppos,
2926 unsigned long convmul,
2927 unsigned long convdiv)
2928 {
2929 return __do_proc_doulongvec_minmax(table->data, table, write,
2930 buffer, lenp, ppos, convmul, convdiv);
2931 }
2932
2933 /**
2934 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2935 * @table: the sysctl table
2936 * @write: %TRUE if this is a write to the sysctl file
2937 * @buffer: the user buffer
2938 * @lenp: the size of the user buffer
2939 * @ppos: file position
2940 *
2941 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2942 * values from/to the user buffer, treated as an ASCII string.
2943 *
2944 * This routine will ensure the values are within the range specified by
2945 * table->extra1 (min) and table->extra2 (max).
2946 *
2947 * Returns 0 on success.
2948 */
2949 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2950 void __user *buffer, size_t *lenp, loff_t *ppos)
2951 {
2952 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2953 }
2954
2955 /**
2956 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2957 * @table: the sysctl table
2958 * @write: %TRUE if this is a write to the sysctl file
2959 * @buffer: the user buffer
2960 * @lenp: the size of the user buffer
2961 * @ppos: file position
2962 *
2963 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2964 * values from/to the user buffer, treated as an ASCII string. The values
2965 * are treated as milliseconds, and converted to jiffies when they are stored.
2966 *
2967 * This routine will ensure the values are within the range specified by
2968 * table->extra1 (min) and table->extra2 (max).
2969 *
2970 * Returns 0 on success.
2971 */
2972 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2973 void __user *buffer,
2974 size_t *lenp, loff_t *ppos)
2975 {
2976 return do_proc_doulongvec_minmax(table, write, buffer,
2977 lenp, ppos, HZ, 1000l);
2978 }
2979
2980
2981 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2982 int *valp,
2983 int write, void *data)
2984 {
2985 if (write) {
2986 if (*lvalp > INT_MAX / HZ)
2987 return 1;
2988 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2989 } else {
2990 int val = *valp;
2991 unsigned long lval;
2992 if (val < 0) {
2993 *negp = true;
2994 lval = -(unsigned long)val;
2995 } else {
2996 *negp = false;
2997 lval = (unsigned long)val;
2998 }
2999 *lvalp = lval / HZ;
3000 }
3001 return 0;
3002 }
3003
3004 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
3005 int *valp,
3006 int write, void *data)
3007 {
3008 if (write) {
3009 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
3010 return 1;
3011 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
3012 } else {
3013 int val = *valp;
3014 unsigned long lval;
3015 if (val < 0) {
3016 *negp = true;
3017 lval = -(unsigned long)val;
3018 } else {
3019 *negp = false;
3020 lval = (unsigned long)val;
3021 }
3022 *lvalp = jiffies_to_clock_t(lval);
3023 }
3024 return 0;
3025 }
3026
3027 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
3028 int *valp,
3029 int write, void *data)
3030 {
3031 if (write) {
3032 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
3033
3034 if (jif > INT_MAX)
3035 return 1;
3036 *valp = (int)jif;
3037 } else {
3038 int val = *valp;
3039 unsigned long lval;
3040 if (val < 0) {
3041 *negp = true;
3042 lval = -(unsigned long)val;
3043 } else {
3044 *negp = false;
3045 lval = (unsigned long)val;
3046 }
3047 *lvalp = jiffies_to_msecs(lval);
3048 }
3049 return 0;
3050 }
3051
3052 /**
3053 * proc_dointvec_jiffies - read a vector of integers as seconds
3054 * @table: the sysctl table
3055 * @write: %TRUE if this is a write to the sysctl file
3056 * @buffer: the user buffer
3057 * @lenp: the size of the user buffer
3058 * @ppos: file position
3059 *
3060 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3061 * values from/to the user buffer, treated as an ASCII string.
3062 * The values read are assumed to be in seconds, and are converted into
3063 * jiffies.
3064 *
3065 * Returns 0 on success.
3066 */
3067 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3068 void __user *buffer, size_t *lenp, loff_t *ppos)
3069 {
3070 return do_proc_dointvec(table,write,buffer,lenp,ppos,
3071 do_proc_dointvec_jiffies_conv,NULL);
3072 }
3073
3074 /**
3075 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
3076 * @table: the sysctl table
3077 * @write: %TRUE if this is a write to the sysctl file
3078 * @buffer: the user buffer
3079 * @lenp: the size of the user buffer
3080 * @ppos: pointer to the file position
3081 *
3082 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3083 * values from/to the user buffer, treated as an ASCII string.
3084 * The values read are assumed to be in 1/USER_HZ seconds, and
3085 * are converted into jiffies.
3086 *
3087 * Returns 0 on success.
3088 */
3089 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3090 void __user *buffer, size_t *lenp, loff_t *ppos)
3091 {
3092 return do_proc_dointvec(table,write,buffer,lenp,ppos,
3093 do_proc_dointvec_userhz_jiffies_conv,NULL);
3094 }
3095
3096 /**
3097 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3098 * @table: the sysctl table
3099 * @write: %TRUE if this is a write to the sysctl file
3100 * @buffer: the user buffer
3101 * @lenp: the size of the user buffer
3102 * @ppos: file position
3103 * @ppos: the current position in the file
3104 *
3105 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3106 * values from/to the user buffer, treated as an ASCII string.
3107 * The values read are assumed to be in 1/1000 seconds, and
3108 * are converted into jiffies.
3109 *
3110 * Returns 0 on success.
3111 */
3112 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3113 void __user *buffer, size_t *lenp, loff_t *ppos)
3114 {
3115 return do_proc_dointvec(table, write, buffer, lenp, ppos,
3116 do_proc_dointvec_ms_jiffies_conv, NULL);
3117 }
3118
3119 static int proc_do_cad_pid(struct ctl_table *table, int write,
3120 void __user *buffer, size_t *lenp, loff_t *ppos)
3121 {
3122 struct pid *new_pid;
3123 pid_t tmp;
3124 int r;
3125
3126 tmp = pid_vnr(cad_pid);
3127
3128 r = __do_proc_dointvec(&tmp, table, write, buffer,
3129 lenp, ppos, NULL, NULL);
3130 if (r || !write)
3131 return r;
3132
3133 new_pid = find_get_pid(tmp);
3134 if (!new_pid)
3135 return -ESRCH;
3136
3137 put_pid(xchg(&cad_pid, new_pid));
3138 return 0;
3139 }
3140
3141 /**
3142 * proc_do_large_bitmap - read/write from/to a large bitmap
3143 * @table: the sysctl table
3144 * @write: %TRUE if this is a write to the sysctl file
3145 * @buffer: the user buffer
3146 * @lenp: the size of the user buffer
3147 * @ppos: file position
3148 *
3149 * The bitmap is stored at table->data and the bitmap length (in bits)
3150 * in table->maxlen.
3151 *
3152 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3153 * large bitmaps may be represented in a compact manner. Writing into
3154 * the file will clear the bitmap then update it with the given input.
3155 *
3156 * Returns 0 on success.
3157 */
3158 int proc_do_large_bitmap(struct ctl_table *table, int write,
3159 void __user *buffer, size_t *lenp, loff_t *ppos)
3160 {
3161 int err = 0;
3162 bool first = 1;
3163 size_t left = *lenp;
3164 unsigned long bitmap_len = table->maxlen;
3165 unsigned long *bitmap = *(unsigned long **) table->data;
3166 unsigned long *tmp_bitmap = NULL;
3167 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3168
3169 if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3170 *lenp = 0;
3171 return 0;
3172 }
3173
3174 if (write) {
3175 char *kbuf, *p;
3176 size_t skipped = 0;
3177
3178 if (left > PAGE_SIZE - 1) {
3179 left = PAGE_SIZE - 1;
3180 /* How much of the buffer we'll skip this pass */
3181 skipped = *lenp - left;
3182 }
3183
3184 p = kbuf = memdup_user_nul(buffer, left);
3185 if (IS_ERR(kbuf))
3186 return PTR_ERR(kbuf);
3187
3188 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
3189 if (!tmp_bitmap) {
3190 kfree(kbuf);
3191 return -ENOMEM;
3192 }
3193 proc_skip_char(&p, &left, '\n');
3194 while (!err && left) {
3195 unsigned long val_a, val_b;
3196 bool neg;
3197 size_t saved_left;
3198
3199 /* In case we stop parsing mid-number, we can reset */
3200 saved_left = left;
3201 err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3202 sizeof(tr_a), &c);
3203 /*
3204 * If we consumed the entirety of a truncated buffer or
3205 * only one char is left (may be a "-"), then stop here,
3206 * reset, & come back for more.
3207 */
3208 if ((left <= 1) && skipped) {
3209 left = saved_left;
3210 break;
3211 }
3212
3213 if (err)
3214 break;
3215 if (val_a >= bitmap_len || neg) {
3216 err = -EINVAL;
3217 break;
3218 }
3219
3220 val_b = val_a;
3221 if (left) {
3222 p++;
3223 left--;
3224 }
3225
3226 if (c == '-') {
3227 err = proc_get_long(&p, &left, &val_b,
3228 &neg, tr_b, sizeof(tr_b),
3229 &c);
3230 /*
3231 * If we consumed all of a truncated buffer or
3232 * then stop here, reset, & come back for more.
3233 */
3234 if (!left && skipped) {
3235 left = saved_left;
3236 break;
3237 }
3238
3239 if (err)
3240 break;
3241 if (val_b >= bitmap_len || neg ||
3242 val_a > val_b) {
3243 err = -EINVAL;
3244 break;
3245 }
3246 if (left) {
3247 p++;
3248 left--;
3249 }
3250 }
3251
3252 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3253 first = 0;
3254 proc_skip_char(&p, &left, '\n');
3255 }
3256 kfree(kbuf);
3257 left += skipped;
3258 } else {
3259 unsigned long bit_a, bit_b = 0;
3260
3261 while (left) {
3262 bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3263 if (bit_a >= bitmap_len)
3264 break;
3265 bit_b = find_next_zero_bit(bitmap, bitmap_len,
3266 bit_a + 1) - 1;
3267
3268 if (!first) {
3269 err = proc_put_char(&buffer, &left, ',');
3270 if (err)
3271 break;
3272 }
3273 err = proc_put_long(&buffer, &left, bit_a, false);
3274 if (err)
3275 break;
3276 if (bit_a != bit_b) {
3277 err = proc_put_char(&buffer, &left, '-');
3278 if (err)
3279 break;
3280 err = proc_put_long(&buffer, &left, bit_b, false);
3281 if (err)
3282 break;
3283 }
3284
3285 first = 0; bit_b++;
3286 }
3287 if (!err)
3288 err = proc_put_char(&buffer, &left, '\n');
3289 }
3290
3291 if (!err) {
3292 if (write) {
3293 if (*ppos)
3294 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3295 else
3296 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3297 }
3298 *lenp -= left;
3299 *ppos += *lenp;
3300 }
3301
3302 bitmap_free(tmp_bitmap);
3303 return err;
3304 }
3305
3306 #else /* CONFIG_PROC_SYSCTL */
3307
3308 int proc_dostring(struct ctl_table *table, int write,
3309 void __user *buffer, size_t *lenp, loff_t *ppos)
3310 {
3311 return -ENOSYS;
3312 }
3313
3314 int proc_dointvec(struct ctl_table *table, int write,
3315 void __user *buffer, size_t *lenp, loff_t *ppos)
3316 {
3317 return -ENOSYS;
3318 }
3319
3320 int proc_douintvec(struct ctl_table *table, int write,
3321 void __user *buffer, size_t *lenp, loff_t *ppos)
3322 {
3323 return -ENOSYS;
3324 }
3325
3326 int proc_dointvec_minmax(struct ctl_table *table, int write,
3327 void __user *buffer, size_t *lenp, loff_t *ppos)
3328 {
3329 return -ENOSYS;
3330 }
3331
3332 int proc_douintvec_minmax(struct ctl_table *table, int write,
3333 void __user *buffer, size_t *lenp, loff_t *ppos)
3334 {
3335 return -ENOSYS;
3336 }
3337
3338 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3339 void __user *buffer, size_t *lenp, loff_t *ppos)
3340 {
3341 return -ENOSYS;
3342 }
3343
3344 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3345 void __user *buffer, size_t *lenp, loff_t *ppos)
3346 {
3347 return -ENOSYS;
3348 }
3349
3350 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3351 void __user *buffer, size_t *lenp, loff_t *ppos)
3352 {
3353 return -ENOSYS;
3354 }
3355
3356 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3357 void __user *buffer, size_t *lenp, loff_t *ppos)
3358 {
3359 return -ENOSYS;
3360 }
3361
3362 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3363 void __user *buffer,
3364 size_t *lenp, loff_t *ppos)
3365 {
3366 return -ENOSYS;
3367 }
3368
3369 int proc_do_large_bitmap(struct ctl_table *table, int write,
3370 void __user *buffer, size_t *lenp, loff_t *ppos)
3371 {
3372 return -ENOSYS;
3373 }
3374
3375 #endif /* CONFIG_PROC_SYSCTL */
3376
3377 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
3378 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
3379 void __user *buffer, size_t *lenp,
3380 loff_t *ppos)
3381 {
3382 int ret, bpf_stats = *(int *)table->data;
3383 struct ctl_table tmp = *table;
3384
3385 if (write && !capable(CAP_SYS_ADMIN))
3386 return -EPERM;
3387
3388 tmp.data = &bpf_stats;
3389 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3390 if (write && !ret) {
3391 *(int *)table->data = bpf_stats;
3392 if (bpf_stats)
3393 static_branch_enable(&bpf_stats_enabled_key);
3394 else
3395 static_branch_disable(&bpf_stats_enabled_key);
3396 }
3397 return ret;
3398 }
3399 #endif
3400 /*
3401 * No sense putting this after each symbol definition, twice,
3402 * exception granted :-)
3403 */
3404 EXPORT_SYMBOL(proc_dointvec);
3405 EXPORT_SYMBOL(proc_douintvec);
3406 EXPORT_SYMBOL(proc_dointvec_jiffies);
3407 EXPORT_SYMBOL(proc_dointvec_minmax);
3408 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3409 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3410 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3411 EXPORT_SYMBOL(proc_dostring);
3412 EXPORT_SYMBOL(proc_doulongvec_minmax);
3413 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3414 EXPORT_SYMBOL(proc_do_large_bitmap);