]> git.ipfire.org Git - people/arne_f/kernel.git/blob - kernel/sysctl.c
timekeeping: Repair ktime_get_coarse*() granularity
[people/arne_f/kernel.git] / kernel / sysctl.c
1 /*
2 * sysctl.c: General linux system control interface
3 *
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12 * Horn.
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16 * Wendling.
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */
20
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/sched/coredump.h>
66 #include <linux/kexec.h>
67 #include <linux/bpf.h>
68 #include <linux/mount.h>
69 #include <linux/pipe_fs_i.h>
70
71 #include <linux/uaccess.h>
72 #include <asm/processor.h>
73
74 #ifdef CONFIG_X86
75 #include <asm/nmi.h>
76 #include <asm/stacktrace.h>
77 #include <asm/io.h>
78 #endif
79 #ifdef CONFIG_SPARC
80 #include <asm/setup.h>
81 #endif
82 #ifdef CONFIG_BSD_PROCESS_ACCT
83 #include <linux/acct.h>
84 #endif
85 #ifdef CONFIG_RT_MUTEXES
86 #include <linux/rtmutex.h>
87 #endif
88 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
89 #include <linux/lockdep.h>
90 #endif
91 #ifdef CONFIG_CHR_DEV_SG
92 #include <scsi/sg.h>
93 #endif
94
95 #ifdef CONFIG_LOCKUP_DETECTOR
96 #include <linux/nmi.h>
97 #endif
98
99 #if defined(CONFIG_SYSCTL)
100
101 /* External variables not in a header file. */
102 extern int suid_dumpable;
103 #ifdef CONFIG_COREDUMP
104 extern int core_uses_pid;
105 extern char core_pattern[];
106 extern unsigned int core_pipe_limit;
107 #endif
108 extern int pid_max;
109 extern int pid_max_min, pid_max_max;
110 extern int percpu_pagelist_fraction;
111 extern int latencytop_enabled;
112 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
113 #ifndef CONFIG_MMU
114 extern int sysctl_nr_trim_pages;
115 #endif
116
117 /* Constants used for minimum and maximum */
118 #ifdef CONFIG_LOCKUP_DETECTOR
119 static int sixty = 60;
120 #endif
121
122 static int __maybe_unused neg_one = -1;
123
124 static int zero;
125 static int __maybe_unused one = 1;
126 static int __maybe_unused two = 2;
127 static int __maybe_unused four = 4;
128 static unsigned long zero_ul;
129 static unsigned long one_ul = 1;
130 static unsigned long long_max = LONG_MAX;
131 static int one_hundred = 100;
132 static int one_thousand = 1000;
133 #ifdef CONFIG_PRINTK
134 static int ten_thousand = 10000;
135 #endif
136 #ifdef CONFIG_PERF_EVENTS
137 static int six_hundred_forty_kb = 640 * 1024;
138 #endif
139
140 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
141 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
142
143 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
144 static int maxolduid = 65535;
145 static int minolduid;
146
147 static int ngroups_max = NGROUPS_MAX;
148 static const int cap_last_cap = CAP_LAST_CAP;
149
150 /*
151 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
152 * and hung_task_check_interval_secs
153 */
154 #ifdef CONFIG_DETECT_HUNG_TASK
155 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
156 #endif
157
158 #ifdef CONFIG_INOTIFY_USER
159 #include <linux/inotify.h>
160 #endif
161 #ifdef CONFIG_SPARC
162 #endif
163
164 #ifdef __hppa__
165 extern int pwrsw_enabled;
166 #endif
167
168 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
169 extern int unaligned_enabled;
170 #endif
171
172 #ifdef CONFIG_IA64
173 extern int unaligned_dump_stack;
174 #endif
175
176 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
177 extern int no_unaligned_warning;
178 #endif
179
180 #ifdef CONFIG_PROC_SYSCTL
181
182 /**
183 * enum sysctl_writes_mode - supported sysctl write modes
184 *
185 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
186 * to be written, and multiple writes on the same sysctl file descriptor
187 * will rewrite the sysctl value, regardless of file position. No warning
188 * is issued when the initial position is not 0.
189 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
190 * not 0.
191 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
192 * file position 0 and the value must be fully contained in the buffer
193 * sent to the write syscall. If dealing with strings respect the file
194 * position, but restrict this to the max length of the buffer, anything
195 * passed the max lenght will be ignored. Multiple writes will append
196 * to the buffer.
197 *
198 * These write modes control how current file position affects the behavior of
199 * updating sysctl values through the proc interface on each write.
200 */
201 enum sysctl_writes_mode {
202 SYSCTL_WRITES_LEGACY = -1,
203 SYSCTL_WRITES_WARN = 0,
204 SYSCTL_WRITES_STRICT = 1,
205 };
206
207 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
208
209 static int proc_do_cad_pid(struct ctl_table *table, int write,
210 void __user *buffer, size_t *lenp, loff_t *ppos);
211 static int proc_taint(struct ctl_table *table, int write,
212 void __user *buffer, size_t *lenp, loff_t *ppos);
213 #endif
214
215 #ifdef CONFIG_PRINTK
216 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
217 void __user *buffer, size_t *lenp, loff_t *ppos);
218 #endif
219
220 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
221 void __user *buffer, size_t *lenp, loff_t *ppos);
222 #ifdef CONFIG_COREDUMP
223 static int proc_dostring_coredump(struct ctl_table *table, int write,
224 void __user *buffer, size_t *lenp, loff_t *ppos);
225 #endif
226 static int proc_dopipe_max_size(struct ctl_table *table, int write,
227 void __user *buffer, size_t *lenp, loff_t *ppos);
228
229 #ifdef CONFIG_MAGIC_SYSRQ
230 /* Note: sysrq code uses its own private copy */
231 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
232
233 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
234 void __user *buffer, size_t *lenp,
235 loff_t *ppos)
236 {
237 int error;
238
239 error = proc_dointvec(table, write, buffer, lenp, ppos);
240 if (error)
241 return error;
242
243 if (write)
244 sysrq_toggle_support(__sysrq_enabled);
245
246 return 0;
247 }
248
249 #endif
250
251 static struct ctl_table kern_table[];
252 static struct ctl_table vm_table[];
253 static struct ctl_table fs_table[];
254 static struct ctl_table debug_table[];
255 static struct ctl_table dev_table[];
256 extern struct ctl_table random_table[];
257 #ifdef CONFIG_EPOLL
258 extern struct ctl_table epoll_table[];
259 #endif
260
261 #ifdef CONFIG_FW_LOADER_USER_HELPER
262 extern struct ctl_table firmware_config_table[];
263 #endif
264
265 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
266 int sysctl_legacy_va_layout;
267 #endif
268
269 /* The default sysctl tables: */
270
271 static struct ctl_table sysctl_base_table[] = {
272 {
273 .procname = "kernel",
274 .mode = 0555,
275 .child = kern_table,
276 },
277 {
278 .procname = "vm",
279 .mode = 0555,
280 .child = vm_table,
281 },
282 {
283 .procname = "fs",
284 .mode = 0555,
285 .child = fs_table,
286 },
287 {
288 .procname = "debug",
289 .mode = 0555,
290 .child = debug_table,
291 },
292 {
293 .procname = "dev",
294 .mode = 0555,
295 .child = dev_table,
296 },
297 { }
298 };
299
300 #ifdef CONFIG_SCHED_DEBUG
301 static int min_sched_granularity_ns = 100000; /* 100 usecs */
302 static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
303 static int min_wakeup_granularity_ns; /* 0 usecs */
304 static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
305 #ifdef CONFIG_SMP
306 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
307 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
308 #endif /* CONFIG_SMP */
309 #endif /* CONFIG_SCHED_DEBUG */
310
311 #ifdef CONFIG_COMPACTION
312 static int min_extfrag_threshold;
313 static int max_extfrag_threshold = 1000;
314 #endif
315
316 static struct ctl_table kern_table[] = {
317 {
318 .procname = "sched_child_runs_first",
319 .data = &sysctl_sched_child_runs_first,
320 .maxlen = sizeof(unsigned int),
321 .mode = 0644,
322 .proc_handler = proc_dointvec,
323 },
324 #ifdef CONFIG_SCHED_DEBUG
325 {
326 .procname = "sched_min_granularity_ns",
327 .data = &sysctl_sched_min_granularity,
328 .maxlen = sizeof(unsigned int),
329 .mode = 0644,
330 .proc_handler = sched_proc_update_handler,
331 .extra1 = &min_sched_granularity_ns,
332 .extra2 = &max_sched_granularity_ns,
333 },
334 {
335 .procname = "sched_latency_ns",
336 .data = &sysctl_sched_latency,
337 .maxlen = sizeof(unsigned int),
338 .mode = 0644,
339 .proc_handler = sched_proc_update_handler,
340 .extra1 = &min_sched_granularity_ns,
341 .extra2 = &max_sched_granularity_ns,
342 },
343 {
344 .procname = "sched_wakeup_granularity_ns",
345 .data = &sysctl_sched_wakeup_granularity,
346 .maxlen = sizeof(unsigned int),
347 .mode = 0644,
348 .proc_handler = sched_proc_update_handler,
349 .extra1 = &min_wakeup_granularity_ns,
350 .extra2 = &max_wakeup_granularity_ns,
351 },
352 #ifdef CONFIG_SMP
353 {
354 .procname = "sched_tunable_scaling",
355 .data = &sysctl_sched_tunable_scaling,
356 .maxlen = sizeof(enum sched_tunable_scaling),
357 .mode = 0644,
358 .proc_handler = sched_proc_update_handler,
359 .extra1 = &min_sched_tunable_scaling,
360 .extra2 = &max_sched_tunable_scaling,
361 },
362 {
363 .procname = "sched_migration_cost_ns",
364 .data = &sysctl_sched_migration_cost,
365 .maxlen = sizeof(unsigned int),
366 .mode = 0644,
367 .proc_handler = proc_dointvec,
368 },
369 {
370 .procname = "sched_nr_migrate",
371 .data = &sysctl_sched_nr_migrate,
372 .maxlen = sizeof(unsigned int),
373 .mode = 0644,
374 .proc_handler = proc_dointvec,
375 },
376 #ifdef CONFIG_SCHEDSTATS
377 {
378 .procname = "sched_schedstats",
379 .data = NULL,
380 .maxlen = sizeof(unsigned int),
381 .mode = 0644,
382 .proc_handler = sysctl_schedstats,
383 .extra1 = &zero,
384 .extra2 = &one,
385 },
386 #endif /* CONFIG_SCHEDSTATS */
387 #endif /* CONFIG_SMP */
388 #ifdef CONFIG_NUMA_BALANCING
389 {
390 .procname = "numa_balancing_scan_delay_ms",
391 .data = &sysctl_numa_balancing_scan_delay,
392 .maxlen = sizeof(unsigned int),
393 .mode = 0644,
394 .proc_handler = proc_dointvec,
395 },
396 {
397 .procname = "numa_balancing_scan_period_min_ms",
398 .data = &sysctl_numa_balancing_scan_period_min,
399 .maxlen = sizeof(unsigned int),
400 .mode = 0644,
401 .proc_handler = proc_dointvec,
402 },
403 {
404 .procname = "numa_balancing_scan_period_max_ms",
405 .data = &sysctl_numa_balancing_scan_period_max,
406 .maxlen = sizeof(unsigned int),
407 .mode = 0644,
408 .proc_handler = proc_dointvec,
409 },
410 {
411 .procname = "numa_balancing_scan_size_mb",
412 .data = &sysctl_numa_balancing_scan_size,
413 .maxlen = sizeof(unsigned int),
414 .mode = 0644,
415 .proc_handler = proc_dointvec_minmax,
416 .extra1 = &one,
417 },
418 {
419 .procname = "numa_balancing",
420 .data = NULL, /* filled in by handler */
421 .maxlen = sizeof(unsigned int),
422 .mode = 0644,
423 .proc_handler = sysctl_numa_balancing,
424 .extra1 = &zero,
425 .extra2 = &one,
426 },
427 #endif /* CONFIG_NUMA_BALANCING */
428 #endif /* CONFIG_SCHED_DEBUG */
429 {
430 .procname = "sched_rt_period_us",
431 .data = &sysctl_sched_rt_period,
432 .maxlen = sizeof(unsigned int),
433 .mode = 0644,
434 .proc_handler = sched_rt_handler,
435 },
436 {
437 .procname = "sched_rt_runtime_us",
438 .data = &sysctl_sched_rt_runtime,
439 .maxlen = sizeof(int),
440 .mode = 0644,
441 .proc_handler = sched_rt_handler,
442 },
443 {
444 .procname = "sched_rr_timeslice_ms",
445 .data = &sysctl_sched_rr_timeslice,
446 .maxlen = sizeof(int),
447 .mode = 0644,
448 .proc_handler = sched_rr_handler,
449 },
450 #ifdef CONFIG_SCHED_AUTOGROUP
451 {
452 .procname = "sched_autogroup_enabled",
453 .data = &sysctl_sched_autogroup_enabled,
454 .maxlen = sizeof(unsigned int),
455 .mode = 0644,
456 .proc_handler = proc_dointvec_minmax,
457 .extra1 = &zero,
458 .extra2 = &one,
459 },
460 #endif
461 #ifdef CONFIG_CFS_BANDWIDTH
462 {
463 .procname = "sched_cfs_bandwidth_slice_us",
464 .data = &sysctl_sched_cfs_bandwidth_slice,
465 .maxlen = sizeof(unsigned int),
466 .mode = 0644,
467 .proc_handler = proc_dointvec_minmax,
468 .extra1 = &one,
469 },
470 #endif
471 #ifdef CONFIG_PROVE_LOCKING
472 {
473 .procname = "prove_locking",
474 .data = &prove_locking,
475 .maxlen = sizeof(int),
476 .mode = 0644,
477 .proc_handler = proc_dointvec,
478 },
479 #endif
480 #ifdef CONFIG_LOCK_STAT
481 {
482 .procname = "lock_stat",
483 .data = &lock_stat,
484 .maxlen = sizeof(int),
485 .mode = 0644,
486 .proc_handler = proc_dointvec,
487 },
488 #endif
489 {
490 .procname = "panic",
491 .data = &panic_timeout,
492 .maxlen = sizeof(int),
493 .mode = 0644,
494 .proc_handler = proc_dointvec,
495 },
496 #ifdef CONFIG_COREDUMP
497 {
498 .procname = "core_uses_pid",
499 .data = &core_uses_pid,
500 .maxlen = sizeof(int),
501 .mode = 0644,
502 .proc_handler = proc_dointvec,
503 },
504 {
505 .procname = "core_pattern",
506 .data = core_pattern,
507 .maxlen = CORENAME_MAX_SIZE,
508 .mode = 0644,
509 .proc_handler = proc_dostring_coredump,
510 },
511 {
512 .procname = "core_pipe_limit",
513 .data = &core_pipe_limit,
514 .maxlen = sizeof(unsigned int),
515 .mode = 0644,
516 .proc_handler = proc_dointvec,
517 },
518 #endif
519 #ifdef CONFIG_PROC_SYSCTL
520 {
521 .procname = "tainted",
522 .maxlen = sizeof(long),
523 .mode = 0644,
524 .proc_handler = proc_taint,
525 },
526 {
527 .procname = "sysctl_writes_strict",
528 .data = &sysctl_writes_strict,
529 .maxlen = sizeof(int),
530 .mode = 0644,
531 .proc_handler = proc_dointvec_minmax,
532 .extra1 = &neg_one,
533 .extra2 = &one,
534 },
535 #endif
536 #ifdef CONFIG_LATENCYTOP
537 {
538 .procname = "latencytop",
539 .data = &latencytop_enabled,
540 .maxlen = sizeof(int),
541 .mode = 0644,
542 .proc_handler = sysctl_latencytop,
543 },
544 #endif
545 #ifdef CONFIG_BLK_DEV_INITRD
546 {
547 .procname = "real-root-dev",
548 .data = &real_root_dev,
549 .maxlen = sizeof(int),
550 .mode = 0644,
551 .proc_handler = proc_dointvec,
552 },
553 #endif
554 {
555 .procname = "print-fatal-signals",
556 .data = &print_fatal_signals,
557 .maxlen = sizeof(int),
558 .mode = 0644,
559 .proc_handler = proc_dointvec,
560 },
561 #ifdef CONFIG_SPARC
562 {
563 .procname = "reboot-cmd",
564 .data = reboot_command,
565 .maxlen = 256,
566 .mode = 0644,
567 .proc_handler = proc_dostring,
568 },
569 {
570 .procname = "stop-a",
571 .data = &stop_a_enabled,
572 .maxlen = sizeof (int),
573 .mode = 0644,
574 .proc_handler = proc_dointvec,
575 },
576 {
577 .procname = "scons-poweroff",
578 .data = &scons_pwroff,
579 .maxlen = sizeof (int),
580 .mode = 0644,
581 .proc_handler = proc_dointvec,
582 },
583 #endif
584 #ifdef CONFIG_SPARC64
585 {
586 .procname = "tsb-ratio",
587 .data = &sysctl_tsb_ratio,
588 .maxlen = sizeof (int),
589 .mode = 0644,
590 .proc_handler = proc_dointvec,
591 },
592 #endif
593 #ifdef __hppa__
594 {
595 .procname = "soft-power",
596 .data = &pwrsw_enabled,
597 .maxlen = sizeof (int),
598 .mode = 0644,
599 .proc_handler = proc_dointvec,
600 },
601 #endif
602 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
603 {
604 .procname = "unaligned-trap",
605 .data = &unaligned_enabled,
606 .maxlen = sizeof (int),
607 .mode = 0644,
608 .proc_handler = proc_dointvec,
609 },
610 #endif
611 {
612 .procname = "ctrl-alt-del",
613 .data = &C_A_D,
614 .maxlen = sizeof(int),
615 .mode = 0644,
616 .proc_handler = proc_dointvec,
617 },
618 #ifdef CONFIG_FUNCTION_TRACER
619 {
620 .procname = "ftrace_enabled",
621 .data = &ftrace_enabled,
622 .maxlen = sizeof(int),
623 .mode = 0644,
624 .proc_handler = ftrace_enable_sysctl,
625 },
626 #endif
627 #ifdef CONFIG_STACK_TRACER
628 {
629 .procname = "stack_tracer_enabled",
630 .data = &stack_tracer_enabled,
631 .maxlen = sizeof(int),
632 .mode = 0644,
633 .proc_handler = stack_trace_sysctl,
634 },
635 #endif
636 #ifdef CONFIG_TRACING
637 {
638 .procname = "ftrace_dump_on_oops",
639 .data = &ftrace_dump_on_oops,
640 .maxlen = sizeof(int),
641 .mode = 0644,
642 .proc_handler = proc_dointvec,
643 },
644 {
645 .procname = "traceoff_on_warning",
646 .data = &__disable_trace_on_warning,
647 .maxlen = sizeof(__disable_trace_on_warning),
648 .mode = 0644,
649 .proc_handler = proc_dointvec,
650 },
651 {
652 .procname = "tracepoint_printk",
653 .data = &tracepoint_printk,
654 .maxlen = sizeof(tracepoint_printk),
655 .mode = 0644,
656 .proc_handler = tracepoint_printk_sysctl,
657 },
658 #endif
659 #ifdef CONFIG_KEXEC_CORE
660 {
661 .procname = "kexec_load_disabled",
662 .data = &kexec_load_disabled,
663 .maxlen = sizeof(int),
664 .mode = 0644,
665 /* only handle a transition from default "0" to "1" */
666 .proc_handler = proc_dointvec_minmax,
667 .extra1 = &one,
668 .extra2 = &one,
669 },
670 #endif
671 #ifdef CONFIG_MODULES
672 {
673 .procname = "modprobe",
674 .data = &modprobe_path,
675 .maxlen = KMOD_PATH_LEN,
676 .mode = 0644,
677 .proc_handler = proc_dostring,
678 },
679 {
680 .procname = "modules_disabled",
681 .data = &modules_disabled,
682 .maxlen = sizeof(int),
683 .mode = 0644,
684 /* only handle a transition from default "0" to "1" */
685 .proc_handler = proc_dointvec_minmax,
686 .extra1 = &one,
687 .extra2 = &one,
688 },
689 #endif
690 #ifdef CONFIG_UEVENT_HELPER
691 {
692 .procname = "hotplug",
693 .data = &uevent_helper,
694 .maxlen = UEVENT_HELPER_PATH_LEN,
695 .mode = 0644,
696 .proc_handler = proc_dostring,
697 },
698 #endif
699 #ifdef CONFIG_CHR_DEV_SG
700 {
701 .procname = "sg-big-buff",
702 .data = &sg_big_buff,
703 .maxlen = sizeof (int),
704 .mode = 0444,
705 .proc_handler = proc_dointvec,
706 },
707 #endif
708 #ifdef CONFIG_BSD_PROCESS_ACCT
709 {
710 .procname = "acct",
711 .data = &acct_parm,
712 .maxlen = 3*sizeof(int),
713 .mode = 0644,
714 .proc_handler = proc_dointvec,
715 },
716 #endif
717 #ifdef CONFIG_MAGIC_SYSRQ
718 {
719 .procname = "sysrq",
720 .data = &__sysrq_enabled,
721 .maxlen = sizeof (int),
722 .mode = 0644,
723 .proc_handler = sysrq_sysctl_handler,
724 },
725 #endif
726 #ifdef CONFIG_PROC_SYSCTL
727 {
728 .procname = "cad_pid",
729 .data = NULL,
730 .maxlen = sizeof (int),
731 .mode = 0600,
732 .proc_handler = proc_do_cad_pid,
733 },
734 #endif
735 {
736 .procname = "threads-max",
737 .data = NULL,
738 .maxlen = sizeof(int),
739 .mode = 0644,
740 .proc_handler = sysctl_max_threads,
741 },
742 {
743 .procname = "random",
744 .mode = 0555,
745 .child = random_table,
746 },
747 {
748 .procname = "usermodehelper",
749 .mode = 0555,
750 .child = usermodehelper_table,
751 },
752 #ifdef CONFIG_FW_LOADER_USER_HELPER
753 {
754 .procname = "firmware_config",
755 .mode = 0555,
756 .child = firmware_config_table,
757 },
758 #endif
759 {
760 .procname = "overflowuid",
761 .data = &overflowuid,
762 .maxlen = sizeof(int),
763 .mode = 0644,
764 .proc_handler = proc_dointvec_minmax,
765 .extra1 = &minolduid,
766 .extra2 = &maxolduid,
767 },
768 {
769 .procname = "overflowgid",
770 .data = &overflowgid,
771 .maxlen = sizeof(int),
772 .mode = 0644,
773 .proc_handler = proc_dointvec_minmax,
774 .extra1 = &minolduid,
775 .extra2 = &maxolduid,
776 },
777 #ifdef CONFIG_S390
778 #ifdef CONFIG_MATHEMU
779 {
780 .procname = "ieee_emulation_warnings",
781 .data = &sysctl_ieee_emulation_warnings,
782 .maxlen = sizeof(int),
783 .mode = 0644,
784 .proc_handler = proc_dointvec,
785 },
786 #endif
787 {
788 .procname = "userprocess_debug",
789 .data = &show_unhandled_signals,
790 .maxlen = sizeof(int),
791 .mode = 0644,
792 .proc_handler = proc_dointvec,
793 },
794 #endif
795 {
796 .procname = "pid_max",
797 .data = &pid_max,
798 .maxlen = sizeof (int),
799 .mode = 0644,
800 .proc_handler = proc_dointvec_minmax,
801 .extra1 = &pid_max_min,
802 .extra2 = &pid_max_max,
803 },
804 {
805 .procname = "panic_on_oops",
806 .data = &panic_on_oops,
807 .maxlen = sizeof(int),
808 .mode = 0644,
809 .proc_handler = proc_dointvec,
810 },
811 #if defined CONFIG_PRINTK
812 {
813 .procname = "printk",
814 .data = &console_loglevel,
815 .maxlen = 4*sizeof(int),
816 .mode = 0644,
817 .proc_handler = proc_dointvec,
818 },
819 {
820 .procname = "printk_ratelimit",
821 .data = &printk_ratelimit_state.interval,
822 .maxlen = sizeof(int),
823 .mode = 0644,
824 .proc_handler = proc_dointvec_jiffies,
825 },
826 {
827 .procname = "printk_ratelimit_burst",
828 .data = &printk_ratelimit_state.burst,
829 .maxlen = sizeof(int),
830 .mode = 0644,
831 .proc_handler = proc_dointvec,
832 },
833 {
834 .procname = "printk_delay",
835 .data = &printk_delay_msec,
836 .maxlen = sizeof(int),
837 .mode = 0644,
838 .proc_handler = proc_dointvec_minmax,
839 .extra1 = &zero,
840 .extra2 = &ten_thousand,
841 },
842 {
843 .procname = "printk_devkmsg",
844 .data = devkmsg_log_str,
845 .maxlen = DEVKMSG_STR_MAX_SIZE,
846 .mode = 0644,
847 .proc_handler = devkmsg_sysctl_set_loglvl,
848 },
849 {
850 .procname = "dmesg_restrict",
851 .data = &dmesg_restrict,
852 .maxlen = sizeof(int),
853 .mode = 0644,
854 .proc_handler = proc_dointvec_minmax_sysadmin,
855 .extra1 = &zero,
856 .extra2 = &one,
857 },
858 {
859 .procname = "kptr_restrict",
860 .data = &kptr_restrict,
861 .maxlen = sizeof(int),
862 .mode = 0644,
863 .proc_handler = proc_dointvec_minmax_sysadmin,
864 .extra1 = &zero,
865 .extra2 = &two,
866 },
867 #endif
868 {
869 .procname = "ngroups_max",
870 .data = &ngroups_max,
871 .maxlen = sizeof (int),
872 .mode = 0444,
873 .proc_handler = proc_dointvec,
874 },
875 {
876 .procname = "cap_last_cap",
877 .data = (void *)&cap_last_cap,
878 .maxlen = sizeof(int),
879 .mode = 0444,
880 .proc_handler = proc_dointvec,
881 },
882 #if defined(CONFIG_LOCKUP_DETECTOR)
883 {
884 .procname = "watchdog",
885 .data = &watchdog_user_enabled,
886 .maxlen = sizeof(int),
887 .mode = 0644,
888 .proc_handler = proc_watchdog,
889 .extra1 = &zero,
890 .extra2 = &one,
891 },
892 {
893 .procname = "watchdog_thresh",
894 .data = &watchdog_thresh,
895 .maxlen = sizeof(int),
896 .mode = 0644,
897 .proc_handler = proc_watchdog_thresh,
898 .extra1 = &zero,
899 .extra2 = &sixty,
900 },
901 {
902 .procname = "nmi_watchdog",
903 .data = &nmi_watchdog_user_enabled,
904 .maxlen = sizeof(int),
905 .mode = NMI_WATCHDOG_SYSCTL_PERM,
906 .proc_handler = proc_nmi_watchdog,
907 .extra1 = &zero,
908 .extra2 = &one,
909 },
910 {
911 .procname = "watchdog_cpumask",
912 .data = &watchdog_cpumask_bits,
913 .maxlen = NR_CPUS,
914 .mode = 0644,
915 .proc_handler = proc_watchdog_cpumask,
916 },
917 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
918 {
919 .procname = "soft_watchdog",
920 .data = &soft_watchdog_user_enabled,
921 .maxlen = sizeof(int),
922 .mode = 0644,
923 .proc_handler = proc_soft_watchdog,
924 .extra1 = &zero,
925 .extra2 = &one,
926 },
927 {
928 .procname = "softlockup_panic",
929 .data = &softlockup_panic,
930 .maxlen = sizeof(int),
931 .mode = 0644,
932 .proc_handler = proc_dointvec_minmax,
933 .extra1 = &zero,
934 .extra2 = &one,
935 },
936 #ifdef CONFIG_SMP
937 {
938 .procname = "softlockup_all_cpu_backtrace",
939 .data = &sysctl_softlockup_all_cpu_backtrace,
940 .maxlen = sizeof(int),
941 .mode = 0644,
942 .proc_handler = proc_dointvec_minmax,
943 .extra1 = &zero,
944 .extra2 = &one,
945 },
946 #endif /* CONFIG_SMP */
947 #endif
948 #ifdef CONFIG_HARDLOCKUP_DETECTOR
949 {
950 .procname = "hardlockup_panic",
951 .data = &hardlockup_panic,
952 .maxlen = sizeof(int),
953 .mode = 0644,
954 .proc_handler = proc_dointvec_minmax,
955 .extra1 = &zero,
956 .extra2 = &one,
957 },
958 #ifdef CONFIG_SMP
959 {
960 .procname = "hardlockup_all_cpu_backtrace",
961 .data = &sysctl_hardlockup_all_cpu_backtrace,
962 .maxlen = sizeof(int),
963 .mode = 0644,
964 .proc_handler = proc_dointvec_minmax,
965 .extra1 = &zero,
966 .extra2 = &one,
967 },
968 #endif /* CONFIG_SMP */
969 #endif
970 #endif
971
972 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
973 {
974 .procname = "unknown_nmi_panic",
975 .data = &unknown_nmi_panic,
976 .maxlen = sizeof (int),
977 .mode = 0644,
978 .proc_handler = proc_dointvec,
979 },
980 #endif
981 #if defined(CONFIG_X86)
982 {
983 .procname = "panic_on_unrecovered_nmi",
984 .data = &panic_on_unrecovered_nmi,
985 .maxlen = sizeof(int),
986 .mode = 0644,
987 .proc_handler = proc_dointvec,
988 },
989 {
990 .procname = "panic_on_io_nmi",
991 .data = &panic_on_io_nmi,
992 .maxlen = sizeof(int),
993 .mode = 0644,
994 .proc_handler = proc_dointvec,
995 },
996 #ifdef CONFIG_DEBUG_STACKOVERFLOW
997 {
998 .procname = "panic_on_stackoverflow",
999 .data = &sysctl_panic_on_stackoverflow,
1000 .maxlen = sizeof(int),
1001 .mode = 0644,
1002 .proc_handler = proc_dointvec,
1003 },
1004 #endif
1005 {
1006 .procname = "bootloader_type",
1007 .data = &bootloader_type,
1008 .maxlen = sizeof (int),
1009 .mode = 0444,
1010 .proc_handler = proc_dointvec,
1011 },
1012 {
1013 .procname = "bootloader_version",
1014 .data = &bootloader_version,
1015 .maxlen = sizeof (int),
1016 .mode = 0444,
1017 .proc_handler = proc_dointvec,
1018 },
1019 {
1020 .procname = "io_delay_type",
1021 .data = &io_delay_type,
1022 .maxlen = sizeof(int),
1023 .mode = 0644,
1024 .proc_handler = proc_dointvec,
1025 },
1026 #endif
1027 #if defined(CONFIG_MMU)
1028 {
1029 .procname = "randomize_va_space",
1030 .data = &randomize_va_space,
1031 .maxlen = sizeof(int),
1032 .mode = 0644,
1033 .proc_handler = proc_dointvec,
1034 },
1035 #endif
1036 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1037 {
1038 .procname = "spin_retry",
1039 .data = &spin_retry,
1040 .maxlen = sizeof (int),
1041 .mode = 0644,
1042 .proc_handler = proc_dointvec,
1043 },
1044 #endif
1045 #if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1046 {
1047 .procname = "acpi_video_flags",
1048 .data = &acpi_realmode_flags,
1049 .maxlen = sizeof (unsigned long),
1050 .mode = 0644,
1051 .proc_handler = proc_doulongvec_minmax,
1052 },
1053 #endif
1054 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1055 {
1056 .procname = "ignore-unaligned-usertrap",
1057 .data = &no_unaligned_warning,
1058 .maxlen = sizeof (int),
1059 .mode = 0644,
1060 .proc_handler = proc_dointvec,
1061 },
1062 #endif
1063 #ifdef CONFIG_IA64
1064 {
1065 .procname = "unaligned-dump-stack",
1066 .data = &unaligned_dump_stack,
1067 .maxlen = sizeof (int),
1068 .mode = 0644,
1069 .proc_handler = proc_dointvec,
1070 },
1071 #endif
1072 #ifdef CONFIG_DETECT_HUNG_TASK
1073 {
1074 .procname = "hung_task_panic",
1075 .data = &sysctl_hung_task_panic,
1076 .maxlen = sizeof(int),
1077 .mode = 0644,
1078 .proc_handler = proc_dointvec_minmax,
1079 .extra1 = &zero,
1080 .extra2 = &one,
1081 },
1082 {
1083 .procname = "hung_task_check_count",
1084 .data = &sysctl_hung_task_check_count,
1085 .maxlen = sizeof(int),
1086 .mode = 0644,
1087 .proc_handler = proc_dointvec_minmax,
1088 .extra1 = &zero,
1089 },
1090 {
1091 .procname = "hung_task_timeout_secs",
1092 .data = &sysctl_hung_task_timeout_secs,
1093 .maxlen = sizeof(unsigned long),
1094 .mode = 0644,
1095 .proc_handler = proc_dohung_task_timeout_secs,
1096 .extra2 = &hung_task_timeout_max,
1097 },
1098 {
1099 .procname = "hung_task_check_interval_secs",
1100 .data = &sysctl_hung_task_check_interval_secs,
1101 .maxlen = sizeof(unsigned long),
1102 .mode = 0644,
1103 .proc_handler = proc_dohung_task_timeout_secs,
1104 .extra2 = &hung_task_timeout_max,
1105 },
1106 {
1107 .procname = "hung_task_warnings",
1108 .data = &sysctl_hung_task_warnings,
1109 .maxlen = sizeof(int),
1110 .mode = 0644,
1111 .proc_handler = proc_dointvec_minmax,
1112 .extra1 = &neg_one,
1113 },
1114 #endif
1115 #ifdef CONFIG_RT_MUTEXES
1116 {
1117 .procname = "max_lock_depth",
1118 .data = &max_lock_depth,
1119 .maxlen = sizeof(int),
1120 .mode = 0644,
1121 .proc_handler = proc_dointvec,
1122 },
1123 #endif
1124 {
1125 .procname = "poweroff_cmd",
1126 .data = &poweroff_cmd,
1127 .maxlen = POWEROFF_CMD_PATH_LEN,
1128 .mode = 0644,
1129 .proc_handler = proc_dostring,
1130 },
1131 #ifdef CONFIG_KEYS
1132 {
1133 .procname = "keys",
1134 .mode = 0555,
1135 .child = key_sysctls,
1136 },
1137 #endif
1138 #ifdef CONFIG_PERF_EVENTS
1139 /*
1140 * User-space scripts rely on the existence of this file
1141 * as a feature check for perf_events being enabled.
1142 *
1143 * So it's an ABI, do not remove!
1144 */
1145 {
1146 .procname = "perf_event_paranoid",
1147 .data = &sysctl_perf_event_paranoid,
1148 .maxlen = sizeof(sysctl_perf_event_paranoid),
1149 .mode = 0644,
1150 .proc_handler = proc_dointvec,
1151 },
1152 {
1153 .procname = "perf_event_mlock_kb",
1154 .data = &sysctl_perf_event_mlock,
1155 .maxlen = sizeof(sysctl_perf_event_mlock),
1156 .mode = 0644,
1157 .proc_handler = proc_dointvec,
1158 },
1159 {
1160 .procname = "perf_event_max_sample_rate",
1161 .data = &sysctl_perf_event_sample_rate,
1162 .maxlen = sizeof(sysctl_perf_event_sample_rate),
1163 .mode = 0644,
1164 .proc_handler = perf_proc_update_handler,
1165 .extra1 = &one,
1166 },
1167 {
1168 .procname = "perf_cpu_time_max_percent",
1169 .data = &sysctl_perf_cpu_time_max_percent,
1170 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
1171 .mode = 0644,
1172 .proc_handler = perf_cpu_time_max_percent_handler,
1173 .extra1 = &zero,
1174 .extra2 = &one_hundred,
1175 },
1176 {
1177 .procname = "perf_event_max_stack",
1178 .data = &sysctl_perf_event_max_stack,
1179 .maxlen = sizeof(sysctl_perf_event_max_stack),
1180 .mode = 0644,
1181 .proc_handler = perf_event_max_stack_handler,
1182 .extra1 = &zero,
1183 .extra2 = &six_hundred_forty_kb,
1184 },
1185 {
1186 .procname = "perf_event_max_contexts_per_stack",
1187 .data = &sysctl_perf_event_max_contexts_per_stack,
1188 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
1189 .mode = 0644,
1190 .proc_handler = perf_event_max_stack_handler,
1191 .extra1 = &zero,
1192 .extra2 = &one_thousand,
1193 },
1194 #endif
1195 {
1196 .procname = "panic_on_warn",
1197 .data = &panic_on_warn,
1198 .maxlen = sizeof(int),
1199 .mode = 0644,
1200 .proc_handler = proc_dointvec_minmax,
1201 .extra1 = &zero,
1202 .extra2 = &one,
1203 },
1204 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1205 {
1206 .procname = "timer_migration",
1207 .data = &sysctl_timer_migration,
1208 .maxlen = sizeof(unsigned int),
1209 .mode = 0644,
1210 .proc_handler = timer_migration_handler,
1211 .extra1 = &zero,
1212 .extra2 = &one,
1213 },
1214 #endif
1215 #ifdef CONFIG_BPF_SYSCALL
1216 {
1217 .procname = "unprivileged_bpf_disabled",
1218 .data = &sysctl_unprivileged_bpf_disabled,
1219 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
1220 .mode = 0644,
1221 /* only handle a transition from default "0" to "1" */
1222 .proc_handler = proc_dointvec_minmax,
1223 .extra1 = &one,
1224 .extra2 = &one,
1225 },
1226 #endif
1227 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1228 {
1229 .procname = "panic_on_rcu_stall",
1230 .data = &sysctl_panic_on_rcu_stall,
1231 .maxlen = sizeof(sysctl_panic_on_rcu_stall),
1232 .mode = 0644,
1233 .proc_handler = proc_dointvec_minmax,
1234 .extra1 = &zero,
1235 .extra2 = &one,
1236 },
1237 #endif
1238 { }
1239 };
1240
1241 static struct ctl_table vm_table[] = {
1242 {
1243 .procname = "overcommit_memory",
1244 .data = &sysctl_overcommit_memory,
1245 .maxlen = sizeof(sysctl_overcommit_memory),
1246 .mode = 0644,
1247 .proc_handler = proc_dointvec_minmax,
1248 .extra1 = &zero,
1249 .extra2 = &two,
1250 },
1251 {
1252 .procname = "panic_on_oom",
1253 .data = &sysctl_panic_on_oom,
1254 .maxlen = sizeof(sysctl_panic_on_oom),
1255 .mode = 0644,
1256 .proc_handler = proc_dointvec_minmax,
1257 .extra1 = &zero,
1258 .extra2 = &two,
1259 },
1260 {
1261 .procname = "oom_kill_allocating_task",
1262 .data = &sysctl_oom_kill_allocating_task,
1263 .maxlen = sizeof(sysctl_oom_kill_allocating_task),
1264 .mode = 0644,
1265 .proc_handler = proc_dointvec,
1266 },
1267 {
1268 .procname = "oom_dump_tasks",
1269 .data = &sysctl_oom_dump_tasks,
1270 .maxlen = sizeof(sysctl_oom_dump_tasks),
1271 .mode = 0644,
1272 .proc_handler = proc_dointvec,
1273 },
1274 {
1275 .procname = "overcommit_ratio",
1276 .data = &sysctl_overcommit_ratio,
1277 .maxlen = sizeof(sysctl_overcommit_ratio),
1278 .mode = 0644,
1279 .proc_handler = overcommit_ratio_handler,
1280 },
1281 {
1282 .procname = "overcommit_kbytes",
1283 .data = &sysctl_overcommit_kbytes,
1284 .maxlen = sizeof(sysctl_overcommit_kbytes),
1285 .mode = 0644,
1286 .proc_handler = overcommit_kbytes_handler,
1287 },
1288 {
1289 .procname = "page-cluster",
1290 .data = &page_cluster,
1291 .maxlen = sizeof(int),
1292 .mode = 0644,
1293 .proc_handler = proc_dointvec_minmax,
1294 .extra1 = &zero,
1295 },
1296 {
1297 .procname = "dirty_background_ratio",
1298 .data = &dirty_background_ratio,
1299 .maxlen = sizeof(dirty_background_ratio),
1300 .mode = 0644,
1301 .proc_handler = dirty_background_ratio_handler,
1302 .extra1 = &zero,
1303 .extra2 = &one_hundred,
1304 },
1305 {
1306 .procname = "dirty_background_bytes",
1307 .data = &dirty_background_bytes,
1308 .maxlen = sizeof(dirty_background_bytes),
1309 .mode = 0644,
1310 .proc_handler = dirty_background_bytes_handler,
1311 .extra1 = &one_ul,
1312 },
1313 {
1314 .procname = "dirty_ratio",
1315 .data = &vm_dirty_ratio,
1316 .maxlen = sizeof(vm_dirty_ratio),
1317 .mode = 0644,
1318 .proc_handler = dirty_ratio_handler,
1319 .extra1 = &zero,
1320 .extra2 = &one_hundred,
1321 },
1322 {
1323 .procname = "dirty_bytes",
1324 .data = &vm_dirty_bytes,
1325 .maxlen = sizeof(vm_dirty_bytes),
1326 .mode = 0644,
1327 .proc_handler = dirty_bytes_handler,
1328 .extra1 = &dirty_bytes_min,
1329 },
1330 {
1331 .procname = "dirty_writeback_centisecs",
1332 .data = &dirty_writeback_interval,
1333 .maxlen = sizeof(dirty_writeback_interval),
1334 .mode = 0644,
1335 .proc_handler = dirty_writeback_centisecs_handler,
1336 },
1337 {
1338 .procname = "dirty_expire_centisecs",
1339 .data = &dirty_expire_interval,
1340 .maxlen = sizeof(dirty_expire_interval),
1341 .mode = 0644,
1342 .proc_handler = proc_dointvec_minmax,
1343 .extra1 = &zero,
1344 },
1345 {
1346 .procname = "dirtytime_expire_seconds",
1347 .data = &dirtytime_expire_interval,
1348 .maxlen = sizeof(dirtytime_expire_interval),
1349 .mode = 0644,
1350 .proc_handler = dirtytime_interval_handler,
1351 .extra1 = &zero,
1352 },
1353 {
1354 .procname = "swappiness",
1355 .data = &vm_swappiness,
1356 .maxlen = sizeof(vm_swappiness),
1357 .mode = 0644,
1358 .proc_handler = proc_dointvec_minmax,
1359 .extra1 = &zero,
1360 .extra2 = &one_hundred,
1361 },
1362 #ifdef CONFIG_HUGETLB_PAGE
1363 {
1364 .procname = "nr_hugepages",
1365 .data = NULL,
1366 .maxlen = sizeof(unsigned long),
1367 .mode = 0644,
1368 .proc_handler = hugetlb_sysctl_handler,
1369 },
1370 #ifdef CONFIG_NUMA
1371 {
1372 .procname = "nr_hugepages_mempolicy",
1373 .data = NULL,
1374 .maxlen = sizeof(unsigned long),
1375 .mode = 0644,
1376 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1377 },
1378 {
1379 .procname = "numa_stat",
1380 .data = &sysctl_vm_numa_stat,
1381 .maxlen = sizeof(int),
1382 .mode = 0644,
1383 .proc_handler = sysctl_vm_numa_stat_handler,
1384 .extra1 = &zero,
1385 .extra2 = &one,
1386 },
1387 #endif
1388 {
1389 .procname = "hugetlb_shm_group",
1390 .data = &sysctl_hugetlb_shm_group,
1391 .maxlen = sizeof(gid_t),
1392 .mode = 0644,
1393 .proc_handler = proc_dointvec,
1394 },
1395 {
1396 .procname = "nr_overcommit_hugepages",
1397 .data = NULL,
1398 .maxlen = sizeof(unsigned long),
1399 .mode = 0644,
1400 .proc_handler = hugetlb_overcommit_handler,
1401 },
1402 #endif
1403 {
1404 .procname = "lowmem_reserve_ratio",
1405 .data = &sysctl_lowmem_reserve_ratio,
1406 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
1407 .mode = 0644,
1408 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
1409 },
1410 {
1411 .procname = "drop_caches",
1412 .data = &sysctl_drop_caches,
1413 .maxlen = sizeof(int),
1414 .mode = 0644,
1415 .proc_handler = drop_caches_sysctl_handler,
1416 .extra1 = &one,
1417 .extra2 = &four,
1418 },
1419 #ifdef CONFIG_COMPACTION
1420 {
1421 .procname = "compact_memory",
1422 .data = &sysctl_compact_memory,
1423 .maxlen = sizeof(int),
1424 .mode = 0200,
1425 .proc_handler = sysctl_compaction_handler,
1426 },
1427 {
1428 .procname = "extfrag_threshold",
1429 .data = &sysctl_extfrag_threshold,
1430 .maxlen = sizeof(int),
1431 .mode = 0644,
1432 .proc_handler = sysctl_extfrag_handler,
1433 .extra1 = &min_extfrag_threshold,
1434 .extra2 = &max_extfrag_threshold,
1435 },
1436 {
1437 .procname = "compact_unevictable_allowed",
1438 .data = &sysctl_compact_unevictable_allowed,
1439 .maxlen = sizeof(int),
1440 .mode = 0644,
1441 .proc_handler = proc_dointvec,
1442 .extra1 = &zero,
1443 .extra2 = &one,
1444 },
1445
1446 #endif /* CONFIG_COMPACTION */
1447 {
1448 .procname = "min_free_kbytes",
1449 .data = &min_free_kbytes,
1450 .maxlen = sizeof(min_free_kbytes),
1451 .mode = 0644,
1452 .proc_handler = min_free_kbytes_sysctl_handler,
1453 .extra1 = &zero,
1454 },
1455 {
1456 .procname = "watermark_scale_factor",
1457 .data = &watermark_scale_factor,
1458 .maxlen = sizeof(watermark_scale_factor),
1459 .mode = 0644,
1460 .proc_handler = watermark_scale_factor_sysctl_handler,
1461 .extra1 = &one,
1462 .extra2 = &one_thousand,
1463 },
1464 {
1465 .procname = "percpu_pagelist_fraction",
1466 .data = &percpu_pagelist_fraction,
1467 .maxlen = sizeof(percpu_pagelist_fraction),
1468 .mode = 0644,
1469 .proc_handler = percpu_pagelist_fraction_sysctl_handler,
1470 .extra1 = &zero,
1471 },
1472 #ifdef CONFIG_MMU
1473 {
1474 .procname = "max_map_count",
1475 .data = &sysctl_max_map_count,
1476 .maxlen = sizeof(sysctl_max_map_count),
1477 .mode = 0644,
1478 .proc_handler = proc_dointvec_minmax,
1479 .extra1 = &zero,
1480 },
1481 #else
1482 {
1483 .procname = "nr_trim_pages",
1484 .data = &sysctl_nr_trim_pages,
1485 .maxlen = sizeof(sysctl_nr_trim_pages),
1486 .mode = 0644,
1487 .proc_handler = proc_dointvec_minmax,
1488 .extra1 = &zero,
1489 },
1490 #endif
1491 {
1492 .procname = "laptop_mode",
1493 .data = &laptop_mode,
1494 .maxlen = sizeof(laptop_mode),
1495 .mode = 0644,
1496 .proc_handler = proc_dointvec_jiffies,
1497 },
1498 {
1499 .procname = "block_dump",
1500 .data = &block_dump,
1501 .maxlen = sizeof(block_dump),
1502 .mode = 0644,
1503 .proc_handler = proc_dointvec,
1504 .extra1 = &zero,
1505 },
1506 {
1507 .procname = "vfs_cache_pressure",
1508 .data = &sysctl_vfs_cache_pressure,
1509 .maxlen = sizeof(sysctl_vfs_cache_pressure),
1510 .mode = 0644,
1511 .proc_handler = proc_dointvec,
1512 .extra1 = &zero,
1513 },
1514 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1515 {
1516 .procname = "legacy_va_layout",
1517 .data = &sysctl_legacy_va_layout,
1518 .maxlen = sizeof(sysctl_legacy_va_layout),
1519 .mode = 0644,
1520 .proc_handler = proc_dointvec,
1521 .extra1 = &zero,
1522 },
1523 #endif
1524 #ifdef CONFIG_NUMA
1525 {
1526 .procname = "zone_reclaim_mode",
1527 .data = &node_reclaim_mode,
1528 .maxlen = sizeof(node_reclaim_mode),
1529 .mode = 0644,
1530 .proc_handler = proc_dointvec,
1531 .extra1 = &zero,
1532 },
1533 {
1534 .procname = "min_unmapped_ratio",
1535 .data = &sysctl_min_unmapped_ratio,
1536 .maxlen = sizeof(sysctl_min_unmapped_ratio),
1537 .mode = 0644,
1538 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
1539 .extra1 = &zero,
1540 .extra2 = &one_hundred,
1541 },
1542 {
1543 .procname = "min_slab_ratio",
1544 .data = &sysctl_min_slab_ratio,
1545 .maxlen = sizeof(sysctl_min_slab_ratio),
1546 .mode = 0644,
1547 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
1548 .extra1 = &zero,
1549 .extra2 = &one_hundred,
1550 },
1551 #endif
1552 #ifdef CONFIG_SMP
1553 {
1554 .procname = "stat_interval",
1555 .data = &sysctl_stat_interval,
1556 .maxlen = sizeof(sysctl_stat_interval),
1557 .mode = 0644,
1558 .proc_handler = proc_dointvec_jiffies,
1559 },
1560 {
1561 .procname = "stat_refresh",
1562 .data = NULL,
1563 .maxlen = 0,
1564 .mode = 0600,
1565 .proc_handler = vmstat_refresh,
1566 },
1567 #endif
1568 #ifdef CONFIG_MMU
1569 {
1570 .procname = "mmap_min_addr",
1571 .data = &dac_mmap_min_addr,
1572 .maxlen = sizeof(unsigned long),
1573 .mode = 0644,
1574 .proc_handler = mmap_min_addr_handler,
1575 },
1576 #endif
1577 #ifdef CONFIG_NUMA
1578 {
1579 .procname = "numa_zonelist_order",
1580 .data = &numa_zonelist_order,
1581 .maxlen = NUMA_ZONELIST_ORDER_LEN,
1582 .mode = 0644,
1583 .proc_handler = numa_zonelist_order_handler,
1584 },
1585 #endif
1586 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1587 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1588 {
1589 .procname = "vdso_enabled",
1590 #ifdef CONFIG_X86_32
1591 .data = &vdso32_enabled,
1592 .maxlen = sizeof(vdso32_enabled),
1593 #else
1594 .data = &vdso_enabled,
1595 .maxlen = sizeof(vdso_enabled),
1596 #endif
1597 .mode = 0644,
1598 .proc_handler = proc_dointvec,
1599 .extra1 = &zero,
1600 },
1601 #endif
1602 #ifdef CONFIG_HIGHMEM
1603 {
1604 .procname = "highmem_is_dirtyable",
1605 .data = &vm_highmem_is_dirtyable,
1606 .maxlen = sizeof(vm_highmem_is_dirtyable),
1607 .mode = 0644,
1608 .proc_handler = proc_dointvec_minmax,
1609 .extra1 = &zero,
1610 .extra2 = &one,
1611 },
1612 #endif
1613 #ifdef CONFIG_MEMORY_FAILURE
1614 {
1615 .procname = "memory_failure_early_kill",
1616 .data = &sysctl_memory_failure_early_kill,
1617 .maxlen = sizeof(sysctl_memory_failure_early_kill),
1618 .mode = 0644,
1619 .proc_handler = proc_dointvec_minmax,
1620 .extra1 = &zero,
1621 .extra2 = &one,
1622 },
1623 {
1624 .procname = "memory_failure_recovery",
1625 .data = &sysctl_memory_failure_recovery,
1626 .maxlen = sizeof(sysctl_memory_failure_recovery),
1627 .mode = 0644,
1628 .proc_handler = proc_dointvec_minmax,
1629 .extra1 = &zero,
1630 .extra2 = &one,
1631 },
1632 #endif
1633 {
1634 .procname = "user_reserve_kbytes",
1635 .data = &sysctl_user_reserve_kbytes,
1636 .maxlen = sizeof(sysctl_user_reserve_kbytes),
1637 .mode = 0644,
1638 .proc_handler = proc_doulongvec_minmax,
1639 },
1640 {
1641 .procname = "admin_reserve_kbytes",
1642 .data = &sysctl_admin_reserve_kbytes,
1643 .maxlen = sizeof(sysctl_admin_reserve_kbytes),
1644 .mode = 0644,
1645 .proc_handler = proc_doulongvec_minmax,
1646 },
1647 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1648 {
1649 .procname = "mmap_rnd_bits",
1650 .data = &mmap_rnd_bits,
1651 .maxlen = sizeof(mmap_rnd_bits),
1652 .mode = 0600,
1653 .proc_handler = proc_dointvec_minmax,
1654 .extra1 = (void *)&mmap_rnd_bits_min,
1655 .extra2 = (void *)&mmap_rnd_bits_max,
1656 },
1657 #endif
1658 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1659 {
1660 .procname = "mmap_rnd_compat_bits",
1661 .data = &mmap_rnd_compat_bits,
1662 .maxlen = sizeof(mmap_rnd_compat_bits),
1663 .mode = 0600,
1664 .proc_handler = proc_dointvec_minmax,
1665 .extra1 = (void *)&mmap_rnd_compat_bits_min,
1666 .extra2 = (void *)&mmap_rnd_compat_bits_max,
1667 },
1668 #endif
1669 { }
1670 };
1671
1672 static struct ctl_table fs_table[] = {
1673 {
1674 .procname = "inode-nr",
1675 .data = &inodes_stat,
1676 .maxlen = 2*sizeof(long),
1677 .mode = 0444,
1678 .proc_handler = proc_nr_inodes,
1679 },
1680 {
1681 .procname = "inode-state",
1682 .data = &inodes_stat,
1683 .maxlen = 7*sizeof(long),
1684 .mode = 0444,
1685 .proc_handler = proc_nr_inodes,
1686 },
1687 {
1688 .procname = "file-nr",
1689 .data = &files_stat,
1690 .maxlen = sizeof(files_stat),
1691 .mode = 0444,
1692 .proc_handler = proc_nr_files,
1693 },
1694 {
1695 .procname = "file-max",
1696 .data = &files_stat.max_files,
1697 .maxlen = sizeof(files_stat.max_files),
1698 .mode = 0644,
1699 .proc_handler = proc_doulongvec_minmax,
1700 .extra1 = &zero_ul,
1701 .extra2 = &long_max,
1702 },
1703 {
1704 .procname = "nr_open",
1705 .data = &sysctl_nr_open,
1706 .maxlen = sizeof(unsigned int),
1707 .mode = 0644,
1708 .proc_handler = proc_dointvec_minmax,
1709 .extra1 = &sysctl_nr_open_min,
1710 .extra2 = &sysctl_nr_open_max,
1711 },
1712 {
1713 .procname = "dentry-state",
1714 .data = &dentry_stat,
1715 .maxlen = 6*sizeof(long),
1716 .mode = 0444,
1717 .proc_handler = proc_nr_dentry,
1718 },
1719 {
1720 .procname = "overflowuid",
1721 .data = &fs_overflowuid,
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
1724 .proc_handler = proc_dointvec_minmax,
1725 .extra1 = &minolduid,
1726 .extra2 = &maxolduid,
1727 },
1728 {
1729 .procname = "overflowgid",
1730 .data = &fs_overflowgid,
1731 .maxlen = sizeof(int),
1732 .mode = 0644,
1733 .proc_handler = proc_dointvec_minmax,
1734 .extra1 = &minolduid,
1735 .extra2 = &maxolduid,
1736 },
1737 #ifdef CONFIG_FILE_LOCKING
1738 {
1739 .procname = "leases-enable",
1740 .data = &leases_enable,
1741 .maxlen = sizeof(int),
1742 .mode = 0644,
1743 .proc_handler = proc_dointvec,
1744 },
1745 #endif
1746 #ifdef CONFIG_DNOTIFY
1747 {
1748 .procname = "dir-notify-enable",
1749 .data = &dir_notify_enable,
1750 .maxlen = sizeof(int),
1751 .mode = 0644,
1752 .proc_handler = proc_dointvec,
1753 },
1754 #endif
1755 #ifdef CONFIG_MMU
1756 #ifdef CONFIG_FILE_LOCKING
1757 {
1758 .procname = "lease-break-time",
1759 .data = &lease_break_time,
1760 .maxlen = sizeof(int),
1761 .mode = 0644,
1762 .proc_handler = proc_dointvec,
1763 },
1764 #endif
1765 #ifdef CONFIG_AIO
1766 {
1767 .procname = "aio-nr",
1768 .data = &aio_nr,
1769 .maxlen = sizeof(aio_nr),
1770 .mode = 0444,
1771 .proc_handler = proc_doulongvec_minmax,
1772 },
1773 {
1774 .procname = "aio-max-nr",
1775 .data = &aio_max_nr,
1776 .maxlen = sizeof(aio_max_nr),
1777 .mode = 0644,
1778 .proc_handler = proc_doulongvec_minmax,
1779 },
1780 #endif /* CONFIG_AIO */
1781 #ifdef CONFIG_INOTIFY_USER
1782 {
1783 .procname = "inotify",
1784 .mode = 0555,
1785 .child = inotify_table,
1786 },
1787 #endif
1788 #ifdef CONFIG_EPOLL
1789 {
1790 .procname = "epoll",
1791 .mode = 0555,
1792 .child = epoll_table,
1793 },
1794 #endif
1795 #endif
1796 {
1797 .procname = "protected_symlinks",
1798 .data = &sysctl_protected_symlinks,
1799 .maxlen = sizeof(int),
1800 .mode = 0600,
1801 .proc_handler = proc_dointvec_minmax,
1802 .extra1 = &zero,
1803 .extra2 = &one,
1804 },
1805 {
1806 .procname = "protected_hardlinks",
1807 .data = &sysctl_protected_hardlinks,
1808 .maxlen = sizeof(int),
1809 .mode = 0600,
1810 .proc_handler = proc_dointvec_minmax,
1811 .extra1 = &zero,
1812 .extra2 = &one,
1813 },
1814 {
1815 .procname = "protected_fifos",
1816 .data = &sysctl_protected_fifos,
1817 .maxlen = sizeof(int),
1818 .mode = 0600,
1819 .proc_handler = proc_dointvec_minmax,
1820 .extra1 = &zero,
1821 .extra2 = &two,
1822 },
1823 {
1824 .procname = "protected_regular",
1825 .data = &sysctl_protected_regular,
1826 .maxlen = sizeof(int),
1827 .mode = 0600,
1828 .proc_handler = proc_dointvec_minmax,
1829 .extra1 = &zero,
1830 .extra2 = &two,
1831 },
1832 {
1833 .procname = "suid_dumpable",
1834 .data = &suid_dumpable,
1835 .maxlen = sizeof(int),
1836 .mode = 0644,
1837 .proc_handler = proc_dointvec_minmax_coredump,
1838 .extra1 = &zero,
1839 .extra2 = &two,
1840 },
1841 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1842 {
1843 .procname = "binfmt_misc",
1844 .mode = 0555,
1845 .child = sysctl_mount_point,
1846 },
1847 #endif
1848 {
1849 .procname = "pipe-max-size",
1850 .data = &pipe_max_size,
1851 .maxlen = sizeof(pipe_max_size),
1852 .mode = 0644,
1853 .proc_handler = proc_dopipe_max_size,
1854 },
1855 {
1856 .procname = "pipe-user-pages-hard",
1857 .data = &pipe_user_pages_hard,
1858 .maxlen = sizeof(pipe_user_pages_hard),
1859 .mode = 0644,
1860 .proc_handler = proc_doulongvec_minmax,
1861 },
1862 {
1863 .procname = "pipe-user-pages-soft",
1864 .data = &pipe_user_pages_soft,
1865 .maxlen = sizeof(pipe_user_pages_soft),
1866 .mode = 0644,
1867 .proc_handler = proc_doulongvec_minmax,
1868 },
1869 {
1870 .procname = "mount-max",
1871 .data = &sysctl_mount_max,
1872 .maxlen = sizeof(unsigned int),
1873 .mode = 0644,
1874 .proc_handler = proc_dointvec_minmax,
1875 .extra1 = &one,
1876 },
1877 { }
1878 };
1879
1880 static struct ctl_table debug_table[] = {
1881 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1882 {
1883 .procname = "exception-trace",
1884 .data = &show_unhandled_signals,
1885 .maxlen = sizeof(int),
1886 .mode = 0644,
1887 .proc_handler = proc_dointvec
1888 },
1889 #endif
1890 #if defined(CONFIG_OPTPROBES)
1891 {
1892 .procname = "kprobes-optimization",
1893 .data = &sysctl_kprobes_optimization,
1894 .maxlen = sizeof(int),
1895 .mode = 0644,
1896 .proc_handler = proc_kprobes_optimization_handler,
1897 .extra1 = &zero,
1898 .extra2 = &one,
1899 },
1900 #endif
1901 { }
1902 };
1903
1904 static struct ctl_table dev_table[] = {
1905 { }
1906 };
1907
1908 int __init sysctl_init(void)
1909 {
1910 struct ctl_table_header *hdr;
1911
1912 hdr = register_sysctl_table(sysctl_base_table);
1913 kmemleak_not_leak(hdr);
1914 return 0;
1915 }
1916
1917 #endif /* CONFIG_SYSCTL */
1918
1919 /*
1920 * /proc/sys support
1921 */
1922
1923 #ifdef CONFIG_PROC_SYSCTL
1924
1925 static int _proc_do_string(char *data, int maxlen, int write,
1926 char __user *buffer,
1927 size_t *lenp, loff_t *ppos)
1928 {
1929 size_t len;
1930 char __user *p;
1931 char c;
1932
1933 if (!data || !maxlen || !*lenp) {
1934 *lenp = 0;
1935 return 0;
1936 }
1937
1938 if (write) {
1939 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1940 /* Only continue writes not past the end of buffer. */
1941 len = strlen(data);
1942 if (len > maxlen - 1)
1943 len = maxlen - 1;
1944
1945 if (*ppos > len)
1946 return 0;
1947 len = *ppos;
1948 } else {
1949 /* Start writing from beginning of buffer. */
1950 len = 0;
1951 }
1952
1953 *ppos += *lenp;
1954 p = buffer;
1955 while ((p - buffer) < *lenp && len < maxlen - 1) {
1956 if (get_user(c, p++))
1957 return -EFAULT;
1958 if (c == 0 || c == '\n')
1959 break;
1960 data[len++] = c;
1961 }
1962 data[len] = 0;
1963 } else {
1964 len = strlen(data);
1965 if (len > maxlen)
1966 len = maxlen;
1967
1968 if (*ppos > len) {
1969 *lenp = 0;
1970 return 0;
1971 }
1972
1973 data += *ppos;
1974 len -= *ppos;
1975
1976 if (len > *lenp)
1977 len = *lenp;
1978 if (len)
1979 if (copy_to_user(buffer, data, len))
1980 return -EFAULT;
1981 if (len < *lenp) {
1982 if (put_user('\n', buffer + len))
1983 return -EFAULT;
1984 len++;
1985 }
1986 *lenp = len;
1987 *ppos += len;
1988 }
1989 return 0;
1990 }
1991
1992 static void warn_sysctl_write(struct ctl_table *table)
1993 {
1994 pr_warn_once("%s wrote to %s when file position was not 0!\n"
1995 "This will not be supported in the future. To silence this\n"
1996 "warning, set kernel.sysctl_writes_strict = -1\n",
1997 current->comm, table->procname);
1998 }
1999
2000 /**
2001 * proc_first_pos_non_zero_ignore - check if first position is allowed
2002 * @ppos: file position
2003 * @table: the sysctl table
2004 *
2005 * Returns true if the first position is non-zero and the sysctl_writes_strict
2006 * mode indicates this is not allowed for numeric input types. String proc
2007 * handlers can ignore the return value.
2008 */
2009 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2010 struct ctl_table *table)
2011 {
2012 if (!*ppos)
2013 return false;
2014
2015 switch (sysctl_writes_strict) {
2016 case SYSCTL_WRITES_STRICT:
2017 return true;
2018 case SYSCTL_WRITES_WARN:
2019 warn_sysctl_write(table);
2020 return false;
2021 default:
2022 return false;
2023 }
2024 }
2025
2026 /**
2027 * proc_dostring - read a string sysctl
2028 * @table: the sysctl table
2029 * @write: %TRUE if this is a write to the sysctl file
2030 * @buffer: the user buffer
2031 * @lenp: the size of the user buffer
2032 * @ppos: file position
2033 *
2034 * Reads/writes a string from/to the user buffer. If the kernel
2035 * buffer provided is not large enough to hold the string, the
2036 * string is truncated. The copied string is %NULL-terminated.
2037 * If the string is being read by the user process, it is copied
2038 * and a newline '\n' is added. It is truncated if the buffer is
2039 * not large enough.
2040 *
2041 * Returns 0 on success.
2042 */
2043 int proc_dostring(struct ctl_table *table, int write,
2044 void __user *buffer, size_t *lenp, loff_t *ppos)
2045 {
2046 if (write)
2047 proc_first_pos_non_zero_ignore(ppos, table);
2048
2049 return _proc_do_string((char *)(table->data), table->maxlen, write,
2050 (char __user *)buffer, lenp, ppos);
2051 }
2052
2053 static size_t proc_skip_spaces(char **buf)
2054 {
2055 size_t ret;
2056 char *tmp = skip_spaces(*buf);
2057 ret = tmp - *buf;
2058 *buf = tmp;
2059 return ret;
2060 }
2061
2062 static void proc_skip_char(char **buf, size_t *size, const char v)
2063 {
2064 while (*size) {
2065 if (**buf != v)
2066 break;
2067 (*size)--;
2068 (*buf)++;
2069 }
2070 }
2071
2072 #define TMPBUFLEN 22
2073 /**
2074 * proc_get_long - reads an ASCII formatted integer from a user buffer
2075 *
2076 * @buf: a kernel buffer
2077 * @size: size of the kernel buffer
2078 * @val: this is where the number will be stored
2079 * @neg: set to %TRUE if number is negative
2080 * @perm_tr: a vector which contains the allowed trailers
2081 * @perm_tr_len: size of the perm_tr vector
2082 * @tr: pointer to store the trailer character
2083 *
2084 * In case of success %0 is returned and @buf and @size are updated with
2085 * the amount of bytes read. If @tr is non-NULL and a trailing
2086 * character exists (size is non-zero after returning from this
2087 * function), @tr is updated with the trailing character.
2088 */
2089 static int proc_get_long(char **buf, size_t *size,
2090 unsigned long *val, bool *neg,
2091 const char *perm_tr, unsigned perm_tr_len, char *tr)
2092 {
2093 int len;
2094 char *p, tmp[TMPBUFLEN];
2095
2096 if (!*size)
2097 return -EINVAL;
2098
2099 len = *size;
2100 if (len > TMPBUFLEN - 1)
2101 len = TMPBUFLEN - 1;
2102
2103 memcpy(tmp, *buf, len);
2104
2105 tmp[len] = 0;
2106 p = tmp;
2107 if (*p == '-' && *size > 1) {
2108 *neg = true;
2109 p++;
2110 } else
2111 *neg = false;
2112 if (!isdigit(*p))
2113 return -EINVAL;
2114
2115 *val = simple_strtoul(p, &p, 0);
2116
2117 len = p - tmp;
2118
2119 /* We don't know if the next char is whitespace thus we may accept
2120 * invalid integers (e.g. 1234...a) or two integers instead of one
2121 * (e.g. 123...1). So lets not allow such large numbers. */
2122 if (len == TMPBUFLEN - 1)
2123 return -EINVAL;
2124
2125 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2126 return -EINVAL;
2127
2128 if (tr && (len < *size))
2129 *tr = *p;
2130
2131 *buf += len;
2132 *size -= len;
2133
2134 return 0;
2135 }
2136
2137 /**
2138 * proc_put_long - converts an integer to a decimal ASCII formatted string
2139 *
2140 * @buf: the user buffer
2141 * @size: the size of the user buffer
2142 * @val: the integer to be converted
2143 * @neg: sign of the number, %TRUE for negative
2144 *
2145 * In case of success %0 is returned and @buf and @size are updated with
2146 * the amount of bytes written.
2147 */
2148 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2149 bool neg)
2150 {
2151 int len;
2152 char tmp[TMPBUFLEN], *p = tmp;
2153
2154 sprintf(p, "%s%lu", neg ? "-" : "", val);
2155 len = strlen(tmp);
2156 if (len > *size)
2157 len = *size;
2158 if (copy_to_user(*buf, tmp, len))
2159 return -EFAULT;
2160 *size -= len;
2161 *buf += len;
2162 return 0;
2163 }
2164 #undef TMPBUFLEN
2165
2166 static int proc_put_char(void __user **buf, size_t *size, char c)
2167 {
2168 if (*size) {
2169 char __user **buffer = (char __user **)buf;
2170 if (put_user(c, *buffer))
2171 return -EFAULT;
2172 (*size)--, (*buffer)++;
2173 *buf = *buffer;
2174 }
2175 return 0;
2176 }
2177
2178 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2179 int *valp,
2180 int write, void *data)
2181 {
2182 if (write) {
2183 if (*negp) {
2184 if (*lvalp > (unsigned long) INT_MAX + 1)
2185 return -EINVAL;
2186 *valp = -*lvalp;
2187 } else {
2188 if (*lvalp > (unsigned long) INT_MAX)
2189 return -EINVAL;
2190 *valp = *lvalp;
2191 }
2192 } else {
2193 int val = *valp;
2194 if (val < 0) {
2195 *negp = true;
2196 *lvalp = -(unsigned long)val;
2197 } else {
2198 *negp = false;
2199 *lvalp = (unsigned long)val;
2200 }
2201 }
2202 return 0;
2203 }
2204
2205 static int do_proc_douintvec_conv(unsigned long *lvalp,
2206 unsigned int *valp,
2207 int write, void *data)
2208 {
2209 if (write) {
2210 if (*lvalp > UINT_MAX)
2211 return -EINVAL;
2212 *valp = *lvalp;
2213 } else {
2214 unsigned int val = *valp;
2215 *lvalp = (unsigned long)val;
2216 }
2217 return 0;
2218 }
2219
2220 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2221
2222 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2223 int write, void __user *buffer,
2224 size_t *lenp, loff_t *ppos,
2225 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2226 int write, void *data),
2227 void *data)
2228 {
2229 int *i, vleft, first = 1, err = 0;
2230 size_t left;
2231 char *kbuf = NULL, *p;
2232
2233 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2234 *lenp = 0;
2235 return 0;
2236 }
2237
2238 i = (int *) tbl_data;
2239 vleft = table->maxlen / sizeof(*i);
2240 left = *lenp;
2241
2242 if (!conv)
2243 conv = do_proc_dointvec_conv;
2244
2245 if (write) {
2246 if (proc_first_pos_non_zero_ignore(ppos, table))
2247 goto out;
2248
2249 if (left > PAGE_SIZE - 1)
2250 left = PAGE_SIZE - 1;
2251 p = kbuf = memdup_user_nul(buffer, left);
2252 if (IS_ERR(kbuf))
2253 return PTR_ERR(kbuf);
2254 }
2255
2256 for (; left && vleft--; i++, first=0) {
2257 unsigned long lval;
2258 bool neg;
2259
2260 if (write) {
2261 left -= proc_skip_spaces(&p);
2262
2263 if (!left)
2264 break;
2265 err = proc_get_long(&p, &left, &lval, &neg,
2266 proc_wspace_sep,
2267 sizeof(proc_wspace_sep), NULL);
2268 if (err)
2269 break;
2270 if (conv(&neg, &lval, i, 1, data)) {
2271 err = -EINVAL;
2272 break;
2273 }
2274 } else {
2275 if (conv(&neg, &lval, i, 0, data)) {
2276 err = -EINVAL;
2277 break;
2278 }
2279 if (!first)
2280 err = proc_put_char(&buffer, &left, '\t');
2281 if (err)
2282 break;
2283 err = proc_put_long(&buffer, &left, lval, neg);
2284 if (err)
2285 break;
2286 }
2287 }
2288
2289 if (!write && !first && left && !err)
2290 err = proc_put_char(&buffer, &left, '\n');
2291 if (write && !err && left)
2292 left -= proc_skip_spaces(&p);
2293 if (write) {
2294 kfree(kbuf);
2295 if (first)
2296 return err ? : -EINVAL;
2297 }
2298 *lenp -= left;
2299 out:
2300 *ppos += *lenp;
2301 return err;
2302 }
2303
2304 static int do_proc_dointvec(struct ctl_table *table, int write,
2305 void __user *buffer, size_t *lenp, loff_t *ppos,
2306 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2307 int write, void *data),
2308 void *data)
2309 {
2310 return __do_proc_dointvec(table->data, table, write,
2311 buffer, lenp, ppos, conv, data);
2312 }
2313
2314 static int do_proc_douintvec_w(unsigned int *tbl_data,
2315 struct ctl_table *table,
2316 void __user *buffer,
2317 size_t *lenp, loff_t *ppos,
2318 int (*conv)(unsigned long *lvalp,
2319 unsigned int *valp,
2320 int write, void *data),
2321 void *data)
2322 {
2323 unsigned long lval;
2324 int err = 0;
2325 size_t left;
2326 bool neg;
2327 char *kbuf = NULL, *p;
2328
2329 left = *lenp;
2330
2331 if (proc_first_pos_non_zero_ignore(ppos, table))
2332 goto bail_early;
2333
2334 if (left > PAGE_SIZE - 1)
2335 left = PAGE_SIZE - 1;
2336
2337 p = kbuf = memdup_user_nul(buffer, left);
2338 if (IS_ERR(kbuf))
2339 return -EINVAL;
2340
2341 left -= proc_skip_spaces(&p);
2342 if (!left) {
2343 err = -EINVAL;
2344 goto out_free;
2345 }
2346
2347 err = proc_get_long(&p, &left, &lval, &neg,
2348 proc_wspace_sep,
2349 sizeof(proc_wspace_sep), NULL);
2350 if (err || neg) {
2351 err = -EINVAL;
2352 goto out_free;
2353 }
2354
2355 if (conv(&lval, tbl_data, 1, data)) {
2356 err = -EINVAL;
2357 goto out_free;
2358 }
2359
2360 if (!err && left)
2361 left -= proc_skip_spaces(&p);
2362
2363 out_free:
2364 kfree(kbuf);
2365 if (err)
2366 return -EINVAL;
2367
2368 return 0;
2369
2370 /* This is in keeping with old __do_proc_dointvec() */
2371 bail_early:
2372 *ppos += *lenp;
2373 return err;
2374 }
2375
2376 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2377 size_t *lenp, loff_t *ppos,
2378 int (*conv)(unsigned long *lvalp,
2379 unsigned int *valp,
2380 int write, void *data),
2381 void *data)
2382 {
2383 unsigned long lval;
2384 int err = 0;
2385 size_t left;
2386
2387 left = *lenp;
2388
2389 if (conv(&lval, tbl_data, 0, data)) {
2390 err = -EINVAL;
2391 goto out;
2392 }
2393
2394 err = proc_put_long(&buffer, &left, lval, false);
2395 if (err || !left)
2396 goto out;
2397
2398 err = proc_put_char(&buffer, &left, '\n');
2399
2400 out:
2401 *lenp -= left;
2402 *ppos += *lenp;
2403
2404 return err;
2405 }
2406
2407 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2408 int write, void __user *buffer,
2409 size_t *lenp, loff_t *ppos,
2410 int (*conv)(unsigned long *lvalp,
2411 unsigned int *valp,
2412 int write, void *data),
2413 void *data)
2414 {
2415 unsigned int *i, vleft;
2416
2417 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2418 *lenp = 0;
2419 return 0;
2420 }
2421
2422 i = (unsigned int *) tbl_data;
2423 vleft = table->maxlen / sizeof(*i);
2424
2425 /*
2426 * Arrays are not supported, keep this simple. *Do not* add
2427 * support for them.
2428 */
2429 if (vleft != 1) {
2430 *lenp = 0;
2431 return -EINVAL;
2432 }
2433
2434 if (!conv)
2435 conv = do_proc_douintvec_conv;
2436
2437 if (write)
2438 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2439 conv, data);
2440 return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2441 }
2442
2443 static int do_proc_douintvec(struct ctl_table *table, int write,
2444 void __user *buffer, size_t *lenp, loff_t *ppos,
2445 int (*conv)(unsigned long *lvalp,
2446 unsigned int *valp,
2447 int write, void *data),
2448 void *data)
2449 {
2450 return __do_proc_douintvec(table->data, table, write,
2451 buffer, lenp, ppos, conv, data);
2452 }
2453
2454 /**
2455 * proc_dointvec - read a vector of integers
2456 * @table: the sysctl table
2457 * @write: %TRUE if this is a write to the sysctl file
2458 * @buffer: the user buffer
2459 * @lenp: the size of the user buffer
2460 * @ppos: file position
2461 *
2462 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2463 * values from/to the user buffer, treated as an ASCII string.
2464 *
2465 * Returns 0 on success.
2466 */
2467 int proc_dointvec(struct ctl_table *table, int write,
2468 void __user *buffer, size_t *lenp, loff_t *ppos)
2469 {
2470 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2471 }
2472
2473 /**
2474 * proc_douintvec - read a vector of unsigned integers
2475 * @table: the sysctl table
2476 * @write: %TRUE if this is a write to the sysctl file
2477 * @buffer: the user buffer
2478 * @lenp: the size of the user buffer
2479 * @ppos: file position
2480 *
2481 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2482 * values from/to the user buffer, treated as an ASCII string.
2483 *
2484 * Returns 0 on success.
2485 */
2486 int proc_douintvec(struct ctl_table *table, int write,
2487 void __user *buffer, size_t *lenp, loff_t *ppos)
2488 {
2489 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2490 do_proc_douintvec_conv, NULL);
2491 }
2492
2493 /*
2494 * Taint values can only be increased
2495 * This means we can safely use a temporary.
2496 */
2497 static int proc_taint(struct ctl_table *table, int write,
2498 void __user *buffer, size_t *lenp, loff_t *ppos)
2499 {
2500 struct ctl_table t;
2501 unsigned long tmptaint = get_taint();
2502 int err;
2503
2504 if (write && !capable(CAP_SYS_ADMIN))
2505 return -EPERM;
2506
2507 t = *table;
2508 t.data = &tmptaint;
2509 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2510 if (err < 0)
2511 return err;
2512
2513 if (write) {
2514 /*
2515 * Poor man's atomic or. Not worth adding a primitive
2516 * to everyone's atomic.h for this
2517 */
2518 int i;
2519 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2520 if ((tmptaint >> i) & 1)
2521 add_taint(i, LOCKDEP_STILL_OK);
2522 }
2523 }
2524
2525 return err;
2526 }
2527
2528 #ifdef CONFIG_PRINTK
2529 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2530 void __user *buffer, size_t *lenp, loff_t *ppos)
2531 {
2532 if (write && !capable(CAP_SYS_ADMIN))
2533 return -EPERM;
2534
2535 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2536 }
2537 #endif
2538
2539 /**
2540 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2541 * @min: pointer to minimum allowable value
2542 * @max: pointer to maximum allowable value
2543 *
2544 * The do_proc_dointvec_minmax_conv_param structure provides the
2545 * minimum and maximum values for doing range checking for those sysctl
2546 * parameters that use the proc_dointvec_minmax() handler.
2547 */
2548 struct do_proc_dointvec_minmax_conv_param {
2549 int *min;
2550 int *max;
2551 };
2552
2553 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2554 int *valp,
2555 int write, void *data)
2556 {
2557 struct do_proc_dointvec_minmax_conv_param *param = data;
2558 if (write) {
2559 int val;
2560 if (*negp) {
2561 if (*lvalp > (unsigned long) INT_MAX + 1)
2562 return -EINVAL;
2563 val = -*lvalp;
2564 } else {
2565 if (*lvalp > (unsigned long) INT_MAX)
2566 return -EINVAL;
2567 val = *lvalp;
2568 }
2569 if ((param->min && *param->min > val) ||
2570 (param->max && *param->max < val))
2571 return -EINVAL;
2572 *valp = val;
2573 } else {
2574 int val = *valp;
2575 if (val < 0) {
2576 *negp = true;
2577 *lvalp = -(unsigned long)val;
2578 } else {
2579 *negp = false;
2580 *lvalp = (unsigned long)val;
2581 }
2582 }
2583 return 0;
2584 }
2585
2586 /**
2587 * proc_dointvec_minmax - read a vector of integers with min/max values
2588 * @table: the sysctl table
2589 * @write: %TRUE if this is a write to the sysctl file
2590 * @buffer: the user buffer
2591 * @lenp: the size of the user buffer
2592 * @ppos: file position
2593 *
2594 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2595 * values from/to the user buffer, treated as an ASCII string.
2596 *
2597 * This routine will ensure the values are within the range specified by
2598 * table->extra1 (min) and table->extra2 (max).
2599 *
2600 * Returns 0 on success or -EINVAL on write when the range check fails.
2601 */
2602 int proc_dointvec_minmax(struct ctl_table *table, int write,
2603 void __user *buffer, size_t *lenp, loff_t *ppos)
2604 {
2605 struct do_proc_dointvec_minmax_conv_param param = {
2606 .min = (int *) table->extra1,
2607 .max = (int *) table->extra2,
2608 };
2609 return do_proc_dointvec(table, write, buffer, lenp, ppos,
2610 do_proc_dointvec_minmax_conv, &param);
2611 }
2612
2613 /**
2614 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2615 * @min: pointer to minimum allowable value
2616 * @max: pointer to maximum allowable value
2617 *
2618 * The do_proc_douintvec_minmax_conv_param structure provides the
2619 * minimum and maximum values for doing range checking for those sysctl
2620 * parameters that use the proc_douintvec_minmax() handler.
2621 */
2622 struct do_proc_douintvec_minmax_conv_param {
2623 unsigned int *min;
2624 unsigned int *max;
2625 };
2626
2627 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2628 unsigned int *valp,
2629 int write, void *data)
2630 {
2631 struct do_proc_douintvec_minmax_conv_param *param = data;
2632
2633 if (write) {
2634 unsigned int val = *lvalp;
2635
2636 if (*lvalp > UINT_MAX)
2637 return -EINVAL;
2638
2639 if ((param->min && *param->min > val) ||
2640 (param->max && *param->max < val))
2641 return -ERANGE;
2642
2643 *valp = val;
2644 } else {
2645 unsigned int val = *valp;
2646 *lvalp = (unsigned long) val;
2647 }
2648
2649 return 0;
2650 }
2651
2652 /**
2653 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2654 * @table: the sysctl table
2655 * @write: %TRUE if this is a write to the sysctl file
2656 * @buffer: the user buffer
2657 * @lenp: the size of the user buffer
2658 * @ppos: file position
2659 *
2660 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2661 * values from/to the user buffer, treated as an ASCII string. Negative
2662 * strings are not allowed.
2663 *
2664 * This routine will ensure the values are within the range specified by
2665 * table->extra1 (min) and table->extra2 (max). There is a final sanity
2666 * check for UINT_MAX to avoid having to support wrap around uses from
2667 * userspace.
2668 *
2669 * Returns 0 on success or -ERANGE on write when the range check fails.
2670 */
2671 int proc_douintvec_minmax(struct ctl_table *table, int write,
2672 void __user *buffer, size_t *lenp, loff_t *ppos)
2673 {
2674 struct do_proc_douintvec_minmax_conv_param param = {
2675 .min = (unsigned int *) table->extra1,
2676 .max = (unsigned int *) table->extra2,
2677 };
2678 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2679 do_proc_douintvec_minmax_conv, &param);
2680 }
2681
2682 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2683 unsigned int *valp,
2684 int write, void *data)
2685 {
2686 if (write) {
2687 unsigned int val;
2688
2689 val = round_pipe_size(*lvalp);
2690 if (val == 0)
2691 return -EINVAL;
2692
2693 *valp = val;
2694 } else {
2695 unsigned int val = *valp;
2696 *lvalp = (unsigned long) val;
2697 }
2698
2699 return 0;
2700 }
2701
2702 static int proc_dopipe_max_size(struct ctl_table *table, int write,
2703 void __user *buffer, size_t *lenp, loff_t *ppos)
2704 {
2705 return do_proc_douintvec(table, write, buffer, lenp, ppos,
2706 do_proc_dopipe_max_size_conv, NULL);
2707 }
2708
2709 static void validate_coredump_safety(void)
2710 {
2711 #ifdef CONFIG_COREDUMP
2712 if (suid_dumpable == SUID_DUMP_ROOT &&
2713 core_pattern[0] != '/' && core_pattern[0] != '|') {
2714 printk(KERN_WARNING
2715 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2716 "Pipe handler or fully qualified core dump path required.\n"
2717 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2718 );
2719 }
2720 #endif
2721 }
2722
2723 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2724 void __user *buffer, size_t *lenp, loff_t *ppos)
2725 {
2726 int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2727 if (!error)
2728 validate_coredump_safety();
2729 return error;
2730 }
2731
2732 #ifdef CONFIG_COREDUMP
2733 static int proc_dostring_coredump(struct ctl_table *table, int write,
2734 void __user *buffer, size_t *lenp, loff_t *ppos)
2735 {
2736 int error = proc_dostring(table, write, buffer, lenp, ppos);
2737 if (!error)
2738 validate_coredump_safety();
2739 return error;
2740 }
2741 #endif
2742
2743 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2744 void __user *buffer,
2745 size_t *lenp, loff_t *ppos,
2746 unsigned long convmul,
2747 unsigned long convdiv)
2748 {
2749 unsigned long *i, *min, *max;
2750 int vleft, first = 1, err = 0;
2751 size_t left;
2752 char *kbuf = NULL, *p;
2753
2754 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2755 *lenp = 0;
2756 return 0;
2757 }
2758
2759 i = (unsigned long *) data;
2760 min = (unsigned long *) table->extra1;
2761 max = (unsigned long *) table->extra2;
2762 vleft = table->maxlen / sizeof(unsigned long);
2763 left = *lenp;
2764
2765 if (write) {
2766 if (proc_first_pos_non_zero_ignore(ppos, table))
2767 goto out;
2768
2769 if (left > PAGE_SIZE - 1)
2770 left = PAGE_SIZE - 1;
2771 p = kbuf = memdup_user_nul(buffer, left);
2772 if (IS_ERR(kbuf))
2773 return PTR_ERR(kbuf);
2774 }
2775
2776 for (; left && vleft--; i++, first = 0) {
2777 unsigned long val;
2778
2779 if (write) {
2780 bool neg;
2781
2782 left -= proc_skip_spaces(&p);
2783 if (!left)
2784 break;
2785
2786 err = proc_get_long(&p, &left, &val, &neg,
2787 proc_wspace_sep,
2788 sizeof(proc_wspace_sep), NULL);
2789 if (err)
2790 break;
2791 if (neg)
2792 continue;
2793 val = convmul * val / convdiv;
2794 if ((min && val < *min) || (max && val > *max)) {
2795 err = -EINVAL;
2796 break;
2797 }
2798 *i = val;
2799 } else {
2800 val = convdiv * (*i) / convmul;
2801 if (!first) {
2802 err = proc_put_char(&buffer, &left, '\t');
2803 if (err)
2804 break;
2805 }
2806 err = proc_put_long(&buffer, &left, val, false);
2807 if (err)
2808 break;
2809 }
2810 }
2811
2812 if (!write && !first && left && !err)
2813 err = proc_put_char(&buffer, &left, '\n');
2814 if (write && !err)
2815 left -= proc_skip_spaces(&p);
2816 if (write) {
2817 kfree(kbuf);
2818 if (first)
2819 return err ? : -EINVAL;
2820 }
2821 *lenp -= left;
2822 out:
2823 *ppos += *lenp;
2824 return err;
2825 }
2826
2827 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2828 void __user *buffer,
2829 size_t *lenp, loff_t *ppos,
2830 unsigned long convmul,
2831 unsigned long convdiv)
2832 {
2833 return __do_proc_doulongvec_minmax(table->data, table, write,
2834 buffer, lenp, ppos, convmul, convdiv);
2835 }
2836
2837 /**
2838 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2839 * @table: the sysctl table
2840 * @write: %TRUE if this is a write to the sysctl file
2841 * @buffer: the user buffer
2842 * @lenp: the size of the user buffer
2843 * @ppos: file position
2844 *
2845 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2846 * values from/to the user buffer, treated as an ASCII string.
2847 *
2848 * This routine will ensure the values are within the range specified by
2849 * table->extra1 (min) and table->extra2 (max).
2850 *
2851 * Returns 0 on success.
2852 */
2853 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2854 void __user *buffer, size_t *lenp, loff_t *ppos)
2855 {
2856 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2857 }
2858
2859 /**
2860 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2861 * @table: the sysctl table
2862 * @write: %TRUE if this is a write to the sysctl file
2863 * @buffer: the user buffer
2864 * @lenp: the size of the user buffer
2865 * @ppos: file position
2866 *
2867 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2868 * values from/to the user buffer, treated as an ASCII string. The values
2869 * are treated as milliseconds, and converted to jiffies when they are stored.
2870 *
2871 * This routine will ensure the values are within the range specified by
2872 * table->extra1 (min) and table->extra2 (max).
2873 *
2874 * Returns 0 on success.
2875 */
2876 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2877 void __user *buffer,
2878 size_t *lenp, loff_t *ppos)
2879 {
2880 return do_proc_doulongvec_minmax(table, write, buffer,
2881 lenp, ppos, HZ, 1000l);
2882 }
2883
2884
2885 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2886 int *valp,
2887 int write, void *data)
2888 {
2889 if (write) {
2890 if (*lvalp > INT_MAX / HZ)
2891 return 1;
2892 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2893 } else {
2894 int val = *valp;
2895 unsigned long lval;
2896 if (val < 0) {
2897 *negp = true;
2898 lval = -(unsigned long)val;
2899 } else {
2900 *negp = false;
2901 lval = (unsigned long)val;
2902 }
2903 *lvalp = lval / HZ;
2904 }
2905 return 0;
2906 }
2907
2908 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2909 int *valp,
2910 int write, void *data)
2911 {
2912 if (write) {
2913 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2914 return 1;
2915 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2916 } else {
2917 int val = *valp;
2918 unsigned long lval;
2919 if (val < 0) {
2920 *negp = true;
2921 lval = -(unsigned long)val;
2922 } else {
2923 *negp = false;
2924 lval = (unsigned long)val;
2925 }
2926 *lvalp = jiffies_to_clock_t(lval);
2927 }
2928 return 0;
2929 }
2930
2931 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2932 int *valp,
2933 int write, void *data)
2934 {
2935 if (write) {
2936 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2937
2938 if (jif > INT_MAX)
2939 return 1;
2940 *valp = (int)jif;
2941 } else {
2942 int val = *valp;
2943 unsigned long lval;
2944 if (val < 0) {
2945 *negp = true;
2946 lval = -(unsigned long)val;
2947 } else {
2948 *negp = false;
2949 lval = (unsigned long)val;
2950 }
2951 *lvalp = jiffies_to_msecs(lval);
2952 }
2953 return 0;
2954 }
2955
2956 /**
2957 * proc_dointvec_jiffies - read a vector of integers as seconds
2958 * @table: the sysctl table
2959 * @write: %TRUE if this is a write to the sysctl file
2960 * @buffer: the user buffer
2961 * @lenp: the size of the user buffer
2962 * @ppos: file position
2963 *
2964 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2965 * values from/to the user buffer, treated as an ASCII string.
2966 * The values read are assumed to be in seconds, and are converted into
2967 * jiffies.
2968 *
2969 * Returns 0 on success.
2970 */
2971 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2972 void __user *buffer, size_t *lenp, loff_t *ppos)
2973 {
2974 return do_proc_dointvec(table,write,buffer,lenp,ppos,
2975 do_proc_dointvec_jiffies_conv,NULL);
2976 }
2977
2978 /**
2979 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2980 * @table: the sysctl table
2981 * @write: %TRUE if this is a write to the sysctl file
2982 * @buffer: the user buffer
2983 * @lenp: the size of the user buffer
2984 * @ppos: pointer to the file position
2985 *
2986 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2987 * values from/to the user buffer, treated as an ASCII string.
2988 * The values read are assumed to be in 1/USER_HZ seconds, and
2989 * are converted into jiffies.
2990 *
2991 * Returns 0 on success.
2992 */
2993 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2994 void __user *buffer, size_t *lenp, loff_t *ppos)
2995 {
2996 return do_proc_dointvec(table,write,buffer,lenp,ppos,
2997 do_proc_dointvec_userhz_jiffies_conv,NULL);
2998 }
2999
3000 /**
3001 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3002 * @table: the sysctl table
3003 * @write: %TRUE if this is a write to the sysctl file
3004 * @buffer: the user buffer
3005 * @lenp: the size of the user buffer
3006 * @ppos: file position
3007 * @ppos: the current position in the file
3008 *
3009 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3010 * values from/to the user buffer, treated as an ASCII string.
3011 * The values read are assumed to be in 1/1000 seconds, and
3012 * are converted into jiffies.
3013 *
3014 * Returns 0 on success.
3015 */
3016 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3017 void __user *buffer, size_t *lenp, loff_t *ppos)
3018 {
3019 return do_proc_dointvec(table, write, buffer, lenp, ppos,
3020 do_proc_dointvec_ms_jiffies_conv, NULL);
3021 }
3022
3023 static int proc_do_cad_pid(struct ctl_table *table, int write,
3024 void __user *buffer, size_t *lenp, loff_t *ppos)
3025 {
3026 struct pid *new_pid;
3027 pid_t tmp;
3028 int r;
3029
3030 tmp = pid_vnr(cad_pid);
3031
3032 r = __do_proc_dointvec(&tmp, table, write, buffer,
3033 lenp, ppos, NULL, NULL);
3034 if (r || !write)
3035 return r;
3036
3037 new_pid = find_get_pid(tmp);
3038 if (!new_pid)
3039 return -ESRCH;
3040
3041 put_pid(xchg(&cad_pid, new_pid));
3042 return 0;
3043 }
3044
3045 /**
3046 * proc_do_large_bitmap - read/write from/to a large bitmap
3047 * @table: the sysctl table
3048 * @write: %TRUE if this is a write to the sysctl file
3049 * @buffer: the user buffer
3050 * @lenp: the size of the user buffer
3051 * @ppos: file position
3052 *
3053 * The bitmap is stored at table->data and the bitmap length (in bits)
3054 * in table->maxlen.
3055 *
3056 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3057 * large bitmaps may be represented in a compact manner. Writing into
3058 * the file will clear the bitmap then update it with the given input.
3059 *
3060 * Returns 0 on success.
3061 */
3062 int proc_do_large_bitmap(struct ctl_table *table, int write,
3063 void __user *buffer, size_t *lenp, loff_t *ppos)
3064 {
3065 int err = 0;
3066 bool first = 1;
3067 size_t left = *lenp;
3068 unsigned long bitmap_len = table->maxlen;
3069 unsigned long *bitmap = *(unsigned long **) table->data;
3070 unsigned long *tmp_bitmap = NULL;
3071 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3072
3073 if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3074 *lenp = 0;
3075 return 0;
3076 }
3077
3078 if (write) {
3079 char *kbuf, *p;
3080
3081 if (left > PAGE_SIZE - 1)
3082 left = PAGE_SIZE - 1;
3083
3084 p = kbuf = memdup_user_nul(buffer, left);
3085 if (IS_ERR(kbuf))
3086 return PTR_ERR(kbuf);
3087
3088 tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3089 sizeof(unsigned long),
3090 GFP_KERNEL);
3091 if (!tmp_bitmap) {
3092 kfree(kbuf);
3093 return -ENOMEM;
3094 }
3095 proc_skip_char(&p, &left, '\n');
3096 while (!err && left) {
3097 unsigned long val_a, val_b;
3098 bool neg;
3099
3100 err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3101 sizeof(tr_a), &c);
3102 if (err)
3103 break;
3104 if (val_a >= bitmap_len || neg) {
3105 err = -EINVAL;
3106 break;
3107 }
3108
3109 val_b = val_a;
3110 if (left) {
3111 p++;
3112 left--;
3113 }
3114
3115 if (c == '-') {
3116 err = proc_get_long(&p, &left, &val_b,
3117 &neg, tr_b, sizeof(tr_b),
3118 &c);
3119 if (err)
3120 break;
3121 if (val_b >= bitmap_len || neg ||
3122 val_a > val_b) {
3123 err = -EINVAL;
3124 break;
3125 }
3126 if (left) {
3127 p++;
3128 left--;
3129 }
3130 }
3131
3132 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3133 first = 0;
3134 proc_skip_char(&p, &left, '\n');
3135 }
3136 kfree(kbuf);
3137 } else {
3138 unsigned long bit_a, bit_b = 0;
3139
3140 while (left) {
3141 bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3142 if (bit_a >= bitmap_len)
3143 break;
3144 bit_b = find_next_zero_bit(bitmap, bitmap_len,
3145 bit_a + 1) - 1;
3146
3147 if (!first) {
3148 err = proc_put_char(&buffer, &left, ',');
3149 if (err)
3150 break;
3151 }
3152 err = proc_put_long(&buffer, &left, bit_a, false);
3153 if (err)
3154 break;
3155 if (bit_a != bit_b) {
3156 err = proc_put_char(&buffer, &left, '-');
3157 if (err)
3158 break;
3159 err = proc_put_long(&buffer, &left, bit_b, false);
3160 if (err)
3161 break;
3162 }
3163
3164 first = 0; bit_b++;
3165 }
3166 if (!err)
3167 err = proc_put_char(&buffer, &left, '\n');
3168 }
3169
3170 if (!err) {
3171 if (write) {
3172 if (*ppos)
3173 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3174 else
3175 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3176 }
3177 *lenp -= left;
3178 *ppos += *lenp;
3179 }
3180
3181 kfree(tmp_bitmap);
3182 return err;
3183 }
3184
3185 #else /* CONFIG_PROC_SYSCTL */
3186
3187 int proc_dostring(struct ctl_table *table, int write,
3188 void __user *buffer, size_t *lenp, loff_t *ppos)
3189 {
3190 return -ENOSYS;
3191 }
3192
3193 int proc_dointvec(struct ctl_table *table, int write,
3194 void __user *buffer, size_t *lenp, loff_t *ppos)
3195 {
3196 return -ENOSYS;
3197 }
3198
3199 int proc_douintvec(struct ctl_table *table, int write,
3200 void __user *buffer, size_t *lenp, loff_t *ppos)
3201 {
3202 return -ENOSYS;
3203 }
3204
3205 int proc_dointvec_minmax(struct ctl_table *table, int write,
3206 void __user *buffer, size_t *lenp, loff_t *ppos)
3207 {
3208 return -ENOSYS;
3209 }
3210
3211 int proc_douintvec_minmax(struct ctl_table *table, int write,
3212 void __user *buffer, size_t *lenp, loff_t *ppos)
3213 {
3214 return -ENOSYS;
3215 }
3216
3217 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3218 void __user *buffer, size_t *lenp, loff_t *ppos)
3219 {
3220 return -ENOSYS;
3221 }
3222
3223 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3224 void __user *buffer, size_t *lenp, loff_t *ppos)
3225 {
3226 return -ENOSYS;
3227 }
3228
3229 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3230 void __user *buffer, size_t *lenp, loff_t *ppos)
3231 {
3232 return -ENOSYS;
3233 }
3234
3235 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3236 void __user *buffer, size_t *lenp, loff_t *ppos)
3237 {
3238 return -ENOSYS;
3239 }
3240
3241 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3242 void __user *buffer,
3243 size_t *lenp, loff_t *ppos)
3244 {
3245 return -ENOSYS;
3246 }
3247
3248
3249 #endif /* CONFIG_PROC_SYSCTL */
3250
3251 /*
3252 * No sense putting this after each symbol definition, twice,
3253 * exception granted :-)
3254 */
3255 EXPORT_SYMBOL(proc_dointvec);
3256 EXPORT_SYMBOL(proc_douintvec);
3257 EXPORT_SYMBOL(proc_dointvec_jiffies);
3258 EXPORT_SYMBOL(proc_dointvec_minmax);
3259 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3260 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3261 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3262 EXPORT_SYMBOL(proc_dostring);
3263 EXPORT_SYMBOL(proc_doulongvec_minmax);
3264 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);