]> git.ipfire.org Git - thirdparty/chrony.git/blob - sys_linux.c
conf: rework allow/deny parser
[thirdparty/chrony.git] / sys_linux.c
1 /*
2 chronyd/chronyc - Programs for keeping computer clocks accurate.
3
4 **********************************************************************
5 * Copyright (C) Richard P. Curnow 1997-2003
6 * Copyright (C) John G. Hasler 2009
7 * Copyright (C) Miroslav Lichvar 2009-2012, 2014-2018
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 *
22 **********************************************************************
23
24 =======================================================================
25
26 This is the module specific to the Linux operating system.
27
28 */
29
30 #include "config.h"
31
32 #include "sysincl.h"
33
34 #include <sys/utsname.h>
35
36 #if defined(FEAT_PHC) || defined(HAVE_LINUX_TIMESTAMPING)
37 #include <linux/ptp_clock.h>
38 #endif
39
40 #ifdef FEAT_SCFILTER
41 #include <sys/prctl.h>
42 #include <seccomp.h>
43 #include <termios.h>
44 #ifdef FEAT_PPS
45 #include <linux/pps.h>
46 #endif
47 #ifdef FEAT_RTC
48 #include <linux/rtc.h>
49 #endif
50 #ifdef HAVE_LINUX_TIMESTAMPING
51 #include <linux/sockios.h>
52 #endif
53 #endif
54
55 #ifdef FEAT_PRIVDROP
56 #include <sys/prctl.h>
57 #include <sys/capability.h>
58 #endif
59
60 #include "sys_linux.h"
61 #include "sys_timex.h"
62 #include "conf.h"
63 #include "local.h"
64 #include "logging.h"
65 #include "privops.h"
66 #include "util.h"
67
68 /* Frequency scale to convert from ppm to the timex freq */
69 #define FREQ_SCALE (double)(1 << 16)
70
71 /* Definitions used if missed in the system headers */
72 #ifndef ADJ_SETOFFSET
73 #define ADJ_SETOFFSET 0x0100 /* add 'time' to current time */
74 #endif
75 #ifndef ADJ_NANO
76 #define ADJ_NANO 0x2000 /* select nanosecond resolution */
77 #endif
78
79 /* This is the uncompensated system tick value */
80 static int nominal_tick;
81
82 /* Current tick value */
83 static int current_delta_tick;
84
85 /* The maximum amount by which 'tick' can be biased away from 'nominal_tick'
86 (sys_adjtimex() in the kernel bounds this to 10%) */
87 static int max_tick_bias;
88
89 /* The kernel USER_HZ constant */
90 static int hz;
91 static double dhz; /* And dbl prec version of same for arithmetic */
92
93 /* Flag indicating whether adjtimex() can step the clock */
94 static int have_setoffset;
95
96 /* The assumed rate at which the effective frequency and tick values are
97 updated in the kernel */
98 static int tick_update_hz;
99
100 /* ================================================== */
101
102 inline static long
103 our_round(double x)
104 {
105 long y;
106
107 if (x > 0.0)
108 y = x + 0.5;
109 else
110 y = x - 0.5;
111
112 return y;
113 }
114
115 /* ================================================== */
116 /* Positive means currently fast of true time, i.e. jump backwards */
117
118 static int
119 apply_step_offset(double offset)
120 {
121 struct timex txc;
122
123 txc.modes = ADJ_SETOFFSET | ADJ_NANO;
124 txc.time.tv_sec = -offset;
125 txc.time.tv_usec = 1.0e9 * (-offset - txc.time.tv_sec);
126 if (txc.time.tv_usec < 0) {
127 txc.time.tv_sec--;
128 txc.time.tv_usec += 1000000000;
129 }
130
131 if (SYS_Timex_Adjust(&txc, 1) < 0)
132 return 0;
133
134 return 1;
135 }
136
137 /* ================================================== */
138 /* This call sets the Linux kernel frequency to a given value in parts
139 per million relative to the nominal running frequency. Nominal is taken to
140 be tick=10000, freq=0 (for a USER_HZ==100 system, other values otherwise).
141 The convention is that this is called with a positive argument if the local
142 clock runs fast when uncompensated. */
143
144 static double
145 set_frequency(double freq_ppm)
146 {
147 struct timex txc;
148 long required_tick;
149 double required_freq;
150 int required_delta_tick;
151
152 required_delta_tick = our_round(freq_ppm / dhz);
153
154 /* Older kernels (pre-2.6.18) don't apply the frequency offset exactly as
155 set by adjtimex() and a scaling constant (that depends on the internal
156 kernel HZ constant) would be needed to compensate for the error. Because
157 chronyd is closed loop it doesn't matter much if we don't scale the
158 required frequency, but we want to prevent thrashing between two states
159 when the system's frequency error is close to a multiple of USER_HZ. With
160 USER_HZ <= 250, the maximum frequency adjustment of 500 ppm overlaps at
161 least two ticks and we can stick to the current tick if it's next to the
162 required tick. */
163 if (hz <= 250 && (required_delta_tick + 1 == current_delta_tick ||
164 required_delta_tick - 1 == current_delta_tick)) {
165 required_delta_tick = current_delta_tick;
166 }
167
168 required_freq = -(freq_ppm - dhz * required_delta_tick);
169 required_tick = nominal_tick - required_delta_tick;
170
171 txc.modes = ADJ_TICK | ADJ_FREQUENCY;
172 txc.freq = required_freq * FREQ_SCALE;
173 txc.tick = required_tick;
174
175 SYS_Timex_Adjust(&txc, 0);
176
177 current_delta_tick = required_delta_tick;
178
179 return dhz * current_delta_tick - txc.freq / FREQ_SCALE;
180 }
181
182 /* ================================================== */
183 /* Read the ppm frequency from the kernel */
184
185 static double
186 read_frequency(void)
187 {
188 struct timex txc;
189
190 txc.modes = 0;
191
192 SYS_Timex_Adjust(&txc, 0);
193
194 current_delta_tick = nominal_tick - txc.tick;
195
196 return dhz * current_delta_tick - txc.freq / FREQ_SCALE;
197 }
198
199 /* ================================================== */
200
201 /* Estimate the value of USER_HZ given the value of txc.tick that chronyd finds when
202 * it starts. The only credible values are 100 (Linux/x86) or powers of 2.
203 * Also, the bounds checking inside the kernel's adjtimex system call enforces
204 * a +/- 10% movement of tick away from the nominal value 1e6/USER_HZ. */
205
206 static int
207 guess_hz(void)
208 {
209 struct timex txc;
210 int i, tick, tick_lo, tick_hi, ihz;
211 double tick_nominal;
212
213 txc.modes = 0;
214 SYS_Timex_Adjust(&txc, 0);
215 tick = txc.tick;
216
217 /* Pick off the hz=100 case first */
218 if (tick >= 9000 && tick <= 11000) {
219 return 100;
220 }
221
222 for (i=4; i<16; i++) { /* surely 16 .. 32768 is a wide enough range? */
223 ihz = 1 << i;
224 tick_nominal = 1.0e6 / (double) ihz;
225 tick_lo = (int)(0.5 + tick_nominal*2.0/3.0);
226 tick_hi = (int)(0.5 + tick_nominal*4.0/3.0);
227
228 if (tick_lo < tick && tick <= tick_hi) {
229 return ihz;
230 }
231 }
232
233 /* oh dear. doomed. */
234 LOG_FATAL("Can't determine hz from tick %d", tick);
235
236 return 0;
237 }
238
239 /* ================================================== */
240
241 static int
242 get_hz(void)
243 {
244 #ifdef _SC_CLK_TCK
245 int hz;
246
247 if ((hz = sysconf(_SC_CLK_TCK)) < 1)
248 return 0;
249
250 return hz;
251 #else
252 return 0;
253 #endif
254 }
255
256 /* ================================================== */
257
258 static int
259 kernelvercmp(int major1, int minor1, int patch1,
260 int major2, int minor2, int patch2)
261 {
262 if (major1 != major2)
263 return major1 - major2;
264 if (minor1 != minor2)
265 return minor1 - minor2;
266 return patch1 - patch2;
267 }
268
269 /* ================================================== */
270
271 static void
272 get_kernel_version(int *major, int *minor, int *patch)
273 {
274 struct utsname uts;
275
276 if (uname(&uts) < 0)
277 LOG_FATAL("uname() failed");
278
279 *patch = 0;
280 if (sscanf(uts.release, "%d.%d.%d", major, minor, patch) < 2)
281 LOG_FATAL("Could not parse kernel version");
282 }
283
284 /* ================================================== */
285
286 /* Compute the scaling to use on any frequency we set, according to
287 the vintage of the Linux kernel being used. */
288
289 static void
290 get_version_specific_details(void)
291 {
292 int major, minor, patch;
293
294 hz = get_hz();
295
296 if (!hz)
297 hz = guess_hz();
298
299 dhz = (double) hz;
300 nominal_tick = (1000000L + (hz/2))/hz; /* Mirror declaration in kernel */
301 max_tick_bias = nominal_tick / 10;
302
303 /* In modern kernels the frequency of the clock is updated immediately in the
304 adjtimex() system call. Assume a maximum delay of 10 microseconds. */
305 tick_update_hz = 100000;
306
307 get_kernel_version(&major, &minor, &patch);
308 DEBUG_LOG("Linux kernel major=%d minor=%d patch=%d", major, minor, patch);
309
310 if (kernelvercmp(major, minor, patch, 2, 2, 0) < 0) {
311 LOG_FATAL("Kernel version not supported, sorry.");
312 }
313
314 if (kernelvercmp(major, minor, patch, 2, 6, 27) >= 0 &&
315 kernelvercmp(major, minor, patch, 2, 6, 33) < 0) {
316 /* In tickless kernels before 2.6.33 the frequency is updated in
317 a half-second interval */
318 tick_update_hz = 2;
319 } else if (kernelvercmp(major, minor, patch, 4, 19, 0) < 0) {
320 /* In kernels before 4.19 the frequency is updated only on internal ticks
321 (CONFIG_HZ). As their rate cannot be reliably detected from the user
322 space, and it may not even be constant (CONFIG_NO_HZ - aka tickless),
323 assume the lowest commonly used constant rate */
324 tick_update_hz = 100;
325 }
326
327 /* ADJ_SETOFFSET support */
328 if (kernelvercmp(major, minor, patch, 2, 6, 39) < 0) {
329 have_setoffset = 0;
330 } else {
331 have_setoffset = 1;
332 }
333
334 DEBUG_LOG("hz=%d nominal_tick=%d max_tick_bias=%d tick_update_hz=%d",
335 hz, nominal_tick, max_tick_bias, tick_update_hz);
336 }
337
338 /* ================================================== */
339
340 static void
341 reset_adjtime_offset(void)
342 {
343 struct timex txc;
344
345 /* Reset adjtime() offset */
346 txc.modes = ADJ_OFFSET_SINGLESHOT;
347 txc.offset = 0;
348
349 SYS_Timex_Adjust(&txc, 0);
350 }
351
352 /* ================================================== */
353
354 static int
355 test_step_offset(void)
356 {
357 struct timex txc;
358
359 /* Zero maxerror and check it's reset to a maximum after ADJ_SETOFFSET.
360 This seems to be the only way how to verify that the kernel really
361 supports the ADJ_SETOFFSET mode as it doesn't return an error on unknown
362 mode. */
363
364 txc.modes = MOD_MAXERROR;
365 txc.maxerror = 0;
366
367 if (SYS_Timex_Adjust(&txc, 1) < 0 || txc.maxerror != 0)
368 return 0;
369
370 txc.modes = ADJ_SETOFFSET | ADJ_NANO;
371 txc.time.tv_sec = 0;
372 txc.time.tv_usec = 0;
373
374 if (SYS_Timex_Adjust(&txc, 1) < 0 || txc.maxerror < 100000)
375 return 0;
376
377 return 1;
378 }
379
380 /* ================================================== */
381
382 static void
383 report_time_adjust_blockers(void)
384 {
385 #if defined(FEAT_PRIVDROP) && defined(CAP_IS_SUPPORTED)
386 if (CAP_IS_SUPPORTED(CAP_SYS_TIME) && cap_get_bound(CAP_SYS_TIME))
387 return;
388 LOG(LOGS_WARN, "CAP_SYS_TIME not present");
389 #endif
390 }
391
392 /* ================================================== */
393 /* Initialisation code for this module */
394
395 void
396 SYS_Linux_Initialise(void)
397 {
398 get_version_specific_details();
399
400 report_time_adjust_blockers();
401
402 reset_adjtime_offset();
403
404 if (have_setoffset && !test_step_offset()) {
405 LOG(LOGS_INFO, "adjtimex() doesn't support ADJ_SETOFFSET");
406 have_setoffset = 0;
407 }
408
409 SYS_Timex_InitialiseWithFunctions(1.0e6 * max_tick_bias / nominal_tick,
410 1.0 / tick_update_hz,
411 read_frequency, set_frequency,
412 have_setoffset ? apply_step_offset : NULL,
413 0.0, 0.0, NULL, NULL);
414 }
415
416 /* ================================================== */
417 /* Finalisation code for this module */
418
419 void
420 SYS_Linux_Finalise(void)
421 {
422 SYS_Timex_Finalise();
423 }
424
425 /* ================================================== */
426
427 #ifdef FEAT_PRIVDROP
428 void
429 SYS_Linux_DropRoot(uid_t uid, gid_t gid, SYS_ProcessContext context, int clock_control)
430 {
431 char cap_text[256];
432 cap_t cap;
433
434 if (prctl(PR_SET_KEEPCAPS, 1)) {
435 LOG_FATAL("prctl() failed");
436 }
437
438 UTI_DropRoot(uid, gid);
439
440 /* Keep CAP_NET_BIND_SERVICE if the NTP server sockets may need to be bound
441 to a privileged port.
442 Keep CAP_NET_RAW if an NTP socket may need to be bound to a device on
443 kernels before 5.7.
444 Keep CAP_SYS_TIME if the clock control is enabled. */
445 if (snprintf(cap_text, sizeof (cap_text), "%s %s %s",
446 (CNF_GetNTPPort() > 0 && CNF_GetNTPPort() < 1024) ?
447 "cap_net_bind_service=ep" : "",
448 (CNF_GetBindNtpInterface() || CNF_GetBindAcquisitionInterface()) &&
449 !SYS_Linux_CheckKernelVersion(5, 7) ? "cap_net_raw=ep" : "",
450 clock_control ? "cap_sys_time=ep" : "") >= sizeof (cap_text))
451 assert(0);
452
453 /* Helpers don't need any capabilities */
454 if (context != SYS_MAIN_PROCESS)
455 cap_text[0] = '\0';
456
457 if ((cap = cap_from_text(cap_text)) == NULL) {
458 LOG_FATAL("cap_from_text() failed");
459 }
460
461 if (cap_set_proc(cap)) {
462 LOG_FATAL("cap_set_proc() failed");
463 }
464
465 cap_free(cap);
466 }
467 #endif
468
469 /* ================================================== */
470
471 #ifdef FEAT_SCFILTER
472 static
473 void check_seccomp_applicability(void)
474 {
475 int mail_enabled;
476 double mail_threshold;
477 char *mail_user;
478
479 CNF_GetMailOnChange(&mail_enabled, &mail_threshold, &mail_user);
480 if (mail_enabled)
481 LOG_FATAL("mailonchange directive cannot be used with -F enabled");
482 }
483
484 /* ================================================== */
485
486 void
487 SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
488 {
489 const int allowed[] = {
490 /* Clock */
491 SCMP_SYS(adjtimex),
492 SCMP_SYS(clock_adjtime),
493 #ifdef __NR_clock_adjtime64
494 SCMP_SYS(clock_adjtime64),
495 #endif
496 SCMP_SYS(clock_gettime),
497 #ifdef __NR_clock_gettime64
498 SCMP_SYS(clock_gettime64),
499 #endif
500 SCMP_SYS(gettimeofday),
501 SCMP_SYS(settimeofday),
502 SCMP_SYS(time),
503
504 /* Process */
505 SCMP_SYS(clone),
506 #ifdef __NR_clone3
507 SCMP_SYS(clone3),
508 #endif
509 SCMP_SYS(exit),
510 SCMP_SYS(exit_group),
511 SCMP_SYS(getpid),
512 SCMP_SYS(getrlimit),
513 SCMP_SYS(getuid),
514 SCMP_SYS(getuid32),
515 SCMP_SYS(rt_sigaction),
516 SCMP_SYS(rt_sigreturn),
517 SCMP_SYS(rt_sigprocmask),
518 SCMP_SYS(set_tid_address),
519 SCMP_SYS(sigreturn),
520 SCMP_SYS(wait4),
521 SCMP_SYS(waitpid),
522
523 /* Memory */
524 SCMP_SYS(brk),
525 SCMP_SYS(madvise),
526 SCMP_SYS(mmap),
527 SCMP_SYS(mmap2),
528 SCMP_SYS(mprotect),
529 SCMP_SYS(mremap),
530 SCMP_SYS(munmap),
531 SCMP_SYS(shmdt),
532
533 /* Filesystem */
534 SCMP_SYS(_llseek),
535 SCMP_SYS(access),
536 SCMP_SYS(chmod),
537 SCMP_SYS(chown),
538 SCMP_SYS(chown32),
539 SCMP_SYS(faccessat),
540 SCMP_SYS(fchmodat),
541 SCMP_SYS(fchownat),
542 SCMP_SYS(fstat),
543 SCMP_SYS(fstat64),
544 SCMP_SYS(fstatat64),
545 SCMP_SYS(getdents),
546 SCMP_SYS(getdents64),
547 SCMP_SYS(lseek),
548 SCMP_SYS(lstat),
549 SCMP_SYS(lstat64),
550 SCMP_SYS(newfstatat),
551 SCMP_SYS(readlink),
552 SCMP_SYS(readlinkat),
553 SCMP_SYS(rename),
554 SCMP_SYS(renameat),
555 #ifdef __NR_renameat2
556 SCMP_SYS(renameat2),
557 #endif
558 SCMP_SYS(stat),
559 SCMP_SYS(stat64),
560 SCMP_SYS(statfs),
561 SCMP_SYS(statfs64),
562 #ifdef __NR_statx
563 SCMP_SYS(statx),
564 #endif
565 SCMP_SYS(unlink),
566 SCMP_SYS(unlinkat),
567
568 /* Socket */
569 SCMP_SYS(accept),
570 SCMP_SYS(bind),
571 SCMP_SYS(connect),
572 SCMP_SYS(getsockname),
573 SCMP_SYS(getsockopt),
574 SCMP_SYS(recv),
575 SCMP_SYS(recvfrom),
576 SCMP_SYS(recvmmsg),
577 #ifdef __NR_recvmmsg_time64
578 SCMP_SYS(recvmmsg_time64),
579 #endif
580 SCMP_SYS(recvmsg),
581 SCMP_SYS(send),
582 SCMP_SYS(sendmmsg),
583 SCMP_SYS(sendmsg),
584 SCMP_SYS(sendto),
585 SCMP_SYS(shutdown),
586 /* TODO: check socketcall arguments */
587 SCMP_SYS(socketcall),
588
589 /* General I/O */
590 SCMP_SYS(_newselect),
591 SCMP_SYS(close),
592 SCMP_SYS(open),
593 SCMP_SYS(openat),
594 SCMP_SYS(pipe),
595 SCMP_SYS(pipe2),
596 SCMP_SYS(poll),
597 SCMP_SYS(ppoll),
598 #ifdef __NR_ppoll_time64
599 SCMP_SYS(ppoll_time64),
600 #endif
601 SCMP_SYS(pread64),
602 SCMP_SYS(pselect6),
603 #ifdef __NR_pselect6_time64
604 SCMP_SYS(pselect6_time64),
605 #endif
606 SCMP_SYS(read),
607 SCMP_SYS(futex),
608 #ifdef __NR_futex_time64
609 SCMP_SYS(futex_time64),
610 #endif
611 SCMP_SYS(select),
612 SCMP_SYS(set_robust_list),
613 SCMP_SYS(write),
614
615 /* Miscellaneous */
616 SCMP_SYS(getrandom),
617 SCMP_SYS(sysinfo),
618 SCMP_SYS(uname),
619 };
620
621 const int denied_any[] = {
622 SCMP_SYS(execve),
623 #ifdef __NR_execveat
624 SCMP_SYS(execveat),
625 #endif
626 SCMP_SYS(fork),
627 SCMP_SYS(ptrace),
628 SCMP_SYS(vfork),
629 };
630
631 const int denied_ntske[] = {
632 SCMP_SYS(ioctl),
633 SCMP_SYS(setsockopt),
634 SCMP_SYS(socket),
635 };
636
637 const int socket_domains[] = {
638 AF_NETLINK, AF_UNIX, AF_INET,
639 #ifdef FEAT_IPV6
640 AF_INET6,
641 #endif
642 };
643
644 const static int socket_options[][2] = {
645 { SOL_IP, IP_PKTINFO }, { SOL_IP, IP_FREEBIND }, { SOL_IP, IP_TOS },
646 #ifdef FEAT_IPV6
647 { SOL_IPV6, IPV6_V6ONLY }, { SOL_IPV6, IPV6_RECVPKTINFO },
648 #endif
649 #ifdef SO_BINDTODEVICE
650 { SOL_SOCKET, SO_BINDTODEVICE },
651 #endif
652 { SOL_SOCKET, SO_BROADCAST }, { SOL_SOCKET, SO_REUSEADDR },
653 #ifdef SO_REUSEPORT
654 { SOL_SOCKET, SO_REUSEPORT },
655 #endif
656 { SOL_SOCKET, SO_TIMESTAMP }, { SOL_SOCKET, SO_TIMESTAMPNS },
657 #ifdef HAVE_LINUX_TIMESTAMPING
658 { SOL_SOCKET, SO_SELECT_ERR_QUEUE }, { SOL_SOCKET, SO_TIMESTAMPING },
659 #endif
660 };
661
662 const static int fcntls[] = { F_GETFD, F_SETFD, F_GETFL, F_SETFL };
663
664 const static unsigned long ioctls[] = {
665 FIONREAD, TCGETS,
666 #if defined(FEAT_PHC) || defined(HAVE_LINUX_TIMESTAMPING)
667 PTP_EXTTS_REQUEST, PTP_SYS_OFFSET,
668 #ifdef PTP_PIN_SETFUNC
669 PTP_PIN_SETFUNC,
670 #endif
671 #ifdef PTP_SYS_OFFSET_EXTENDED
672 PTP_SYS_OFFSET_EXTENDED,
673 #endif
674 #ifdef PTP_SYS_OFFSET_PRECISE
675 PTP_SYS_OFFSET_PRECISE,
676 #endif
677 #endif
678 #ifdef FEAT_PPS
679 PPS_FETCH,
680 #endif
681 #ifdef FEAT_RTC
682 RTC_RD_TIME, RTC_SET_TIME, RTC_UIE_ON, RTC_UIE_OFF,
683 #endif
684 #ifdef HAVE_LINUX_TIMESTAMPING
685 SIOCETHTOOL,
686 #endif
687 };
688
689 unsigned int default_action, deny_action;
690 scmp_filter_ctx *ctx;
691 int i;
692
693 /* Sign of the level determines the deny action (kill or SIGSYS).
694 At level 1, selected syscalls are allowed, others are denied.
695 At level 2, selected syscalls are denied, others are allowed. */
696
697 deny_action = level > 0 ? SCMP_ACT_KILL : SCMP_ACT_TRAP;
698 if (level < 0)
699 level = -level;
700
701 switch (level) {
702 case 1:
703 default_action = deny_action;
704 break;
705 case 2:
706 default_action = SCMP_ACT_ALLOW;
707 break;
708 default:
709 LOG_FATAL("Unsupported filter level");
710 }
711
712 if (context == SYS_MAIN_PROCESS) {
713 /* Check if the chronyd configuration is supported */
714 check_seccomp_applicability();
715
716 /* At level 1, start a helper process which will not have a seccomp filter.
717 It will be used for getaddrinfo(), for which it is difficult to maintain
718 a list of required system calls (with glibc it depends on what NSS
719 modules are installed and enabled on the system). */
720 if (default_action != SCMP_ACT_ALLOW)
721 PRV_StartHelper();
722 }
723
724 ctx = seccomp_init(default_action);
725 if (ctx == NULL)
726 LOG_FATAL("Failed to initialize seccomp");
727
728 if (default_action != SCMP_ACT_ALLOW) {
729 for (i = 0; i < sizeof (allowed) / sizeof (*allowed); i++) {
730 if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, allowed[i], 0) < 0)
731 goto add_failed;
732 }
733 } else {
734 for (i = 0; i < sizeof (denied_any) / sizeof (*denied_any); i++) {
735 if (seccomp_rule_add(ctx, deny_action, denied_any[i], 0) < 0)
736 goto add_failed;
737 }
738
739 if (context == SYS_NTSKE_HELPER) {
740 for (i = 0; i < sizeof (denied_ntske) / sizeof (*denied_ntske); i++) {
741 if (seccomp_rule_add(ctx, deny_action, denied_ntske[i], 0) < 0)
742 goto add_failed;
743 }
744 }
745 }
746
747 if (default_action != SCMP_ACT_ALLOW && context == SYS_MAIN_PROCESS) {
748 /* Allow opening sockets in selected domains */
749 for (i = 0; i < sizeof (socket_domains) / sizeof (*socket_domains); i++) {
750 if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(socket), 1,
751 SCMP_A0(SCMP_CMP_EQ, socket_domains[i])) < 0)
752 goto add_failed;
753 }
754
755 /* Allow selected socket options */
756 for (i = 0; i < sizeof (socket_options) / sizeof (*socket_options); i++) {
757 if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(setsockopt), 3,
758 SCMP_A1(SCMP_CMP_EQ, socket_options[i][0]),
759 SCMP_A2(SCMP_CMP_EQ, socket_options[i][1]),
760 SCMP_A4(SCMP_CMP_LE, sizeof (int))) < 0)
761 goto add_failed;
762 }
763
764 /* Allow selected fcntl calls */
765 for (i = 0; i < sizeof (fcntls) / sizeof (*fcntls); i++) {
766 if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(fcntl), 1,
767 SCMP_A1(SCMP_CMP_EQ, fcntls[i])) < 0 ||
768 seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(fcntl64), 1,
769 SCMP_A1(SCMP_CMP_EQ, fcntls[i])) < 0)
770 goto add_failed;
771 }
772
773 /* Allow selected ioctls */
774 for (i = 0; i < sizeof (ioctls) / sizeof (*ioctls); i++) {
775 if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(ioctl), 1,
776 SCMP_A1(SCMP_CMP_EQ, ioctls[i])) < 0)
777 goto add_failed;
778 }
779 }
780
781 if (seccomp_load(ctx) < 0)
782 LOG_FATAL("Failed to load seccomp rules");
783
784 LOG(context == SYS_MAIN_PROCESS ? LOGS_INFO : LOGS_DEBUG,
785 "Loaded seccomp filter (level %d)", level);
786 seccomp_release(ctx);
787 return;
788
789 add_failed:
790 LOG_FATAL("Failed to add seccomp rules");
791 }
792 #endif
793
794 /* ================================================== */
795
796 int
797 SYS_Linux_CheckKernelVersion(int req_major, int req_minor)
798 {
799 int major, minor, patch;
800
801 get_kernel_version(&major, &minor, &patch);
802
803 return kernelvercmp(req_major, req_minor, 0, major, minor, patch) <= 0;
804 }
805
806 /* ================================================== */
807
808 #if defined(FEAT_PHC) || defined(HAVE_LINUX_TIMESTAMPING)
809
810 #define PHC_READINGS 10
811
812 static int
813 process_phc_readings(struct timespec ts[][3], int n, double precision,
814 struct timespec *phc_ts, struct timespec *sys_ts, double *err)
815 {
816 double min_delay = 0.0, delays[PTP_MAX_SAMPLES], phc_sum, sys_sum, sys_prec;
817 int i, combined;
818
819 if (n > PTP_MAX_SAMPLES)
820 return 0;
821
822 for (i = 0; i < n; i++) {
823 delays[i] = UTI_DiffTimespecsToDouble(&ts[i][2], &ts[i][0]);
824
825 if (delays[i] < 0.0) {
826 /* Step in the middle of a PHC reading? */
827 DEBUG_LOG("Bad PTP_SYS_OFFSET sample delay=%e", delays[i]);
828 return 0;
829 }
830
831 if (!i || delays[i] < min_delay)
832 min_delay = delays[i];
833 }
834
835 sys_prec = LCL_GetSysPrecisionAsQuantum();
836
837 /* Combine best readings */
838 for (i = combined = 0, phc_sum = sys_sum = 0.0; i < n; i++) {
839 if (delays[i] > min_delay + MAX(sys_prec, precision))
840 continue;
841
842 phc_sum += UTI_DiffTimespecsToDouble(&ts[i][1], &ts[0][1]);
843 sys_sum += UTI_DiffTimespecsToDouble(&ts[i][0], &ts[0][0]) + delays[i] / 2.0;
844 combined++;
845 }
846
847 assert(combined);
848
849 UTI_AddDoubleToTimespec(&ts[0][1], phc_sum / combined, phc_ts);
850 UTI_AddDoubleToTimespec(&ts[0][0], sys_sum / combined, sys_ts);
851 *err = MAX(min_delay / 2.0, precision);
852
853 return 1;
854 }
855
856 /* ================================================== */
857
858 static int
859 get_phc_sample(int phc_fd, double precision, struct timespec *phc_ts,
860 struct timespec *sys_ts, double *err)
861 {
862 struct timespec ts[PHC_READINGS][3];
863 struct ptp_sys_offset sys_off;
864 int i;
865
866 /* Silence valgrind */
867 memset(&sys_off, 0, sizeof (sys_off));
868
869 sys_off.n_samples = PHC_READINGS;
870
871 if (ioctl(phc_fd, PTP_SYS_OFFSET, &sys_off)) {
872 DEBUG_LOG("ioctl(%s) failed : %s", "PTP_SYS_OFFSET", strerror(errno));
873 return 0;
874 }
875
876 for (i = 0; i < PHC_READINGS; i++) {
877 ts[i][0].tv_sec = sys_off.ts[i * 2].sec;
878 ts[i][0].tv_nsec = sys_off.ts[i * 2].nsec;
879 ts[i][1].tv_sec = sys_off.ts[i * 2 + 1].sec;
880 ts[i][1].tv_nsec = sys_off.ts[i * 2 + 1].nsec;
881 ts[i][2].tv_sec = sys_off.ts[i * 2 + 2].sec;
882 ts[i][2].tv_nsec = sys_off.ts[i * 2 + 2].nsec;
883 }
884
885 return process_phc_readings(ts, PHC_READINGS, precision, phc_ts, sys_ts, err);
886 }
887
888 /* ================================================== */
889
890 static int
891 get_extended_phc_sample(int phc_fd, double precision, struct timespec *phc_ts,
892 struct timespec *sys_ts, double *err)
893 {
894 #ifdef PTP_SYS_OFFSET_EXTENDED
895 struct timespec ts[PHC_READINGS][3];
896 struct ptp_sys_offset_extended sys_off;
897 int i;
898
899 /* Silence valgrind */
900 memset(&sys_off, 0, sizeof (sys_off));
901
902 sys_off.n_samples = PHC_READINGS;
903
904 if (ioctl(phc_fd, PTP_SYS_OFFSET_EXTENDED, &sys_off)) {
905 DEBUG_LOG("ioctl(%s) failed : %s", "PTP_SYS_OFFSET_EXTENDED", strerror(errno));
906 return 0;
907 }
908
909 for (i = 0; i < PHC_READINGS; i++) {
910 ts[i][0].tv_sec = sys_off.ts[i][0].sec;
911 ts[i][0].tv_nsec = sys_off.ts[i][0].nsec;
912 ts[i][1].tv_sec = sys_off.ts[i][1].sec;
913 ts[i][1].tv_nsec = sys_off.ts[i][1].nsec;
914 ts[i][2].tv_sec = sys_off.ts[i][2].sec;
915 ts[i][2].tv_nsec = sys_off.ts[i][2].nsec;
916 }
917
918 return process_phc_readings(ts, PHC_READINGS, precision, phc_ts, sys_ts, err);
919 #else
920 return 0;
921 #endif
922 }
923
924 /* ================================================== */
925
926 static int
927 get_precise_phc_sample(int phc_fd, double precision, struct timespec *phc_ts,
928 struct timespec *sys_ts, double *err)
929 {
930 #ifdef PTP_SYS_OFFSET_PRECISE
931 struct ptp_sys_offset_precise sys_off;
932
933 /* Silence valgrind */
934 memset(&sys_off, 0, sizeof (sys_off));
935
936 if (ioctl(phc_fd, PTP_SYS_OFFSET_PRECISE, &sys_off)) {
937 DEBUG_LOG("ioctl(%s) failed : %s", "PTP_SYS_OFFSET_PRECISE",
938 strerror(errno));
939 return 0;
940 }
941
942 phc_ts->tv_sec = sys_off.device.sec;
943 phc_ts->tv_nsec = sys_off.device.nsec;
944 sys_ts->tv_sec = sys_off.sys_realtime.sec;
945 sys_ts->tv_nsec = sys_off.sys_realtime.nsec;
946 *err = MAX(LCL_GetSysPrecisionAsQuantum(), precision);
947
948 return 1;
949 #else
950 return 0;
951 #endif
952 }
953
954 /* ================================================== */
955
956 int
957 SYS_Linux_OpenPHC(const char *path, int phc_index)
958 {
959 struct ptp_clock_caps caps;
960 char phc_path[64];
961 int phc_fd;
962
963 if (!path) {
964 if (snprintf(phc_path, sizeof (phc_path), "/dev/ptp%d", phc_index) >= sizeof (phc_path))
965 return -1;
966 path = phc_path;
967 }
968
969 phc_fd = open(path, O_RDONLY);
970 if (phc_fd < 0) {
971 LOG(LOGS_ERR, "Could not open %s : %s", path, strerror(errno));
972 return -1;
973 }
974
975 /* Make sure it is a PHC */
976 if (ioctl(phc_fd, PTP_CLOCK_GETCAPS, &caps)) {
977 LOG(LOGS_ERR, "ioctl(%s) failed : %s", "PTP_CLOCK_GETCAPS", strerror(errno));
978 close(phc_fd);
979 return -1;
980 }
981
982 UTI_FdSetCloexec(phc_fd);
983
984 return phc_fd;
985 }
986
987 /* ================================================== */
988
989 int
990 SYS_Linux_GetPHCSample(int fd, int nocrossts, double precision, int *reading_mode,
991 struct timespec *phc_ts, struct timespec *sys_ts, double *err)
992 {
993 if ((*reading_mode == 2 || !*reading_mode) && !nocrossts &&
994 get_precise_phc_sample(fd, precision, phc_ts, sys_ts, err)) {
995 *reading_mode = 2;
996 return 1;
997 } else if ((*reading_mode == 3 || !*reading_mode) &&
998 get_extended_phc_sample(fd, precision, phc_ts, sys_ts, err)) {
999 *reading_mode = 3;
1000 return 1;
1001 } else if ((*reading_mode == 1 || !*reading_mode) &&
1002 get_phc_sample(fd, precision, phc_ts, sys_ts, err)) {
1003 *reading_mode = 1;
1004 return 1;
1005 }
1006 return 0;
1007 }
1008
1009 /* ================================================== */
1010
1011 int
1012 SYS_Linux_SetPHCExtTimestamping(int fd, int pin, int channel,
1013 int rising, int falling, int enable)
1014 {
1015 struct ptp_extts_request extts_req;
1016 #ifdef PTP_PIN_SETFUNC
1017 struct ptp_pin_desc pin_desc;
1018
1019 memset(&pin_desc, 0, sizeof (pin_desc));
1020 pin_desc.index = pin;
1021 pin_desc.func = enable ? PTP_PF_EXTTS : PTP_PF_NONE;
1022 pin_desc.chan = channel;
1023
1024 if (ioctl(fd, PTP_PIN_SETFUNC, &pin_desc)) {
1025 DEBUG_LOG("ioctl(%s) failed : %s", "PTP_PIN_SETFUNC", strerror(errno));
1026 return 0;
1027 }
1028 #else
1029 DEBUG_LOG("Missing PTP_PIN_SETFUNC");
1030 return 0;
1031 #endif
1032
1033 memset(&extts_req, 0, sizeof (extts_req));
1034 extts_req.index = channel;
1035 extts_req.flags = (enable ? PTP_ENABLE_FEATURE : 0) |
1036 (rising ? PTP_RISING_EDGE : 0) |
1037 (falling ? PTP_FALLING_EDGE : 0);
1038
1039 if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_req)) {
1040 DEBUG_LOG("ioctl(%s) failed : %s", "PTP_EXTTS_REQUEST", strerror(errno));
1041 return 0;
1042 }
1043
1044 return 1;
1045 }
1046
1047 /* ================================================== */
1048
1049 int
1050 SYS_Linux_ReadPHCExtTimestamp(int fd, struct timespec *phc_ts, int *channel)
1051 {
1052 struct ptp_extts_event extts_event;
1053
1054 if (read(fd, &extts_event, sizeof (extts_event)) != sizeof (extts_event)) {
1055 DEBUG_LOG("Could not read PHC extts event");
1056 return 0;
1057 }
1058
1059 phc_ts->tv_sec = extts_event.t.sec;
1060 phc_ts->tv_nsec = extts_event.t.nsec;
1061 *channel = extts_event.index;
1062
1063 return 1;
1064 }
1065
1066 #endif