2 chronyd/chronyc - Programs for keeping computer clocks accurate.
4 **********************************************************************
5 * Copyright (C) Richard P. Curnow 1997-2003
6 * Copyright (C) Miroslav Lichvar 2011, 2016-2017
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of version 2 of the GNU General Public License as
10 * published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 **********************************************************************
23 =======================================================================
25 Regression algorithms.
40 RGR_WeightedRegression
41 (double *x
, /* independent variable */
42 double *y
, /* measured data */
43 double *w
, /* weightings (large => data
46 int n
, /* number of data points */
48 /* And now the results */
50 double *b0
, /* estimated y axis intercept */
51 double *b1
, /* estimated slope */
52 double *s2
, /* estimated variance of data points */
54 double *sb0
, /* estimated standard deviation of
56 double *sb1
/* estimated standard deviation of
59 /* Could add correlation stuff later if required */
77 /* Calculate statistics from data */
82 Q
+= y
[i
] * ui
/ w
[i
];
87 *b0
= (P
/ W
) - (*b1
) * u
;
91 diff
= y
[i
] - *b0
- *b1
*x
[i
];
92 *s2
+= diff
*diff
/ w
[i
];
99 *sb0
= sqrt(*s2
/ W
+ aa
* aa
);
101 *s2
*= (n
/ W
); /* Giving weighted average of variances */
104 /* ================================================== */
105 /* Get the coefficient to multiply the standard deviation by, to get a
106 particular size of confidence interval (assuming a t-distribution) */
109 RGR_GetTCoef(int dof
)
111 /* Assuming now the 99.95% quantile */
112 static const float coefs
[] =
113 { 636.6, 31.6, 12.92, 8.61, 6.869,
114 5.959, 5.408, 5.041, 4.781, 4.587,
115 4.437, 4.318, 4.221, 4.140, 4.073,
116 4.015, 3.965, 3.922, 3.883, 3.850,
117 3.819, 3.792, 3.768, 3.745, 3.725,
118 3.707, 3.690, 3.674, 3.659, 3.646,
119 3.633, 3.622, 3.611, 3.601, 3.591,
120 3.582, 3.574, 3.566, 3.558, 3.551};
125 return 3.5; /* Until I can be bothered to do something better */
129 /* ================================================== */
130 /* Get 90% quantile of chi-square distribution */
133 RGR_GetChi2Coef(int dof
)
135 static const float coefs
[] = {
136 2.706, 4.605, 6.251, 7.779, 9.236, 10.645, 12.017, 13.362,
137 14.684, 15.987, 17.275, 18.549, 19.812, 21.064, 22.307, 23.542,
138 24.769, 25.989, 27.204, 28.412, 29.615, 30.813, 32.007, 33.196,
139 34.382, 35.563, 36.741, 37.916, 39.087, 40.256, 41.422, 42.585,
140 43.745, 44.903, 46.059, 47.212, 48.363, 49.513, 50.660, 51.805,
141 52.949, 54.090, 55.230, 56.369, 57.505, 58.641, 59.774, 60.907,
142 62.038, 63.167, 64.295, 65.422, 66.548, 67.673, 68.796, 69.919,
143 71.040, 72.160, 73.279, 74.397, 75.514, 76.630, 77.745, 78.860
149 return 1.2 * dof
; /* Until I can be bothered to do something better */
153 /* ================================================== */
154 /* Critical value for number of runs of residuals with same sign.
155 5% critical region for now. */
157 static char critical_runs
[] = {
158 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,
159 3, 3, 4, 4, 5, 5, 5, 6, 6, 7,
160 7, 7, 8, 8, 9, 9, 9, 10, 10, 11,
161 11, 11, 12, 12, 13, 13, 14, 14, 14, 15,
162 15, 16, 16, 17, 17, 18, 18, 18, 19, 19,
163 20, 20, 21, 21, 21, 22, 22, 23, 23, 24,
164 24, 25, 25, 26, 26, 26, 27, 27, 28, 28,
165 29, 29, 30, 30, 30, 31, 31, 32, 32, 33,
166 33, 34, 34, 35, 35, 35, 36, 36, 37, 37,
167 38, 38, 39, 39, 40, 40, 40, 41, 41, 42,
168 42, 43, 43, 44, 44, 45, 45, 46, 46, 46,
169 47, 47, 48, 48, 49, 49, 50, 50, 51, 51,
170 52, 52, 52, 53, 53, 54, 54, 55, 55, 56
173 /* ================================================== */
176 n_runs_from_residuals(double *resid
, int n
)
182 for (i
=1; i
<n
; i
++) {
183 if (((resid
[i
-1] < 0.0) && (resid
[i
] < 0.0)) ||
184 ((resid
[i
-1] > 0.0) && (resid
[i
] > 0.0))) {
194 /* ================================================== */
195 /* Return a boolean indicating whether we had enough points for
199 RGR_FindBestRegression
200 (double *x
, /* independent variable */
201 double *y
, /* measured data */
202 double *w
, /* weightings (large => data
205 int n
, /* number of data points */
206 int m
, /* number of extra samples in x and y arrays
207 (negative index) which can be used to
209 int min_samples
, /* minimum number of samples to be kept after
210 changing the starting index to pass the runs
213 /* And now the results */
215 double *b0
, /* estimated y axis intercept */
216 double *b1
, /* estimated slope */
217 double *s2
, /* estimated variance of data points */
219 double *sb0
, /* estimated standard deviation of
221 double *sb1
, /* estimated standard deviation of
224 int *new_start
, /* the new starting index to make the
225 residuals pass the two tests */
227 int *n_runs
, /* number of runs amongst the residuals */
229 int *dof
/* degrees of freedom in statistics (needed
230 to get confidence intervals later) */
234 double P
, Q
, U
, V
, W
; /* total */
235 double resid
[MAX_POINTS
* REGRESS_RUNS_RATIO
];
237 double a
, b
, u
, ui
, aa
;
239 int start
, resid_start
, nruns
, npoints
;
242 assert(n
<= MAX_POINTS
&& m
>= 0);
243 assert(n
* REGRESS_RUNS_RATIO
< sizeof (critical_runs
) / sizeof (critical_runs
[0]));
245 if (n
< MIN_SAMPLES_FOR_REGRESS
) {
253 for (i
=start
; i
<n
; i
++) {
261 for (i
=start
; i
<n
; i
++) {
264 Q
+= y
[i
] * ui
/ w
[i
];
269 a
= (P
/ W
) - (b
* u
);
271 /* Get residuals also for the extra samples before start */
272 resid_start
= n
- (n
- start
) * REGRESS_RUNS_RATIO
;
273 if (resid_start
< -m
)
276 for (i
=resid_start
; i
<n
; i
++) {
277 resid
[i
- resid_start
] = y
[i
] - a
- b
*x
[i
];
280 /* Count number of runs */
281 nruns
= n_runs_from_residuals(resid
, n
- resid_start
);
283 if (nruns
> critical_runs
[n
- resid_start
] ||
284 n
- start
<= MIN_SAMPLES_FOR_REGRESS
||
285 n
- start
<= min_samples
) {
286 if (start
!= resid_start
) {
287 /* Ignore extra samples in returned nruns */
288 nruns
= n_runs_from_residuals(resid
+ (start
- resid_start
), n
- start
);
292 /* Try dropping one sample at a time until the runs test passes. */
298 /* Work out statistics from full dataset */
303 for (i
=start
; i
<n
; i
++) {
304 ss
+= resid
[i
- resid_start
]*resid
[i
- resid_start
] / w
[i
];
308 ss
/= (double)(npoints
- 2);
311 *sb0
= sqrt((ss
/ W
) + (aa
* aa
));
312 *s2
= ss
* (double) npoints
/ W
;
322 /* ================================================== */
324 #define EXCH(a,b) temp=(a); (a)=(b); (b)=temp
326 /* ================================================== */
327 /* Find the index'th biggest element in the array x of n elements.
328 flags is an array where a 1 indicates that the corresponding entry
329 in x is known to be sorted into its correct position and a 0
330 indicates that the corresponding entry is not sorted. However, if
331 flags[m] = flags[n] = 1 with m<n, then x[m] must be <= x[n] and for
332 all i with m<i<n, x[m] <= x[i] <= x[n]. In practice, this means
333 flags[] has to be the result of a previous call to this routine
334 with the same array x, and is used to remember which parts of the
335 x[] array we have already sorted.
337 The approach used is a cut-down quicksort, where we only bother to
338 keep sorting the partition that contains the index we are after.
339 The approach comes from Numerical Recipes in C (ISBN
343 find_ordered_entry_with_flags(double *x
, int n
, int index
, char *flags
)
352 /* If this bit of the array is already sorted, simple! */
357 /* Find subrange to look at */
359 while (u
> 0 && !flags
[u
]) u
--;
362 while (v
< (n
-1) && !flags
[v
]) v
++;
370 flags
[v
] = flags
[u
] = 1;
373 pivind
= (u
+ v
) >> 1;
374 EXCH(x
[u
], x
[pivind
]);
375 piv
= x
[u
]; /* New value */
379 while (l
< v
&& x
[l
] < piv
) l
++;
380 while (x
[r
] > piv
) r
--;
387 flags
[r
] = 1; /* Pivot now in correct place */
390 } else if (index
< r
) {
392 } else if (index
> r
) {
399 /* ================================================== */
402 /* Not used, but this is how it can be done */
404 find_ordered_entry(double *x
, int n
, int index
)
406 char flags
[MAX_POINTS
];
408 memset(flags
, 0, n
* sizeof (flags
[0]));
409 return find_ordered_entry_with_flags(x
, n
, index
, flags
);
413 /* ================================================== */
414 /* Find the median entry of an array x[] with n elements. */
417 find_median(double *x
, int n
)
420 char flags
[MAX_POINTS
];
422 memset(flags
, 0, n
* sizeof (flags
[0]));
425 return find_ordered_entry_with_flags(x
, n
, k
, flags
);
427 return 0.5 * (find_ordered_entry_with_flags(x
, n
, k
, flags
) +
428 find_ordered_entry_with_flags(x
, n
, k
-1, flags
));
432 /* ================================================== */
435 RGR_FindMedian(double *x
, int n
)
437 double tmp
[MAX_POINTS
];
439 assert(n
> 0 && n
<= MAX_POINTS
);
440 memcpy(tmp
, x
, n
* sizeof (tmp
[0]));
442 return find_median(tmp
, n
);
445 /* ================================================== */
446 /* This function evaluates the equation
448 \sum_{i=0}^{n-1} x_i sign(y_i - a - b x_i)
450 and chooses the value of a that minimises the absolute value of the
451 result. (See pp703-704 of Numerical Recipes in C). */
455 (double *x
, /* The independent points */
456 double *y
, /* The dependent points */
457 int n
, /* Number of points */
458 double b
, /* Slope */
459 double *aa
, /* Intercept giving smallest absolute
460 value for the above equation */
461 double *rr
/* Corresponding value of equation */
466 double d
[MAX_POINTS
];
468 for (i
=0; i
<n
; i
++) {
469 d
[i
] = y
[i
] - b
* x
[i
];
472 a
= find_median(d
, n
);
475 for (i
=0; i
<n
; i
++) {
476 del
= y
[i
] - a
- b
* x
[i
];
479 } else if (del
< 0.0) {
488 /* ================================================== */
489 /* This routine performs a 'robust' regression, i.e. one which has low
490 susceptibility to outliers amongst the data. If one thinks of a
491 normal (least squares) linear regression in 2D being analogous to
492 the arithmetic mean in 1D, this algorithm in 2D is roughly
493 analogous to the median in 1D. This algorithm seems to work quite
494 well until the number of outliers is approximately half the number
497 The return value is a status indicating whether there were enough
498 data points to run the routine or not. */
501 RGR_FindBestRobustRegression
502 (double *x
, /* The independent axis points */
503 double *y
, /* The dependent axis points (which
504 may contain outliers). */
505 int n
, /* The number of points */
506 double tol
, /* The tolerance required in
507 determining the value of b1 */
508 double *b0
, /* The estimated Y-axis intercept */
509 double *b1
, /* The estimated slope */
510 int *n_runs
, /* The number of runs of residuals */
511 int *best_start
/* The best starting index */
518 double P
, U
, V
, W
, X
;
519 double resid
, resids
[MAX_POINTS
];
520 double blo
, bhi
, bmid
, rlo
, rhi
, rmid
;
522 double mx
, dx
, my
, dy
;
525 assert(n
<= MAX_POINTS
);
530 /* Just a straight line fit (we need this for the manual mode) */
531 *b1
= (y
[1] - y
[0]) / (x
[1] - x
[0]);
532 *b0
= y
[0] - (*b1
) * x
[0];
538 /* else at least 3 points, apply normal algorithm */
542 /* Loop to strip oldest points that cause the regression residuals
543 to fail the number of runs test */
546 n_points
= n
- start
;
548 /* Use standard least squares regression to get starting estimate */
551 for (i
=start
; i
<n
; i
++) {
556 W
= (double) n_points
;
562 for (i
=start
; i
<n
; i
++) {
573 for (i
=start
; i
<n
; i
++) {
574 resid
= y
[i
] - a
- b
* x
[i
];
578 /* Need to expand range of b to get a root in the interval.
579 Estimate standard deviation of b and expand range about b based
587 /* Give up if the interval is too large */
594 /* We don't want 'a' yet */
595 eval_robust_residual(x
+ start
, y
+ start
, n_points
, blo
, &a
, &rlo
);
596 eval_robust_residual(x
+ start
, y
+ start
, n_points
, bhi
, &a
, &rhi
);
598 } while (rlo
* rhi
>= 0.0); /* fn vals have same sign or one is zero,
599 i.e. root not in interval (rlo, rhi). */
601 /* OK, so the root for b lies in (blo, bhi). Start bisecting */
603 bmid
= 0.5 * (blo
+ bhi
);
604 if (!(blo
< bmid
&& bmid
< bhi
))
606 eval_robust_residual(x
+ start
, y
+ start
, n_points
, bmid
, &a
, &rmid
);
609 } else if (rmid
* rlo
> 0.0) {
612 } else if (rmid
* rhi
> 0.0) {
618 } while (bhi
- blo
> tol
);
623 /* Number of runs test, but not if we're already down to the
624 minimum number of points */
625 if (n_points
== MIN_SAMPLES_FOR_REGRESS
) {
629 for (i
=start
; i
<n
; i
++) {
630 resids
[i
] = y
[i
] - a
- bmid
* x
[i
];
633 nruns
= n_runs_from_residuals(resids
+ start
, n_points
);
635 if (nruns
> critical_runs
[n_points
]) {
650 /* ================================================== */
651 /* This routine performs linear regression with two independent variables.
652 It returns non-zero status if there were enough data points and there
657 (double *x1
, /* first independent variable */
658 double *x2
, /* second independent variable */
659 double *y
, /* measured data */
661 int n
, /* number of data points */
664 double *b2
/* estimated second slope */
665 /* other values are not needed yet */
668 double Sx1
, Sx2
, Sx1x1
, Sx1x2
, Sx2x2
, Sx1y
, Sx2y
, Sy
;
669 double U
, V
, V1
, V2
, V3
;
675 Sx1
= Sx2
= Sx1x1
= Sx1x2
= Sx2x2
= Sx1y
= Sx2y
= Sy
= 0.0;
677 for (i
= 0; i
< n
; i
++) {
680 Sx1x1
+= x1
[i
] * x1
[i
];
681 Sx1x2
+= x1
[i
] * x2
[i
];
682 Sx2x2
+= x2
[i
] * x2
[i
];
683 Sx1y
+= x1
[i
] * y
[i
];
684 Sx2y
+= x2
[i
] * y
[i
];
688 U
= n
* (Sx1x2
* Sx1y
- Sx1x1
* Sx2y
) +
689 Sx1
* Sx1
* Sx2y
- Sx1
* Sx2
* Sx1y
+
690 Sy
* (Sx2
* Sx1x1
- Sx1
* Sx1x2
);
692 V1
= n
* (Sx1x2
* Sx1x2
- Sx1x1
* Sx2x2
);
693 V2
= Sx1
* Sx1
* Sx2x2
+ Sx2
* Sx2
* Sx1x1
;
694 V3
= -2.0 * Sx1
* Sx2
* Sx1x2
;
697 /* Check if there is a (numerically stable) solution */
698 if (fabs(V
) * 1.0e10
<= -V1
+ V2
+ fabs(V3
))