]> git.ipfire.org Git - collecty.git/blame - src/collecty/daemon.py
latency: Don't show full packet loss in background
[collecty.git] / src / collecty / daemon.py
CommitLineData
f37913e8 1#!/usr/bin/python3
73db5226
MT
2###############################################################################
3# #
4# collecty - A system statistics collection daemon for IPFire #
5# Copyright (C) 2012 IPFire development team #
6# #
7# This program is free software: you can redistribute it and/or modify #
8# it under the terms of the GNU General Public License as published by #
9# the Free Software Foundation, either version 3 of the License, or #
10# (at your option) any later version. #
11# #
12# This program is distributed in the hope that it will be useful, #
13# but WITHOUT ANY WARRANTY; without even the implied warranty of #
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15# GNU General Public License for more details. #
16# #
17# You should have received a copy of the GNU General Public License #
18# along with this program. If not, see <http://www.gnu.org/licenses/>. #
19# #
20###############################################################################
21
49c1b8fd
MT
22import datetime
23import multiprocessing
682b512d 24import os
f37913e8 25import queue
72364063 26import rrdtool
73db5226 27import signal
72364063
MT
28import threading
29import time
73db5226 30
f37913e8
MT
31from . import bus
32from . import plugins
73db5226 33
f37913e8
MT
34from .constants import *
35from .i18n import _
73db5226
MT
36
37import logging
38log = logging.getLogger("collecty")
39
40class Collecty(object):
41 # The default interval, when all data is written to disk.
42 SUBMIT_INTERVAL = 300
43
49c1b8fd 44 HEARTBEAT = 1
72364063 45
73db5226 46 def __init__(self, debug=False):
72364063
MT
47 self.debug = debug
48
682b512d
MT
49 # Reset timezone to UTC
50 # rrdtool is reading that from the environment
51 os.environ["TZ"] = "UTC"
52
a76917bf 53 # Enable debug logging when running in debug mode
72364063 54 if self.debug:
a76917bf
MT
55 log.setLevel(logging.DEBUG)
56
5d140577 57 self.plugins = []
73db5226
MT
58
59 # Indicates whether this process should be running or not.
60 self.running = True
72364063
MT
61
62 # The write queue holds all collected pieces of data which
63 # will be written to disk later.
64 self.write_queue = WriteQueue(self, self.SUBMIT_INTERVAL)
73db5226 65
49c1b8fd
MT
66 # Create worker threads
67 self.worker_threads = self.create_worker_threads()
68
69 self._timer_queue = queue.PriorityQueue()
70 self._worker_queue = queue.Queue()
71
c968f6d9
MT
72 # Create a thread that connects to dbus and processes requests we
73 # get from there.
74 self.bus = bus.Bus(self)
75
5d140577
MT
76 # Add all plugins
77 for plugin in plugins.get():
78 self.add_plugin(plugin)
73db5226 79
0ee0c42d 80 log.debug(_("Collecty successfully initialized with %s plugins") \
5d140577 81 % len(self.plugins))
73db5226 82
5d140577
MT
83 def add_plugin(self, plugin_class):
84 # Try initialising a new plugin. If that fails, we will log the
85 # error and try to go on.
86 try:
87 plugin = plugin_class(self)
88 except:
89 log.critical(_("Plugin %s could not be initialised") % plugin_class, exc_info=True)
90 return
73db5226 91
5d140577 92 self.plugins.append(plugin)
73db5226 93
c968f6d9
MT
94 @property
95 def templates(self):
96 for plugin in self.plugins:
97 for template in plugin.templates:
98 yield template
99
73db5226
MT
100 def run(self):
101 # Register signal handlers.
102 self.register_signal_handler()
103
f37913e8
MT
104 # Cannot do anything if no plugins have been initialised
105 if not self.plugins:
106 log.critical(_("No plugins have been initialised"))
107 return
108
c968f6d9
MT
109 # Start the bus
110 self.bus.start()
111
49c1b8fd
MT
112 # Initialise the timer queue
113 self.initialise_timer_queue()
114
115 # Start worker threads
116 for w in self.worker_threads:
117 w.start()
73db5226 118
72364063
MT
119 # Run the write queue thread
120 self.write_queue.start()
121
73db5226
MT
122 # Regularly submit all data to disk.
123 while self.running:
72364063 124 try:
49c1b8fd
MT
125 # Try processing one event from the queue. If that succeeded
126 # we will retry immediately.
127 if self.process_timer_queue():
128 continue
129
130 # Otherwise we will sleep for a bit
72364063 131 time.sleep(self.HEARTBEAT)
49c1b8fd
MT
132
133 # Log warnings if the worker queue is filling up
134 queue_size = self._worker_queue.qsize()
135 if queue_size >= 5:
136 log.warning(_("Worker queue is filling up with %s events") % queue_size)
137
72364063
MT
138 except KeyboardInterrupt:
139 self.shutdown()
140 break
73db5226 141
49c1b8fd
MT
142 # Wait until all worker threads are finished
143 for w in self.worker_threads:
144 w.join()
73db5226 145
c968f6d9
MT
146 # Stop the bus thread
147 self.bus.shutdown()
148
72364063
MT
149 # Write all collected data to disk before ending the main thread
150 self.write_queue.shutdown()
151
152 log.debug(_("Main thread exited"))
73db5226
MT
153
154 def shutdown(self):
72364063
MT
155 if not self.running:
156 return
73db5226 157
0ee0c42d 158 log.info(_("Received shutdown signal"))
73db5226 159 self.running = False
73db5226
MT
160
161 # Propagating shutdown to all threads.
49c1b8fd
MT
162 for w in self.worker_threads:
163 w.shutdown()
73db5226
MT
164
165 def register_signal_handler(self):
166 for s in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1):
167 log.debug(_("Registering signal %d") % s)
168
169 signal.signal(s, self.signal_handler)
170
171 def signal_handler(self, sig, *args, **kwargs):
172 log.info(_("Caught signal %d") % sig)
173
174 if sig in (signal.SIGTERM, signal.SIGINT):
175 # Shutdown this application.
176 self.shutdown()
177
178 elif sig == signal.SIGUSR1:
72364063
MT
179 # Commit all data.
180 self.write_queue.commit()
73db5226 181
c968f6d9
MT
182 def get_plugin_from_template(self, template_name):
183 for plugin in self.plugins:
184 if not template_name in [t.name for t in plugin.templates]:
185 continue
186
187 return plugin
188
189 def generate_graph(self, template_name, *args, **kwargs):
190 plugin = self.get_plugin_from_template(template_name)
191 if not plugin:
192 raise RuntimeError("Could not find template %s" % template_name)
193
194 return plugin.generate_graph(template_name, *args, **kwargs)
72364063 195
49c1b8fd
MT
196 def create_worker_threads(self, num=None):
197 """
198 Creates a number of worker threads
199 """
200 # If no number of threads is given, we will create as many as we have
2dda7c67 201 # active processor cores but never less than two.
49c1b8fd 202 if num is None:
2dda7c67 203 num = max(multiprocessing.cpu_count(), 2)
49c1b8fd
MT
204
205 worker_threads = []
206
207 for id in range(num):
208 worker_thread = WorkerThread(self, id)
209 worker_threads.append(worker_thread)
210
211 return worker_threads
212
213 def initialise_timer_queue(self):
214 for p in self.plugins:
215 timer = PluginTimer(p)
216
217 self._timer_queue.put(timer)
218
219 def process_timer_queue(self):
220 # Take the item from the timer queue that is to be due first
221 timer = self._timer_queue.get()
222
223 try:
224 # If the timer event is to be executed, we will put the plugin
225 # into the worker queue and reset the timer
226 if timer.is_due():
227 self._worker_queue.put(timer.plugin)
228 timer.reset_deadline()
229
230 return timer
231 finally:
232 # Put the timer back into the timer queue.
233 self._timer_queue.put(timer)
234
235
236class WorkerThread(threading.Thread):
237 HEARTBEAT = 2.5
238
239 def __init__(self, collecty, id):
240 threading.Thread.__init__(self)
241 self.daemon = True
242
243 self.log = logging.getLogger("collecty.worker")
244 self.log.propagate = 1
245
246 self.collecty = collecty
247 self.id = id
248
249 self.log.debug(_("Worker thread %s has been initialised") % self.id)
250
251 @property
252 def queue(self):
253 """
254 The queue this thread is getting events from
255 """
256 return self.collecty._worker_queue
257
258 def run(self):
259 self.log.debug(_("Worker thread %s has been started") % self.id)
260 self.running = True
261
262 while self.running:
263 try:
264 plugin = self.queue.get(block=True, timeout=self.HEARTBEAT)
265
266 # If the queue has been empty we just retry
267 except queue.Empty:
268 continue
269
270 # Execute the collect operation for this plugin
271 plugin.collect()
272
273 self.log.debug(_("Worker thread %s has been terminated") % self.id)
274
275 def shutdown(self):
276 self.running = False
277
72364063
MT
278
279class WriteQueue(threading.Thread):
280 def __init__(self, collecty, submit_interval):
281 threading.Thread.__init__(self)
282 self.daemon = True
283
284 self.collecty = collecty
285
286 self.log = logging.getLogger("collecty.queue")
72364063
MT
287 self.log.propagate = 1
288
289 self.timer = plugins.Timer(submit_interval)
290 self._queue = queue.PriorityQueue()
291
292 self.log.debug(_("Initialised write queue"))
293
294 def run(self):
295 self.log.debug(_("Write queue process started"))
296 self.running = True
297
298 while self.running:
299 # Reset the timer.
300 self.timer.reset()
301
302 # Wait until the timer has successfully elapsed.
303 if self.timer.wait():
304 self.commit()
305
306 self.commit()
307 self.log.debug(_("Write queue process stopped"))
308
309 def shutdown(self):
310 self.running = False
311 self.timer.cancel()
312
313 # Wait until all data has been written.
314 self.join()
315
316 def add(self, object, time, data):
317 result = QueueObject(object.file, time, data)
318 self._queue.put(result)
319
320 def commit(self):
321 """
322 Flushes the read data to disk.
323 """
324 # There is nothing to do if the queue is empty
325 if self._queue.empty():
326 self.log.debug(_("No data to commit"))
327 return
328
329 time_start = time.time()
330
331 self.log.debug(_("Submitting data to the databases..."))
332
333 # Get all objects from the queue and group them by the RRD file
334 # to commit them all at once
335 results = {}
336 while not self._queue.empty():
337 result = self._queue.get()
338
339 try:
340 results[result.file].append(result)
341 except KeyError:
342 results[result.file] = [result]
343
344 # Write the collected data to disk
f37913e8 345 for filename, results in list(results.items()):
72364063
MT
346 self._commit_file(filename, results)
347
348 duration = time.time() - time_start
349 self.log.debug(_("Emptied write queue in %.2fs") % duration)
350
351 def _commit_file(self, filename, results):
0ee0c42d 352 self.log.debug(_("Committing %(counter)s entries to %(filename)s") \
72364063
MT
353 % { "counter" : len(results), "filename" : filename })
354
0ee0c42d
MT
355 for result in results:
356 self.log.debug(" %s: %s" % (result.time, result.data))
72364063 357
50b8fcff
MT
358 try:
359 rrdtool.update(filename, *["%s" % r for r in results])
360
361 # Catch operational errors like unreadable/unwritable RRD databases
362 # or those where the format has changed. The collected data will be lost.
363 except rrdtool.OperationalError as e:
364 self.log.critical(_("Could not update RRD database %s: %s") \
365 % (filename, e))
72364063
MT
366
367
368class QueueObject(object):
369 def __init__(self, file, time, data):
370 self.file = file
371 self.time = time
372 self.data = data
373
374 def __str__(self):
375 return "%s:%s" % (self.time.strftime("%s"), self.data)
376
f37913e8
MT
377 def __lt__(self, other):
378 return self.time < other.time
49c1b8fd
MT
379
380
381class PluginTimer(object):
382 def __init__(self, plugin):
383 self.plugin = plugin
384
385 self.deadline = datetime.datetime.utcnow()
386
387 def __repr__(self):
388 return "<%s %s>" % (self.__class__.__name__, self.deadline)
389
f37913e8
MT
390 def __lt__(self, other):
391 return self.deadline < other.deadline
49c1b8fd
MT
392
393 def reset_deadline(self):
394 self.deadline = datetime.datetime.utcnow() \
395 + datetime.timedelta(seconds=self.plugin.interval)
396
397 def is_due(self):
398 return datetime.datetime.utcnow() >= self.deadline