]> git.ipfire.org Git - collecty.git/blame - src/collecty/daemon.py
latency: Improve logging when host could not be reached
[collecty.git] / src / collecty / daemon.py
CommitLineData
f37913e8 1#!/usr/bin/python3
73db5226
MT
2###############################################################################
3# #
4# collecty - A system statistics collection daemon for IPFire #
5# Copyright (C) 2012 IPFire development team #
6# #
7# This program is free software: you can redistribute it and/or modify #
8# it under the terms of the GNU General Public License as published by #
9# the Free Software Foundation, either version 3 of the License, or #
10# (at your option) any later version. #
11# #
12# This program is distributed in the hope that it will be useful, #
13# but WITHOUT ANY WARRANTY; without even the implied warranty of #
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15# GNU General Public License for more details. #
16# #
17# You should have received a copy of the GNU General Public License #
18# along with this program. If not, see <http://www.gnu.org/licenses/>. #
19# #
20###############################################################################
21
49c1b8fd
MT
22import datetime
23import multiprocessing
f37913e8 24import queue
72364063 25import rrdtool
73db5226 26import signal
72364063
MT
27import threading
28import time
73db5226 29
f37913e8
MT
30from . import bus
31from . import plugins
73db5226 32
f37913e8
MT
33from .constants import *
34from .i18n import _
73db5226
MT
35
36import logging
37log = logging.getLogger("collecty")
38
39class Collecty(object):
40 # The default interval, when all data is written to disk.
41 SUBMIT_INTERVAL = 300
42
49c1b8fd 43 HEARTBEAT = 1
72364063 44
73db5226 45 def __init__(self, debug=False):
72364063
MT
46 self.debug = debug
47
a76917bf 48 # Enable debug logging when running in debug mode
72364063 49 if self.debug:
a76917bf
MT
50 log.setLevel(logging.DEBUG)
51
5d140577 52 self.plugins = []
73db5226
MT
53
54 # Indicates whether this process should be running or not.
55 self.running = True
72364063
MT
56
57 # The write queue holds all collected pieces of data which
58 # will be written to disk later.
59 self.write_queue = WriteQueue(self, self.SUBMIT_INTERVAL)
73db5226 60
49c1b8fd
MT
61 # Create worker threads
62 self.worker_threads = self.create_worker_threads()
63
64 self._timer_queue = queue.PriorityQueue()
65 self._worker_queue = queue.Queue()
66
c968f6d9
MT
67 # Create a thread that connects to dbus and processes requests we
68 # get from there.
69 self.bus = bus.Bus(self)
70
5d140577
MT
71 # Add all plugins
72 for plugin in plugins.get():
73 self.add_plugin(plugin)
73db5226 74
0ee0c42d 75 log.debug(_("Collecty successfully initialized with %s plugins") \
5d140577 76 % len(self.plugins))
73db5226 77
5d140577
MT
78 def add_plugin(self, plugin_class):
79 # Try initialising a new plugin. If that fails, we will log the
80 # error and try to go on.
81 try:
82 plugin = plugin_class(self)
83 except:
84 log.critical(_("Plugin %s could not be initialised") % plugin_class, exc_info=True)
85 return
73db5226 86
5d140577 87 self.plugins.append(plugin)
73db5226 88
c968f6d9
MT
89 @property
90 def templates(self):
91 for plugin in self.plugins:
92 for template in plugin.templates:
93 yield template
94
73db5226
MT
95 def run(self):
96 # Register signal handlers.
97 self.register_signal_handler()
98
f37913e8
MT
99 # Cannot do anything if no plugins have been initialised
100 if not self.plugins:
101 log.critical(_("No plugins have been initialised"))
102 return
103
c968f6d9
MT
104 # Start the bus
105 self.bus.start()
106
49c1b8fd
MT
107 # Initialise the timer queue
108 self.initialise_timer_queue()
109
110 # Start worker threads
111 for w in self.worker_threads:
112 w.start()
73db5226 113
72364063
MT
114 # Run the write queue thread
115 self.write_queue.start()
116
73db5226
MT
117 # Regularly submit all data to disk.
118 while self.running:
72364063 119 try:
49c1b8fd
MT
120 # Try processing one event from the queue. If that succeeded
121 # we will retry immediately.
122 if self.process_timer_queue():
123 continue
124
125 # Otherwise we will sleep for a bit
72364063 126 time.sleep(self.HEARTBEAT)
49c1b8fd
MT
127
128 # Log warnings if the worker queue is filling up
129 queue_size = self._worker_queue.qsize()
130 if queue_size >= 5:
131 log.warning(_("Worker queue is filling up with %s events") % queue_size)
132
72364063
MT
133 except KeyboardInterrupt:
134 self.shutdown()
135 break
73db5226 136
49c1b8fd
MT
137 # Wait until all worker threads are finished
138 for w in self.worker_threads:
139 w.join()
73db5226 140
c968f6d9
MT
141 # Stop the bus thread
142 self.bus.shutdown()
143
72364063
MT
144 # Write all collected data to disk before ending the main thread
145 self.write_queue.shutdown()
146
147 log.debug(_("Main thread exited"))
73db5226
MT
148
149 def shutdown(self):
72364063
MT
150 if not self.running:
151 return
73db5226 152
0ee0c42d 153 log.info(_("Received shutdown signal"))
73db5226 154 self.running = False
73db5226
MT
155
156 # Propagating shutdown to all threads.
49c1b8fd
MT
157 for w in self.worker_threads:
158 w.shutdown()
73db5226
MT
159
160 def register_signal_handler(self):
161 for s in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1):
162 log.debug(_("Registering signal %d") % s)
163
164 signal.signal(s, self.signal_handler)
165
166 def signal_handler(self, sig, *args, **kwargs):
167 log.info(_("Caught signal %d") % sig)
168
169 if sig in (signal.SIGTERM, signal.SIGINT):
170 # Shutdown this application.
171 self.shutdown()
172
173 elif sig == signal.SIGUSR1:
72364063
MT
174 # Commit all data.
175 self.write_queue.commit()
73db5226 176
c968f6d9
MT
177 def get_plugin_from_template(self, template_name):
178 for plugin in self.plugins:
179 if not template_name in [t.name for t in plugin.templates]:
180 continue
181
182 return plugin
183
184 def generate_graph(self, template_name, *args, **kwargs):
185 plugin = self.get_plugin_from_template(template_name)
186 if not plugin:
187 raise RuntimeError("Could not find template %s" % template_name)
188
189 return plugin.generate_graph(template_name, *args, **kwargs)
72364063 190
49c1b8fd
MT
191 def create_worker_threads(self, num=None):
192 """
193 Creates a number of worker threads
194 """
195 # If no number of threads is given, we will create as many as we have
2dda7c67 196 # active processor cores but never less than two.
49c1b8fd 197 if num is None:
2dda7c67 198 num = max(multiprocessing.cpu_count(), 2)
49c1b8fd
MT
199
200 worker_threads = []
201
202 for id in range(num):
203 worker_thread = WorkerThread(self, id)
204 worker_threads.append(worker_thread)
205
206 return worker_threads
207
208 def initialise_timer_queue(self):
209 for p in self.plugins:
210 timer = PluginTimer(p)
211
212 self._timer_queue.put(timer)
213
214 def process_timer_queue(self):
215 # Take the item from the timer queue that is to be due first
216 timer = self._timer_queue.get()
217
218 try:
219 # If the timer event is to be executed, we will put the plugin
220 # into the worker queue and reset the timer
221 if timer.is_due():
222 self._worker_queue.put(timer.plugin)
223 timer.reset_deadline()
224
225 return timer
226 finally:
227 # Put the timer back into the timer queue.
228 self._timer_queue.put(timer)
229
230
231class WorkerThread(threading.Thread):
232 HEARTBEAT = 2.5
233
234 def __init__(self, collecty, id):
235 threading.Thread.__init__(self)
236 self.daemon = True
237
238 self.log = logging.getLogger("collecty.worker")
239 self.log.propagate = 1
240
241 self.collecty = collecty
242 self.id = id
243
244 self.log.debug(_("Worker thread %s has been initialised") % self.id)
245
246 @property
247 def queue(self):
248 """
249 The queue this thread is getting events from
250 """
251 return self.collecty._worker_queue
252
253 def run(self):
254 self.log.debug(_("Worker thread %s has been started") % self.id)
255 self.running = True
256
257 while self.running:
258 try:
259 plugin = self.queue.get(block=True, timeout=self.HEARTBEAT)
260
261 # If the queue has been empty we just retry
262 except queue.Empty:
263 continue
264
265 # Execute the collect operation for this plugin
266 plugin.collect()
267
268 self.log.debug(_("Worker thread %s has been terminated") % self.id)
269
270 def shutdown(self):
271 self.running = False
272
72364063
MT
273
274class WriteQueue(threading.Thread):
275 def __init__(self, collecty, submit_interval):
276 threading.Thread.__init__(self)
277 self.daemon = True
278
279 self.collecty = collecty
280
281 self.log = logging.getLogger("collecty.queue")
72364063
MT
282 self.log.propagate = 1
283
284 self.timer = plugins.Timer(submit_interval)
285 self._queue = queue.PriorityQueue()
286
287 self.log.debug(_("Initialised write queue"))
288
289 def run(self):
290 self.log.debug(_("Write queue process started"))
291 self.running = True
292
293 while self.running:
294 # Reset the timer.
295 self.timer.reset()
296
297 # Wait until the timer has successfully elapsed.
298 if self.timer.wait():
299 self.commit()
300
301 self.commit()
302 self.log.debug(_("Write queue process stopped"))
303
304 def shutdown(self):
305 self.running = False
306 self.timer.cancel()
307
308 # Wait until all data has been written.
309 self.join()
310
311 def add(self, object, time, data):
312 result = QueueObject(object.file, time, data)
313 self._queue.put(result)
314
315 def commit(self):
316 """
317 Flushes the read data to disk.
318 """
319 # There is nothing to do if the queue is empty
320 if self._queue.empty():
321 self.log.debug(_("No data to commit"))
322 return
323
324 time_start = time.time()
325
326 self.log.debug(_("Submitting data to the databases..."))
327
328 # Get all objects from the queue and group them by the RRD file
329 # to commit them all at once
330 results = {}
331 while not self._queue.empty():
332 result = self._queue.get()
333
334 try:
335 results[result.file].append(result)
336 except KeyError:
337 results[result.file] = [result]
338
339 # Write the collected data to disk
f37913e8 340 for filename, results in list(results.items()):
72364063
MT
341 self._commit_file(filename, results)
342
343 duration = time.time() - time_start
344 self.log.debug(_("Emptied write queue in %.2fs") % duration)
345
346 def _commit_file(self, filename, results):
0ee0c42d 347 self.log.debug(_("Committing %(counter)s entries to %(filename)s") \
72364063
MT
348 % { "counter" : len(results), "filename" : filename })
349
0ee0c42d
MT
350 for result in results:
351 self.log.debug(" %s: %s" % (result.time, result.data))
72364063 352
50b8fcff
MT
353 try:
354 rrdtool.update(filename, *["%s" % r for r in results])
355
356 # Catch operational errors like unreadable/unwritable RRD databases
357 # or those where the format has changed. The collected data will be lost.
358 except rrdtool.OperationalError as e:
359 self.log.critical(_("Could not update RRD database %s: %s") \
360 % (filename, e))
72364063
MT
361
362
363class QueueObject(object):
364 def __init__(self, file, time, data):
365 self.file = file
366 self.time = time
367 self.data = data
368
369 def __str__(self):
370 return "%s:%s" % (self.time.strftime("%s"), self.data)
371
f37913e8
MT
372 def __lt__(self, other):
373 return self.time < other.time
49c1b8fd
MT
374
375
376class PluginTimer(object):
377 def __init__(self, plugin):
378 self.plugin = plugin
379
380 self.deadline = datetime.datetime.utcnow()
381
382 def __repr__(self):
383 return "<%s %s>" % (self.__class__.__name__, self.deadline)
384
f37913e8
MT
385 def __lt__(self, other):
386 return self.deadline < other.deadline
49c1b8fd
MT
387
388 def reset_deadline(self):
389 self.deadline = datetime.datetime.utcnow() \
390 + datetime.timedelta(seconds=self.plugin.interval)
391
392 def is_due(self):
393 return datetime.datetime.utcnow() >= self.deadline