Add graph info functionality
[collecty.git] / src / collecty / daemon.py
CommitLineData
f37913e8 1#!/usr/bin/python3
73db5226
MT
2###############################################################################
3# #
4# collecty - A system statistics collection daemon for IPFire #
5# Copyright (C) 2012 IPFire development team #
6# #
7# This program is free software: you can redistribute it and/or modify #
8# it under the terms of the GNU General Public License as published by #
9# the Free Software Foundation, either version 3 of the License, or #
10# (at your option) any later version. #
11# #
12# This program is distributed in the hope that it will be useful, #
13# but WITHOUT ANY WARRANTY; without even the implied warranty of #
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15# GNU General Public License for more details. #
16# #
17# You should have received a copy of the GNU General Public License #
18# along with this program. If not, see <http://www.gnu.org/licenses/>. #
19# #
20###############################################################################
21
49c1b8fd
MT
22import datetime
23import multiprocessing
682b512d 24import os
f37913e8 25import queue
72364063 26import rrdtool
73db5226 27import signal
72364063
MT
28import threading
29import time
73db5226 30
f37913e8 31from . import bus
429ba506 32from . import locales
f37913e8 33from . import plugins
73db5226 34
f37913e8
MT
35from .constants import *
36from .i18n import _
73db5226
MT
37
38import logging
39log = logging.getLogger("collecty")
40
41class Collecty(object):
42 # The default interval, when all data is written to disk.
43 SUBMIT_INTERVAL = 300
44
49c1b8fd 45 HEARTBEAT = 1
72364063 46
73db5226 47 def __init__(self, debug=False):
72364063
MT
48 self.debug = debug
49
682b512d
MT
50 # Reset timezone to UTC
51 # rrdtool is reading that from the environment
52 os.environ["TZ"] = "UTC"
53
a76917bf 54 # Enable debug logging when running in debug mode
72364063 55 if self.debug:
a76917bf
MT
56 log.setLevel(logging.DEBUG)
57
5d140577 58 self.plugins = []
73db5226
MT
59
60 # Indicates whether this process should be running or not.
61 self.running = True
72364063
MT
62
63 # The write queue holds all collected pieces of data which
64 # will be written to disk later.
65 self.write_queue = WriteQueue(self, self.SUBMIT_INTERVAL)
73db5226 66
49c1b8fd
MT
67 # Create worker threads
68 self.worker_threads = self.create_worker_threads()
69
70 self._timer_queue = queue.PriorityQueue()
71 self._worker_queue = queue.Queue()
72
c968f6d9
MT
73 # Create a thread that connects to dbus and processes requests we
74 # get from there.
75 self.bus = bus.Bus(self)
76
5d140577
MT
77 # Add all plugins
78 for plugin in plugins.get():
79 self.add_plugin(plugin)
73db5226 80
0ee0c42d 81 log.debug(_("Collecty successfully initialized with %s plugins") \
5d140577 82 % len(self.plugins))
73db5226 83
429ba506
MT
84 log.debug(_("Supported locales: %s") % ", ".join(locales.get_supported_locales()))
85
5d140577
MT
86 def add_plugin(self, plugin_class):
87 # Try initialising a new plugin. If that fails, we will log the
88 # error and try to go on.
89 try:
90 plugin = plugin_class(self)
91 except:
92 log.critical(_("Plugin %s could not be initialised") % plugin_class, exc_info=True)
93 return
73db5226 94
5d140577 95 self.plugins.append(plugin)
73db5226 96
c968f6d9
MT
97 @property
98 def templates(self):
99 for plugin in self.plugins:
100 for template in plugin.templates:
101 yield template
102
73db5226
MT
103 def run(self):
104 # Register signal handlers.
105 self.register_signal_handler()
106
f37913e8
MT
107 # Cannot do anything if no plugins have been initialised
108 if not self.plugins:
109 log.critical(_("No plugins have been initialised"))
110 return
111
c968f6d9
MT
112 # Start the bus
113 self.bus.start()
114
49c1b8fd
MT
115 # Initialise the timer queue
116 self.initialise_timer_queue()
117
118 # Start worker threads
119 for w in self.worker_threads:
120 w.start()
73db5226 121
72364063
MT
122 # Run the write queue thread
123 self.write_queue.start()
124
73db5226
MT
125 # Regularly submit all data to disk.
126 while self.running:
72364063 127 try:
49c1b8fd
MT
128 # Try processing one event from the queue. If that succeeded
129 # we will retry immediately.
130 if self.process_timer_queue():
131 continue
132
133 # Otherwise we will sleep for a bit
72364063 134 time.sleep(self.HEARTBEAT)
49c1b8fd
MT
135
136 # Log warnings if the worker queue is filling up
137 queue_size = self._worker_queue.qsize()
138 if queue_size >= 5:
139 log.warning(_("Worker queue is filling up with %s events") % queue_size)
140
72364063
MT
141 except KeyboardInterrupt:
142 self.shutdown()
143 break
73db5226 144
49c1b8fd
MT
145 # Wait until all worker threads are finished
146 for w in self.worker_threads:
147 w.join()
73db5226 148
c968f6d9
MT
149 # Stop the bus thread
150 self.bus.shutdown()
151
72364063
MT
152 # Write all collected data to disk before ending the main thread
153 self.write_queue.shutdown()
154
155 log.debug(_("Main thread exited"))
73db5226
MT
156
157 def shutdown(self):
72364063
MT
158 if not self.running:
159 return
73db5226 160
0ee0c42d 161 log.info(_("Received shutdown signal"))
73db5226 162 self.running = False
73db5226
MT
163
164 # Propagating shutdown to all threads.
49c1b8fd
MT
165 for w in self.worker_threads:
166 w.shutdown()
73db5226
MT
167
168 def register_signal_handler(self):
169 for s in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1):
170 log.debug(_("Registering signal %d") % s)
171
172 signal.signal(s, self.signal_handler)
173
174 def signal_handler(self, sig, *args, **kwargs):
175 log.info(_("Caught signal %d") % sig)
176
177 if sig in (signal.SIGTERM, signal.SIGINT):
178 # Shutdown this application.
179 self.shutdown()
180
181 elif sig == signal.SIGUSR1:
72364063
MT
182 # Commit all data.
183 self.write_queue.commit()
73db5226 184
c968f6d9
MT
185 def get_plugin_from_template(self, template_name):
186 for plugin in self.plugins:
187 if not template_name in [t.name for t in plugin.templates]:
188 continue
189
190 return plugin
191
192 def generate_graph(self, template_name, *args, **kwargs):
193 plugin = self.get_plugin_from_template(template_name)
194 if not plugin:
195 raise RuntimeError("Could not find template %s" % template_name)
196
197 return plugin.generate_graph(template_name, *args, **kwargs)
72364063 198
a3864812
MT
199 def graph_info(self, template_name, *args, **kwargs):
200 plugin = self.get_plugin_from_template(template_name)
201 if not plugin:
202 raise RuntimeError("Could not find template %s" % template_name)
203
204 return plugin.graph_info(template_name, *args, **kwargs)
205
49c1b8fd
MT
206 def create_worker_threads(self, num=None):
207 """
208 Creates a number of worker threads
209 """
210 # If no number of threads is given, we will create as many as we have
2dda7c67 211 # active processor cores but never less than two.
49c1b8fd 212 if num is None:
2dda7c67 213 num = max(multiprocessing.cpu_count(), 2)
49c1b8fd
MT
214
215 worker_threads = []
216
217 for id in range(num):
218 worker_thread = WorkerThread(self, id)
219 worker_threads.append(worker_thread)
220
221 return worker_threads
222
223 def initialise_timer_queue(self):
224 for p in self.plugins:
225 timer = PluginTimer(p)
226
227 self._timer_queue.put(timer)
228
229 def process_timer_queue(self):
230 # Take the item from the timer queue that is to be due first
231 timer = self._timer_queue.get()
232
233 try:
234 # If the timer event is to be executed, we will put the plugin
235 # into the worker queue and reset the timer
236 if timer.is_due():
237 self._worker_queue.put(timer.plugin)
238 timer.reset_deadline()
239
240 return timer
241 finally:
242 # Put the timer back into the timer queue.
243 self._timer_queue.put(timer)
244
245
246class WorkerThread(threading.Thread):
247 HEARTBEAT = 2.5
248
249 def __init__(self, collecty, id):
250 threading.Thread.__init__(self)
251 self.daemon = True
252
253 self.log = logging.getLogger("collecty.worker")
254 self.log.propagate = 1
255
256 self.collecty = collecty
257 self.id = id
258
259 self.log.debug(_("Worker thread %s has been initialised") % self.id)
260
261 @property
262 def queue(self):
263 """
264 The queue this thread is getting events from
265 """
266 return self.collecty._worker_queue
267
268 def run(self):
269 self.log.debug(_("Worker thread %s has been started") % self.id)
270 self.running = True
271
272 while self.running:
273 try:
274 plugin = self.queue.get(block=True, timeout=self.HEARTBEAT)
275
276 # If the queue has been empty we just retry
277 except queue.Empty:
278 continue
279
280 # Execute the collect operation for this plugin
281 plugin.collect()
282
283 self.log.debug(_("Worker thread %s has been terminated") % self.id)
284
285 def shutdown(self):
286 self.running = False
287
72364063
MT
288
289class WriteQueue(threading.Thread):
290 def __init__(self, collecty, submit_interval):
291 threading.Thread.__init__(self)
292 self.daemon = True
293
294 self.collecty = collecty
295
296 self.log = logging.getLogger("collecty.queue")
72364063
MT
297 self.log.propagate = 1
298
299 self.timer = plugins.Timer(submit_interval)
300 self._queue = queue.PriorityQueue()
301
302 self.log.debug(_("Initialised write queue"))
303
304 def run(self):
305 self.log.debug(_("Write queue process started"))
306 self.running = True
307
308 while self.running:
309 # Reset the timer.
310 self.timer.reset()
311
312 # Wait until the timer has successfully elapsed.
313 if self.timer.wait():
314 self.commit()
315
316 self.commit()
317 self.log.debug(_("Write queue process stopped"))
318
319 def shutdown(self):
320 self.running = False
321 self.timer.cancel()
322
323 # Wait until all data has been written.
324 self.join()
325
326 def add(self, object, time, data):
327 result = QueueObject(object.file, time, data)
328 self._queue.put(result)
329
330 def commit(self):
331 """
332 Flushes the read data to disk.
333 """
334 # There is nothing to do if the queue is empty
335 if self._queue.empty():
336 self.log.debug(_("No data to commit"))
337 return
338
339 time_start = time.time()
340
341 self.log.debug(_("Submitting data to the databases..."))
342
343 # Get all objects from the queue and group them by the RRD file
344 # to commit them all at once
345 results = {}
346 while not self._queue.empty():
347 result = self._queue.get()
348
349 try:
350 results[result.file].append(result)
351 except KeyError:
352 results[result.file] = [result]
353
354 # Write the collected data to disk
f37913e8 355 for filename, results in list(results.items()):
72364063
MT
356 self._commit_file(filename, results)
357
358 duration = time.time() - time_start
359 self.log.debug(_("Emptied write queue in %.2fs") % duration)
360
361 def _commit_file(self, filename, results):
0ee0c42d 362 self.log.debug(_("Committing %(counter)s entries to %(filename)s") \
72364063
MT
363 % { "counter" : len(results), "filename" : filename })
364
0ee0c42d
MT
365 for result in results:
366 self.log.debug(" %s: %s" % (result.time, result.data))
72364063 367
50b8fcff
MT
368 try:
369 rrdtool.update(filename, *["%s" % r for r in results])
370
371 # Catch operational errors like unreadable/unwritable RRD databases
372 # or those where the format has changed. The collected data will be lost.
373 except rrdtool.OperationalError as e:
374 self.log.critical(_("Could not update RRD database %s: %s") \
375 % (filename, e))
72364063
MT
376
377
378class QueueObject(object):
379 def __init__(self, file, time, data):
380 self.file = file
381 self.time = time
382 self.data = data
383
384 def __str__(self):
385 return "%s:%s" % (self.time.strftime("%s"), self.data)
386
f37913e8
MT
387 def __lt__(self, other):
388 return self.time < other.time
49c1b8fd
MT
389
390
391class PluginTimer(object):
392 def __init__(self, plugin):
393 self.plugin = plugin
394
395 self.deadline = datetime.datetime.utcnow()
396
397 def __repr__(self):
398 return "<%s %s>" % (self.__class__.__name__, self.deadline)
399
f37913e8
MT
400 def __lt__(self, other):
401 return self.deadline < other.deadline
49c1b8fd
MT
402
403 def reset_deadline(self):
404 self.deadline = datetime.datetime.utcnow() \
405 + datetime.timedelta(seconds=self.plugin.interval)
406
407 def is_due(self):
408 return datetime.datetime.utcnow() >= self.deadline