Add graph info functionality
[collecty.git] / src / collecty / daemon.py
1 #!/usr/bin/python3
2 ###############################################################################
3 #                                                                             #
4 # collecty - A system statistics collection daemon for IPFire                 #
5 # Copyright (C) 2012 IPFire development team                                  #
6 #                                                                             #
7 # This program is free software: you can redistribute it and/or modify        #
8 # it under the terms of the GNU General Public License as published by        #
9 # the Free Software Foundation, either version 3 of the License, or           #
10 # (at your option) any later version.                                         #
11 #                                                                             #
12 # This program is distributed in the hope that it will be useful,             #
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
15 # GNU General Public License for more details.                                #
16 #                                                                             #
17 # You should have received a copy of the GNU General Public License           #
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
19 #                                                                             #
20 ###############################################################################
21
22 import datetime
23 import multiprocessing
24 import os
25 import queue
26 import rrdtool
27 import signal
28 import threading
29 import time
30
31 from . import bus
32 from . import locales
33 from . import plugins
34
35 from .constants import *
36 from .i18n import _
37
38 import logging
39 log = logging.getLogger("collecty")
40
41 class Collecty(object):
42         # The default interval, when all data is written to disk.
43         SUBMIT_INTERVAL = 300
44
45         HEARTBEAT = 1
46
47         def __init__(self, debug=False):
48                 self.debug = debug
49
50                 # Reset timezone to UTC
51                 # rrdtool is reading that from the environment
52                 os.environ["TZ"] = "UTC"
53
54                 # Enable debug logging when running in debug mode
55                 if self.debug:
56                         log.setLevel(logging.DEBUG)
57
58                 self.plugins = []
59
60                 # Indicates whether this process should be running or not.
61                 self.running = True
62
63                 # The write queue holds all collected pieces of data which
64                 # will be written to disk later.
65                 self.write_queue = WriteQueue(self, self.SUBMIT_INTERVAL)
66
67                 # Create worker threads
68                 self.worker_threads = self.create_worker_threads()
69
70                 self._timer_queue = queue.PriorityQueue()
71                 self._worker_queue = queue.Queue()
72
73                 # Create a thread that connects to dbus and processes requests we
74                 # get from there.
75                 self.bus = bus.Bus(self)
76
77                 # Add all plugins
78                 for plugin in plugins.get():
79                         self.add_plugin(plugin)
80
81                 log.debug(_("Collecty successfully initialized with %s plugins") \
82                         % len(self.plugins))
83
84                 log.debug(_("Supported locales: %s") % ", ".join(locales.get_supported_locales()))
85
86         def add_plugin(self, plugin_class):
87                 # Try initialising a new plugin. If that fails, we will log the
88                 # error and try to go on.
89                 try:
90                         plugin = plugin_class(self)
91                 except:
92                         log.critical(_("Plugin %s could not be initialised") % plugin_class, exc_info=True)
93                         return
94
95                 self.plugins.append(plugin)
96
97         @property
98         def templates(self):
99                 for plugin in self.plugins:
100                         for template in plugin.templates:
101                                 yield template
102
103         def run(self):
104                 # Register signal handlers.
105                 self.register_signal_handler()
106
107                 # Cannot do anything if no plugins have been initialised
108                 if not self.plugins:
109                         log.critical(_("No plugins have been initialised"))
110                         return
111
112                 # Start the bus
113                 self.bus.start()
114
115                 # Initialise the timer queue
116                 self.initialise_timer_queue()
117
118                 # Start worker threads
119                 for w in self.worker_threads:
120                         w.start()
121
122                 # Run the write queue thread
123                 self.write_queue.start()
124
125                 # Regularly submit all data to disk.
126                 while self.running:
127                         try:
128                                 # Try processing one event from the queue. If that succeeded
129                                 # we will retry immediately.
130                                 if self.process_timer_queue():
131                                         continue
132
133                                 # Otherwise we will sleep for a bit
134                                 time.sleep(self.HEARTBEAT)
135
136                                 # Log warnings if the worker queue is filling up
137                                 queue_size = self._worker_queue.qsize()
138                                 if queue_size >= 5:
139                                         log.warning(_("Worker queue is filling up with %s events") % queue_size)
140
141                         except KeyboardInterrupt:
142                                 self.shutdown()
143                                 break
144
145                 # Wait until all worker threads are finished
146                 for w in self.worker_threads:
147                         w.join()
148
149                 # Stop the bus thread
150                 self.bus.shutdown()
151
152                 # Write all collected data to disk before ending the main thread
153                 self.write_queue.shutdown()
154
155                 log.debug(_("Main thread exited"))
156
157         def shutdown(self):
158                 if not self.running:
159                         return
160
161                 log.info(_("Received shutdown signal"))
162                 self.running = False
163
164                 # Propagating shutdown to all threads.
165                 for w in self.worker_threads:
166                         w.shutdown()
167
168         def register_signal_handler(self):
169                 for s in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1):
170                         log.debug(_("Registering signal %d") % s)
171
172                 signal.signal(s, self.signal_handler)
173
174         def signal_handler(self, sig, *args, **kwargs):
175                 log.info(_("Caught signal %d") % sig)
176
177                 if sig in (signal.SIGTERM, signal.SIGINT):
178                         # Shutdown this application.
179                         self.shutdown()
180
181                 elif sig == signal.SIGUSR1:
182                         # Commit all data.
183                         self.write_queue.commit()
184
185         def get_plugin_from_template(self, template_name):
186                 for plugin in self.plugins:
187                         if not template_name in [t.name for t in plugin.templates]:
188                                 continue
189
190                         return plugin
191
192         def generate_graph(self, template_name, *args, **kwargs):
193                 plugin = self.get_plugin_from_template(template_name)
194                 if not plugin:
195                         raise RuntimeError("Could not find template %s" % template_name)
196
197                 return plugin.generate_graph(template_name, *args, **kwargs)
198
199         def graph_info(self, template_name, *args, **kwargs):
200                 plugin = self.get_plugin_from_template(template_name)
201                 if not plugin:
202                         raise RuntimeError("Could not find template %s" % template_name)
203
204                 return plugin.graph_info(template_name, *args, **kwargs)
205
206         def create_worker_threads(self, num=None):
207                 """
208                         Creates a number of worker threads
209                 """
210                 # If no number of threads is given, we will create as many as we have
211                 # active processor cores but never less than two.
212                 if num is None:
213                         num = max(multiprocessing.cpu_count(), 2)
214
215                 worker_threads = []
216
217                 for id in range(num):
218                         worker_thread = WorkerThread(self, id)
219                         worker_threads.append(worker_thread)
220
221                 return worker_threads
222
223         def initialise_timer_queue(self):
224                 for p in self.plugins:
225                         timer = PluginTimer(p)
226
227                         self._timer_queue.put(timer)
228
229         def process_timer_queue(self):
230                 # Take the item from the timer queue that is to be due first
231                 timer = self._timer_queue.get()
232
233                 try:
234                         # If the timer event is to be executed, we will put the plugin
235                         # into the worker queue and reset the timer
236                         if timer.is_due():
237                                 self._worker_queue.put(timer.plugin)
238                                 timer.reset_deadline()
239
240                                 return timer
241                 finally:
242                         # Put the timer back into the timer queue.
243                         self._timer_queue.put(timer)
244
245
246 class WorkerThread(threading.Thread):
247         HEARTBEAT = 2.5
248
249         def __init__(self, collecty, id):
250                 threading.Thread.__init__(self)
251                 self.daemon = True
252
253                 self.log = logging.getLogger("collecty.worker")
254                 self.log.propagate = 1
255
256                 self.collecty = collecty
257                 self.id = id
258
259                 self.log.debug(_("Worker thread %s has been initialised") % self.id)
260
261         @property
262         def queue(self):
263                 """
264                         The queue this thread is getting events from
265                 """
266                 return self.collecty._worker_queue
267
268         def run(self):
269                 self.log.debug(_("Worker thread %s has been started") % self.id)
270                 self.running = True
271
272                 while self.running:
273                         try:
274                                 plugin = self.queue.get(block=True, timeout=self.HEARTBEAT)
275
276                         # If the queue has been empty we just retry
277                         except queue.Empty:
278                                 continue
279
280                         # Execute the collect operation for this plugin
281                         plugin.collect()
282
283                 self.log.debug(_("Worker thread %s has been terminated") % self.id)
284
285         def shutdown(self):
286                 self.running = False
287
288
289 class WriteQueue(threading.Thread):
290         def __init__(self, collecty, submit_interval):
291                 threading.Thread.__init__(self)
292                 self.daemon = True
293
294                 self.collecty = collecty
295
296                 self.log = logging.getLogger("collecty.queue")
297                 self.log.propagate = 1
298
299                 self.timer = plugins.Timer(submit_interval)
300                 self._queue = queue.PriorityQueue()
301
302                 self.log.debug(_("Initialised write queue"))
303
304         def run(self):
305                 self.log.debug(_("Write queue process started"))
306                 self.running = True
307
308                 while self.running:
309                         # Reset the timer.
310                         self.timer.reset()
311
312                         # Wait until the timer has successfully elapsed.
313                         if self.timer.wait():
314                                 self.commit()
315
316                 self.commit()
317                 self.log.debug(_("Write queue process stopped"))
318
319         def shutdown(self):
320                 self.running = False
321                 self.timer.cancel()
322
323                 # Wait until all data has been written.
324                 self.join()
325
326         def add(self, object, time, data):
327                 result = QueueObject(object.file, time, data)
328                 self._queue.put(result)
329
330         def commit(self):
331                 """
332                         Flushes the read data to disk.
333                 """
334                 # There is nothing to do if the queue is empty
335                 if self._queue.empty():
336                         self.log.debug(_("No data to commit"))
337                         return
338
339                 time_start = time.time()
340
341                 self.log.debug(_("Submitting data to the databases..."))
342
343                 # Get all objects from the queue and group them by the RRD file
344                 # to commit them all at once
345                 results = {}
346                 while not self._queue.empty():
347                         result = self._queue.get()
348
349                         try:
350                                 results[result.file].append(result)
351                         except KeyError:
352                                 results[result.file] = [result]
353
354                 # Write the collected data to disk
355                 for filename, results in list(results.items()):
356                         self._commit_file(filename, results)
357
358                 duration = time.time() - time_start
359                 self.log.debug(_("Emptied write queue in %.2fs") % duration)
360
361         def _commit_file(self, filename, results):
362                 self.log.debug(_("Committing %(counter)s entries to %(filename)s") \
363                         % { "counter" : len(results), "filename" : filename })
364
365                 for result in results:
366                         self.log.debug("  %s: %s" % (result.time, result.data))
367
368                 try:
369                         rrdtool.update(filename, *["%s" % r for r in results])
370
371                 # Catch operational errors like unreadable/unwritable RRD databases
372                 # or those where the format has changed. The collected data will be lost.
373                 except rrdtool.OperationalError as e:
374                         self.log.critical(_("Could not update RRD database %s: %s") \
375                                 % (filename, e))
376
377
378 class QueueObject(object):
379         def __init__(self, file, time, data):
380                 self.file = file
381                 self.time = time
382                 self.data = data
383
384         def __str__(self):
385                 return "%s:%s" % (self.time.strftime("%s"), self.data)
386
387         def __lt__(self, other):
388                 return self.time < other.time
389
390
391 class PluginTimer(object):
392         def __init__(self, plugin):
393                 self.plugin = plugin
394
395                 self.deadline = datetime.datetime.utcnow()
396
397         def __repr__(self):
398                 return "<%s %s>" % (self.__class__.__name__, self.deadline)
399
400         def __lt__(self, other):
401                 return self.deadline < other.deadline
402
403         def reset_deadline(self):
404                 self.deadline = datetime.datetime.utcnow() \
405                         + datetime.timedelta(seconds=self.plugin.interval)
406
407         def is_due(self):
408                 return datetime.datetime.utcnow() >= self.deadline