]>
Commit | Line | Data |
---|---|---|
1 | #!/usr/bin/python3 | |
2 | ############################################################################### | |
3 | # # | |
4 | # collecty - A system statistics collection daemon for IPFire # | |
5 | # Copyright (C) 2012 IPFire development team # | |
6 | # # | |
7 | # This program is free software: you can redistribute it and/or modify # | |
8 | # it under the terms of the GNU General Public License as published by # | |
9 | # the Free Software Foundation, either version 3 of the License, or # | |
10 | # (at your option) any later version. # | |
11 | # # | |
12 | # This program is distributed in the hope that it will be useful, # | |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # | |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # | |
15 | # GNU General Public License for more details. # | |
16 | # # | |
17 | # You should have received a copy of the GNU General Public License # | |
18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. # | |
19 | # # | |
20 | ############################################################################### | |
21 | ||
22 | import datetime | |
23 | import multiprocessing | |
24 | import os | |
25 | import queue | |
26 | import rrdtool | |
27 | import signal | |
28 | import threading | |
29 | import time | |
30 | ||
31 | from . import bus | |
32 | from . import locales | |
33 | from . import plugins | |
34 | ||
35 | from .constants import * | |
36 | from .i18n import _ | |
37 | ||
38 | import logging | |
39 | log = logging.getLogger("collecty") | |
40 | ||
41 | class Collecty(object): | |
42 | # The default interval, when all data is written to disk. | |
43 | SUBMIT_INTERVAL = 300 | |
44 | ||
45 | HEARTBEAT = 1 | |
46 | ||
47 | def __init__(self, debug=False): | |
48 | self.debug = debug | |
49 | ||
50 | # Reset timezone to UTC | |
51 | # rrdtool is reading that from the environment | |
52 | os.environ["TZ"] = "UTC" | |
53 | ||
54 | # Enable debug logging when running in debug mode | |
55 | if self.debug: | |
56 | log.setLevel(logging.DEBUG) | |
57 | ||
58 | self.plugins = [] | |
59 | ||
60 | # Indicates whether this process should be running or not. | |
61 | self.running = True | |
62 | ||
63 | # The write queue holds all collected pieces of data which | |
64 | # will be written to disk later. | |
65 | self.write_queue = WriteQueue(self, self.SUBMIT_INTERVAL) | |
66 | ||
67 | # Create worker threads | |
68 | self.worker_threads = self.create_worker_threads() | |
69 | ||
70 | self._timer_queue = queue.PriorityQueue() | |
71 | self._worker_queue = queue.Queue() | |
72 | ||
73 | # Create a thread that connects to dbus and processes requests we | |
74 | # get from there. | |
75 | self.bus = bus.Bus(self) | |
76 | ||
77 | # Add all plugins | |
78 | for plugin in plugins.get(): | |
79 | self.add_plugin(plugin) | |
80 | ||
81 | log.debug(_("Collecty successfully initialized with %s plugins") \ | |
82 | % len(self.plugins)) | |
83 | ||
84 | log.debug(_("Supported locales: %s") % ", ".join(locales.get_supported_locales())) | |
85 | ||
86 | def add_plugin(self, plugin_class): | |
87 | # Try initialising a new plugin. If that fails, we will log the | |
88 | # error and try to go on. | |
89 | try: | |
90 | plugin = plugin_class(self) | |
91 | except: | |
92 | log.critical(_("Plugin %s could not be initialised") % plugin_class, exc_info=True) | |
93 | return | |
94 | ||
95 | self.plugins.append(plugin) | |
96 | ||
97 | @property | |
98 | def templates(self): | |
99 | for plugin in self.plugins: | |
100 | for template in plugin.templates: | |
101 | yield template | |
102 | ||
103 | def run(self): | |
104 | # Register signal handlers. | |
105 | self.register_signal_handler() | |
106 | ||
107 | # Cannot do anything if no plugins have been initialised | |
108 | if not self.plugins: | |
109 | log.critical(_("No plugins have been initialised")) | |
110 | return | |
111 | ||
112 | # Start the bus | |
113 | self.bus.start() | |
114 | ||
115 | # Initialise the timer queue | |
116 | self.initialise_timer_queue() | |
117 | ||
118 | # Start worker threads | |
119 | for w in self.worker_threads: | |
120 | w.start() | |
121 | ||
122 | # Run the write queue thread | |
123 | self.write_queue.start() | |
124 | ||
125 | # Regularly submit all data to disk. | |
126 | while self.running: | |
127 | try: | |
128 | # Try processing one event from the queue. If that succeeded | |
129 | # we will retry immediately. | |
130 | if self.process_timer_queue(): | |
131 | continue | |
132 | ||
133 | # Otherwise we will sleep for a bit | |
134 | time.sleep(self.HEARTBEAT) | |
135 | ||
136 | # Log warnings if the worker queue is filling up | |
137 | queue_size = self._worker_queue.qsize() | |
138 | if queue_size >= 5: | |
139 | log.warning(_("Worker queue is filling up with %s events") % queue_size) | |
140 | ||
141 | except KeyboardInterrupt: | |
142 | self.shutdown() | |
143 | break | |
144 | ||
145 | # Wait until all worker threads are finished | |
146 | for w in self.worker_threads: | |
147 | w.join() | |
148 | ||
149 | # Stop the bus thread | |
150 | self.bus.shutdown() | |
151 | ||
152 | # Write all collected data to disk before ending the main thread | |
153 | self.write_queue.shutdown() | |
154 | ||
155 | log.debug(_("Main thread exited")) | |
156 | ||
157 | def shutdown(self): | |
158 | if not self.running: | |
159 | return | |
160 | ||
161 | log.info(_("Received shutdown signal")) | |
162 | self.running = False | |
163 | ||
164 | # Propagating shutdown to all threads. | |
165 | for w in self.worker_threads: | |
166 | w.shutdown() | |
167 | ||
168 | def register_signal_handler(self): | |
169 | for s in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1): | |
170 | log.debug(_("Registering signal %d") % s) | |
171 | ||
172 | signal.signal(s, self.signal_handler) | |
173 | ||
174 | def signal_handler(self, sig, *args, **kwargs): | |
175 | log.info(_("Caught signal %d") % sig) | |
176 | ||
177 | if sig in (signal.SIGTERM, signal.SIGINT): | |
178 | # Shutdown this application. | |
179 | self.shutdown() | |
180 | ||
181 | elif sig == signal.SIGUSR1: | |
182 | # Commit all data. | |
183 | self.write_queue.commit() | |
184 | ||
185 | def get_plugin_from_template(self, template_name): | |
186 | for plugin in self.plugins: | |
187 | if not template_name in [t.name for t in plugin.templates]: | |
188 | continue | |
189 | ||
190 | return plugin | |
191 | ||
192 | def generate_graph(self, template_name, *args, **kwargs): | |
193 | plugin = self.get_plugin_from_template(template_name) | |
194 | if not plugin: | |
195 | raise RuntimeError("Could not find template %s" % template_name) | |
196 | ||
197 | return plugin.generate_graph(template_name, *args, **kwargs) | |
198 | ||
199 | def graph_info(self, template_name, *args, **kwargs): | |
200 | plugin = self.get_plugin_from_template(template_name) | |
201 | if not plugin: | |
202 | raise RuntimeError("Could not find template %s" % template_name) | |
203 | ||
204 | return plugin.graph_info(template_name, *args, **kwargs) | |
205 | ||
206 | def create_worker_threads(self, num=None): | |
207 | """ | |
208 | Creates a number of worker threads | |
209 | """ | |
210 | # If no number of threads is given, we will create as many as we have | |
211 | # active processor cores but never less than two. | |
212 | if num is None: | |
213 | num = max(multiprocessing.cpu_count(), 2) | |
214 | ||
215 | worker_threads = [] | |
216 | ||
217 | for id in range(num): | |
218 | worker_thread = WorkerThread(self, id) | |
219 | worker_threads.append(worker_thread) | |
220 | ||
221 | return worker_threads | |
222 | ||
223 | def initialise_timer_queue(self): | |
224 | for p in self.plugins: | |
225 | timer = PluginTimer(p) | |
226 | ||
227 | self._timer_queue.put(timer) | |
228 | ||
229 | def process_timer_queue(self): | |
230 | # Take the item from the timer queue that is to be due first | |
231 | timer = self._timer_queue.get() | |
232 | ||
233 | try: | |
234 | # If the timer event is to be executed, we will put the plugin | |
235 | # into the worker queue and reset the timer | |
236 | if timer.is_due(): | |
237 | self._worker_queue.put(timer.plugin) | |
238 | timer.reset_deadline() | |
239 | ||
240 | return timer | |
241 | finally: | |
242 | # Put the timer back into the timer queue. | |
243 | self._timer_queue.put(timer) | |
244 | ||
245 | ||
246 | class WorkerThread(threading.Thread): | |
247 | HEARTBEAT = 2.5 | |
248 | ||
249 | def __init__(self, collecty, id): | |
250 | threading.Thread.__init__(self) | |
251 | self.daemon = True | |
252 | ||
253 | self.log = logging.getLogger("collecty.worker") | |
254 | self.log.propagate = 1 | |
255 | ||
256 | self.collecty = collecty | |
257 | self.id = id | |
258 | ||
259 | self.log.debug(_("Worker thread %s has been initialised") % self.id) | |
260 | ||
261 | @property | |
262 | def queue(self): | |
263 | """ | |
264 | The queue this thread is getting events from | |
265 | """ | |
266 | return self.collecty._worker_queue | |
267 | ||
268 | def run(self): | |
269 | self.log.debug(_("Worker thread %s has been started") % self.id) | |
270 | self.running = True | |
271 | ||
272 | while self.running: | |
273 | try: | |
274 | plugin = self.queue.get(block=True, timeout=self.HEARTBEAT) | |
275 | ||
276 | # If the queue has been empty we just retry | |
277 | except queue.Empty: | |
278 | continue | |
279 | ||
280 | # Execute the collect operation for this plugin | |
281 | plugin.collect() | |
282 | ||
283 | self.log.debug(_("Worker thread %s has been terminated") % self.id) | |
284 | ||
285 | def shutdown(self): | |
286 | self.running = False | |
287 | ||
288 | ||
289 | class WriteQueue(threading.Thread): | |
290 | def __init__(self, collecty, submit_interval): | |
291 | threading.Thread.__init__(self) | |
292 | self.daemon = True | |
293 | ||
294 | self.collecty = collecty | |
295 | ||
296 | self.log = logging.getLogger("collecty.queue") | |
297 | self.log.propagate = 1 | |
298 | ||
299 | self.timer = plugins.Timer(submit_interval) | |
300 | self._queue = queue.PriorityQueue() | |
301 | ||
302 | self.log.debug(_("Initialised write queue")) | |
303 | ||
304 | def run(self): | |
305 | self.log.debug(_("Write queue process started")) | |
306 | self.running = True | |
307 | ||
308 | while self.running: | |
309 | # Reset the timer. | |
310 | self.timer.reset() | |
311 | ||
312 | # Wait until the timer has successfully elapsed. | |
313 | if self.timer.wait(): | |
314 | self.commit() | |
315 | ||
316 | self.commit() | |
317 | self.log.debug(_("Write queue process stopped")) | |
318 | ||
319 | def shutdown(self): | |
320 | self.running = False | |
321 | self.timer.cancel() | |
322 | ||
323 | # Wait until all data has been written. | |
324 | self.join() | |
325 | ||
326 | def add(self, object, time, data): | |
327 | result = QueueObject(object.file, time, data) | |
328 | self._queue.put(result) | |
329 | ||
330 | def commit(self): | |
331 | """ | |
332 | Flushes the read data to disk. | |
333 | """ | |
334 | # There is nothing to do if the queue is empty | |
335 | if self._queue.empty(): | |
336 | self.log.debug(_("No data to commit")) | |
337 | return | |
338 | ||
339 | time_start = time.time() | |
340 | ||
341 | self.log.debug(_("Submitting data to the databases...")) | |
342 | ||
343 | # Get all objects from the queue and group them by the RRD file | |
344 | # to commit them all at once | |
345 | results = {} | |
346 | while not self._queue.empty(): | |
347 | result = self._queue.get() | |
348 | ||
349 | try: | |
350 | results[result.file].append(result) | |
351 | except KeyError: | |
352 | results[result.file] = [result] | |
353 | ||
354 | # Write the collected data to disk | |
355 | for filename, results in list(results.items()): | |
356 | self._commit_file(filename, results) | |
357 | ||
358 | duration = time.time() - time_start | |
359 | self.log.debug(_("Emptied write queue in %.2fs") % duration) | |
360 | ||
361 | def _commit_file(self, filename, results): | |
362 | self.log.debug(_("Committing %(counter)s entries to %(filename)s") \ | |
363 | % { "counter" : len(results), "filename" : filename }) | |
364 | ||
365 | for result in results: | |
366 | self.log.debug(" %s: %s" % (result.time, result.data)) | |
367 | ||
368 | try: | |
369 | rrdtool.update(filename, *["%s" % r for r in results]) | |
370 | ||
371 | # Catch operational errors like unreadable/unwritable RRD databases | |
372 | # or those where the format has changed. The collected data will be lost. | |
373 | except rrdtool.OperationalError as e: | |
374 | self.log.critical(_("Could not update RRD database %s: %s") \ | |
375 | % (filename, e)) | |
376 | ||
377 | ||
378 | class QueueObject(object): | |
379 | def __init__(self, file, time, data): | |
380 | self.file = file | |
381 | self.time = time | |
382 | self.data = data | |
383 | ||
384 | def __str__(self): | |
385 | return "%s:%s" % (self.time.strftime("%s"), self.data) | |
386 | ||
387 | def __lt__(self, other): | |
388 | return self.time < other.time | |
389 | ||
390 | ||
391 | class PluginTimer(object): | |
392 | def __init__(self, plugin): | |
393 | self.plugin = plugin | |
394 | ||
395 | self.deadline = datetime.datetime.utcnow() | |
396 | ||
397 | def __repr__(self): | |
398 | return "<%s %s>" % (self.__class__.__name__, self.deadline) | |
399 | ||
400 | def __lt__(self, other): | |
401 | return self.deadline < other.deadline | |
402 | ||
403 | def reset_deadline(self): | |
404 | self.deadline = datetime.datetime.utcnow() \ | |
405 | + datetime.timedelta(seconds=self.plugin.interval) | |
406 | ||
407 | def is_due(self): | |
408 | return datetime.datetime.utcnow() >= self.deadline |