]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/mirrors.py
.gitignore: Add .vscode
[ipfire.org.git] / src / backend / mirrors.py
1 #!/usr/bin/python
2
3 from __future__ import division
4
5 import datetime
6 import logging
7 import math
8 import os.path
9 import random
10 import socket
11 import time
12 import tornado.httpclient
13 import tornado.netutil
14 import urlparse
15
16 from misc import Object
17
18 class Downloads(Object):
19 @property
20 def total(self):
21 ret = self.db.get("SELECT COUNT(*) AS total FROM log_download")
22
23 return ret.total
24
25 @property
26 def today(self):
27 ret = self.db.get("SELECT COUNT(*) AS today FROM log_download WHERE date::date = NOW()::date")
28
29 return ret.today
30
31 @property
32 def yesterday(self):
33 ret = self.db.get("SELECT COUNT(*) AS yesterday FROM log_download WHERE date::date = (NOW() - INTERVAL '1 day')::date")
34
35 return ret.yesterday
36
37 @property
38 def daily_map(self):
39 ret = self.db.query("WITH downloads AS (SELECT * FROM log_download \
40 WHERE DATE(date) BETWEEN (NOW()::date - INTERVAL '30 days') AND DATE(NOW())) \
41 SELECT DATE(date) AS date, COUNT(*) AS count FROM downloads \
42 GROUP BY DATE(date) ORDER BY date")
43
44 return ret
45
46 def get_countries(self, duration="all"):
47 query = "SELECT country_code, count(country_code) AS count FROM log_download"
48
49 if duration == "today":
50 query += " WHERE date::date = NOW()::date"
51
52 query += " GROUP BY country_code ORDER BY count DESC"
53
54 results = self.db.query(query)
55 ret = []
56
57 count = sum([o.count for o in results])
58 if count:
59 for res in results:
60 ret.append((res.country_code, res.count / count))
61
62 return ret
63
64 def get_mirror_load(self, duration="all"):
65 query = "SELECT mirror, COUNT(mirror) AS count FROM log_download"
66
67 if duration == "today":
68 query += " WHERE date::date = NOW()::date"
69
70 query += " GROUP BY mirror ORDER BY count DESC"
71
72 results = self.db.query(query)
73 ret = {}
74
75 count = sum([o.count for o in results])
76 if count:
77 for res in results:
78 mirror = self.mirrors.get(res.mirror)
79 ret[mirror.hostname] = res.count / count
80
81 return ret
82
83
84 class Mirrors(Object):
85 def check_all(self):
86 for mirror in self.get_all():
87 mirror.check()
88
89 def get(self, id):
90 return Mirror(self.backend, id)
91
92 def get_all(self):
93 res = self.db.query("SELECT * FROM mirrors WHERE enabled = %s", True)
94
95 mirrors = []
96 for row in res:
97 mirror = Mirror(self.backend, row.id, row)
98 mirrors.append(mirror)
99
100 return MirrorSet(self.backend, sorted(mirrors))
101
102 def get_by_hostname(self, hostname):
103 ret = self.db.get("SELECT * FROM mirrors WHERE hostname = %s", hostname)
104
105 if ret:
106 return Mirror(self.backend, ret.id, ret)
107
108 def get_with_file(self, filename, country=None):
109 # XXX quick and dirty solution - needs a performance boost
110 mirror_ids = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
111
112 #if country:
113 # # Sort out all mirrors that are not preferred to the given country
114 # for mirror in self.get_for_country(country):
115 # if not mirror.id in mirror_ids:
116 # mirror_ids.remove(mirror.id)
117
118 mirrors = []
119 for mirror_id in mirror_ids:
120 mirror = self.get(mirror_id)
121 if not mirror.state == "UP":
122 continue
123 mirrors.append(mirror)
124
125 return mirrors
126
127 def get_for_location(self, location, max_distance=4000, filename=None):
128 if not location:
129 return []
130
131 if filename:
132 res = self.db.query("\
133 WITH client AS (SELECT point(%s, %s) AS location) \
134 SELECT * FROM mirrors WHERE mirrors.state = %s \
135 AND mirrors.id IN ( \
136 SELECT mirror FROM mirror_files WHERE filename = %s \
137 ) AND mirrors.id IN ( \
138 SELECT id FROM mirrors_locations, client \
139 WHERE geodistance(mirrors_locations.location, client.location) <= %s \
140 )",
141 location.latitude, location.longitude, "UP", filename, max_distance)
142 else:
143 res = self.db.query("\
144 WITH client AS (SELECT point(%s, %s) AS location) \
145 SELECT * FROM mirrors WHERE mirrors.state = %s AND mirrors.id IN ( \
146 SELECT id FROM mirrors_locations, client \
147 WHERE geodistance(mirrors_locations.location, client.location) <= %s \
148 )",
149 location.latitude, location.longitude, "UP", max_distance)
150
151 mirrors = []
152 for row in res:
153 mirror = Mirror(self.backend, row.id, row)
154 mirrors.append(mirror)
155
156 return sorted(mirrors, reverse=True)
157
158 def get_all_files(self):
159 files = []
160
161 for mirror in self.get_all():
162 if not mirror.state == "UP":
163 continue
164
165 for file in mirror.filelist:
166 if not file in files:
167 files.append(file)
168
169 return files
170
171 def get_random(self, filename=None):
172 if filename:
173 ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
174 AND mirrors.id IN (SELECT mirror FROM mirror_files \
175 WHERE filename = %s) ORDER BY RANDOM() LIMIT 1", "UP", filename)
176 else:
177 ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
178 ORDER BY RANDOM() LIMIT 1", "UP")
179
180 if ret:
181 return Mirror(self.backend, ret.id, ret)
182
183 def file_exists(self, filename):
184 ret = self.db.get("SELECT 1 FROM mirror_files \
185 WHERE filename = %s LIMIT 1", filename)
186
187 if ret:
188 return True
189
190 return False
191
192
193 class MirrorSet(Object):
194 def __init__(self, backend, mirrors):
195 Object.__init__(self, backend)
196
197 self._mirrors = mirrors
198
199 def __add__(self, other):
200 mirrors = []
201
202 for mirror in self._mirrors + other._mirrors:
203 if mirror in mirrors:
204 continue
205
206 mirrors.append(mirror)
207
208 return MirrorSet(self.backend, mirrors)
209
210 def __sub__(self, other):
211 mirrors = self._mirrors[:]
212
213 for mirror in other._mirrors:
214 if mirror in mirrors:
215 mirrors.remove(mirror)
216
217 return MirrorSet(self.backend, mirrors)
218
219 def __iter__(self):
220 return iter(self._mirrors)
221
222 def __len__(self):
223 return len(self._mirrors)
224
225 def __str__(self):
226 return "<MirrorSet %s>" % ", ".join([m.hostname for m in self._mirrors])
227
228 def get_with_file(self, filename):
229 with_file = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
230
231 mirrors = []
232 for mirror in self._mirrors:
233 if mirror.id in with_file:
234 mirrors.append(mirror)
235
236 return MirrorSet(self.backend, mirrors)
237
238 def get_random(self):
239 mirrors = []
240 for mirror in self._mirrors:
241 for i in range(0, mirror.priority):
242 mirrors.append(mirror)
243
244 return random.choice(mirrors)
245
246 def get_for_location(self, location):
247 distance = 2500
248 mirrors = []
249
250 if location:
251 while len(mirrors) <= 3 and distance <= 8000:
252 for mirror in self._mirrors:
253 if mirror in mirrors:
254 continue
255
256 mirror_distance = mirror.distance_to(location)
257 if mirror_distance is None:
258 continue
259
260 if mirror_distance <= distance:
261 mirrors.append(mirror)
262
263 distance *= 1.2
264
265 return MirrorSet(self.backend, mirrors)
266
267 def get_with_state(self, state):
268 mirrors = []
269
270 for mirror in self._mirrors:
271 if mirror.state == state:
272 mirrors.append(mirror)
273
274 return MirrorSet(self.backend, mirrors)
275
276
277 class Mirror(Object):
278 def __init__(self, backend, id, data=None):
279 Object.__init__(self, backend)
280
281 self.id = id
282
283 if data:
284 self._info = data
285 else:
286 self._info = self.db.get("SELECT * FROM mirrors WHERE id = %s", self.id)
287 self._info["url"] = self.generate_url()
288
289 self.__location = None
290 self.__country_name = None
291
292 def __repr__(self):
293 return "<%s %s>" % (self.__class__.__name__, self.url)
294
295 def __cmp__(self, other):
296 ret = cmp(self.country_code, other.country_code)
297
298 if not ret:
299 ret = cmp(self.hostname, other.hostname)
300
301 return ret
302
303 def generate_url(self):
304 url = "%s://%s" % ("https" if self.supports_https else "http", self.hostname)
305 if not self.path.startswith("/"):
306 url += "/"
307 url += "%s" % self.path
308 if not self.path.endswith("/"):
309 url += "/"
310 return url
311
312 @property
313 def hostname(self):
314 return self._info.hostname
315
316 @property
317 def path(self):
318 return self._info.path
319
320 @property
321 def supports_https(self):
322 return self._info.supports_https
323
324 @property
325 def address(self):
326 for addr in self.addresses4:
327 return addr
328
329 for addr in self.addresses6:
330 return addr
331
332 @property
333 def owner(self):
334 return self._info.owner
335
336 @property
337 def location(self):
338 if self.__location is None:
339 self.__location = self.geoip.get_location(self.address)
340
341 return self.__location
342
343 @property
344 def latitude(self):
345 if self.location:
346 return self.location.latitude
347
348 @property
349 def longitude(self):
350 if self.location:
351 return self.location.longitude
352
353 @property
354 def coordinates(self):
355 return (self.latitude, self.longitude)
356
357 @property
358 def coordiante_str(self):
359 coordinates = []
360
361 for i in self.coordinates:
362 coordinates.append("%s" % i)
363
364 return ",".join(coordinates)
365
366 @property
367 def country_code(self):
368 if self.location:
369 return self.location.country
370
371 @property
372 def country_name(self):
373 if self.__country_name is None:
374 self.__country_name = self.geoip.get_country_name(self.country_code)
375
376 return self.__country_name
377
378 @property
379 def location_str(self):
380 location = []
381
382 if self._info.location:
383 location.append(self._info.location)
384
385 elif self.location:
386 location.append(self.location.city)
387 location.append(self.country_name)
388
389 return ", ".join([s for s in location if s])
390
391 @property
392 def asn(self):
393 if not hasattr(self, "__asn"):
394 self.__asn = self.geoip.get_asn(self.address)
395
396 return self.__asn
397
398 @property
399 def filelist(self):
400 filelist = self.db.query("SELECT filename FROM mirror_files WHERE mirror=%s ORDER BY filename", self.id)
401 return [f.filename for f in filelist]
402
403 @property
404 def prefix(self):
405 return ""
406
407 @property
408 def url(self):
409 return self._info.url
410
411 def build_url(self, filename):
412 return urlparse.urljoin(self.url, filename)
413
414 @property
415 def last_update(self):
416 return self._info.last_update
417
418 @property
419 def state(self):
420 return self._info.state
421
422 def set_state(self, state):
423 logging.info("Setting state of %s to %s" % (self.hostname, state))
424
425 if self.state == state:
426 return
427
428 self.db.execute("UPDATE mirrors SET state = %s WHERE id = %s", state, self.id)
429
430 # Reload changed settings
431 if hasattr(self, "_info"):
432 self._info["state"] = state
433
434 @property
435 def enabled(self):
436 return self._info.enabled
437
438 @property
439 def disabled(self):
440 return not self.enabled
441
442 def check(self):
443 logging.info("Running check for mirror %s" % self.hostname)
444
445 self.db.execute("UPDATE mirrors SET address = %s WHERE id = %s",
446 self.address, self.id)
447
448 self.check_timestamp()
449 self.check_filelist()
450
451 def check_state(self):
452 logging.debug("Checking state of mirror %s" % self.id)
453
454 if not self.enabled:
455 self.set_state("DOWN")
456 return
457
458 now = datetime.datetime.utcnow()
459
460 time_delta = now - self.last_update
461 time_diff = time_delta.total_seconds()
462
463 time_down = self.settings.get_int("mirrors_time_down", 3*24*60*60)
464 if time_diff >= time_down:
465 self.set_state("DOWN")
466 return
467
468 time_outofsync = self.settings.get_int("mirrors_time_outofsync", 6*60*60)
469 if time_diff >= time_outofsync:
470 self.set_state("OUTOFSYNC")
471 return
472
473 self.set_state("UP")
474
475 def check_timestamp(self):
476 http = tornado.httpclient.AsyncHTTPClient()
477
478 http.fetch(self.url + ".timestamp",
479 headers={ "Pragma" : "no-cache" },
480 callback=self.__check_timestamp_response)
481
482 def __check_timestamp_response(self, response):
483 if response.error:
484 logging.debug("Error getting timestamp from %s" % self.hostname)
485 self.set_state("DOWN")
486 return
487
488 try:
489 timestamp = int(response.body.strip())
490 except ValueError:
491 timestamp = 0
492
493 timestamp = datetime.datetime.utcfromtimestamp(timestamp)
494
495 self.db.execute("UPDATE mirrors SET last_update = %s WHERE id = %s",
496 timestamp, self.id)
497
498 # Reload changed settings
499 if hasattr(self, "_info"):
500 self._info["timestamp"] = timestamp
501
502 self.check_state()
503
504 logging.info("Successfully updated timestamp from %s" % self.hostname)
505
506 def check_filelist(self):
507 # XXX need to remove data from disabled mirrors
508 if not self.enabled:
509 return
510
511 http = tornado.httpclient.AsyncHTTPClient()
512
513 http.fetch(self.url + ".filelist",
514 headers={ "Pragma" : "no-cache" },
515 callback=self.__check_filelist_response)
516
517 def __check_filelist_response(self, response):
518 if response.error:
519 logging.debug("Error getting timestamp from %s" % self.hostname)
520 return
521
522 files = self.filelist
523
524 for file in response.body.splitlines():
525 file = os.path.join(self.prefix, file)
526
527 if file in files:
528 files.remove(file)
529 continue
530
531 self.db.execute("INSERT INTO mirror_files(mirror, filename) VALUES(%s, %s)",
532 self.id, file)
533
534 for file in files:
535 self.db.execute("DELETE FROM mirror_files WHERE mirror=%s AND filename=%s",
536 self.id, file)
537
538 logging.info("Successfully updated mirror filelist from %s" % self.hostname)
539
540 @property
541 def prefer_for_countries(self):
542 countries = self._info.get("prefer_for_countries", "")
543 if countries:
544 return sorted(countries.split(", "))
545
546 return []
547
548 @property
549 def prefer_for_countries_names(self):
550 countries = [self.geoip.get_country_name(c.upper()) for c in self.prefer_for_countries]
551
552 return sorted(countries)
553
554 def distance_to(self, location, ignore_preference=False):
555 if not location:
556 return None
557
558 country_code = None
559 if location.country:
560 country_code = location.country.lower()
561
562 if not ignore_preference and country_code in self.prefer_for_countries:
563 return 0
564
565 # http://www.movable-type.co.uk/scripts/latlong.html
566
567 if self.latitude is None:
568 return None
569
570 if self.longitude is None:
571 return None
572
573 earth = 6371 # km
574 delta_lat = math.radians(self.latitude - location.latitude)
575 delta_lon = math.radians(self.longitude - location.longitude)
576
577 lat1 = math.radians(self.latitude)
578 lat2 = math.radians(location.latitude)
579
580 a = math.sin(delta_lat / 2) ** 2
581 a += math.cos(lat1) * math.cos(lat2) * (math.sin(delta_lon / 2) ** 2)
582
583 b1 = math.sqrt(a)
584 b2 = math.sqrt(1 - a)
585
586 c = 2 * math.atan2(b1, b2)
587
588 return c * earth
589
590 def traffic(self, since):
591 # XXX needs to be done better
592
593 files = {}
594 for entry in self.db.query("SELECT filename, filesize FROM files"):
595 files[entry.filename] = entry.filesize
596
597 query = "SELECT COUNT(filename) as count, filename FROM log_download WHERE mirror = %s"
598 query += " AND date >= %s GROUP BY filename"
599
600 traffic = 0
601 for entry in self.db.query(query, self.id, since):
602 if files.has_key(entry.filename):
603 traffic += entry.count * files[entry.filename]
604
605 return traffic
606
607 @property
608 def priority(self):
609 return self._info.get("priority", 10)
610
611 @property
612 def development(self):
613 return self._info.get("mirrorlist_devel", False)
614
615 @property
616 def mirrorlist(self):
617 return self._info.get("mirrorlist", False)
618
619 @property
620 def addresses(self):
621 if not hasattr(self, "__addresses"):
622 try:
623 addrinfo = socket.getaddrinfo(self.hostname, 0, socket.AF_UNSPEC, socket.SOCK_STREAM)
624 except:
625 raise Exception("Could not resolve %s" % self.hostname)
626
627 ret = []
628 for family, socktype, proto, canonname, address in addrinfo:
629 if family == socket.AF_INET:
630 address, port = address
631 elif family == socket.AF_INET6:
632 address, port, flowid, scopeid = address
633 ret.append((family, address))
634
635 self.__addresses = ret
636
637 return self.__addresses
638
639 @property
640 def addresses6(self):
641 return [address for family, address in self.addresses if family == socket.AF_INET6]
642
643 @property
644 def addresses4(self):
645 return [address for family, address in self.addresses if family == socket.AF_INET]