]> git.ipfire.org Git - ipfire.org.git/blob - src/backend/mirrors.py
python3: More code cleanup
[ipfire.org.git] / src / backend / mirrors.py
1 #!/usr/bin/python
2
3 import datetime
4 import logging
5 import math
6 import os.path
7 import random
8 import socket
9 import time
10 import tornado.httpclient
11 import tornado.netutil
12 import urllib.parse
13
14 from .misc import Object
15
16 class Downloads(Object):
17 @property
18 def total(self):
19 ret = self.db.get("SELECT COUNT(*) AS total FROM log_download")
20
21 return ret.total
22
23 @property
24 def today(self):
25 ret = self.db.get("SELECT COUNT(*) AS today FROM log_download WHERE date::date = NOW()::date")
26
27 return ret.today
28
29 @property
30 def yesterday(self):
31 ret = self.db.get("SELECT COUNT(*) AS yesterday FROM log_download WHERE date::date = (NOW() - INTERVAL '1 day')::date")
32
33 return ret.yesterday
34
35 @property
36 def daily_map(self):
37 ret = self.db.query("WITH downloads AS (SELECT * FROM log_download \
38 WHERE DATE(date) BETWEEN (NOW()::date - INTERVAL '30 days') AND DATE(NOW())) \
39 SELECT DATE(date) AS date, COUNT(*) AS count FROM downloads \
40 GROUP BY DATE(date) ORDER BY date")
41
42 return ret
43
44 def get_countries(self, duration="all"):
45 query = "SELECT country_code, count(country_code) AS count FROM log_download"
46
47 if duration == "today":
48 query += " WHERE date::date = NOW()::date"
49
50 query += " GROUP BY country_code ORDER BY count DESC"
51
52 results = self.db.query(query)
53 ret = []
54
55 count = sum([o.count for o in results])
56 if count:
57 for res in results:
58 ret.append((res.country_code, res.count / count))
59
60 return ret
61
62 def get_mirror_load(self, duration="all"):
63 query = "SELECT mirror, COUNT(mirror) AS count FROM log_download"
64
65 if duration == "today":
66 query += " WHERE date::date = NOW()::date"
67
68 query += " GROUP BY mirror ORDER BY count DESC"
69
70 results = self.db.query(query)
71 ret = {}
72
73 count = sum([o.count for o in results])
74 if count:
75 for res in results:
76 mirror = self.mirrors.get(res.mirror)
77 ret[mirror.hostname] = res.count / count
78
79 return ret
80
81
82 class Mirrors(Object):
83 def check_all(self):
84 for mirror in self.get_all():
85 mirror.check()
86
87 def get(self, id):
88 return Mirror(self.backend, id)
89
90 def get_all(self):
91 res = self.db.query("SELECT * FROM mirrors WHERE enabled = %s", True)
92
93 mirrors = []
94 for row in res:
95 mirror = Mirror(self.backend, row.id, row)
96 mirrors.append(mirror)
97
98 return MirrorSet(self.backend, sorted(mirrors))
99
100 def get_by_hostname(self, hostname):
101 ret = self.db.get("SELECT * FROM mirrors WHERE hostname = %s", hostname)
102
103 if ret:
104 return Mirror(self.backend, ret.id, ret)
105
106 def get_with_file(self, filename, country=None):
107 # XXX quick and dirty solution - needs a performance boost
108 mirror_ids = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
109
110 #if country:
111 # # Sort out all mirrors that are not preferred to the given country
112 # for mirror in self.get_for_country(country):
113 # if not mirror.id in mirror_ids:
114 # mirror_ids.remove(mirror.id)
115
116 mirrors = []
117 for mirror_id in mirror_ids:
118 mirror = self.get(mirror_id)
119 if not mirror.state == "UP":
120 continue
121 mirrors.append(mirror)
122
123 return mirrors
124
125 def get_for_location(self, location, max_distance=4000, filename=None):
126 if not location:
127 return []
128
129 if filename:
130 res = self.db.query("\
131 WITH client AS (SELECT point(%s, %s) AS location) \
132 SELECT * FROM mirrors WHERE mirrors.state = %s \
133 AND mirrors.id IN ( \
134 SELECT mirror FROM mirror_files WHERE filename = %s \
135 ) AND mirrors.id IN ( \
136 SELECT id FROM mirrors_locations, client \
137 WHERE geodistance(mirrors_locations.location, client.location) <= %s \
138 )",
139 location.latitude, location.longitude, "UP", filename, max_distance)
140 else:
141 res = self.db.query("\
142 WITH client AS (SELECT point(%s, %s) AS location) \
143 SELECT * FROM mirrors WHERE mirrors.state = %s AND mirrors.id IN ( \
144 SELECT id FROM mirrors_locations, client \
145 WHERE geodistance(mirrors_locations.location, client.location) <= %s \
146 )",
147 location.latitude, location.longitude, "UP", max_distance)
148
149 mirrors = []
150 for row in res:
151 mirror = Mirror(self.backend, row.id, row)
152 mirrors.append(mirror)
153
154 return sorted(mirrors, reverse=True)
155
156 def get_all_files(self):
157 files = []
158
159 for mirror in self.get_all():
160 if not mirror.state == "UP":
161 continue
162
163 for file in mirror.filelist:
164 if not file in files:
165 files.append(file)
166
167 return files
168
169 def get_random(self, filename=None):
170 if filename:
171 ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
172 AND mirrors.id IN (SELECT mirror FROM mirror_files \
173 WHERE filename = %s) ORDER BY RANDOM() LIMIT 1", "UP", filename)
174 else:
175 ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
176 ORDER BY RANDOM() LIMIT 1", "UP")
177
178 if ret:
179 return Mirror(self.backend, ret.id, ret)
180
181 def file_exists(self, filename):
182 ret = self.db.get("SELECT 1 FROM mirror_files \
183 WHERE filename = %s LIMIT 1", filename)
184
185 if ret:
186 return True
187
188 return False
189
190
191 class MirrorSet(Object):
192 def __init__(self, backend, mirrors):
193 Object.__init__(self, backend)
194
195 self._mirrors = mirrors
196
197 def __add__(self, other):
198 mirrors = []
199
200 for mirror in self._mirrors + other._mirrors:
201 if mirror in mirrors:
202 continue
203
204 mirrors.append(mirror)
205
206 return MirrorSet(self.backend, mirrors)
207
208 def __sub__(self, other):
209 mirrors = self._mirrors[:]
210
211 for mirror in other._mirrors:
212 if mirror in mirrors:
213 mirrors.remove(mirror)
214
215 return MirrorSet(self.backend, mirrors)
216
217 def __iter__(self):
218 return iter(self._mirrors)
219
220 def __len__(self):
221 return len(self._mirrors)
222
223 def __str__(self):
224 return "<MirrorSet %s>" % ", ".join([m.hostname for m in self._mirrors])
225
226 def get_with_file(self, filename):
227 with_file = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
228
229 mirrors = []
230 for mirror in self._mirrors:
231 if mirror.id in with_file:
232 mirrors.append(mirror)
233
234 return MirrorSet(self.backend, mirrors)
235
236 def get_random(self):
237 mirrors = []
238 for mirror in self._mirrors:
239 for i in range(0, mirror.priority):
240 mirrors.append(mirror)
241
242 return random.choice(mirrors)
243
244 def get_for_location(self, location):
245 distance = 2500
246 mirrors = []
247
248 if location:
249 while len(mirrors) <= 3 and distance <= 8000:
250 for mirror in self._mirrors:
251 if mirror in mirrors:
252 continue
253
254 mirror_distance = mirror.distance_to(location)
255 if mirror_distance is None:
256 continue
257
258 if mirror_distance <= distance:
259 mirrors.append(mirror)
260
261 distance *= 1.2
262
263 return MirrorSet(self.backend, mirrors)
264
265 def get_with_state(self, state):
266 mirrors = []
267
268 for mirror in self._mirrors:
269 if mirror.state == state:
270 mirrors.append(mirror)
271
272 return MirrorSet(self.backend, mirrors)
273
274
275 class Mirror(Object):
276 def __init__(self, backend, id, data=None):
277 Object.__init__(self, backend)
278
279 self.id = id
280
281 if data:
282 self._info = data
283 else:
284 self._info = self.db.get("SELECT * FROM mirrors WHERE id = %s", self.id)
285 self._info["url"] = self.generate_url()
286
287 self.__location = None
288 self.__country_name = None
289
290 def __repr__(self):
291 return "<%s %s>" % (self.__class__.__name__, self.url)
292
293 def __cmp__(self, other):
294 ret = cmp(self.country_code, other.country_code)
295
296 if not ret:
297 ret = cmp(self.hostname, other.hostname)
298
299 return ret
300
301 def generate_url(self):
302 url = "%s://%s" % ("https" if self.supports_https else "http", self.hostname)
303 if not self.path.startswith("/"):
304 url += "/"
305 url += "%s" % self.path
306 if not self.path.endswith("/"):
307 url += "/"
308 return url
309
310 @property
311 def hostname(self):
312 return self._info.hostname
313
314 @property
315 def path(self):
316 return self._info.path
317
318 @property
319 def supports_https(self):
320 return self._info.supports_https
321
322 @property
323 def address(self):
324 for addr in self.addresses4:
325 return addr
326
327 for addr in self.addresses6:
328 return addr
329
330 @property
331 def owner(self):
332 return self._info.owner
333
334 @property
335 def location(self):
336 if self.__location is None:
337 self.__location = self.geoip.get_location(self.address)
338
339 return self.__location
340
341 @property
342 def latitude(self):
343 if self.location:
344 return self.location.latitude
345
346 @property
347 def longitude(self):
348 if self.location:
349 return self.location.longitude
350
351 @property
352 def coordinates(self):
353 return (self.latitude, self.longitude)
354
355 @property
356 def coordiante_str(self):
357 coordinates = []
358
359 for i in self.coordinates:
360 coordinates.append("%s" % i)
361
362 return ",".join(coordinates)
363
364 @property
365 def country_code(self):
366 if self.location:
367 return self.location.country
368
369 @property
370 def country_name(self):
371 if self.__country_name is None:
372 self.__country_name = self.geoip.get_country_name(self.country_code)
373
374 return self.__country_name
375
376 @property
377 def location_str(self):
378 location = []
379
380 if self._info.location:
381 location.append(self._info.location)
382
383 elif self.location:
384 location.append(self.location.city)
385 location.append(self.country_name)
386
387 return ", ".join([s for s in location if s])
388
389 @property
390 def asn(self):
391 if not hasattr(self, "__asn"):
392 self.__asn = self.geoip.get_asn(self.address)
393
394 return self.__asn
395
396 @property
397 def filelist(self):
398 filelist = self.db.query("SELECT filename FROM mirror_files WHERE mirror=%s ORDER BY filename", self.id)
399 return [f.filename for f in filelist]
400
401 @property
402 def prefix(self):
403 return ""
404
405 @property
406 def url(self):
407 return self._info.url
408
409 def build_url(self, filename):
410 return urllib.parse.urljoin(self.url, filename)
411
412 @property
413 def last_update(self):
414 return self._info.last_update
415
416 @property
417 def state(self):
418 return self._info.state
419
420 def set_state(self, state):
421 logging.info("Setting state of %s to %s" % (self.hostname, state))
422
423 if self.state == state:
424 return
425
426 self.db.execute("UPDATE mirrors SET state = %s WHERE id = %s", state, self.id)
427
428 # Reload changed settings
429 if hasattr(self, "_info"):
430 self._info["state"] = state
431
432 @property
433 def enabled(self):
434 return self._info.enabled
435
436 @property
437 def disabled(self):
438 return not self.enabled
439
440 def check(self):
441 logging.info("Running check for mirror %s" % self.hostname)
442
443 self.db.execute("UPDATE mirrors SET address = %s WHERE id = %s",
444 self.address, self.id)
445
446 self.check_timestamp()
447 self.check_filelist()
448
449 def check_state(self):
450 logging.debug("Checking state of mirror %s" % self.id)
451
452 if not self.enabled:
453 self.set_state("DOWN")
454 return
455
456 now = datetime.datetime.utcnow()
457
458 time_delta = now - self.last_update
459 time_diff = time_delta.total_seconds()
460
461 time_down = self.settings.get_int("mirrors_time_down", 3*24*60*60)
462 if time_diff >= time_down:
463 self.set_state("DOWN")
464 return
465
466 time_outofsync = self.settings.get_int("mirrors_time_outofsync", 6*60*60)
467 if time_diff >= time_outofsync:
468 self.set_state("OUTOFSYNC")
469 return
470
471 self.set_state("UP")
472
473 def check_timestamp(self):
474 http = tornado.httpclient.AsyncHTTPClient()
475
476 http.fetch(self.url + ".timestamp",
477 headers={ "Pragma" : "no-cache" },
478 callback=self.__check_timestamp_response)
479
480 def __check_timestamp_response(self, response):
481 if response.error:
482 logging.debug("Error getting timestamp from %s" % self.hostname)
483 self.set_state("DOWN")
484 return
485
486 try:
487 timestamp = int(response.body.strip())
488 except ValueError:
489 timestamp = 0
490
491 timestamp = datetime.datetime.utcfromtimestamp(timestamp)
492
493 self.db.execute("UPDATE mirrors SET last_update = %s WHERE id = %s",
494 timestamp, self.id)
495
496 # Reload changed settings
497 if hasattr(self, "_info"):
498 self._info["timestamp"] = timestamp
499
500 self.check_state()
501
502 logging.info("Successfully updated timestamp from %s" % self.hostname)
503
504 def check_filelist(self):
505 # XXX need to remove data from disabled mirrors
506 if not self.enabled:
507 return
508
509 http = tornado.httpclient.AsyncHTTPClient()
510
511 http.fetch(self.url + ".filelist",
512 headers={ "Pragma" : "no-cache" },
513 callback=self.__check_filelist_response)
514
515 def __check_filelist_response(self, response):
516 if response.error:
517 logging.debug("Error getting timestamp from %s" % self.hostname)
518 return
519
520 files = self.filelist
521
522 for file in response.body.splitlines():
523 file = os.path.join(self.prefix, file)
524
525 if file in files:
526 files.remove(file)
527 continue
528
529 self.db.execute("INSERT INTO mirror_files(mirror, filename) VALUES(%s, %s)",
530 self.id, file)
531
532 for file in files:
533 self.db.execute("DELETE FROM mirror_files WHERE mirror=%s AND filename=%s",
534 self.id, file)
535
536 logging.info("Successfully updated mirror filelist from %s" % self.hostname)
537
538 @property
539 def prefer_for_countries(self):
540 countries = self._info.get("prefer_for_countries", "")
541 if countries:
542 return sorted(countries.split(", "))
543
544 return []
545
546 @property
547 def prefer_for_countries_names(self):
548 countries = [self.geoip.get_country_name(c.upper()) for c in self.prefer_for_countries]
549
550 return sorted(countries)
551
552 def distance_to(self, location, ignore_preference=False):
553 if not location:
554 return None
555
556 country_code = None
557 if location.country:
558 country_code = location.country.lower()
559
560 if not ignore_preference and country_code in self.prefer_for_countries:
561 return 0
562
563 # http://www.movable-type.co.uk/scripts/latlong.html
564
565 if self.latitude is None:
566 return None
567
568 if self.longitude is None:
569 return None
570
571 earth = 6371 # km
572 delta_lat = math.radians(self.latitude - location.latitude)
573 delta_lon = math.radians(self.longitude - location.longitude)
574
575 lat1 = math.radians(self.latitude)
576 lat2 = math.radians(location.latitude)
577
578 a = math.sin(delta_lat / 2) ** 2
579 a += math.cos(lat1) * math.cos(lat2) * (math.sin(delta_lon / 2) ** 2)
580
581 b1 = math.sqrt(a)
582 b2 = math.sqrt(1 - a)
583
584 c = 2 * math.atan2(b1, b2)
585
586 return c * earth
587
588 def traffic(self, since):
589 # XXX needs to be done better
590
591 files = {}
592 for entry in self.db.query("SELECT filename, filesize FROM files"):
593 files[entry.filename] = entry.filesize
594
595 query = "SELECT COUNT(filename) as count, filename FROM log_download WHERE mirror = %s"
596 query += " AND date >= %s GROUP BY filename"
597
598 traffic = 0
599 for entry in self.db.query(query, self.id, since):
600 if entry.filename in files:
601 traffic += entry.count * files[entry.filename]
602
603 return traffic
604
605 @property
606 def priority(self):
607 return self._info.get("priority", 10)
608
609 @property
610 def development(self):
611 return self._info.get("mirrorlist_devel", False)
612
613 @property
614 def mirrorlist(self):
615 return self._info.get("mirrorlist", False)
616
617 @property
618 def addresses(self):
619 if not hasattr(self, "__addresses"):
620 try:
621 addrinfo = socket.getaddrinfo(self.hostname, 0, socket.AF_UNSPEC, socket.SOCK_STREAM)
622 except:
623 raise Exception("Could not resolve %s" % self.hostname)
624
625 ret = []
626 for family, socktype, proto, canonname, address in addrinfo:
627 if family == socket.AF_INET:
628 address, port = address
629 elif family == socket.AF_INET6:
630 address, port, flowid, scopeid = address
631 ret.append((family, address))
632
633 self.__addresses = ret
634
635 return self.__addresses
636
637 @property
638 def addresses6(self):
639 return [address for family, address in self.addresses if family == socket.AF_INET6]
640
641 @property
642 def addresses4(self):
643 return [address for family, address in self.addresses if family == socket.AF_INET]