]> git.ipfire.org Git - people/shoehn/ipfire.org.git/blob - webapp/backend/mirrors.py
68b58023b99a855675828c6a033976a46fad8e38
[people/shoehn/ipfire.org.git] / webapp / backend / mirrors.py
1 #!/usr/bin/python
2
3 from __future__ import division
4
5 import datetime
6 import logging
7 import math
8 import os.path
9 import random
10 import socket
11 import time
12 import tornado.httpclient
13 import tornado.netutil
14 import urlparse
15
16 from misc import Object
17
18 class Downloads(Object):
19 @property
20 def total(self):
21 ret = self.db.get("SELECT COUNT(*) AS total FROM log_download")
22
23 return ret.total
24
25 @property
26 def today(self):
27 ret = self.db.get("SELECT COUNT(*) AS today FROM log_download WHERE date::date = NOW()::date")
28
29 return ret.today
30
31 @property
32 def yesterday(self):
33 ret = self.db.get("SELECT COUNT(*) AS yesterday FROM log_download WHERE date::date = (NOW() - INTERVAL '1 day')::date")
34
35 return ret.yesterday
36
37 @property
38 def daily_map(self):
39 ret = self.db.query("SELECT date::date AS date, COUNT(*) AS downloads FROM log_download"
40 " WHERE date::date BETWEEN (NOW() - INTERVAL '30 days')::date AND NOW()::date GROUP BY date::date")
41
42 return ret
43
44 def get_countries(self, duration="all"):
45 query = "SELECT country_code, count(country_code) AS count FROM log_download"
46
47 if duration == "today":
48 query += " WHERE date::date = NOW()::date"
49
50 query += " GROUP BY country_code ORDER BY count DESC"
51
52 results = self.db.query(query)
53 ret = {}
54
55 count = sum([o.count for o in results])
56 if count:
57 for res in results:
58 ret[res.country_code] = res.count / count
59
60 return ret
61
62 def get_mirror_load(self, duration="all"):
63 query = "SELECT mirror, COUNT(mirror) AS count FROM log_download"
64
65 if duration == "today":
66 query += " WHERE date::date = NOW()::date"
67
68 query += " GROUP BY mirror ORDER BY count DESC"
69
70 results = self.db.query(query)
71 ret = {}
72
73 count = sum([o.count for o in results])
74 if count:
75 for res in results:
76 mirror = self.mirrors.get(res.mirror)
77 ret[mirror.hostname] = res.count / count
78
79 return ret
80
81
82 class Mirrors(Object):
83 def check_all(self):
84 for mirror in self.get_all():
85 mirror.check()
86
87 def get(self, id):
88 return Mirror(self.backend, id)
89
90 def get_all(self):
91 res = self.db.query("SELECT * FROM mirrors WHERE enabled = %s", True)
92
93 mirrors = []
94 for row in res:
95 mirror = Mirror(self.backend, row.id, row)
96 mirrors.append(mirror)
97
98 return MirrorSet(self.backend, sorted(mirrors))
99
100 def get_all_up(self):
101 res = self.db.query("SELECT * FROM mirrors WHERE enabled = %s AND state = %s \
102 ORDER BY hostname", True, "UP")
103
104 mirrors = []
105 for row in res:
106 m = Mirror(self.backend, row.id, row)
107 mirrors.append(m)
108
109 return MirrorSet(self.backend, mirrors)
110
111 def get_by_hostname(self, hostname):
112 ret = self.db.get("SELECT * FROM mirrors WHERE hostname = %s", hostname)
113
114 if ret:
115 return Mirror(self.backend, ret.id, ret)
116
117 def get_with_file(self, filename, country=None):
118 # XXX quick and dirty solution - needs a performance boost
119 mirror_ids = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
120
121 #if country:
122 # # Sort out all mirrors that are not preferred to the given country
123 # for mirror in self.get_for_country(country):
124 # if not mirror.id in mirror_ids:
125 # mirror_ids.remove(mirror.id)
126
127 mirrors = []
128 for mirror_id in mirror_ids:
129 mirror = self.get(mirror_id)
130 if not mirror.state == "UP":
131 continue
132 mirrors.append(mirror)
133
134 return mirrors
135
136 def get_for_location(self, location, max_distance=4000, filename=None):
137 if not location:
138 return []
139
140 if filename:
141 res = self.db.query("\
142 WITH client AS (SELECT point(%s, %s) AS location) \
143 SELECT * FROM mirrors WHERE mirrors.state = %s \
144 AND mirrors.id IN ( \
145 SELECT mirror FROM mirror_files WHERE filename = %s \
146 ) AND mirrors.id IN ( \
147 SELECT id FROM mirrors_locations, client \
148 WHERE geodistance(mirrors_locations.location, client.location) <= %s \
149 )",
150 location.latitude, location.longitude, "UP", filename, max_distance)
151 else:
152 res = self.db.query("\
153 WITH client AS (SELECT point(%s, %s) AS location) \
154 SELECT * FROM mirrors WHERE mirrors.state = %s AND mirrors.id IN ( \
155 SELECT id FROM mirrors_locations, client \
156 WHERE geodistance(mirrors_locations.location, client.location) <= %s \
157 )",
158 location.latitude, location.longitude, "UP", max_distance)
159
160 mirrors = []
161 for row in res:
162 mirror = Mirror(self.backend, row.id, row)
163 mirrors.append(mirror)
164
165 return sorted(mirrors)
166
167 def get_all_files(self):
168 files = []
169
170 for mirror in self.get_all():
171 if not mirror.state == "UP":
172 continue
173
174 for file in mirror.filelist:
175 if not file in files:
176 files.append(file)
177
178 return files
179
180 def get_random(self, filename=None):
181 if filename:
182 ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
183 AND mirrors.id IN (SELECT mirror FROM mirror_files \
184 WHERE filename = %s) ORDER BY RANDOM() LIMIT 1", "UP", filename)
185 else:
186 ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
187 ORDER BY RANDOM() LIMIT 1", "UP")
188
189 if ret:
190 return Mirror(self.backend, ret.id, ret)
191
192 def file_exists(self, filename):
193 ret = self.db.get("SELECT 1 FROM mirror_files \
194 WHERE filename = %s LIMIT 1", filename)
195
196 if ret:
197 return True
198
199 return False
200
201
202 class MirrorSet(Object):
203 def __init__(self, backend, mirrors):
204 Object.__init__(self, backend)
205
206 self._mirrors = mirrors
207
208 def __add__(self, other):
209 mirrors = []
210
211 for mirror in self._mirrors + other._mirrors:
212 if mirror in mirrors:
213 continue
214
215 mirrors.append(mirror)
216
217 return MirrorSet(self.backend, mirrors)
218
219 def __sub__(self, other):
220 mirrors = self._mirrors[:]
221
222 for mirror in other._mirrors:
223 if mirror in mirrors:
224 mirrors.remove(mirror)
225
226 return MirrorSet(self.backend, mirrors)
227
228 def __iter__(self):
229 return iter(self._mirrors)
230
231 def __len__(self):
232 return len(self._mirrors)
233
234 def __str__(self):
235 return "<MirrorSet %s>" % ", ".join([m.hostname for m in self._mirrors])
236
237 def get_with_file(self, filename):
238 with_file = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
239
240 mirrors = []
241 for mirror in self._mirrors:
242 if mirror.id in with_file:
243 mirrors.append(mirror)
244
245 return MirrorSet(self.backend, mirrors)
246
247 def get_random(self):
248 mirrors = []
249 for mirror in self._mirrors:
250 for i in range(0, mirror.priority):
251 mirrors.append(mirror)
252
253 return random.choice(mirrors)
254
255 def get_for_location(self, location):
256 distance = 2500
257 mirrors = []
258
259 if location:
260 while len(mirrors) <= 3 and distance <= 8000:
261 for mirror in self._mirrors:
262 if mirror in mirrors:
263 continue
264
265 mirror_distance = mirror.distance_to(location)
266 if mirror_distance is None:
267 continue
268
269 if mirror_distance <= distance:
270 mirrors.append(mirror)
271
272 distance *= 1.2
273
274 return MirrorSet(self.backend, mirrors)
275
276 def get_with_state(self, state):
277 mirrors = []
278
279 for mirror in self._mirrors:
280 if mirror.state == state:
281 mirrors.append(mirror)
282
283 return MirrorSet(self.backend, mirrors)
284
285
286 class Mirror(Object):
287 def __init__(self, backend, id, data=None):
288 Object.__init__(self, backend)
289
290 self.id = id
291
292 if data:
293 self._info = data
294 else:
295 self._info = self.db.get("SELECT * FROM mirrors WHERE id = %s", self.id)
296 self._info["url"] = self.generate_url()
297
298 self.__location = None
299 self.__country_name = None
300
301 def __repr__(self):
302 return "<%s %s>" % (self.__class__.__name__, self.url)
303
304 def __cmp__(self, other):
305 ret = cmp(self.country_code, other.country_code)
306
307 if not ret:
308 ret = cmp(self.hostname, other.hostname)
309
310 return ret
311
312 def generate_url(self):
313 url = "http://%s" % self.hostname
314 if not self.path.startswith("/"):
315 url += "/"
316 url += "%s" % self.path
317 if not self.path.endswith("/"):
318 url += "/"
319 return url
320
321 @property
322 def hostname(self):
323 return self._info.hostname
324
325 @property
326 def path(self):
327 return self._info.path
328
329 @property
330 def address(self):
331 for addr in self.addresses6:
332 return addr
333
334 for addr in self.addresses4:
335 return addr
336
337 @property
338 def owner(self):
339 return self._info.owner
340
341 @property
342 def location(self):
343 if self.__location is None:
344 self.__location = self.geoip.get_location(self.address)
345
346 return self.__location
347
348 @property
349 def latitude(self):
350 if self.location:
351 return self.location.latitude
352
353 @property
354 def longitude(self):
355 if self.location:
356 return self.location.longitude
357
358 @property
359 def coordinates(self):
360 return (self.latitude, self.longitude)
361
362 @property
363 def coordiante_str(self):
364 coordinates = []
365
366 for i in self.coordinates:
367 coordinates.append("%s" % i)
368
369 return ",".join(coordinates)
370
371 @property
372 def country_code(self):
373 if self.location:
374 return self.location.country
375
376 @property
377 def country_name(self):
378 if self.__country_name is None:
379 self.__country_name = self.geoip.get_country_name(self.country_code)
380
381 return self.__country_name
382
383 @property
384 def location_str(self):
385 location = []
386
387 if self._info.location:
388 location.append(self._info.location)
389
390 elif self.location:
391 location.append(self.location.city)
392 location.append(self.country_name)
393
394 return ", ".join([s for s in location if s])
395
396 @property
397 def asn(self):
398 if not hasattr(self, "__asn"):
399 self.__asn = self.geoip.get_asn(self.address)
400
401 return self.__asn
402
403 @property
404 def filelist(self):
405 filelist = self.db.query("SELECT filename FROM mirror_files WHERE mirror=%s ORDER BY filename", self.id)
406 return [f.filename for f in filelist]
407
408 @property
409 def prefix(self):
410 return ""
411
412 @property
413 def url(self):
414 return self._info.url
415
416 def build_url(self, filename):
417 return urlparse.urljoin(self.url, filename)
418
419 @property
420 def last_update(self):
421 return self._info.last_update
422
423 @property
424 def state(self):
425 return self._info.state
426
427 def set_state(self, state):
428 logging.info("Setting state of %s to %s" % (self.hostname, state))
429
430 if self.state == state:
431 return
432
433 self.db.execute("UPDATE mirrors SET state = %s WHERE id = %s", state, self.id)
434
435 # Reload changed settings
436 if hasattr(self, "_info"):
437 self._info["state"] = state
438
439 @property
440 def enabled(self):
441 return self._info.enabled
442
443 @property
444 def disabled(self):
445 return not self.enabled
446
447 def check(self):
448 logging.info("Running check for mirror %s" % self.hostname)
449
450 self.db.execute("UPDATE mirrors SET address = %s WHERE id = %s",
451 self.address, self.id)
452
453 self.check_timestamp()
454 self.check_filelist()
455
456 def check_state(self):
457 logging.debug("Checking state of mirror %s" % self.id)
458
459 if not self.enabled:
460 self.set_state("DOWN")
461 return
462
463 now = datetime.datetime.utcnow()
464
465 time_delta = now - self.last_update
466 time_diff = time_delta.total_seconds()
467
468 time_down = self.settings.get_int("mirrors_time_down", 3*24*60*60)
469 if time_diff >= time_down:
470 self.set_state("DOWN")
471 return
472
473 time_outofsync = self.settings.get_int("mirrors_time_outofsync", 6*60*60)
474 if time_diff >= time_outofsync:
475 self.set_state("OUTOFSYNC")
476 return
477
478 self.set_state("UP")
479
480 def check_timestamp(self):
481 http = tornado.httpclient.AsyncHTTPClient()
482
483 http.fetch(self.url + ".timestamp",
484 headers={ "Pragma" : "no-cache" },
485 callback=self.__check_timestamp_response)
486
487 def __check_timestamp_response(self, response):
488 if response.error:
489 logging.debug("Error getting timestamp from %s" % self.hostname)
490 self.set_state("DOWN")
491 return
492
493 try:
494 timestamp = int(response.body.strip())
495 except ValueError:
496 timestamp = 0
497
498 timestamp = datetime.datetime.utcfromtimestamp(timestamp)
499
500 self.db.execute("UPDATE mirrors SET last_update = %s WHERE id = %s",
501 timestamp, self.id)
502
503 # Reload changed settings
504 if hasattr(self, "_info"):
505 self._info["timestamp"] = timestamp
506
507 self.check_state()
508
509 logging.info("Successfully updated timestamp from %s" % self.hostname)
510
511 def check_filelist(self):
512 # XXX need to remove data from disabled mirrors
513 if not self.enabled:
514 return
515
516 http = tornado.httpclient.AsyncHTTPClient()
517
518 http.fetch(self.url + ".filelist",
519 headers={ "Pragma" : "no-cache" },
520 callback=self.__check_filelist_response)
521
522 def __check_filelist_response(self, response):
523 if response.error:
524 logging.debug("Error getting timestamp from %s" % self.hostname)
525 return
526
527 files = self.filelist
528
529 for file in response.body.splitlines():
530 file = os.path.join(self.prefix, file)
531
532 if file in files:
533 files.remove(file)
534 continue
535
536 self.db.execute("INSERT INTO mirror_files(mirror, filename) VALUES(%s, %s)",
537 self.id, file)
538
539 for file in files:
540 self.db.execute("DELETE FROM mirror_files WHERE mirror=%s AND filename=%s",
541 self.id, file)
542
543 logging.info("Successfully updated mirror filelist from %s" % self.hostname)
544
545 @property
546 def prefer_for_countries(self):
547 countries = self._info.get("prefer_for_countries", "")
548 if countries:
549 return sorted(countries.split(", "))
550
551 return []
552
553 @property
554 def prefer_for_countries_names(self):
555 countries = [self.geoip.get_country_name(c.upper()) for c in self.prefer_for_countries]
556
557 return sorted(countries)
558
559 def distance_to(self, location, ignore_preference=False):
560 if not location:
561 return None
562
563 country_code = None
564 if location.country:
565 country_code = location.country.lower()
566
567 if not ignore_preference and country_code in self.prefer_for_countries:
568 return 0
569
570 # http://www.movable-type.co.uk/scripts/latlong.html
571
572 if self.latitude is None:
573 return None
574
575 if self.longitude is None:
576 return None
577
578 earth = 6371 # km
579 delta_lat = math.radians(self.latitude - location.latitude)
580 delta_lon = math.radians(self.longitude - location.longitude)
581
582 lat1 = math.radians(self.latitude)
583 lat2 = math.radians(location.latitude)
584
585 a = math.sin(delta_lat / 2) ** 2
586 a += math.cos(lat1) * math.cos(lat2) * (math.sin(delta_lon / 2) ** 2)
587
588 b1 = math.sqrt(a)
589 b2 = math.sqrt(1 - a)
590
591 c = 2 * math.atan2(b1, b2)
592
593 return c * earth
594
595 def traffic(self, since):
596 # XXX needs to be done better
597
598 files = {}
599 for entry in self.db.query("SELECT filename, filesize FROM files"):
600 files[entry.filename] = entry.filesize
601
602 query = "SELECT COUNT(filename) as count, filename FROM log_download WHERE mirror = %s"
603 query += " AND date >= %s GROUP BY filename"
604
605 traffic = 0
606 for entry in self.db.query(query, self.id, since):
607 if files.has_key(entry.filename):
608 traffic += entry.count * files[entry.filename]
609
610 return traffic
611
612 @property
613 def priority(self):
614 return self._info.get("priority", 10)
615
616 @property
617 def development(self):
618 return self._info.get("mirrorlist_devel", False)
619
620 @property
621 def mirrorlist(self):
622 return self._info.get("mirrorlist", False)
623
624 @property
625 def addresses(self):
626 if not hasattr(self, "__addresses"):
627 try:
628 addrinfo = socket.getaddrinfo(self.hostname, 0, socket.AF_UNSPEC, socket.SOCK_STREAM)
629 except:
630 raise Exception("Could not resolve %s" % self.hostname)
631
632 ret = []
633 for family, socktype, proto, canonname, address in addrinfo:
634 if family == socket.AF_INET:
635 address, port = address
636 elif family == socket.AF_INET6:
637 address, port, flowid, scopeid = address
638 ret.append((family, address))
639
640 self.__addresses = ret
641
642 return self.__addresses
643
644 @property
645 def addresses6(self):
646 return [address for family, address in self.addresses if family == socket.AF_INET6]
647
648 @property
649 def addresses4(self):
650 return [address for family, address in self.addresses if family == socket.AF_INET]