]> git.ipfire.org Git - people/shoehn/ipfire.org.git/blob - webapp/backend/mirrors.py
Revert "geoip: Tolerate lots of spaces in database fields"
[people/shoehn/ipfire.org.git] / webapp / backend / mirrors.py
1 #!/usr/bin/python
2
3 from __future__ import division
4
5 import datetime
6 import logging
7 import math
8 import os.path
9 import random
10 import socket
11 import time
12 import tornado.httpclient
13 import tornado.netutil
14 import urlparse
15
16 from misc import Object
17
18 class Downloads(Object):
19 @property
20 def total(self):
21 ret = self.db.get("SELECT COUNT(*) AS total FROM log_download")
22
23 return ret.total
24
25 @property
26 def today(self):
27 ret = self.db.get("SELECT COUNT(*) AS today FROM log_download WHERE date::date = NOW()::date")
28
29 return ret.today
30
31 @property
32 def yesterday(self):
33 ret = self.db.get("SELECT COUNT(*) AS yesterday FROM log_download WHERE date::date = (NOW() - INTERVAL '1 day')::date")
34
35 return ret.yesterday
36
37 @property
38 def daily_map(self):
39 ret = self.db.query("WITH downloads AS (SELECT * FROM log_download \
40 WHERE DATE(date) BETWEEN (NOW()::date - INTERVAL '30 days') AND DATE(NOW())) \
41 SELECT DATE(date) AS date, COUNT(*) AS count FROM downloads \
42 GROUP BY DATE(date) ORDER BY date")
43
44 return ret
45
46 def get_countries(self, duration="all"):
47 query = "SELECT country_code, count(country_code) AS count FROM log_download"
48
49 if duration == "today":
50 query += " WHERE date::date = NOW()::date"
51
52 query += " GROUP BY country_code ORDER BY count DESC"
53
54 results = self.db.query(query)
55 ret = []
56
57 count = sum([o.count for o in results])
58 if count:
59 for res in results:
60 ret.append((res.country_code, res.count / count))
61
62 return ret
63
64 def get_mirror_load(self, duration="all"):
65 query = "SELECT mirror, COUNT(mirror) AS count FROM log_download"
66
67 if duration == "today":
68 query += " WHERE date::date = NOW()::date"
69
70 query += " GROUP BY mirror ORDER BY count DESC"
71
72 results = self.db.query(query)
73 ret = {}
74
75 count = sum([o.count for o in results])
76 if count:
77 for res in results:
78 mirror = self.mirrors.get(res.mirror)
79 ret[mirror.hostname] = res.count / count
80
81 return ret
82
83
84 class Mirrors(Object):
85 def check_all(self):
86 for mirror in self.get_all():
87 mirror.check()
88
89 def get(self, id):
90 return Mirror(self.backend, id)
91
92 def get_all(self):
93 res = self.db.query("SELECT * FROM mirrors WHERE enabled = %s", True)
94
95 mirrors = []
96 for row in res:
97 mirror = Mirror(self.backend, row.id, row)
98 mirrors.append(mirror)
99
100 return MirrorSet(self.backend, sorted(mirrors))
101
102 def get_all_up(self):
103 res = self.db.query("SELECT * FROM mirrors WHERE enabled = %s AND state = %s \
104 ORDER BY hostname", True, "UP")
105
106 mirrors = []
107 for row in res:
108 m = Mirror(self.backend, row.id, row)
109 mirrors.append(m)
110
111 return MirrorSet(self.backend, mirrors)
112
113 def get_by_hostname(self, hostname):
114 ret = self.db.get("SELECT * FROM mirrors WHERE hostname = %s", hostname)
115
116 if ret:
117 return Mirror(self.backend, ret.id, ret)
118
119 def get_with_file(self, filename, country=None):
120 # XXX quick and dirty solution - needs a performance boost
121 mirror_ids = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
122
123 #if country:
124 # # Sort out all mirrors that are not preferred to the given country
125 # for mirror in self.get_for_country(country):
126 # if not mirror.id in mirror_ids:
127 # mirror_ids.remove(mirror.id)
128
129 mirrors = []
130 for mirror_id in mirror_ids:
131 mirror = self.get(mirror_id)
132 if not mirror.state == "UP":
133 continue
134 mirrors.append(mirror)
135
136 return mirrors
137
138 def get_for_location(self, location, max_distance=4000, filename=None):
139 if not location:
140 return []
141
142 if filename:
143 res = self.db.query("\
144 WITH client AS (SELECT point(%s, %s) AS location) \
145 SELECT * FROM mirrors WHERE mirrors.state = %s \
146 AND mirrors.id IN ( \
147 SELECT mirror FROM mirror_files WHERE filename = %s \
148 ) AND mirrors.id IN ( \
149 SELECT id FROM mirrors_locations, client \
150 WHERE geodistance(mirrors_locations.location, client.location) <= %s \
151 )",
152 location.latitude, location.longitude, "UP", filename, max_distance)
153 else:
154 res = self.db.query("\
155 WITH client AS (SELECT point(%s, %s) AS location) \
156 SELECT * FROM mirrors WHERE mirrors.state = %s AND mirrors.id IN ( \
157 SELECT id FROM mirrors_locations, client \
158 WHERE geodistance(mirrors_locations.location, client.location) <= %s \
159 )",
160 location.latitude, location.longitude, "UP", max_distance)
161
162 mirrors = []
163 for row in res:
164 mirror = Mirror(self.backend, row.id, row)
165 mirrors.append(mirror)
166
167 return sorted(mirrors, reverse=True)
168
169 def get_all_files(self):
170 files = []
171
172 for mirror in self.get_all():
173 if not mirror.state == "UP":
174 continue
175
176 for file in mirror.filelist:
177 if not file in files:
178 files.append(file)
179
180 return files
181
182 def get_random(self, filename=None):
183 if filename:
184 ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
185 AND mirrors.id IN (SELECT mirror FROM mirror_files \
186 WHERE filename = %s) ORDER BY RANDOM() LIMIT 1", "UP", filename)
187 else:
188 ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
189 ORDER BY RANDOM() LIMIT 1", "UP")
190
191 if ret:
192 return Mirror(self.backend, ret.id, ret)
193
194 def file_exists(self, filename):
195 ret = self.db.get("SELECT 1 FROM mirror_files \
196 WHERE filename = %s LIMIT 1", filename)
197
198 if ret:
199 return True
200
201 return False
202
203
204 class MirrorSet(Object):
205 def __init__(self, backend, mirrors):
206 Object.__init__(self, backend)
207
208 self._mirrors = mirrors
209
210 def __add__(self, other):
211 mirrors = []
212
213 for mirror in self._mirrors + other._mirrors:
214 if mirror in mirrors:
215 continue
216
217 mirrors.append(mirror)
218
219 return MirrorSet(self.backend, mirrors)
220
221 def __sub__(self, other):
222 mirrors = self._mirrors[:]
223
224 for mirror in other._mirrors:
225 if mirror in mirrors:
226 mirrors.remove(mirror)
227
228 return MirrorSet(self.backend, mirrors)
229
230 def __iter__(self):
231 return iter(self._mirrors)
232
233 def __len__(self):
234 return len(self._mirrors)
235
236 def __str__(self):
237 return "<MirrorSet %s>" % ", ".join([m.hostname for m in self._mirrors])
238
239 def get_with_file(self, filename):
240 with_file = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
241
242 mirrors = []
243 for mirror in self._mirrors:
244 if mirror.id in with_file:
245 mirrors.append(mirror)
246
247 return MirrorSet(self.backend, mirrors)
248
249 def get_random(self):
250 mirrors = []
251 for mirror in self._mirrors:
252 for i in range(0, mirror.priority):
253 mirrors.append(mirror)
254
255 return random.choice(mirrors)
256
257 def get_for_location(self, location):
258 distance = 2500
259 mirrors = []
260
261 if location:
262 while len(mirrors) <= 3 and distance <= 8000:
263 for mirror in self._mirrors:
264 if mirror in mirrors:
265 continue
266
267 mirror_distance = mirror.distance_to(location)
268 if mirror_distance is None:
269 continue
270
271 if mirror_distance <= distance:
272 mirrors.append(mirror)
273
274 distance *= 1.2
275
276 return MirrorSet(self.backend, mirrors)
277
278 def get_with_state(self, state):
279 mirrors = []
280
281 for mirror in self._mirrors:
282 if mirror.state == state:
283 mirrors.append(mirror)
284
285 return MirrorSet(self.backend, mirrors)
286
287
288 class Mirror(Object):
289 def __init__(self, backend, id, data=None):
290 Object.__init__(self, backend)
291
292 self.id = id
293
294 if data:
295 self._info = data
296 else:
297 self._info = self.db.get("SELECT * FROM mirrors WHERE id = %s", self.id)
298 self._info["url"] = self.generate_url()
299
300 self.__location = None
301 self.__country_name = None
302
303 def __repr__(self):
304 return "<%s %s>" % (self.__class__.__name__, self.url)
305
306 def __cmp__(self, other):
307 ret = cmp(self.country_code, other.country_code)
308
309 if not ret:
310 ret = cmp(self.hostname, other.hostname)
311
312 return ret
313
314 def generate_url(self):
315 url = "http://%s" % self.hostname
316 if not self.path.startswith("/"):
317 url += "/"
318 url += "%s" % self.path
319 if not self.path.endswith("/"):
320 url += "/"
321 return url
322
323 @property
324 def hostname(self):
325 return self._info.hostname
326
327 @property
328 def path(self):
329 return self._info.path
330
331 @property
332 def address(self):
333 for addr in self.addresses4:
334 return addr
335
336 for addr in self.addresses6:
337 return addr
338
339 @property
340 def owner(self):
341 return self._info.owner
342
343 @property
344 def location(self):
345 if self.__location is None:
346 self.__location = self.geoip.get_location(self.address)
347
348 return self.__location
349
350 @property
351 def latitude(self):
352 if self.location:
353 return self.location.latitude
354
355 @property
356 def longitude(self):
357 if self.location:
358 return self.location.longitude
359
360 @property
361 def coordinates(self):
362 return (self.latitude, self.longitude)
363
364 @property
365 def coordiante_str(self):
366 coordinates = []
367
368 for i in self.coordinates:
369 coordinates.append("%s" % i)
370
371 return ",".join(coordinates)
372
373 @property
374 def country_code(self):
375 if self.location:
376 return self.location.country
377
378 @property
379 def country_name(self):
380 if self.__country_name is None:
381 self.__country_name = self.geoip.get_country_name(self.country_code)
382
383 return self.__country_name
384
385 @property
386 def location_str(self):
387 location = []
388
389 if self._info.location:
390 location.append(self._info.location)
391
392 elif self.location:
393 location.append(self.location.city)
394 location.append(self.country_name)
395
396 return ", ".join([s for s in location if s])
397
398 @property
399 def asn(self):
400 if not hasattr(self, "__asn"):
401 self.__asn = self.geoip.get_asn(self.address)
402
403 return self.__asn
404
405 @property
406 def filelist(self):
407 filelist = self.db.query("SELECT filename FROM mirror_files WHERE mirror=%s ORDER BY filename", self.id)
408 return [f.filename for f in filelist]
409
410 @property
411 def prefix(self):
412 return ""
413
414 @property
415 def url(self):
416 return self._info.url
417
418 def build_url(self, filename):
419 return urlparse.urljoin(self.url, filename)
420
421 @property
422 def last_update(self):
423 return self._info.last_update
424
425 @property
426 def state(self):
427 return self._info.state
428
429 def set_state(self, state):
430 logging.info("Setting state of %s to %s" % (self.hostname, state))
431
432 if self.state == state:
433 return
434
435 self.db.execute("UPDATE mirrors SET state = %s WHERE id = %s", state, self.id)
436
437 # Reload changed settings
438 if hasattr(self, "_info"):
439 self._info["state"] = state
440
441 @property
442 def enabled(self):
443 return self._info.enabled
444
445 @property
446 def disabled(self):
447 return not self.enabled
448
449 def check(self):
450 logging.info("Running check for mirror %s" % self.hostname)
451
452 self.db.execute("UPDATE mirrors SET address = %s WHERE id = %s",
453 self.address, self.id)
454
455 self.check_timestamp()
456 self.check_filelist()
457
458 def check_state(self):
459 logging.debug("Checking state of mirror %s" % self.id)
460
461 if not self.enabled:
462 self.set_state("DOWN")
463 return
464
465 now = datetime.datetime.utcnow()
466
467 time_delta = now - self.last_update
468 time_diff = time_delta.total_seconds()
469
470 time_down = self.settings.get_int("mirrors_time_down", 3*24*60*60)
471 if time_diff >= time_down:
472 self.set_state("DOWN")
473 return
474
475 time_outofsync = self.settings.get_int("mirrors_time_outofsync", 6*60*60)
476 if time_diff >= time_outofsync:
477 self.set_state("OUTOFSYNC")
478 return
479
480 self.set_state("UP")
481
482 def check_timestamp(self):
483 http = tornado.httpclient.AsyncHTTPClient()
484
485 http.fetch(self.url + ".timestamp",
486 headers={ "Pragma" : "no-cache" },
487 callback=self.__check_timestamp_response)
488
489 def __check_timestamp_response(self, response):
490 if response.error:
491 logging.debug("Error getting timestamp from %s" % self.hostname)
492 self.set_state("DOWN")
493 return
494
495 try:
496 timestamp = int(response.body.strip())
497 except ValueError:
498 timestamp = 0
499
500 timestamp = datetime.datetime.utcfromtimestamp(timestamp)
501
502 self.db.execute("UPDATE mirrors SET last_update = %s WHERE id = %s",
503 timestamp, self.id)
504
505 # Reload changed settings
506 if hasattr(self, "_info"):
507 self._info["timestamp"] = timestamp
508
509 self.check_state()
510
511 logging.info("Successfully updated timestamp from %s" % self.hostname)
512
513 def check_filelist(self):
514 # XXX need to remove data from disabled mirrors
515 if not self.enabled:
516 return
517
518 http = tornado.httpclient.AsyncHTTPClient()
519
520 http.fetch(self.url + ".filelist",
521 headers={ "Pragma" : "no-cache" },
522 callback=self.__check_filelist_response)
523
524 def __check_filelist_response(self, response):
525 if response.error:
526 logging.debug("Error getting timestamp from %s" % self.hostname)
527 return
528
529 files = self.filelist
530
531 for file in response.body.splitlines():
532 file = os.path.join(self.prefix, file)
533
534 if file in files:
535 files.remove(file)
536 continue
537
538 self.db.execute("INSERT INTO mirror_files(mirror, filename) VALUES(%s, %s)",
539 self.id, file)
540
541 for file in files:
542 self.db.execute("DELETE FROM mirror_files WHERE mirror=%s AND filename=%s",
543 self.id, file)
544
545 logging.info("Successfully updated mirror filelist from %s" % self.hostname)
546
547 @property
548 def prefer_for_countries(self):
549 countries = self._info.get("prefer_for_countries", "")
550 if countries:
551 return sorted(countries.split(", "))
552
553 return []
554
555 @property
556 def prefer_for_countries_names(self):
557 countries = [self.geoip.get_country_name(c.upper()) for c in self.prefer_for_countries]
558
559 return sorted(countries)
560
561 def distance_to(self, location, ignore_preference=False):
562 if not location:
563 return None
564
565 country_code = None
566 if location.country:
567 country_code = location.country.lower()
568
569 if not ignore_preference and country_code in self.prefer_for_countries:
570 return 0
571
572 # http://www.movable-type.co.uk/scripts/latlong.html
573
574 if self.latitude is None:
575 return None
576
577 if self.longitude is None:
578 return None
579
580 earth = 6371 # km
581 delta_lat = math.radians(self.latitude - location.latitude)
582 delta_lon = math.radians(self.longitude - location.longitude)
583
584 lat1 = math.radians(self.latitude)
585 lat2 = math.radians(location.latitude)
586
587 a = math.sin(delta_lat / 2) ** 2
588 a += math.cos(lat1) * math.cos(lat2) * (math.sin(delta_lon / 2) ** 2)
589
590 b1 = math.sqrt(a)
591 b2 = math.sqrt(1 - a)
592
593 c = 2 * math.atan2(b1, b2)
594
595 return c * earth
596
597 def traffic(self, since):
598 # XXX needs to be done better
599
600 files = {}
601 for entry in self.db.query("SELECT filename, filesize FROM files"):
602 files[entry.filename] = entry.filesize
603
604 query = "SELECT COUNT(filename) as count, filename FROM log_download WHERE mirror = %s"
605 query += " AND date >= %s GROUP BY filename"
606
607 traffic = 0
608 for entry in self.db.query(query, self.id, since):
609 if files.has_key(entry.filename):
610 traffic += entry.count * files[entry.filename]
611
612 return traffic
613
614 @property
615 def priority(self):
616 return self._info.get("priority", 10)
617
618 @property
619 def development(self):
620 return self._info.get("mirrorlist_devel", False)
621
622 @property
623 def mirrorlist(self):
624 return self._info.get("mirrorlist", False)
625
626 @property
627 def addresses(self):
628 if not hasattr(self, "__addresses"):
629 try:
630 addrinfo = socket.getaddrinfo(self.hostname, 0, socket.AF_UNSPEC, socket.SOCK_STREAM)
631 except:
632 raise Exception("Could not resolve %s" % self.hostname)
633
634 ret = []
635 for family, socktype, proto, canonname, address in addrinfo:
636 if family == socket.AF_INET:
637 address, port = address
638 elif family == socket.AF_INET6:
639 address, port, flowid, scopeid = address
640 ret.append((family, address))
641
642 self.__addresses = ret
643
644 return self.__addresses
645
646 @property
647 def addresses6(self):
648 return [address for family, address in self.addresses if family == socket.AF_INET6]
649
650 @property
651 def addresses4(self):
652 return [address for family, address in self.addresses if family == socket.AF_INET]