]> git.ipfire.org Git - people/shoehn/ipfire.org.git/blob - www/webapp/backend/mirrors.py
Website update.
[people/shoehn/ipfire.org.git] / www / webapp / backend / mirrors.py
1 #!/usr/bin/python
2
3 import logging
4 import math
5 import os.path
6 import random
7 import socket
8 import time
9 import tornado.httpclient
10
11 from databases import Databases
12 from geoip import GeoIP
13 from memcached import Memcached
14 from misc import Singleton
15
16 class Downloads(object):
17 __metaclass__ = Singleton
18
19 @property
20 def db(self):
21 return Databases().webapp
22
23 @property
24 def mirrors(self):
25 return Mirrors()
26
27 @property
28 def total(self):
29 ret = self.db.get("SELECT COUNT(*) AS total FROM log_download")
30
31 return ret.total
32
33 @property
34 def today(self):
35 ret = self.db.get("SELECT COUNT(*) AS today FROM log_download WHERE date >= NOW() - 1000000")
36
37 return ret.today
38
39 @property
40 def yesterday(self):
41 ret = self.db.get("SELECT COUNT(*) AS yesterday FROM log_download WHERE DATE(date) = DATE(NOW())-1")
42
43 return ret.yesterday
44
45 @property
46 def daily_map(self):
47 ret = self.db.query("SELECT DATE(date) AS date, COUNT(*) AS downloads FROM log_download"
48 " WHERE DATE(date) BETWEEN DATE(NOW()) - 31 AND DATE(NOW()) GROUP BY DATE(date)")
49
50 return ret
51
52 def get_countries(self, duration="all"):
53 query = "SELECT country_code, count(country_code) AS count FROM log_download"
54
55 if duration == "today":
56 query += " WHERE date >= NOW() - 1000000"
57
58 query += " GROUP BY country_code ORDER BY count DESC"
59
60 results = self.db.query(query)
61 ret = {}
62
63 count = sum([o.count for o in results])
64 for res in results:
65 ret[res.country_code] = float(res.count) / count
66
67 return ret
68
69 def get_mirror_load(self, duration="all"):
70 query = "SELECT mirror, COUNT(mirror) AS count FROM log_download"
71
72 if duration == "today":
73 query += " WHERE date >= NOW() - 1000000"
74
75 query += " GROUP BY mirror ORDER BY count DESC"
76
77 results = self.db.query(query)
78 ret = {}
79
80 count = sum([o.count for o in results])
81 for res in results:
82 mirror = self.mirrors.get(res.mirror)
83 ret[mirror.hostname] = float(res.count) / count
84
85 return ret
86
87
88 class Mirrors(object):
89 __metaclass__ = Singleton
90
91 @property
92 def db(self):
93 return Databases().webapp
94
95 @property
96 def memcached(self):
97 return Memcached()
98
99 def list(self):
100 return [Mirror(m.id) for m in self.db.query("SELECT id FROM mirrors ORDER BY state")]
101
102 def check_all(self):
103 for mirror in self.list():
104 mirror.check()
105
106 def get(self, id):
107 return Mirror(id)
108
109 def get_all(self):
110 return MirrorSet(self.list())
111
112 def get_by_hostname(self, hostname):
113 mirror = self.db.get("SELECT id FROM mirrors WHERE hostname=%s", hostname)
114
115 return Mirror(mirror.id)
116
117 def get_with_file(self, filename, country=None):
118 # XXX quick and dirty solution - needs a performance boost
119 mirror_ids = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
120
121 #if country:
122 # # Sort out all mirrors that are not preferred to the given country
123 # for mirror in self.get_for_country(country):
124 # if not mirror.id in mirror_ids:
125 # mirror_ids.remove(mirror.id)
126
127 mirrors = []
128 for mirror_id in mirror_ids:
129 mirror = self.get(mirror_id)
130 if not mirror.state == "UP":
131 continue
132 mirrors.append(mirror)
133
134 logging.debug("%s" % mirrors)
135
136 return mirrors
137
138 def get_for_country(self, country):
139 # XXX need option for random order
140 mirrors = self.db.query("SELECT id FROM mirrors WHERE prefer_for_countries LIKE %s", country)
141
142 for mirror in mirrors:
143 yield self.get(mirror.id)
144
145 def get_for_location(self, addr):
146 distance = 10
147
148 mirrors = []
149 all_mirrors = self.list()
150
151 while all_mirrors and len(mirrors) <= 2 and distance <= 270:
152 for mirror in all_mirrors:
153 if mirror.distance_to(addr) <= distance:
154 mirrors.append(mirror)
155 all_mirrors.remove(mirror)
156
157 distance *= 1.2
158
159 return mirrors
160
161 def get_all_files(self):
162 files = []
163
164 for mirror in self.list():
165 if not mirror.state == "UP":
166 continue
167
168 for file in mirror.filelist:
169 if not file in files:
170 files.append(file)
171
172 return files
173
174
175 class MirrorSet(object):
176 def __init__(self, mirrors):
177 self._mirrors = mirrors
178
179 def __add__(self, other):
180 mirrors = []
181
182 for mirror in self._mirrors + other._mirrors:
183 if mirror in mirrors:
184 continue
185
186 mirrors.append(mirror)
187
188 return MirrorSet(mirrors)
189
190 def __sub__(self, other):
191 mirrors = self._mirrors[:]
192
193 for mirror in other._mirrors:
194 if mirror in mirrors:
195 mirrors.remove(mirror)
196
197 return MirrorSet(mirrors)
198
199 def __iter__(self):
200 return iter(self._mirrors)
201
202 def __len__(self):
203 return len(self._mirrors)
204
205 def __str__(self):
206 return "<MirrorSet %s>" % ", ".join([m.hostname for m in self._mirrors])
207
208 @property
209 def db(self):
210 return Mirrors().db
211
212 def get_with_file(self, filename):
213 with_file = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
214
215 mirrors = []
216 for mirror in self._mirrors:
217 if mirror.id in with_file:
218 mirrors.append(mirror)
219
220 return MirrorSet(mirrors)
221
222 def get_random(self):
223 mirrors = []
224 for mirror in self._mirrors:
225 for i in range(0, mirror.priority):
226 mirrors.append(mirror)
227
228 return random.choice(mirrors)
229
230 def get_for_country(self, country):
231 mirrors = []
232
233 for mirror in self._mirrors:
234 if country in mirror.prefer_for_countries:
235 mirrors.append(mirror)
236
237 return MirrorSet(mirrors)
238
239 def get_for_location(self, addr):
240 distance = 10
241
242 mirrors = []
243
244 while len(mirrors) <= 2 and distance <= 270:
245 for mirror in self._mirrors:
246 if mirror in mirrors:
247 continue
248
249 if mirror.distance_to(addr) <= distance:
250 mirrors.append(mirror)
251
252 distance *= 1.2
253
254 return MirrorSet(mirrors)
255
256 def get_with_state(self, state):
257 mirrors = []
258
259 for mirror in self._mirrors:
260 if mirror.state == state:
261 mirrors.append(mirror)
262
263 return MirrorSet(mirrors)
264
265
266 class Mirror(object):
267 def __init__(self, id):
268 self.id = id
269
270 self.reload()
271
272 def __repr__(self):
273 return "<%s %s>" % (self.__class__.__name__, self.url)
274
275 def __cmp__(self, other):
276 return cmp(self.id, other.id)
277
278 @property
279 def db(self):
280 return Databases().webapp
281
282 def reload(self, force=False):
283 memcached = Memcached()
284 mem_id = "mirror-%s" % self.id
285
286 if force:
287 memcached.delete(mem_id)
288
289 self._info = memcached.get(mem_id)
290 if not self._info:
291 self._info = self.db.get("SELECT * FROM mirrors WHERE id=%s", self.id)
292 self._info["url"] = self.generate_url()
293
294 memcached.set(mem_id, self._info, 60)
295
296 def generate_url(self):
297 url = "http://%s" % self.hostname
298 if not self.path.startswith("/"):
299 url += "/"
300 url += "%s" % self.path
301 if not self.path.endswith("/"):
302 url += "/"
303 return url
304
305 def __getattr__(self, key):
306 try:
307 return self._info[key]
308 except KeyError:
309 raise AttributeError(key)
310
311 @property
312 def address(self):
313 return socket.gethostbyname(self.hostname)
314
315 @property
316 def location(self):
317 if not hasattr(self, "__location"):
318 self.__location = GeoIP().get_all(self.address)
319
320 return self.__location
321
322 @property
323 def latitude(self):
324 return self.location.latitude
325
326 @property
327 def longitude(self):
328 return self.location.longitude
329
330 @property
331 def coordinates(self):
332 return (self.latitude, self.longitude)
333
334 @property
335 def coordiante_str(self):
336 coordinates = []
337
338 for i in self.coordinates:
339 coordinates.append("%s" % i)
340
341 return ",".join(coordinates)
342
343 @property
344 def country_code(self):
345 return GeoIP().get_country(self.address).lower() or "unknown"
346
347 @property
348 def country_name(self):
349 return GeoIP().get_country_name(self.country_code)
350
351 @property
352 def city(self):
353 if self._info["city"]:
354 return self._info["city"]
355
356 return self.location.city
357
358 @property
359 def location_str(self):
360 s = self.country_name
361 if self.city:
362 s = "%s, %s" % (self.city, s)
363
364 return s
365
366 @property
367 def filelist(self):
368 filelist = self.db.query("SELECT filename FROM mirror_files WHERE mirror=%s ORDER BY filename", self.id)
369 return [f.filename for f in filelist]
370
371 @property
372 def prefix(self):
373 if self.type.startswith("pakfire"):
374 return self.type
375
376 return ""
377
378 def set_state(self, state):
379 logging.info("Setting state of %s to %s" % (self.hostname, state))
380
381 if self.state == state:
382 return
383
384 self.db.execute("UPDATE mirrors SET state=%s WHERE id=%s",
385 state, self.id)
386
387 # Reload changed settings
388 self.reload(force=True)
389
390 def check(self):
391 logging.info("Running check for mirror %s" % self.hostname)
392
393 self.check_timestamp()
394 self.check_filelist()
395
396 def check_state(self):
397 logging.debug("Checking state of mirror %s" % self.id)
398
399 if self.disabled == "Y":
400 self.set_state("DOWN")
401
402 time_diff = time.time() - self.last_update
403 if time_diff > 3*24*60*60: # XXX get this into Settings
404 self.set_state("DOWN")
405 elif time_diff > 6*60*60:
406 self.set_state("OUTOFSYNC")
407 else:
408 self.set_state("UP")
409
410 def check_timestamp(self):
411 if self.releases == "N":
412 return
413
414 http = tornado.httpclient.AsyncHTTPClient()
415
416 http.fetch(self.url + ".timestamp",
417 headers={ "Pragma" : "no-cache" },
418 callback=self.__check_timestamp_response)
419
420 def __check_timestamp_response(self, response):
421 if response.error:
422 logging.debug("Error getting timestamp from %s" % self.hostname)
423 return
424
425 try:
426 timestamp = int(response.body.strip())
427 except ValueError:
428 timestamp = 0
429
430 self.db.execute("UPDATE mirrors SET last_update=%s WHERE id=%s",
431 timestamp, self.id)
432
433 # Reload changed settings
434 self.reload(force=True)
435
436 self.check_state()
437
438 logging.info("Successfully updated timestamp from %s" % self.hostname)
439
440 def check_filelist(self):
441 # XXX need to remove data from disabled mirrors
442 if self.releases == "N" or self.disabled == "Y" or self.type != "full":
443 return
444
445 http = tornado.httpclient.AsyncHTTPClient()
446
447 http.fetch(self.url + ".filelist",
448 headers={ "Pragma" : "no-cache" },
449 callback=self.__check_filelist_response)
450
451 def __check_filelist_response(self, response):
452 if response.error:
453 logging.debug("Error getting timestamp from %s" % self.hostname)
454 return
455
456 files = self.filelist
457
458 for file in response.body.splitlines():
459 file = os.path.join(self.prefix, file)
460
461 if file in files:
462 files.remove(file)
463 continue
464
465 self.db.execute("INSERT INTO mirror_files(mirror, filename) VALUES(%s, %s)",
466 self.id, file)
467
468 for file in files:
469 self.db.execute("DELETE FROM mirror_files WHERE mirror=%s AND filename=%s",
470 self.id, file)
471
472 logging.info("Successfully updated mirror filelist from %s" % self.hostname)
473
474 @property
475 def prefer_for_countries(self):
476 countries = self._info.get("prefer_for_countries", "")
477 if countries:
478 return sorted(countries.split(", "))
479
480 return []
481
482 @property
483 def prefer_for_countries_names(self):
484 return sorted([GeoIP().get_country_name(c) for c in self.prefer_for_countries])
485
486 def distance_to(self, addr):
487 location = GeoIP().get_all(addr)
488 if not location:
489 return 0
490
491 if location.country_code.lower() in self.prefer_for_countries:
492 return 0
493
494 distance_vector = (
495 self.latitude - location.latitude,
496 self.longitude - location.longitude
497 )
498
499 distance = 0
500 for i in distance_vector:
501 distance += i**2
502
503 return math.sqrt(distance)
504
505 def traffic(self, since):
506 # XXX needs to be done better
507
508 files = {}
509 for entry in self.db.query("SELECT filename, filesize FROM files"):
510 files[entry.filename] = entry.filesize
511
512 query = "SELECT COUNT(filename) as count, filename FROM log_download WHERE mirror = %s"
513 query += " AND date >= %s GROUP BY filename"
514
515 traffic = 0
516 for entry in self.db.query(query, self.id, since):
517 if files.has_key(entry.filename):
518 traffic += entry.count * files[entry.filename]
519
520 return traffic
521
522 @property
523 def priority(self):
524 return self._info.get("priority", 10)
525