]> git.ipfire.org Git - ipfire.org.git/blame - www/webapp/backend/mirrors.py
mirrors: New functions for geo load-balancing.
[ipfire.org.git] / www / webapp / backend / mirrors.py
CommitLineData
940227cb
MT
1#!/usr/bin/python
2
3import logging
0673d1b0 4import math
54af860e 5import os.path
0673d1b0 6import random
940227cb
MT
7import socket
8import time
9import tornado.httpclient
10
11from databases import Databases
12from geoip import GeoIP
0673d1b0 13from memcached import Memcached
940227cb
MT
14from misc import Singleton
15
16class Mirrors(object):
17 __metaclass__ = Singleton
18
19 @property
20 def db(self):
21 return Databases().webapp
22
0673d1b0
MT
23 @property
24 def memcached(self):
25 return Memcached()
26
940227cb
MT
27 def list(self):
28 return [Mirror(m.id) for m in self.db.query("SELECT id FROM mirrors ORDER BY state")]
29
30 def check_all(self):
31 for mirror in self.list():
32 mirror.check()
33
34 def get(self, id):
54af860e 35 return Mirror(id)
940227cb 36
0673d1b0
MT
37 def get_all(self):
38 return MirrorSet(self.list())
39
940227cb
MT
40 def get_by_hostname(self, hostname):
41 mirror = self.db.get("SELECT id FROM mirrors WHERE hostname=%s", hostname)
42
43 return Mirror(mirror.id)
44
54af860e
MT
45 def get_with_file(self, filename, country=None):
46 # XXX quick and dirty solution - needs a performance boost
47 mirror_ids = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
48
49 #if country:
50 # # Sort out all mirrors that are not preferred to the given country
51 # for mirror in self.get_for_country(country):
52 # if not mirror.id in mirror_ids:
53 # mirror_ids.remove(mirror.id)
54
55 mirrors = []
56 for mirror_id in mirror_ids:
57 mirror = self.get(mirror_id)
58 if not mirror.state == "UP":
59 continue
60 mirrors.append(mirror)
61
62 logging.debug("%s" % mirrors)
63
64 return mirrors
65
66 def get_for_country(self, country):
67 # XXX need option for random order
68 mirrors = self.db.query("SELECT id FROM mirrors WHERE prefer_for_countries LIKE %s", country)
69
70 for mirror in mirrors:
71 yield self.get(mirror.id)
940227cb 72
0673d1b0
MT
73 def get_for_location(self, addr):
74 distance = 10
75
76 mirrors = []
77 all_mirrors = self.list()
78
79 while all_mirrors and len(mirrors) <= 2 and distance <= 270:
80 for mirror in all_mirrors:
81 if mirror.distance_to(addr) <= distance:
82 mirrors.append(mirror)
83 all_mirrors.remove(mirror)
84
85 distance *= 1.2
86
87 return mirrors
88
edd297c4
MT
89 def get_all_files(self):
90 files = []
91
92 for mirror in self.list():
93 if not mirror.state == "UP":
94 continue
95
96 for file in mirror.filelist:
97 if not file in files:
98 files.append(file)
99
100 return files
101
940227cb 102
0673d1b0
MT
103class MirrorSet(object):
104 def __init__(self, mirrors):
105 self._mirrors = mirrors
106
107 def __add__(self, other):
108 mirrors = []
109
110 for mirror in self._mirrors + other._mirrors:
111 if mirror in mirrors:
112 continue
113
114 mirrors.append(mirror)
115
116 return MirrorSet(mirrors)
117
118 def __sub__(self, other):
119 mirrors = self._mirrors[:]
120
121 for mirror in other._mirrors:
122 if mirror in mirrors:
123 mirrors.remove(mirror)
124
125 return MirrorSet(mirrors)
126
127 def __iter__(self):
128 return iter(self._mirrors)
129
130 def __len__(self):
131 return len(self._mirrors)
132
133 def __str__(self):
134 return "<MirrorSet %s>" % ", ".join([m.hostname for m in self._mirrors])
135
136 @property
137 def db(self):
138 return Mirrors().db
139
140 def get_with_file(self, filename):
141 with_file = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
142
143 mirrors = []
144 for mirror in self._mirrors:
145 if mirror.id in with_file:
146 mirrors.append(mirror)
147
148 return MirrorSet(mirrors)
149
150 def get_random(self):
151 mirrors = []
152 for mirror in self._mirrors:
153 for i in range(0, mirror.priority + 1):
154 mirrors.append(mirror)
155
156 return random.choice(mirrors)
157
158 def get_for_country(self, country):
159 mirrors = []
160
161 for mirror in self._mirrors:
162 if country in mirror.prefer_for_countries:
163 mirrors.append(mirror)
164
165 return MirrorSet(mirrors)
166
167 def get_for_location(self, addr):
168 distance = 10
169
170 mirrors = []
171
172 while len(mirrors) <= 2 and distance <= 270:
173 for mirror in self._mirrors:
174 if mirror in mirrors:
175 continue
176
177 if mirror.distance_to(addr) <= distance:
178 mirrors.append(mirror)
179
180 distance *= 1.2
181
182 return MirrorSet(mirrors)
183
184 def get_with_state(self, state):
185 mirrors = []
186
187 for mirror in self._mirrors:
188 if mirror.state == state:
189 mirrors.append(mirror)
190
191 return MirrorSet(mirrors)
192
193
940227cb
MT
194class Mirror(object):
195 def __init__(self, id):
196 self.id = id
197
198 self.reload()
199
54af860e
MT
200 def __repr__(self):
201 return "<%s %s>" % (self.__class__.__name__, self.url)
202
203 def __cmp__(self, other):
204 return cmp(self.id, other.id)
205
940227cb
MT
206 @property
207 def db(self):
208 return Databases().webapp
209
210 def reload(self):
0673d1b0
MT
211 memcached = Memcached()
212 mem_id = "mirror-%s" % self.id
213
214 self._info = memcached.get(mem_id)
215 if not self._info:
216 self._info = self.db.get("SELECT * FROM mirrors WHERE id=%s", self.id)
217 self._info["url"] = self.generate_url()
218
219 memcached.set(mem_id, self._info, 60)
940227cb
MT
220
221 def generate_url(self):
222 url = "http://%s" % self.hostname
223 if not self.path.startswith("/"):
224 url += "/"
225 url += "%s" % self.path
226 if not self.path.endswith("/"):
227 url += "/"
228 return url
229
230 def __getattr__(self, key):
231 try:
232 return self._info[key]
233 except KeyError:
234 raise AttributeError(key)
235
236 @property
237 def address(self):
238 return socket.gethostbyname(self.hostname)
239
0673d1b0
MT
240 @property
241 def location(self):
242 if not hasattr(self, "__location"):
243 self.__location = GeoIP().get_all(self.address)
244
245 return self.__location
246
247 @property
248 def latitude(self):
249 return self.location.latitude
250
251 @property
252 def longitude(self):
253 return self.location.longitude
254
255 @property
256 def coordinates(self):
257 return (self.latitude, self.longitude)
258
259 @property
260 def coordiante_str(self):
261 coordinates = []
262
263 for i in self.coordinates:
264 coordinates.append("%s" % i)
265
266 return ",".join(coordinates)
267
940227cb
MT
268 @property
269 def country_code(self):
270 return GeoIP().get_country(self.address).lower() or "unknown"
271
0673d1b0
MT
272 @property
273 def country_name(self):
274 return GeoIP().get_country_name(self.country_code)
275
276 @property
277 def city(self):
278 if self._info["city"]:
279 return self._info["city"]
280
281 return self.location.city
282
283 @property
284 def location_str(self):
285 s = self.country_name
286 if self.city:
287 s = "%s, %s" % (self.city, s)
288
289 return s
290
940227cb
MT
291 @property
292 def filelist(self):
293 filelist = self.db.query("SELECT filename FROM mirror_files WHERE mirror=%s ORDER BY filename", self.id)
294 return [f.filename for f in filelist]
295
54af860e
MT
296 @property
297 def prefix(self):
298 if self.type.startswith("pakfire"):
299 return self.type
300
301 return ""
302
940227cb
MT
303 def set_state(self, state):
304 logging.info("Setting state of %s to %s" % (self.hostname, state))
305
306 if self.state == state:
307 return
308
309 self.db.execute("UPDATE mirrors SET state=%s WHERE id=%s",
310 state, self.id)
311
312 # Reload changed settings
313 self.reload()
314
315 def check(self):
316 logging.info("Running check for mirror %s" % self.hostname)
317
318 self.check_timestamp()
319 self.check_filelist()
320
321 def check_state(self):
322 logging.debug("Checking state of mirror %s" % self.id)
323
324 if self.disabled == "Y":
325 self.set_state("DOWN")
326
327 time_diff = time.time() - self.last_update
328 if time_diff > 3*24*60*60: # XXX get this into Settings
329 self.set_state("DOWN")
330 elif time_diff > 6*60*60:
331 self.set_state("OUTOFSYNC")
332 else:
333 self.set_state("UP")
334
335 def check_timestamp(self):
336 if self.releases == "N":
337 return
338
339 http = tornado.httpclient.AsyncHTTPClient()
340
341 http.fetch(self.url + ".timestamp",
54af860e 342 headers={ "Pragma" : "no-cache" },
940227cb
MT
343 callback=self.__check_timestamp_response)
344
345 def __check_timestamp_response(self, response):
346 if response.error:
347 logging.debug("Error getting timestamp from %s" % self.hostname)
348 return
349
350 try:
351 timestamp = int(response.body.strip())
352 except ValueError:
353 timestamp = 0
354
355 self.db.execute("UPDATE mirrors SET last_update=%s WHERE id=%s",
356 timestamp, self.id)
357
358 # Reload changed settings
359 self.reload()
360
361 self.check_state()
362
363 logging.info("Successfully updated timestamp from %s" % self.hostname)
364
365 def check_filelist(self):
54af860e
MT
366 # XXX need to remove data from disabled mirrors
367 if self.releases == "N" or self.disabled == "Y" or self.type != "full":
940227cb
MT
368 return
369
370 http = tornado.httpclient.AsyncHTTPClient()
371
372 http.fetch(self.url + ".filelist",
54af860e 373 headers={ "Pragma" : "no-cache" },
940227cb
MT
374 callback=self.__check_filelist_response)
375
376 def __check_filelist_response(self, response):
377 if response.error:
378 logging.debug("Error getting timestamp from %s" % self.hostname)
379 return
380
56b9c1d8 381 files = self.filelist
940227cb
MT
382
383 for file in response.body.splitlines():
56b9c1d8
MT
384 file = os.path.join(self.prefix, file)
385
386 if file in files:
387 files.remove(file)
388 continue
389
940227cb 390 self.db.execute("INSERT INTO mirror_files(mirror, filename) VALUES(%s, %s)",
56b9c1d8
MT
391 self.id, file)
392
393 for file in files:
394 self.db.execute("DELETE FROM mirror_files WHERE mirror=%s AND filename=%s",
395 self.id, file)
940227cb
MT
396
397 logging.info("Successfully updated mirror filelist from %s" % self.hostname)
398
54af860e
MT
399 @property
400 def prefer_for_countries(self):
0673d1b0
MT
401 countries = self._info.get("prefer_for_countries", "")
402 if countries:
403 return sorted(countries.split(", "))
54af860e 404
0673d1b0
MT
405 return []
406
407 @property
408 def prefer_for_countries_names(self):
409 return sorted([GeoIP().get_country_name(c) for c in self.prefer_for_countries])
54af860e 410
0673d1b0
MT
411 def distance_to(self, addr):
412 location = GeoIP().get_all(addr)
413 if not location:
414 return 0
940227cb 415
0673d1b0
MT
416 if location.country_code.lower() in self.prefer_for_countries:
417 return 0
418
419 distance_vector = (
420 self.latitude - location.latitude,
421 self.longitude - location.longitude
422 )
423
424 distance = 0
425 for i in distance_vector:
426 distance += i**2
427
428 return math.sqrt(distance)
429
430 def traffic(self, since):
431 # XXX needs to be done better
432
433 files = {}
434 for entry in self.db.query("SELECT filename, filesize FROM files"):
435 files[entry.filename] = entry.filesize
436
437 query = "SELECT COUNT(filename) as count, filename FROM log_download WHERE mirror = %s"
438 query += " AND date >= %s GROUP BY filename"
439
440 traffic = 0
441 for entry in self.db.query(query, self.id, since):
442 if files.has_key(entry.filename):
443 traffic += entry.count * files[entry.filename]
444
445 return traffic
446
447 @property
448 def priority(self):
449 return self._info.get("priority", 1) * 10
940227cb 450