]> git.ipfire.org Git - pakfire.git/blob - python/pakfire/downloader.py
Update pakfire-daemon:
[pakfire.git] / python / pakfire / downloader.py
1 #!/usr/bin/python
2 ###############################################################################
3 # #
4 # Pakfire - The IPFire package management system #
5 # Copyright (C) 2011 Pakfire development team #
6 # #
7 # This program is free software: you can redistribute it and/or modify #
8 # it under the terms of the GNU General Public License as published by #
9 # the Free Software Foundation, either version 3 of the License, or #
10 # (at your option) any later version. #
11 # #
12 # This program is distributed in the hope that it will be useful, #
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15 # GNU General Public License for more details. #
16 # #
17 # You should have received a copy of the GNU General Public License #
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 # #
20 ###############################################################################
21
22 import json
23 import os
24 import pycurl
25 import random
26
27 import logging
28 log = logging.getLogger("pakfire")
29
30 from config import _Config
31
32 import urlgrabber.grabber
33 from urlgrabber.grabber import URLGrabber, URLGrabError
34 from urlgrabber.mirror import MirrorGroup
35 from urlgrabber.progress import TextMeter
36
37 from pakfire.constants import *
38 from pakfire.i18n import _
39
40 class PakfireGrabber(URLGrabber):
41 """
42 Class to make some modifications on the urlgrabber configuration.
43 """
44 def __init__(self, pakfire, *args, **kwargs):
45 kwargs.update({
46 "quote" : 0,
47 "user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
48
49 "ssl_verify_host" : False,
50 "ssl_verify_peer" : False,
51 })
52
53 if isinstance(pakfire, _Config):
54 config = pakfire
55 else:
56 config = pakfire.config
57 self.config = config
58
59 # Set throttle setting.
60 bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
61 if bandwidth_throttle:
62 try:
63 bandwidth_throttle = int(bandwidth_throttle)
64 except ValueError:
65 log.error("Configuration value for bandwidth_throttle is invalid.")
66 bandwidth_throttle = 0
67
68 kwargs.update({ "throttle" : bandwidth_throttle })
69
70 # Configure HTTP proxy.
71 http_proxy = config.get("downloader", "http_proxy")
72 if http_proxy:
73 kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})
74
75 URLGrabber.__init__(self, *args, **kwargs)
76
77 def fork(self):
78 """
79 Reset Curl object after forking a process.
80 """
81 # XXX this is a very ugly hack and fiddles around with the internals
82 # or urlgrabber. We should not touch these, but apparently nobody
83 # else uses multiple threads or processes to talk to their servers.
84 # So we simply replace Curl with a new instance without closing
85 # the old one. This should be fixed in urlgrabber and/or pycurl.
86 urlgrabber.grabber._curl_cache = pycurl.Curl()
87
88 def check_offline_mode(self):
89 offline = self.config.get("downloader", "offline")
90 if not offline:
91 return
92
93 raise OfflineModeError
94
95 def urlread(self, filename, *args, **kwargs):
96 self.check_offline_mode()
97
98 # This is for older versions of urlgrabber which are packaged in Debian
99 # and Ubuntu and cannot handle filenames as a normal Python string but need
100 # a unicode string.
101 return URLGrabber.urlread(self, filename.encode("utf-8"), *args, **kwargs)
102
103 def urlopen(self, filename, *args, **kwargs):
104 self.check_offline_mode()
105
106 # However, urlopen requires the filename to be an ordinary string object.
107 filename = str(filename)
108
109 return URLGrabber.urlopen(self, filename, *args, **kwargs)
110
111
112 class PackageDownloader(PakfireGrabber):
113 def __init__(self, pakfire, *args, **kwargs):
114 kwargs.update({
115 "progress_obj" : TextMeter(),
116 })
117
118 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
119
120
121 class MetadataDownloader(PakfireGrabber):
122 def __init__(self, pakfire, *args, **kwargs):
123 kwargs.update({
124 "http_headers" : (('Pragma', 'no-cache'),),
125 })
126
127 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
128
129
130 class DatabaseDownloader(PackageDownloader):
131 def __init__(self, pakfire, *args, **kwargs):
132 kwargs.update({
133 "http_headers" : (('Pragma', 'no-cache'),),
134 })
135
136 PackageDownloader.__init__(self, pakfire, *args, **kwargs)
137
138
139 class SourceDownloader(object):
140 def __init__(self, pakfire, mirrors=None):
141 self.pakfire = pakfire
142
143 self.grabber = PakfireGrabber(
144 self.pakfire,
145 progress_obj = TextMeter(),
146 )
147
148 if mirrors:
149 self.grabber = MirrorGroup(self.grabber,
150 [{ "mirror" : m.encode("utf-8") } for m in mirrors])
151
152 def download(self, files):
153 existant_files = []
154 download_files = []
155
156 for file in files:
157 filename = os.path.join(SOURCE_CACHE_DIR, file)
158 log.debug("Checking existance of %s..." % filename)
159
160 if os.path.exists(filename) and os.path.getsize(filename):
161 log.debug("...exists!")
162 existant_files.append(filename)
163 else:
164 log.debug("...does not exist!")
165 download_files.append(filename)
166
167 if download_files:
168 log.info(_("Downloading source files:"))
169
170 if self.pakfire.offline:
171 raise OfflineModeError, _("Cannot download source code in offline mode.")
172
173 # Create source download directory.
174 if not os.path.exists(SOURCE_CACHE_DIR):
175 os.makedirs(SOURCE_CACHE_DIR)
176
177 for filename in download_files:
178 try:
179 self.grabber.urlgrab(os.path.basename(filename), filename=filename)
180 except URLGrabError, e:
181 # Remove partly downloaded file.
182 try:
183 os.unlink(filename)
184 except OSError:
185 pass
186
187 raise DownloadError, "%s %s" % (os.path.basename(filename), e)
188
189 # Check if the downloaded file was empty.
190 if os.path.getsize(filename) == 0:
191 # Remove the file and raise an error.
192 os.unlink(filename)
193
194 raise DownloadError, _("Downloaded empty file: %s") \
195 % os.path.basename(filename)
196
197 log.info("")
198
199 return existant_files + download_files
200
201
202 class Mirror(object):
203 def __init__(self, url, location=None, preferred=False):
204 # Save URL of the mirror in full format
205 self.url = url
206
207 # Save the location (if given)
208 self.location = location
209
210 # Save preference
211 self.preferred = False
212
213
214 class MirrorList(object):
215 def __init__(self, pakfire, repo, mirrorlist):
216 self.pakfire = pakfire
217 self.repo = repo
218
219 self.__mirrors = []
220
221 # Save URL to more mirrors.
222 self.mirrorlist = mirrorlist
223
224 @property
225 def base_mirror(self):
226 if not self.repo.baseurl:
227 return
228
229 return Mirror(self.repo.baseurl, preferred=False)
230
231 @property
232 def distro(self):
233 return self.repo.distro
234
235 @property
236 def cache(self):
237 """
238 Shortcut to cache from repository.
239 """
240 return self.repo.cache
241
242 def update(self, force=False):
243 # XXX should this be allowed?
244 if not self.mirrorlist:
245 return
246
247 # If the system is not online, we cannot download anything.
248 if self.pakfire.offline:
249 return
250
251 log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
252 cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
253 self.repo.name, self.distro.arch, "mirrors")
254
255 # Force the update if no mirrorlist is available.
256 if not self.cache.exists(cache_filename):
257 force = True
258
259 if not force and self.cache.exists(cache_filename):
260 age = self.cache.age(cache_filename)
261
262 # If the age could be determined and is higher than 24h,
263 # we force an update.
264 if age and age > TIME_24H:
265 force = True
266
267 if force:
268 g = MetadataDownloader(self.pakfire)
269
270 try:
271 mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
272 except URLGrabError, e:
273 log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
274 return
275
276 # XXX check for empty files or damaged output
277
278 # Save new mirror data to cache.
279 f = self.cache.open(cache_filename, "w")
280 f.write(mirrordata)
281 f.close()
282
283 # Read mirrorlist from cache and parse it.
284 self.forget_mirrors()
285 with self.cache.open(cache_filename) as f:
286 self.parse_mirrordata(f.read())
287
288 def parse_mirrordata(self, data):
289 data = json.loads(data)
290
291 for mirror in data["mirrors"]:
292 self.add_mirror(**mirror)
293
294 def add_mirror(self, *args, **kwargs):
295 mirror = Mirror(*args, **kwargs)
296
297 self.__mirrors.append(mirror)
298
299 def forget_mirrors(self):
300 self.__mirrors = []
301
302 @property
303 def preferred(self):
304 """
305 Return a generator for all mirrors that are preferred.
306 """
307 for mirror in self.__mirrors:
308 if mirror.preferred:
309 yield mirror
310
311 @property
312 def non_preferred(self):
313 """
314 Return a generator for all mirrors that are not preferred.
315 """
316 for mirror in self.__mirrors:
317 if not mirror.preferred:
318 yield mirror
319
320 @property
321 def all(self):
322 """
323 Return a generator for all mirrors.
324 """
325 for mirror in self.__mirrors:
326 yield mirror
327
328 def group(self, grabber):
329 """
330 Return a MirrorGroup object for the given grabber.
331 """
332 # Make sure the mirrorlist is up to date.
333 self.update()
334
335 # A list of mirrors that is passed to MirrorGroup.
336 mirrors = []
337
338 # Add all preferred mirrors at the first place and shuffle them
339 # that we will start at a random place.
340 for mirror in self.preferred:
341 mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
342 random.shuffle(mirrors)
343
344 # All other mirrors are added as well and will only be used if all
345 # preferred mirrors did not work.
346 for mirror in self.all:
347 mirror = { "mirror" : mirror.url.encode("utf-8") }
348 if mirror in mirrors:
349 continue
350
351 mirrors.append(mirror)
352
353 # Always add the base mirror if any.
354 base_mirror = self.base_mirror
355 if base_mirror:
356 mirror = { "mirror" : base_mirror.url.encode("utf-8") }
357 if not mirror in mirrors:
358 mirrors.append(mirror)
359
360 return MirrorGroup(grabber, mirrors)
361
362
363
364 class Downloader(object):
365 def __init__(self, mirrors, files):
366 self.grabber = PakfireGrabber()
367
368 self.mirrorgroup = mirrors.group(self.grabber)
369
370