]> git.ipfire.org Git - pakfire.git/blob - python/pakfire/downloader.py
0b5b5b6a47f59cd8c87301c37d01a3a71330b8cc
[pakfire.git] / python / pakfire / downloader.py
1 #!/usr/bin/python
2 ###############################################################################
3 # #
4 # Pakfire - The IPFire package management system #
5 # Copyright (C) 2011 Pakfire development team #
6 # #
7 # This program is free software: you can redistribute it and/or modify #
8 # it under the terms of the GNU General Public License as published by #
9 # the Free Software Foundation, either version 3 of the License, or #
10 # (at your option) any later version. #
11 # #
12 # This program is distributed in the hope that it will be useful, #
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15 # GNU General Public License for more details. #
16 # #
17 # You should have received a copy of the GNU General Public License #
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 # #
20 ###############################################################################
21
22 import json
23 import os
24 import pycurl
25 import random
26
27 import logging
28 log = logging.getLogger("pakfire")
29
30 from config import _Config
31
32 import urlgrabber.grabber
33 from urlgrabber.grabber import URLGrabber, URLGrabError
34 from urlgrabber.mirror import MirrorGroup
35 from urlgrabber.progress import TextMeter
36
37 from pakfire.constants import *
38 from pakfire.i18n import _
39
40 class PakfireGrabber(URLGrabber):
41 """
42 Class to make some modifications on the urlgrabber configuration.
43 """
44 def __init__(self, pakfire, *args, **kwargs):
45 kwargs.update({
46 "quote" : 0,
47 "user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
48
49 "ssl_verify_host" : False,
50 "ssl_verify_peer" : False,
51 })
52
53 if isinstance(pakfire, _Config):
54 config = pakfire
55 else:
56 config = pakfire.config
57 self.config = config
58
59 # Set throttle setting.
60 bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
61 if bandwidth_throttle:
62 try:
63 bandwidth_throttle = int(bandwidth_throttle)
64 except ValueError:
65 log.error("Configuration value for bandwidth_throttle is invalid.")
66 bandwidth_throttle = 0
67
68 kwargs.update({ "throttle" : bandwidth_throttle })
69
70 # Configure HTTP proxy.
71 http_proxy = config.get("downloader", "http_proxy")
72 if http_proxy:
73 kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})
74
75 URLGrabber.__init__(self, *args, **kwargs)
76
77 def check_offline_mode(self):
78 offline = self.config.get("downloader", "offline")
79 if not offline:
80 return
81
82 raise OfflineModeError
83
84 def urlread(self, filename, *args, **kwargs):
85 self.check_offline_mode()
86
87 # This is for older versions of urlgrabber which are packaged in Debian
88 # and Ubuntu and cannot handle filenames as a normal Python string but need
89 # a unicode string.
90 return URLGrabber.urlread(self, filename.encode("utf-8"), *args, **kwargs)
91
92 def urlopen(self, filename, *args, **kwargs):
93 self.check_offline_mode()
94
95 # This is for older versions of urlgrabber which are packaged in Debian
96 # and Ubuntu and cannot handle filenames as a normal Python string but need
97 # a unicode string.
98 return URLGrabber.urlopen(self, filename.encode("utf-8"), *args, **kwargs)
99
100 def urlgrab(self, url, *args, **kwargs):
101 self.check_offline_mode()
102
103 # This is for older versions of urlgrabber which are packaged in Debian
104 # and Ubuntu and cannot handle filenames as a normal Python string but need
105 # a unicode string.
106 return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)
107
108
109 class PackageDownloader(PakfireGrabber):
110 def __init__(self, pakfire, *args, **kwargs):
111 kwargs.update({
112 "progress_obj" : TextMeter(),
113 })
114
115 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
116
117
118 class MetadataDownloader(PakfireGrabber):
119 def __init__(self, pakfire, *args, **kwargs):
120 kwargs.update({
121 "http_headers" : (('Pragma', 'no-cache'),),
122 })
123
124 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
125
126
127 class DatabaseDownloader(PackageDownloader):
128 def __init__(self, pakfire, *args, **kwargs):
129 kwargs.update({
130 "http_headers" : (('Pragma', 'no-cache'),),
131 })
132
133 PackageDownloader.__init__(self, pakfire, *args, **kwargs)
134
135
136 class SourceDownloader(object):
137 def __init__(self, pakfire, mirrors=None):
138 self.pakfire = pakfire
139
140 self.grabber = PakfireGrabber(
141 self.pakfire,
142 progress_obj = TextMeter(),
143 )
144
145 if mirrors:
146 self.grabber = MirrorGroup(self.grabber,
147 [{ "mirror" : m.encode("utf-8") } for m in mirrors])
148
149 def download(self, files):
150 existant_files = []
151 download_files = []
152
153 for file in files:
154 filename = os.path.join(SOURCE_CACHE_DIR, file)
155 log.debug("Checking existance of %s..." % filename)
156
157 if os.path.exists(filename) and os.path.getsize(filename):
158 log.debug("...exists!")
159 existant_files.append(filename)
160 else:
161 log.debug("...does not exist!")
162 download_files.append(filename)
163
164 if download_files:
165 log.info(_("Downloading source files:"))
166
167 if self.pakfire.offline:
168 raise OfflineModeError, _("Cannot download source code in offline mode.")
169
170 # Create source download directory.
171 if not os.path.exists(SOURCE_CACHE_DIR):
172 os.makedirs(SOURCE_CACHE_DIR)
173
174 for filename in download_files:
175 try:
176 self.grabber.urlgrab(os.path.basename(filename), filename=filename)
177 except URLGrabError, e:
178 # Remove partly downloaded file.
179 try:
180 os.unlink(filename)
181 except OSError:
182 pass
183
184 raise DownloadError, "%s %s" % (os.path.basename(filename), e)
185
186 # Check if the downloaded file was empty.
187 if os.path.getsize(filename) == 0:
188 # Remove the file and raise an error.
189 os.unlink(filename)
190
191 raise DownloadError, _("Downloaded empty file: %s") \
192 % os.path.basename(filename)
193
194 log.info("")
195
196 return existant_files + download_files
197
198
199 class Mirror(object):
200 def __init__(self, url, location=None, preferred=False):
201 # Save URL of the mirror in full format
202 self.url = url
203
204 # Save the location (if given)
205 self.location = location
206
207 # Save preference
208 self.preferred = False
209
210
211 class MirrorList(object):
212 def __init__(self, pakfire, repo, mirrorlist):
213 self.pakfire = pakfire
214 self.repo = repo
215
216 self.__mirrors = []
217
218 # Save URL to more mirrors.
219 self.mirrorlist = mirrorlist
220
221 @property
222 def base_mirror(self):
223 if not self.repo.baseurl:
224 return
225
226 return Mirror(self.repo.baseurl, preferred=False)
227
228 @property
229 def distro(self):
230 return self.repo.distro
231
232 @property
233 def cache(self):
234 """
235 Shortcut to cache from repository.
236 """
237 return self.repo.cache
238
239 def update(self, force=False):
240 # XXX should this be allowed?
241 if not self.mirrorlist:
242 return
243
244 # If the system is not online, we cannot download anything.
245 if self.pakfire.offline:
246 return
247
248 log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
249 cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
250 self.repo.name, self.distro.arch, "mirrors")
251
252 # Force the update if no mirrorlist is available.
253 if not self.cache.exists(cache_filename):
254 force = True
255
256 if not force and self.cache.exists(cache_filename):
257 age = self.cache.age(cache_filename)
258
259 # If the age could be determined and is higher than 24h,
260 # we force an update.
261 if age and age > TIME_24H:
262 force = True
263
264 if force:
265 g = MetadataDownloader(self.pakfire)
266
267 try:
268 mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
269 except URLGrabError, e:
270 log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
271 return
272
273 # XXX check for empty files or damaged output
274
275 # Save new mirror data to cache.
276 f = self.cache.open(cache_filename, "w")
277 f.write(mirrordata)
278 f.close()
279
280 # Read mirrorlist from cache and parse it.
281 self.forget_mirrors()
282 with self.cache.open(cache_filename) as f:
283 self.parse_mirrordata(f.read())
284
285 def parse_mirrordata(self, data):
286 data = json.loads(data)
287
288 for mirror in data["mirrors"]:
289 self.add_mirror(**mirror)
290
291 def add_mirror(self, *args, **kwargs):
292 mirror = Mirror(*args, **kwargs)
293
294 self.__mirrors.append(mirror)
295
296 def forget_mirrors(self):
297 self.__mirrors = []
298
299 @property
300 def preferred(self):
301 """
302 Return a generator for all mirrors that are preferred.
303 """
304 for mirror in self.__mirrors:
305 if mirror.preferred:
306 yield mirror
307
308 @property
309 def non_preferred(self):
310 """
311 Return a generator for all mirrors that are not preferred.
312 """
313 for mirror in self.__mirrors:
314 if not mirror.preferred:
315 yield mirror
316
317 @property
318 def all(self):
319 """
320 Return a generator for all mirrors.
321 """
322 for mirror in self.__mirrors:
323 yield mirror
324
325 def group(self, grabber):
326 """
327 Return a MirrorGroup object for the given grabber.
328 """
329 # Make sure the mirrorlist is up to date.
330 self.update()
331
332 # A list of mirrors that is passed to MirrorGroup.
333 mirrors = []
334
335 # Add all preferred mirrors at the first place and shuffle them
336 # that we will start at a random place.
337 for mirror in self.preferred:
338 mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
339 random.shuffle(mirrors)
340
341 # All other mirrors are added as well and will only be used if all
342 # preferred mirrors did not work.
343 for mirror in self.all:
344 mirror = { "mirror" : mirror.url.encode("utf-8") }
345 if mirror in mirrors:
346 continue
347
348 mirrors.append(mirror)
349
350 # Always add the base mirror if any.
351 base_mirror = self.base_mirror
352 if base_mirror:
353 mirror = { "mirror" : base_mirror.url.encode("utf-8") }
354 if not mirror in mirrors:
355 mirrors.append(mirror)
356
357 return MirrorGroup(grabber, mirrors)
358
359
360
361 class Downloader(object):
362 def __init__(self, mirrors, files):
363 self.grabber = PakfireGrabber()
364
365 self.mirrorgroup = mirrors.group(self.grabber)
366
367