]> git.ipfire.org Git - pakfire.git/blob - python/pakfire/downloader.py
Use the HTTP proxy for HTTPS as well.
[pakfire.git] / python / pakfire / downloader.py
1 #!/usr/bin/python
2 ###############################################################################
3 # #
4 # Pakfire - The IPFire package management system #
5 # Copyright (C) 2011 Pakfire development team #
6 # #
7 # This program is free software: you can redistribute it and/or modify #
8 # it under the terms of the GNU General Public License as published by #
9 # the Free Software Foundation, either version 3 of the License, or #
10 # (at your option) any later version. #
11 # #
12 # This program is distributed in the hope that it will be useful, #
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15 # GNU General Public License for more details. #
16 # #
17 # You should have received a copy of the GNU General Public License #
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
19 # #
20 ###############################################################################
21
22 import json
23 import os
24 import random
25
26 import logging
27 log = logging.getLogger("pakfire")
28
29 from config import _Config
30
31 from urlgrabber.grabber import URLGrabber, URLGrabError
32 from urlgrabber.mirror import MirrorGroup
33 from urlgrabber.progress import TextMeter
34
35 from pakfire.constants import *
36 from pakfire.i18n import _
37
38 class PakfireGrabber(URLGrabber):
39 """
40 Class to make some modifications on the urlgrabber configuration.
41 """
42 def __init__(self, pakfire, *args, **kwargs):
43 kwargs.update({
44 "quote" : 0,
45 "user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
46
47 "ssl_verify_host" : False,
48 "ssl_verify_peer" : False,
49 })
50
51 if isinstance(pakfire, _Config):
52 config = pakfire
53 else:
54 config = pakfire.config
55 self.config = config
56
57 # Set throttle setting.
58 bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
59 if bandwidth_throttle:
60 try:
61 bandwidth_throttle = int(bandwidth_throttle)
62 except ValueError:
63 log.error("Configuration value for bandwidth_throttle is invalid.")
64 bandwidth_throttle = 0
65
66 kwargs.update({ "throttle" : bandwidth_throttle })
67
68 # Configure HTTP proxy.
69 http_proxy = config.get("downloader", "http_proxy")
70 if http_proxy:
71 kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})
72
73 URLGrabber.__init__(self, *args, **kwargs)
74
75 def check_offline_mode(self):
76 offline = self.config.get("downloader", "offline")
77 if not offline:
78 return
79
80 raise OfflineModeError
81
82 def urlread(self, filename, *args, **kwargs):
83 self.check_offline_mode()
84
85 # This is for older versions of urlgrabber which are packaged in Debian
86 # and Ubuntu and cannot handle filenames as a normal Python string but need
87 # a unicode string.
88 return URLGrabber.urlread(self, filename.encode("utf-8"), *args, **kwargs)
89
90 def urlopen(self, filename, *args, **kwargs):
91 self.check_offline_mode()
92
93 # However, urlopen requires the filename to be an ordinary string object.
94 filename = str(filename)
95
96 return URLGrabber.urlopen(self, filename, *args, **kwargs)
97
98
99 class PackageDownloader(PakfireGrabber):
100 def __init__(self, pakfire, *args, **kwargs):
101 kwargs.update({
102 "progress_obj" : TextMeter(),
103 })
104
105 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
106
107
108 class MetadataDownloader(PakfireGrabber):
109 def __init__(self, pakfire, *args, **kwargs):
110 kwargs.update({
111 "http_headers" : (('Pragma', 'no-cache'),),
112 })
113
114 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
115
116
117 class DatabaseDownloader(PackageDownloader):
118 def __init__(self, pakfire, *args, **kwargs):
119 kwargs.update({
120 "http_headers" : (('Pragma', 'no-cache'),),
121 })
122
123 PackageDownloader.__init__(self, pakfire, *args, **kwargs)
124
125
126 class SourceDownloader(object):
127 def __init__(self, pakfire, mirrors=None):
128 self.pakfire = pakfire
129
130 self.grabber = PakfireGrabber(
131 self.pakfire,
132 progress_obj = TextMeter(),
133 )
134
135 if mirrors:
136 self.grabber = MirrorGroup(self.grabber,
137 [{ "mirror" : m.encode("utf-8") } for m in mirrors])
138
139 def download(self, files):
140 existant_files = []
141 download_files = []
142
143 for file in files:
144 filename = os.path.join(SOURCE_CACHE_DIR, file)
145 log.debug("Checking existance of %s..." % filename)
146
147 if os.path.exists(filename) and os.path.getsize(filename):
148 log.debug("...exists!")
149 existant_files.append(filename)
150 else:
151 log.debug("...does not exist!")
152 download_files.append(filename)
153
154 if download_files:
155 log.info(_("Downloading source files:"))
156
157 if self.pakfire.offline:
158 raise OfflineModeError, _("Cannot download source code in offline mode.")
159
160 # Create source download directory.
161 if not os.path.exists(SOURCE_CACHE_DIR):
162 os.makedirs(SOURCE_CACHE_DIR)
163
164 for filename in download_files:
165 try:
166 self.grabber.urlgrab(os.path.basename(filename), filename=filename)
167 except URLGrabError, e:
168 # Remove partly downloaded file.
169 try:
170 os.unlink(filename)
171 except OSError:
172 pass
173
174 raise DownloadError, "%s %s" % (os.path.basename(filename), e)
175
176 # Check if the downloaded file was empty.
177 if os.path.getsize(filename) == 0:
178 # Remove the file and raise an error.
179 os.unlink(filename)
180
181 raise DownloadError, _("Downloaded empty file: %s") \
182 % os.path.basename(filename)
183
184 log.info("")
185
186 return existant_files + download_files
187
188
189 class Mirror(object):
190 def __init__(self, url, location=None, preferred=False):
191 # Save URL of the mirror in full format
192 self.url = url
193
194 # Save the location (if given)
195 self.location = location
196
197 # Save preference
198 self.preferred = False
199
200
201 class MirrorList(object):
202 def __init__(self, pakfire, repo, mirrorlist):
203 self.pakfire = pakfire
204 self.repo = repo
205
206 self.__mirrors = []
207
208 # Save URL to more mirrors.
209 self.mirrorlist = mirrorlist
210
211 @property
212 def base_mirror(self):
213 if not self.repo.baseurl:
214 return
215
216 return Mirror(self.repo.baseurl, preferred=False)
217
218 @property
219 def distro(self):
220 return self.repo.distro
221
222 @property
223 def cache(self):
224 """
225 Shortcut to cache from repository.
226 """
227 return self.repo.cache
228
229 def update(self, force=False):
230 # XXX should this be allowed?
231 if not self.mirrorlist:
232 return
233
234 # If the system is not online, we cannot download anything.
235 if self.pakfire.offline:
236 return
237
238 log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
239 cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
240 self.repo.name, self.distro.arch, "mirrors")
241
242 # Force the update if no mirrorlist is available.
243 if not self.cache.exists(cache_filename):
244 force = True
245
246 if not force and self.cache.exists(cache_filename):
247 age = self.cache.age(cache_filename)
248
249 # If the age could be determined and is higher than 24h,
250 # we force an update.
251 if age and age > TIME_24H:
252 force = True
253
254 if force:
255 g = MetadataDownloader(self.pakfire)
256
257 try:
258 mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
259 except URLGrabError, e:
260 log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
261 return
262
263 # XXX check for empty files or damaged output
264
265 # Save new mirror data to cache.
266 f = self.cache.open(cache_filename, "w")
267 f.write(mirrordata)
268 f.close()
269
270 # Read mirrorlist from cache and parse it.
271 self.forget_mirrors()
272 with self.cache.open(cache_filename) as f:
273 self.parse_mirrordata(f.read())
274
275 def parse_mirrordata(self, data):
276 data = json.loads(data)
277
278 for mirror in data["mirrors"]:
279 self.add_mirror(**mirror)
280
281 def add_mirror(self, *args, **kwargs):
282 mirror = Mirror(*args, **kwargs)
283
284 self.__mirrors.append(mirror)
285
286 def forget_mirrors(self):
287 self.__mirrors = []
288
289 @property
290 def preferred(self):
291 """
292 Return a generator for all mirrors that are preferred.
293 """
294 for mirror in self.__mirrors:
295 if mirror.preferred:
296 yield mirror
297
298 @property
299 def non_preferred(self):
300 """
301 Return a generator for all mirrors that are not preferred.
302 """
303 for mirror in self.__mirrors:
304 if not mirror.preferred:
305 yield mirror
306
307 @property
308 def all(self):
309 """
310 Return a generator for all mirrors.
311 """
312 for mirror in self.__mirrors:
313 yield mirror
314
315 def group(self, grabber):
316 """
317 Return a MirrorGroup object for the given grabber.
318 """
319 # Make sure the mirrorlist is up to date.
320 self.update()
321
322 # A list of mirrors that is passed to MirrorGroup.
323 mirrors = []
324
325 # Add all preferred mirrors at the first place and shuffle them
326 # that we will start at a random place.
327 for mirror in self.preferred:
328 mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
329 random.shuffle(mirrors)
330
331 # All other mirrors are added as well and will only be used if all
332 # preferred mirrors did not work.
333 for mirror in self.all:
334 mirror = { "mirror" : mirror.url.encode("utf-8") }
335 if mirror in mirrors:
336 continue
337
338 mirrors.append(mirror)
339
340 # Always add the base mirror if any.
341 base_mirror = self.base_mirror
342 if base_mirror:
343 mirror = { "mirror" : base_mirror.url.encode("utf-8") }
344 if not mirror in mirrors:
345 mirrors.append(mirror)
346
347 return MirrorGroup(grabber, mirrors)
348
349
350
351 class Downloader(object):
352 def __init__(self, mirrors, files):
353 self.grabber = PakfireGrabber()
354
355 self.mirrorgroup = mirrors.group(self.grabber)
356
357