]> git.ipfire.org Git - pakfire.git/blame - python/pakfire/downloader.py
More urlgrabber fixes for Ubuntu/Debian/Mint.
[pakfire.git] / python / pakfire / downloader.py
CommitLineData
1de8761d 1#!/usr/bin/python
b792d887
MT
2###############################################################################
3# #
4# Pakfire - The IPFire package management system #
5# Copyright (C) 2011 Pakfire development team #
6# #
7# This program is free software: you can redistribute it and/or modify #
8# it under the terms of the GNU General Public License as published by #
9# the Free Software Foundation, either version 3 of the License, or #
10# (at your option) any later version. #
11# #
12# This program is distributed in the hope that it will be useful, #
13# but WITHOUT ANY WARRANTY; without even the implied warranty of #
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15# GNU General Public License for more details. #
16# #
17# You should have received a copy of the GNU General Public License #
18# along with this program. If not, see <http://www.gnu.org/licenses/>. #
19# #
20###############################################################################
1de8761d
MT
21
22import json
062699ee 23import os
aa14071d 24import pycurl
4f91860e 25import random
1de8761d 26
8b6bc023
MT
27import logging
28log = logging.getLogger("pakfire")
29
a6bd96bc 30from config import _Config
e57c5475 31
aa14071d 32import urlgrabber.grabber
1de8761d 33from urlgrabber.grabber import URLGrabber, URLGrabError
4f91860e 34from urlgrabber.mirror import MirrorGroup
14ea3228 35from urlgrabber.progress import TextMeter
1de8761d 36
a2d1644c 37from pakfire.constants import *
062699ee 38from pakfire.i18n import _
1de8761d
MT
39
40class PakfireGrabber(URLGrabber):
41 """
42 Class to make some modifications on the urlgrabber configuration.
43 """
80104a80 44 def __init__(self, pakfire, *args, **kwargs):
14ea3228
MT
45 kwargs.update({
46 "quote" : 0,
47 "user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
a6bd96bc
MT
48
49 "ssl_verify_host" : False,
50 "ssl_verify_peer" : False,
14ea3228
MT
51 })
52
a6bd96bc 53 if isinstance(pakfire, _Config):
e57c5475
MT
54 config = pakfire
55 else:
56 config = pakfire.config
98733451 57 self.config = config
6a509182 58
cfc16a71 59 # Set throttle setting.
a6bd96bc 60 bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
80104a80
MT
61 if bandwidth_throttle:
62 try:
63 bandwidth_throttle = int(bandwidth_throttle)
64 except ValueError:
8b6bc023 65 log.error("Configuration value for bandwidth_throttle is invalid.")
80104a80
MT
66 bandwidth_throttle = 0
67
68 kwargs.update({ "throttle" : bandwidth_throttle })
69
cfc16a71 70 # Configure HTTP proxy.
a6bd96bc 71 http_proxy = config.get("downloader", "http_proxy")
cfc16a71 72 if http_proxy:
c611f46b 73 kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})
cfc16a71 74
14ea3228
MT
75 URLGrabber.__init__(self, *args, **kwargs)
76
aa14071d
MT
77 def fork(self):
78 """
79 Reset Curl object after forking a process.
80 """
81 # XXX this is a very ugly hack and fiddles around with the internals
82 # or urlgrabber. We should not touch these, but apparently nobody
83 # else uses multiple threads or processes to talk to their servers.
84 # So we simply replace Curl with a new instance without closing
85 # the old one. This should be fixed in urlgrabber and/or pycurl.
86 urlgrabber.grabber._curl_cache = pycurl.Curl()
87
98733451
MT
88 def check_offline_mode(self):
89 offline = self.config.get("downloader", "offline")
90 if not offline:
91 return
92
93 raise OfflineModeError
94
4efe0da7 95 def urlread(self, filename, *args, **kwargs):
98733451
MT
96 self.check_offline_mode()
97
4efe0da7
MT
98 # This is for older versions of urlgrabber which are packaged in Debian
99 # and Ubuntu and cannot handle filenames as a normal Python string but need
100 # a unicode string.
101 return URLGrabber.urlread(self, filename.encode("utf-8"), *args, **kwargs)
102
0f8d6745 103 def urlopen(self, filename, *args, **kwargs):
98733451
MT
104 self.check_offline_mode()
105
bbd51f58
MT
106 # This is for older versions of urlgrabber which are packaged in Debian
107 # and Ubuntu and cannot handle filenames as a normal Python string but need
108 # a unicode string.
109 return URLGrabber.urlopen(self, filename.encode("utf-8"), *args, **kwargs)
110
111 def urlgrab(self, url, *args, **kwargs):
112 self.check_offline_mode()
0f8d6745 113
bbd51f58
MT
114 # This is for older versions of urlgrabber which are packaged in Debian
115 # and Ubuntu and cannot handle filenames as a normal Python string but need
116 # a unicode string.
117 return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)
0f8d6745 118
14ea3228
MT
119
120class PackageDownloader(PakfireGrabber):
80104a80 121 def __init__(self, pakfire, *args, **kwargs):
14ea3228 122 kwargs.update({
ca38a577 123 "progress_obj" : TextMeter(),
14ea3228
MT
124 })
125
80104a80 126 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
14ea3228
MT
127
128
129class MetadataDownloader(PakfireGrabber):
80104a80 130 def __init__(self, pakfire, *args, **kwargs):
14ea3228
MT
131 kwargs.update({
132 "http_headers" : (('Pragma', 'no-cache'),),
133 })
134
80104a80 135 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
14ea3228
MT
136
137
138class DatabaseDownloader(PackageDownloader):
80104a80 139 def __init__(self, pakfire, *args, **kwargs):
14ea3228
MT
140 kwargs.update({
141 "http_headers" : (('Pragma', 'no-cache'),),
142 })
143
80104a80 144 PackageDownloader.__init__(self, pakfire, *args, **kwargs)
1de8761d 145
4f91860e 146
062699ee
MT
147class SourceDownloader(object):
148 def __init__(self, pakfire, mirrors=None):
149 self.pakfire = pakfire
150
151 self.grabber = PakfireGrabber(
152 self.pakfire,
153 progress_obj = TextMeter(),
154 )
155
156 if mirrors:
157 self.grabber = MirrorGroup(self.grabber,
4efe0da7 158 [{ "mirror" : m.encode("utf-8") } for m in mirrors])
062699ee
MT
159
160 def download(self, files):
161 existant_files = []
162 download_files = []
163
164 for file in files:
165 filename = os.path.join(SOURCE_CACHE_DIR, file)
b76f5f47 166 log.debug("Checking existance of %s..." % filename)
062699ee 167
9ddb19b9 168 if os.path.exists(filename) and os.path.getsize(filename):
b76f5f47 169 log.debug("...exists!")
062699ee
MT
170 existant_files.append(filename)
171 else:
b76f5f47 172 log.debug("...does not exist!")
062699ee
MT
173 download_files.append(filename)
174
175 if download_files:
8b6bc023 176 log.info(_("Downloading source files:"))
062699ee 177
98733451
MT
178 if self.pakfire.offline:
179 raise OfflineModeError, _("Cannot download source code in offline mode.")
180
062699ee
MT
181 # Create source download directory.
182 if not os.path.exists(SOURCE_CACHE_DIR):
183 os.makedirs(SOURCE_CACHE_DIR)
184
185 for filename in download_files:
186 try:
187 self.grabber.urlgrab(os.path.basename(filename), filename=filename)
188 except URLGrabError, e:
08de9306
MT
189 # Remove partly downloaded file.
190 try:
191 os.unlink(filename)
192 except OSError:
193 pass
194
062699ee
MT
195 raise DownloadError, "%s %s" % (os.path.basename(filename), e)
196
9ddb19b9
MT
197 # Check if the downloaded file was empty.
198 if os.path.getsize(filename) == 0:
199 # Remove the file and raise an error.
200 os.unlink(filename)
201
202 raise DownloadError, _("Downloaded empty file: %s") \
203 % os.path.basename(filename)
204
8b6bc023 205 log.info("")
062699ee
MT
206
207 return existant_files + download_files
208
209
1de8761d 210class Mirror(object):
4f91860e 211 def __init__(self, url, location=None, preferred=False):
1de8761d 212 # Save URL of the mirror in full format
4f91860e 213 self.url = url
1de8761d
MT
214
215 # Save the location (if given)
216 self.location = location
217
218 # Save preference
219 self.preferred = False
220
221
222class MirrorList(object):
0f8d6745 223 def __init__(self, pakfire, repo, mirrorlist):
1de8761d
MT
224 self.pakfire = pakfire
225 self.repo = repo
226
227 self.__mirrors = []
228
229 # Save URL to more mirrors.
0f8d6745
MT
230 self.mirrorlist = mirrorlist
231
232 @property
233 def base_mirror(self):
234 if not self.repo.baseurl:
235 return
236
237 return Mirror(self.repo.baseurl, preferred=False)
1de8761d 238
5a99898b
MT
239 @property
240 def distro(self):
241 return self.repo.distro
242
1de8761d
MT
243 @property
244 def cache(self):
245 """
246 Shortcut to cache from repository.
247 """
248 return self.repo.cache
249
250 def update(self, force=False):
251 # XXX should this be allowed?
252 if not self.mirrorlist:
253 return
254
c07a3ca7
MT
255 # If the system is not online, we cannot download anything.
256 if self.pakfire.offline:
257 return
258
8b6bc023 259 log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
5a99898b
MT
260 cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
261 self.repo.name, self.distro.arch, "mirrors")
1de8761d
MT
262
263 # Force the update if no mirrorlist is available.
264 if not self.cache.exists(cache_filename):
265 force = True
266
267 if not force and self.cache.exists(cache_filename):
268 age = self.cache.age(cache_filename)
269
270 # If the age could be determined and is higher than 24h,
271 # we force an update.
272 if age and age > TIME_24H:
273 force = True
274
275 if force:
80104a80 276 g = MetadataDownloader(self.pakfire)
1de8761d
MT
277
278 try:
279 mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
280 except URLGrabError, e:
8b6bc023 281 log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
1de8761d
MT
282 return
283
284 # XXX check for empty files or damaged output
285
286 # Save new mirror data to cache.
287 f = self.cache.open(cache_filename, "w")
288 f.write(mirrordata)
289 f.close()
290
291 # Read mirrorlist from cache and parse it.
90919c62 292 self.forget_mirrors()
1de8761d
MT
293 with self.cache.open(cache_filename) as f:
294 self.parse_mirrordata(f.read())
295
296 def parse_mirrordata(self, data):
297 data = json.loads(data)
298
299 for mirror in data["mirrors"]:
300 self.add_mirror(**mirror)
301
302 def add_mirror(self, *args, **kwargs):
303 mirror = Mirror(*args, **kwargs)
304
305 self.__mirrors.append(mirror)
306
90919c62
MT
307 def forget_mirrors(self):
308 self.__mirrors = []
309
1de8761d
MT
310 @property
311 def preferred(self):
312 """
313 Return a generator for all mirrors that are preferred.
314 """
315 for mirror in self.__mirrors:
316 if mirror.preferred:
317 yield mirror
318
4f91860e
MT
319 @property
320 def non_preferred(self):
321 """
322 Return a generator for all mirrors that are not preferred.
323 """
324 for mirror in self.__mirrors:
325 if not mirror.preferred:
326 yield mirror
327
1de8761d
MT
328 @property
329 def all(self):
330 """
331 Return a generator for all mirrors.
332 """
333 for mirror in self.__mirrors:
334 yield mirror
335
4f91860e
MT
336 def group(self, grabber):
337 """
338 Return a MirrorGroup object for the given grabber.
339 """
90919c62
MT
340 # Make sure the mirrorlist is up to date.
341 self.update()
342
4f91860e
MT
343 # A list of mirrors that is passed to MirrorGroup.
344 mirrors = []
345
346 # Add all preferred mirrors at the first place and shuffle them
347 # that we will start at a random place.
348 for mirror in self.preferred:
9a1da36a 349 mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
4f91860e
MT
350 random.shuffle(mirrors)
351
352 # All other mirrors are added as well and will only be used if all
353 # preferred mirrors did not work.
354 for mirror in self.all:
9a1da36a
MT
355 mirror = { "mirror" : mirror.url.encode("utf-8") }
356 if mirror in mirrors:
4f91860e
MT
357 continue
358
9a1da36a
MT
359 mirrors.append(mirror)
360
361 # Always add the base mirror if any.
362 base_mirror = self.base_mirror
363 if base_mirror:
364 mirror = { "mirror" : base_mirror.url.encode("utf-8") }
365 if not mirror in mirrors:
366 mirrors.append(mirror)
4f91860e
MT
367
368 return MirrorGroup(grabber, mirrors)
369
370
371
372class Downloader(object):
373 def __init__(self, mirrors, files):
374 self.grabber = PakfireGrabber()
375
376 self.mirrorgroup = mirrors.group(self.grabber)
377
378