python/pakfire/downloader.py

   1 #!/usr/bin/python
   2 ###############################################################################
   3 #                                                                             #
   4 # Pakfire - The IPFire package management system                              #
   5 # Copyright (C) 2011 Pakfire development team                                 #
   6 #                                                                             #
   7 # This program is free software: you can redistribute it and/or modify        #
   8 # it under the terms of the GNU General Public License as published by        #
   9 # the Free Software Foundation, either version 3 of the License, or           #
  10 # (at your option) any later version.                                         #
  11 #                                                                             #
  12 # This program is distributed in the hope that it will be useful,             #
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
  15 # GNU General Public License for more details.                                #
  16 #                                                                             #
  17 # You should have received a copy of the GNU General Public License           #
  18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
  19 #                                                                             #
  20 ###############################################################################
  21
  22 import json
  23 import os
  24 import pycurl
  25 import random
  26
  27 import logging
  28 log = logging.getLogger("pakfire")
  29
  30 from config import _Config
  31
  32 import urlgrabber.grabber
  33 from urlgrabber.grabber import URLGrabber, URLGrabError
  34 from urlgrabber.mirror import MirrorGroup
  35 from urlgrabber.progress import TextMeter
  36
  37 from pakfire.constants import *
  38 from pakfire.i18n import _
  39
  40 class PakfireGrabber(URLGrabber):
  41         """
  42                 Class to make some modifications on the urlgrabber configuration.
  43         """
  44         def __init__(self, pakfire, *args, **kwargs):
  45                 kwargs.update({
  46                         "quote" : 0,
  47                         "user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
  48
  49                         "ssl_verify_host" : False,
  50                         "ssl_verify_peer" : False,
  51                 })
  52
  53                 if isinstance(pakfire, _Config):
  54                         config = pakfire
  55                 else:
  56                         config = pakfire.config
  57                 self.config = config
  58
  59                 # Set throttle setting.
  60                 bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
  61                 if bandwidth_throttle:
  62                         try:
  63                                 bandwidth_throttle = int(bandwidth_throttle)
  64                         except ValueError:
  65                                 log.error("Configuration value for bandwidth_throttle is invalid.")
  66                                 bandwidth_throttle = 0
  67
  68                         kwargs.update({ "throttle" : bandwidth_throttle })
  69
  70                 # Configure HTTP proxy.
  71                 http_proxy = config.get("downloader", "http_proxy")
  72                 if http_proxy:
  73                         kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})
  74
  75                 URLGrabber.__init__(self, *args, **kwargs)
  76
  77         def fork(self):
  78                 """
  79                         Reset Curl object after forking a process.
  80                 """
  81                 # XXX this is a very ugly hack and fiddles around with the internals
  82                 # or urlgrabber. We should not touch these, but apparently nobody
  83                 # else uses multiple threads or processes to talk to their servers.
  84                 # So we simply replace Curl with a new instance without closing
  85                 # the old one. This should be fixed in urlgrabber and/or pycurl.
  86                 urlgrabber.grabber._curl_cache = pycurl.Curl()
  87
  88         def check_offline_mode(self):
  89                 offline = self.config.get("downloader", "offline")
  90                 if not offline:
  91                         return
  92
  93                 raise OfflineModeError
  94
  95         def urlread(self, filename, *args, **kwargs):
  96                 self.check_offline_mode()
  97
  98                 # This is for older versions of urlgrabber which are packaged in Debian
  99                 # and Ubuntu and cannot handle filenames as a normal Python string but need
 100                 # a unicode string.
 101                 return URLGrabber.urlread(self, filename.encode("utf-8"), *args, **kwargs)
 102
 103         def urlopen(self, filename, *args, **kwargs):
 104                 self.check_offline_mode()
 105
 106                 # However, urlopen requires the filename to be an ordinary string object.
 107                 filename = str(filename)
 108
 109                 return URLGrabber.urlopen(self, filename, *args, **kwargs)
 110
 111
 112 class PackageDownloader(PakfireGrabber):
 113         def __init__(self, pakfire, *args, **kwargs):
 114                 kwargs.update({
 115                         "progress_obj" : TextMeter(),
 116                 })
 117
 118                 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
 119
 120
 121 class MetadataDownloader(PakfireGrabber):
 122         def __init__(self, pakfire, *args, **kwargs):
 123                 kwargs.update({
 124                         "http_headers" : (('Pragma', 'no-cache'),),
 125                 })
 126
 127                 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
 128
 129
 130 class DatabaseDownloader(PackageDownloader):
 131         def __init__(self, pakfire, *args, **kwargs):
 132                 kwargs.update({
 133                         "http_headers" : (('Pragma', 'no-cache'),),
 134                 })
 135
 136                 PackageDownloader.__init__(self, pakfire, *args, **kwargs)
 137
 138
 139 class SourceDownloader(object):
 140         def __init__(self, pakfire, mirrors=None):
 141                 self.pakfire = pakfire
 142
 143                 self.grabber = PakfireGrabber(
 144                         self.pakfire,
 145                         progress_obj = TextMeter(),
 146                 )
 147
 148                 if mirrors:
 149                         self.grabber = MirrorGroup(self.grabber,
 150                                 [{ "mirror" : m.encode("utf-8") } for m in mirrors])
 151
 152         def download(self, files):
 153                 existant_files = []
 154                 download_files = []
 155
 156                 for file in files:
 157                         filename = os.path.join(SOURCE_CACHE_DIR, file)
 158                         log.debug("Checking existance of %s..." % filename)
 159
 160                         if os.path.exists(filename) and os.path.getsize(filename):
 161                                 log.debug("...exists!")
 162                                 existant_files.append(filename)
 163                         else:
 164                                 log.debug("...does not exist!")
 165                                 download_files.append(filename)
 166
 167                 if download_files:
 168                         log.info(_("Downloading source files:"))
 169
 170                         if self.pakfire.offline:
 171                                 raise OfflineModeError, _("Cannot download source code in offline mode.")
 172
 173                         # Create source download directory.
 174                         if not os.path.exists(SOURCE_CACHE_DIR):
 175                                 os.makedirs(SOURCE_CACHE_DIR)
 176
 177                         for filename in download_files:
 178                                 try:
 179                                         self.grabber.urlgrab(os.path.basename(filename), filename=filename)
 180                                 except URLGrabError, e:
 181                                         # Remove partly downloaded file.
 182                                         try:
 183                                                 os.unlink(filename)
 184                                         except OSError:
 185                                                 pass
 186
 187                                         raise DownloadError, "%s %s" % (os.path.basename(filename), e)
 188
 189                                 # Check if the downloaded file was empty.
 190                                 if os.path.getsize(filename) == 0:
 191                                         # Remove the file and raise an error.
 192                                         os.unlink(filename)
 193
 194                                         raise DownloadError, _("Downloaded empty file: %s") \
 195                                                 % os.path.basename(filename)
 196
 197                         log.info("")
 198
 199                 return existant_files + download_files
 200
 201
 202 class Mirror(object):
 203         def __init__(self, url, location=None, preferred=False):
 204                 # Save URL of the mirror in full format
 205                 self.url = url
 206
 207                 # Save the location (if given)
 208                 self.location = location
 209
 210                 # Save preference
 211                 self.preferred = False
 212
 213
 214 class MirrorList(object):
 215         def __init__(self, pakfire, repo, mirrorlist):
 216                 self.pakfire = pakfire
 217                 self.repo = repo
 218
 219                 self.__mirrors = []
 220
 221                 # Save URL to more mirrors.
 222                 self.mirrorlist = mirrorlist
 223
 224         @property
 225         def base_mirror(self):
 226                 if not self.repo.baseurl:
 227                         return
 228
 229                 return Mirror(self.repo.baseurl, preferred=False)
 230
 231         @property
 232         def distro(self):
 233                 return self.repo.distro
 234
 235         @property
 236         def cache(self):
 237                 """
 238                         Shortcut to cache from repository.
 239                 """
 240                 return self.repo.cache
 241
 242         def update(self, force=False):
 243                 # XXX should this be allowed?
 244                 if not self.mirrorlist:
 245                         return
 246
 247                 # If the system is not online, we cannot download anything.
 248                 if self.pakfire.offline:
 249                         return
 250
 251                 log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
 252                 cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
 253                         self.repo.name, self.distro.arch, "mirrors")
 254
 255                 # Force the update if no mirrorlist is available.
 256                 if not self.cache.exists(cache_filename):
 257                         force = True
 258
 259                 if not force and self.cache.exists(cache_filename):
 260                         age = self.cache.age(cache_filename)
 261
 262                         # If the age could be determined and is higher than 24h,
 263                         # we force an update.
 264                         if age and age > TIME_24H:
 265                                 force = True
 266
 267                 if force:
 268                         g = MetadataDownloader(self.pakfire)
 269
 270                         try:
 271                                 mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
 272                         except URLGrabError, e:
 273                                 log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
 274                                 return
 275
 276                         # XXX check for empty files or damaged output
 277
 278                         # Save new mirror data to cache.
 279                         f = self.cache.open(cache_filename, "w")
 280                         f.write(mirrordata)
 281                         f.close()
 282
 283                 # Read mirrorlist from cache and parse it.
 284                 self.forget_mirrors()
 285                 with self.cache.open(cache_filename) as f:
 286                         self.parse_mirrordata(f.read())
 287
 288         def parse_mirrordata(self, data):
 289                 data = json.loads(data)
 290
 291                 for mirror in data["mirrors"]:
 292                         self.add_mirror(**mirror)
 293
 294         def add_mirror(self, *args, **kwargs):
 295                 mirror = Mirror(*args, **kwargs)
 296
 297                 self.__mirrors.append(mirror)
 298
 299         def forget_mirrors(self):
 300                 self.__mirrors = []
 301
 302         @property
 303         def preferred(self):
 304                 """
 305                         Return a generator for all mirrors that are preferred.
 306                 """
 307                 for mirror in self.__mirrors:
 308                         if mirror.preferred:
 309                                 yield mirror
 310
 311         @property
 312         def non_preferred(self):
 313                 """
 314                         Return a generator for all mirrors that are not preferred.
 315                 """
 316                 for mirror in self.__mirrors:
 317                         if not mirror.preferred:
 318                                 yield mirror
 319
 320         @property
 321         def all(self):
 322                 """
 323                         Return a generator for all mirrors.
 324                 """
 325                 for mirror in self.__mirrors:
 326                         yield mirror
 327
 328         def group(self, grabber):
 329                 """
 330                         Return a MirrorGroup object for the given grabber.
 331                 """
 332                 # Make sure the mirrorlist is up to date.
 333                 self.update()
 334
 335                 # A list of mirrors that is passed to MirrorGroup.
 336                 mirrors = []
 337
 338                 # Add all preferred mirrors at the first place and shuffle them
 339                 # that we will start at a random place.
 340                 for mirror in self.preferred:
 341                         mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
 342                 random.shuffle(mirrors)
 343
 344                 # All other mirrors are added as well and will only be used if all
 345                 # preferred mirrors did not work.
 346                 for mirror in self.all:
 347                         mirror = { "mirror" : mirror.url.encode("utf-8") }
 348                         if mirror in mirrors:
 349                                 continue
 350
 351                         mirrors.append(mirror)
 352
 353                 # Always add the base mirror if any.
 354                 base_mirror = self.base_mirror
 355                 if base_mirror:
 356                         mirror = { "mirror" : base_mirror.url.encode("utf-8") }
 357                         if not mirror in mirrors:
 358                                 mirrors.append(mirror)
 359
 360                 return MirrorGroup(grabber, mirrors)
 361
 362
 363
 364 class Downloader(object):
 365         def __init__(self, mirrors, files):
 366                 self.grabber = PakfireGrabber()
 367
 368                 self.mirrorgroup = mirrors.group(self.grabber)
 369
 370