python/pakfire/downloader.py

   1 #!/usr/bin/python
   2 ###############################################################################
   3 #                                                                             #
   4 # Pakfire - The IPFire package management system                              #
   5 # Copyright (C) 2011 Pakfire development team                                 #
   6 #                                                                             #
   7 # This program is free software: you can redistribute it and/or modify        #
   8 # it under the terms of the GNU General Public License as published by        #
   9 # the Free Software Foundation, either version 3 of the License, or           #
  10 # (at your option) any later version.                                         #
  11 #                                                                             #
  12 # This program is distributed in the hope that it will be useful,             #
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
  15 # GNU General Public License for more details.                                #
  16 #                                                                             #
  17 # You should have received a copy of the GNU General Public License           #
  18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
  19 #                                                                             #
  20 ###############################################################################
  21
  22 import json
  23 import os
  24 import pycurl
  25 import random
  26
  27 import logging
  28 log = logging.getLogger("pakfire")
  29
  30 from config import _Config
  31
  32 import urlgrabber.grabber
  33 from urlgrabber.grabber import URLGrabber, URLGrabError
  34 from urlgrabber.mirror import MirrorGroup
  35 from urlgrabber.progress import TextMeter
  36
  37 from pakfire.constants import *
  38 from pakfire.i18n import _
  39
  40 class PakfireGrabber(URLGrabber):
  41         """
  42                 Class to make some modifications on the urlgrabber configuration.
  43         """
  44         def __init__(self, pakfire, *args, **kwargs):
  45                 kwargs.update({
  46                         "quote" : 0,
  47                         "user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
  48
  49                         "ssl_verify_host" : False,
  50                         "ssl_verify_peer" : False,
  51                 })
  52
  53                 if isinstance(pakfire, _Config):
  54                         config = pakfire
  55                 else:
  56                         config = pakfire.config
  57                 self.config = config
  58
  59                 # Set throttle setting.
  60                 bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
  61                 if bandwidth_throttle:
  62                         try:
  63                                 bandwidth_throttle = int(bandwidth_throttle)
  64                         except ValueError:
  65                                 log.error("Configuration value for bandwidth_throttle is invalid.")
  66                                 bandwidth_throttle = 0
  67
  68                         kwargs.update({ "throttle" : bandwidth_throttle })
  69
  70                 # Configure HTTP proxy.
  71                 http_proxy = config.get("downloader", "http_proxy")
  72                 if http_proxy:
  73                         kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})
  74
  75                 URLGrabber.__init__(self, *args, **kwargs)
  76
  77         def check_offline_mode(self):
  78                 offline = self.config.get("downloader", "offline")
  79                 if not offline:
  80                         return
  81
  82                 raise OfflineModeError
  83
  84         def urlread(self, filename, *args, **kwargs):
  85                 self.check_offline_mode()
  86
  87                 # This is for older versions of urlgrabber which are packaged in Debian
  88                 # and Ubuntu and cannot handle filenames as a normal Python string but need
  89                 # a unicode string.
  90                 return URLGrabber.urlread(self, filename.encode("utf-8"), *args, **kwargs)
  91
  92         def urlopen(self, filename, *args, **kwargs):
  93                 self.check_offline_mode()
  94
  95                 # This is for older versions of urlgrabber which are packaged in Debian
  96                 # and Ubuntu and cannot handle filenames as a normal Python string but need
  97                 # a unicode string.
  98                 return URLGrabber.urlopen(self, filename.encode("utf-8"), *args, **kwargs)
  99
 100         def urlgrab(self, url, *args, **kwargs):
 101                 self.check_offline_mode()
 102
 103                 # This is for older versions of urlgrabber which are packaged in Debian
 104                 # and Ubuntu and cannot handle filenames as a normal Python string but need
 105                 # a unicode string.
 106                 return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)
 107
 108
 109 class PackageDownloader(PakfireGrabber):
 110         def __init__(self, pakfire, *args, **kwargs):
 111                 kwargs.update({
 112                         "progress_obj" : TextMeter(),
 113                 })
 114
 115                 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
 116
 117
 118 class MetadataDownloader(PakfireGrabber):
 119         def __init__(self, pakfire, *args, **kwargs):
 120                 kwargs.update({
 121                         "http_headers" : (('Pragma', 'no-cache'),),
 122                 })
 123
 124                 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
 125
 126
 127 class DatabaseDownloader(PackageDownloader):
 128         def __init__(self, pakfire, *args, **kwargs):
 129                 kwargs.update({
 130                         "http_headers" : (('Pragma', 'no-cache'),),
 131                 })
 132
 133                 PackageDownloader.__init__(self, pakfire, *args, **kwargs)
 134
 135
 136 class SourceDownloader(object):
 137         def __init__(self, pakfire, mirrors=None):
 138                 self.pakfire = pakfire
 139
 140                 self.grabber = PakfireGrabber(
 141                         self.pakfire,
 142                         progress_obj = TextMeter(),
 143                 )
 144
 145                 if mirrors:
 146                         self.grabber = MirrorGroup(self.grabber,
 147                                 [{ "mirror" : m.encode("utf-8") } for m in mirrors])
 148
 149         def download(self, files):
 150                 existant_files = []
 151                 download_files = []
 152
 153                 for file in files:
 154                         filename = os.path.join(SOURCE_CACHE_DIR, file)
 155                         log.debug("Checking existance of %s..." % filename)
 156
 157                         if os.path.exists(filename) and os.path.getsize(filename):
 158                                 log.debug("...exists!")
 159                                 existant_files.append(filename)
 160                         else:
 161                                 log.debug("...does not exist!")
 162                                 download_files.append(filename)
 163
 164                 if download_files:
 165                         log.info(_("Downloading source files:"))
 166
 167                         if self.pakfire.offline:
 168                                 raise OfflineModeError, _("Cannot download source code in offline mode.")
 169
 170                         # Create source download directory.
 171                         if not os.path.exists(SOURCE_CACHE_DIR):
 172                                 os.makedirs(SOURCE_CACHE_DIR)
 173
 174                         for filename in download_files:
 175                                 try:
 176                                         self.grabber.urlgrab(os.path.basename(filename), filename=filename)
 177                                 except URLGrabError, e:
 178                                         # Remove partly downloaded file.
 179                                         try:
 180                                                 os.unlink(filename)
 181                                         except OSError:
 182                                                 pass
 183
 184                                         raise DownloadError, "%s %s" % (os.path.basename(filename), e)
 185
 186                                 # Check if the downloaded file was empty.
 187                                 if os.path.getsize(filename) == 0:
 188                                         # Remove the file and raise an error.
 189                                         os.unlink(filename)
 190
 191                                         raise DownloadError, _("Downloaded empty file: %s") \
 192                                                 % os.path.basename(filename)
 193
 194                         log.info("")
 195
 196                 return existant_files + download_files
 197
 198
 199 class Mirror(object):
 200         def __init__(self, url, location=None, preferred=False):
 201                 # Save URL of the mirror in full format
 202                 self.url = url
 203
 204                 # Save the location (if given)
 205                 self.location = location
 206
 207                 # Save preference
 208                 self.preferred = False
 209
 210
 211 class MirrorList(object):
 212         def __init__(self, pakfire, repo, mirrorlist):
 213                 self.pakfire = pakfire
 214                 self.repo = repo
 215
 216                 self.__mirrors = []
 217
 218                 # Save URL to more mirrors.
 219                 self.mirrorlist = mirrorlist
 220
 221         @property
 222         def base_mirror(self):
 223                 if not self.repo.baseurl:
 224                         return
 225
 226                 return Mirror(self.repo.baseurl, preferred=False)
 227
 228         @property
 229         def distro(self):
 230                 return self.repo.distro
 231
 232         @property
 233         def cache(self):
 234                 """
 235                         Shortcut to cache from repository.
 236                 """
 237                 return self.repo.cache
 238
 239         def update(self, force=False):
 240                 # XXX should this be allowed?
 241                 if not self.mirrorlist:
 242                         return
 243
 244                 # If the system is not online, we cannot download anything.
 245                 if self.pakfire.offline:
 246                         return
 247
 248                 log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
 249                 cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
 250                         self.repo.name, self.distro.arch, "mirrors")
 251
 252                 # Force the update if no mirrorlist is available.
 253                 if not self.cache.exists(cache_filename):
 254                         force = True
 255
 256                 if not force and self.cache.exists(cache_filename):
 257                         age = self.cache.age(cache_filename)
 258
 259                         # If the age could be determined and is higher than 24h,
 260                         # we force an update.
 261                         if age and age > TIME_24H:
 262                                 force = True
 263
 264                 if force:
 265                         g = MetadataDownloader(self.pakfire)
 266
 267                         try:
 268                                 mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
 269                         except URLGrabError, e:
 270                                 log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
 271                                 return
 272
 273                         # XXX check for empty files or damaged output
 274
 275                         # Save new mirror data to cache.
 276                         f = self.cache.open(cache_filename, "w")
 277                         f.write(mirrordata)
 278                         f.close()
 279
 280                 # Read mirrorlist from cache and parse it.
 281                 self.forget_mirrors()
 282                 with self.cache.open(cache_filename) as f:
 283                         self.parse_mirrordata(f.read())
 284
 285         def parse_mirrordata(self, data):
 286                 data = json.loads(data)
 287
 288                 for mirror in data["mirrors"]:
 289                         self.add_mirror(**mirror)
 290
 291         def add_mirror(self, *args, **kwargs):
 292                 mirror = Mirror(*args, **kwargs)
 293
 294                 self.__mirrors.append(mirror)
 295
 296         def forget_mirrors(self):
 297                 self.__mirrors = []
 298
 299         @property
 300         def preferred(self):
 301                 """
 302                         Return a generator for all mirrors that are preferred.
 303                 """
 304                 for mirror in self.__mirrors:
 305                         if mirror.preferred:
 306                                 yield mirror
 307
 308         @property
 309         def non_preferred(self):
 310                 """
 311                         Return a generator for all mirrors that are not preferred.
 312                 """
 313                 for mirror in self.__mirrors:
 314                         if not mirror.preferred:
 315                                 yield mirror
 316
 317         @property
 318         def all(self):
 319                 """
 320                         Return a generator for all mirrors.
 321                 """
 322                 for mirror in self.__mirrors:
 323                         yield mirror
 324
 325         def group(self, grabber):
 326                 """
 327                         Return a MirrorGroup object for the given grabber.
 328                 """
 329                 # Make sure the mirrorlist is up to date.
 330                 self.update()
 331
 332                 # A list of mirrors that is passed to MirrorGroup.
 333                 mirrors = []
 334
 335                 # Add all preferred mirrors at the first place and shuffle them
 336                 # that we will start at a random place.
 337                 for mirror in self.preferred:
 338                         mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
 339                 random.shuffle(mirrors)
 340
 341                 # All other mirrors are added as well and will only be used if all
 342                 # preferred mirrors did not work.
 343                 for mirror in self.all:
 344                         mirror = { "mirror" : mirror.url.encode("utf-8") }
 345                         if mirror in mirrors:
 346                                 continue
 347
 348                         mirrors.append(mirror)
 349
 350                 # Always add the base mirror if any.
 351                 base_mirror = self.base_mirror
 352                 if base_mirror:
 353                         mirror = { "mirror" : base_mirror.url.encode("utf-8") }
 354                         if not mirror in mirrors:
 355                                 mirrors.append(mirror)
 356
 357                 return MirrorGroup(grabber, mirrors)
 358
 359
 360
 361 class Downloader(object):
 362         def __init__(self, mirrors, files):
 363                 self.grabber = PakfireGrabber()
 364
 365                 self.mirrorgroup = mirrors.group(self.grabber)
 366
 367