python/pakfire/downloader.py

   1 #!/usr/bin/python
   2 ###############################################################################
   3 #                                                                             #
   4 # Pakfire - The IPFire package management system                              #
   5 # Copyright (C) 2011 Pakfire development team                                 #
   6 #                                                                             #
   7 # This program is free software: you can redistribute it and/or modify        #
   8 # it under the terms of the GNU General Public License as published by        #
   9 # the Free Software Foundation, either version 3 of the License, or           #
  10 # (at your option) any later version.                                         #
  11 #                                                                             #
  12 # This program is distributed in the hope that it will be useful,             #
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
  15 # GNU General Public License for more details.                                #
  16 #                                                                             #
  17 # You should have received a copy of the GNU General Public License           #
  18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
  19 #                                                                             #
  20 ###############################################################################
  21
  22 import json
  23 import os
  24 import random
  25
  26 import logging
  27 log = logging.getLogger("pakfire")
  28
  29 from config import _Config
  30
  31 from urlgrabber.grabber import URLGrabber, URLGrabError
  32 from urlgrabber.mirror import MirrorGroup
  33 from urlgrabber.progress import TextMeter
  34
  35 from pakfire.constants import *
  36 from pakfire.i18n import _
  37
  38 class PakfireGrabber(URLGrabber):
  39         """
  40                 Class to make some modifications on the urlgrabber configuration.
  41         """
  42         def __init__(self, pakfire, *args, **kwargs):
  43                 kwargs.update({
  44                         "quote" : 0,
  45                         "user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
  46
  47                         "ssl_verify_host" : False,
  48                         "ssl_verify_peer" : False,
  49                 })
  50
  51                 if isinstance(pakfire, _Config):
  52                         config = pakfire
  53                 else:
  54                         config = pakfire.config
  55                 self.config = config
  56
  57                 # Set throttle setting.
  58                 bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
  59                 if bandwidth_throttle:
  60                         try:
  61                                 bandwidth_throttle = int(bandwidth_throttle)
  62                         except ValueError:
  63                                 log.error("Configuration value for bandwidth_throttle is invalid.")
  64                                 bandwidth_throttle = 0
  65
  66                         kwargs.update({ "throttle" : bandwidth_throttle })
  67
  68                 # Configure HTTP proxy.
  69                 http_proxy = config.get("downloader", "http_proxy")
  70                 if http_proxy:
  71                         kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})
  72
  73                 URLGrabber.__init__(self, *args, **kwargs)
  74
  75         def check_offline_mode(self):
  76                 offline = self.config.get("downloader", "offline")
  77                 if not offline:
  78                         return
  79
  80                 raise OfflineModeError
  81
  82         def urlread(self, filename, *args, **kwargs):
  83                 self.check_offline_mode()
  84
  85                 # This is for older versions of urlgrabber which are packaged in Debian
  86                 # and Ubuntu and cannot handle filenames as a normal Python string but need
  87                 # a unicode string.
  88                 return URLGrabber.urlread(self, filename.encode("utf-8"), *args, **kwargs)
  89
  90         def urlopen(self, filename, *args, **kwargs):
  91                 self.check_offline_mode()
  92
  93                 # However, urlopen requires the filename to be an ordinary string object.
  94                 filename = str(filename)
  95
  96                 return URLGrabber.urlopen(self, filename, *args, **kwargs)
  97
  98
  99 class PackageDownloader(PakfireGrabber):
 100         def __init__(self, pakfire, *args, **kwargs):
 101                 kwargs.update({
 102                         "progress_obj" : TextMeter(),
 103                 })
 104
 105                 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
 106
 107
 108 class MetadataDownloader(PakfireGrabber):
 109         def __init__(self, pakfire, *args, **kwargs):
 110                 kwargs.update({
 111                         "http_headers" : (('Pragma', 'no-cache'),),
 112                 })
 113
 114                 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
 115
 116
 117 class DatabaseDownloader(PackageDownloader):
 118         def __init__(self, pakfire, *args, **kwargs):
 119                 kwargs.update({
 120                         "http_headers" : (('Pragma', 'no-cache'),),
 121                 })
 122
 123                 PackageDownloader.__init__(self, pakfire, *args, **kwargs)
 124
 125
 126 class SourceDownloader(object):
 127         def __init__(self, pakfire, mirrors=None):
 128                 self.pakfire = pakfire
 129
 130                 self.grabber = PakfireGrabber(
 131                         self.pakfire,
 132                         progress_obj = TextMeter(),
 133                 )
 134
 135                 if mirrors:
 136                         self.grabber = MirrorGroup(self.grabber,
 137                                 [{ "mirror" : m.encode("utf-8") } for m in mirrors])
 138
 139         def download(self, files):
 140                 existant_files = []
 141                 download_files = []
 142
 143                 for file in files:
 144                         filename = os.path.join(SOURCE_CACHE_DIR, file)
 145                         log.debug("Checking existance of %s..." % filename)
 146
 147                         if os.path.exists(filename) and os.path.getsize(filename):
 148                                 log.debug("...exists!")
 149                                 existant_files.append(filename)
 150                         else:
 151                                 log.debug("...does not exist!")
 152                                 download_files.append(filename)
 153
 154                 if download_files:
 155                         log.info(_("Downloading source files:"))
 156
 157                         if self.pakfire.offline:
 158                                 raise OfflineModeError, _("Cannot download source code in offline mode.")
 159
 160                         # Create source download directory.
 161                         if not os.path.exists(SOURCE_CACHE_DIR):
 162                                 os.makedirs(SOURCE_CACHE_DIR)
 163
 164                         for filename in download_files:
 165                                 try:
 166                                         self.grabber.urlgrab(os.path.basename(filename), filename=filename)
 167                                 except URLGrabError, e:
 168                                         # Remove partly downloaded file.
 169                                         try:
 170                                                 os.unlink(filename)
 171                                         except OSError:
 172                                                 pass
 173
 174                                         raise DownloadError, "%s %s" % (os.path.basename(filename), e)
 175
 176                                 # Check if the downloaded file was empty.
 177                                 if os.path.getsize(filename) == 0:
 178                                         # Remove the file and raise an error.
 179                                         os.unlink(filename)
 180
 181                                         raise DownloadError, _("Downloaded empty file: %s") \
 182                                                 % os.path.basename(filename)
 183
 184                         log.info("")
 185
 186                 return existant_files + download_files
 187
 188
 189 class Mirror(object):
 190         def __init__(self, url, location=None, preferred=False):
 191                 # Save URL of the mirror in full format
 192                 self.url = url
 193
 194                 # Save the location (if given)
 195                 self.location = location
 196
 197                 # Save preference
 198                 self.preferred = False
 199
 200
 201 class MirrorList(object):
 202         def __init__(self, pakfire, repo, mirrorlist):
 203                 self.pakfire = pakfire
 204                 self.repo = repo
 205
 206                 self.__mirrors = []
 207
 208                 # Save URL to more mirrors.
 209                 self.mirrorlist = mirrorlist
 210
 211         @property
 212         def base_mirror(self):
 213                 if not self.repo.baseurl:
 214                         return
 215
 216                 return Mirror(self.repo.baseurl, preferred=False)
 217
 218         @property
 219         def distro(self):
 220                 return self.repo.distro
 221
 222         @property
 223         def cache(self):
 224                 """
 225                         Shortcut to cache from repository.
 226                 """
 227                 return self.repo.cache
 228
 229         def update(self, force=False):
 230                 # XXX should this be allowed?
 231                 if not self.mirrorlist:
 232                         return
 233
 234                 # If the system is not online, we cannot download anything.
 235                 if self.pakfire.offline:
 236                         return
 237
 238                 log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
 239                 cache_filename = os.path.join("repodata", self.distro.sname, self.distro.release,
 240                         self.repo.name, self.distro.arch, "mirrors")
 241
 242                 # Force the update if no mirrorlist is available.
 243                 if not self.cache.exists(cache_filename):
 244                         force = True
 245
 246                 if not force and self.cache.exists(cache_filename):
 247                         age = self.cache.age(cache_filename)
 248
 249                         # If the age could be determined and is higher than 24h,
 250                         # we force an update.
 251                         if age and age > TIME_24H:
 252                                 force = True
 253
 254                 if force:
 255                         g = MetadataDownloader(self.pakfire)
 256
 257                         try:
 258                                 mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
 259                         except URLGrabError, e:
 260                                 log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
 261                                 return
 262
 263                         # XXX check for empty files or damaged output
 264
 265                         # Save new mirror data to cache.
 266                         f = self.cache.open(cache_filename, "w")
 267                         f.write(mirrordata)
 268                         f.close()
 269
 270                 # Read mirrorlist from cache and parse it.
 271                 self.forget_mirrors()
 272                 with self.cache.open(cache_filename) as f:
 273                         self.parse_mirrordata(f.read())
 274
 275         def parse_mirrordata(self, data):
 276                 data = json.loads(data)
 277
 278                 for mirror in data["mirrors"]:
 279                         self.add_mirror(**mirror)
 280
 281         def add_mirror(self, *args, **kwargs):
 282                 mirror = Mirror(*args, **kwargs)
 283
 284                 self.__mirrors.append(mirror)
 285
 286         def forget_mirrors(self):
 287                 self.__mirrors = []
 288
 289         @property
 290         def preferred(self):
 291                 """
 292                         Return a generator for all mirrors that are preferred.
 293                 """
 294                 for mirror in self.__mirrors:
 295                         if mirror.preferred:
 296                                 yield mirror
 297
 298         @property
 299         def non_preferred(self):
 300                 """
 301                         Return a generator for all mirrors that are not preferred.
 302                 """
 303                 for mirror in self.__mirrors:
 304                         if not mirror.preferred:
 305                                 yield mirror
 306
 307         @property
 308         def all(self):
 309                 """
 310                         Return a generator for all mirrors.
 311                 """
 312                 for mirror in self.__mirrors:
 313                         yield mirror
 314
 315         def group(self, grabber):
 316                 """
 317                         Return a MirrorGroup object for the given grabber.
 318                 """
 319                 # Make sure the mirrorlist is up to date.
 320                 self.update()
 321
 322                 # A list of mirrors that is passed to MirrorGroup.
 323                 mirrors = []
 324
 325                 # Add all preferred mirrors at the first place and shuffle them
 326                 # that we will start at a random place.
 327                 for mirror in self.preferred:
 328                         mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
 329                 random.shuffle(mirrors)
 330
 331                 # All other mirrors are added as well and will only be used if all
 332                 # preferred mirrors did not work.
 333                 for mirror in self.all:
 334                         mirror = { "mirror" : mirror.url.encode("utf-8") }
 335                         if mirror in mirrors:
 336                                 continue
 337
 338                         mirrors.append(mirror)
 339
 340                 # Always add the base mirror if any.
 341                 base_mirror = self.base_mirror
 342                 if base_mirror:
 343                         mirror = { "mirror" : base_mirror.url.encode("utf-8") }
 344                         if not mirror in mirrors:
 345                                 mirrors.append(mirror)
 346
 347                 return MirrorGroup(grabber, mirrors)
 348
 349
 350
 351 class Downloader(object):
 352         def __init__(self, mirrors, files):
 353                 self.grabber = PakfireGrabber()
 354
 355                 self.mirrorgroup = mirrors.group(self.grabber)
 356
 357