python/pakfire/downloader.py

   1 #!/usr/bin/python
   2 ###############################################################################
   3 #                                                                             #
   4 # Pakfire - The IPFire package management system                              #
   5 # Copyright (C) 2011 Pakfire development team                                 #
   6 #                                                                             #
   7 # This program is free software: you can redistribute it and/or modify        #
   8 # it under the terms of the GNU General Public License as published by        #
   9 # the Free Software Foundation, either version 3 of the License, or           #
  10 # (at your option) any later version.                                         #
  11 #                                                                             #
  12 # This program is distributed in the hope that it will be useful,             #
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of              #
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
  15 # GNU General Public License for more details.                                #
  16 #                                                                             #
  17 # You should have received a copy of the GNU General Public License           #
  18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
  19 #                                                                             #
  20 ###############################################################################
  21
  22 import json
  23 import os
  24 import random
  25
  26 import logging
  27 log = logging.getLogger("pakfire")
  28
  29 from config import Config
  30
  31 from urlgrabber.grabber import URLGrabber, URLGrabError
  32 from urlgrabber.mirror import MirrorGroup
  33 from urlgrabber.progress import TextMeter
  34
  35 from pakfire.constants import *
  36 from pakfire.i18n import _
  37
  38 class PakfireGrabber(URLGrabber):
  39         """
  40                 Class to make some modifications on the urlgrabber configuration.
  41         """
  42         def __init__(self, pakfire, *args, **kwargs):
  43                 kwargs.update({
  44                         "quote" : 0,
  45                         "user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
  46                 })
  47
  48                 if isinstance(pakfire, Config):
  49                         config = pakfire
  50                 else:
  51                         config = pakfire.config
  52
  53                 if config.get("offline"):
  54                         raise OfflineModeError, "Cannot use %s in offline mode." % self.__class__.__name__
  55
  56                 # Set throttle setting.
  57                 bandwidth_throttle = config.get("bandwidth_throttle")
  58                 if bandwidth_throttle:
  59                         try:
  60                                 bandwidth_throttle = int(bandwidth_throttle)
  61                         except ValueError:
  62                                 log.error("Configuration value for bandwidth_throttle is invalid.")
  63                                 bandwidth_throttle = 0
  64
  65                         kwargs.update({ "throttle" : bandwidth_throttle })
  66
  67                 # Configure HTTP proxy.
  68                 http_proxy = config.get("http_proxy")
  69                 if http_proxy:
  70                         kwargs.update({ "proxies" : { "http" : http_proxy }})
  71
  72                 URLGrabber.__init__(self, *args, **kwargs)
  73
  74         def urlread(self, filename, *args, **kwargs):
  75                 # This is for older versions of urlgrabber which are packaged in Debian
  76                 # and Ubuntu and cannot handle filenames as a normal Python string but need
  77                 # a unicode string.
  78                 return URLGrabber.urlread(self, filename.encode("utf-8"), *args, **kwargs)
  79
  80
  81 class PackageDownloader(PakfireGrabber):
  82         def __init__(self, pakfire, *args, **kwargs):
  83                 kwargs.update({
  84                                 "progress_obj" : TextMeter(),
  85                 })
  86
  87                 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
  88
  89
  90 class MetadataDownloader(PakfireGrabber):
  91         def __init__(self, pakfire, *args, **kwargs):
  92                 kwargs.update({
  93                         "http_headers" : (('Pragma', 'no-cache'),),
  94                 })
  95
  96                 PakfireGrabber.__init__(self, pakfire, *args, **kwargs)
  97
  98
  99 class DatabaseDownloader(PackageDownloader):
 100         def __init__(self, pakfire, *args, **kwargs):
 101                 kwargs.update({
 102                         "http_headers" : (('Pragma', 'no-cache'),),
 103                 })
 104
 105                 PackageDownloader.__init__(self, pakfire, *args, **kwargs)
 106
 107
 108 class SourceDownloader(object):
 109         def __init__(self, pakfire, mirrors=None):
 110                 self.pakfire = pakfire
 111
 112                 self.grabber = PakfireGrabber(
 113                         self.pakfire,
 114                         progress_obj = TextMeter(),
 115                 )
 116
 117                 if mirrors:
 118                         self.grabber = MirrorGroup(self.grabber,
 119                                 [{ "mirror" : m.encode("utf-8") } for m in mirrors])
 120
 121         def download(self, files):
 122                 existant_files = []
 123                 download_files = []
 124
 125                 for file in files:
 126                         filename = os.path.join(SOURCE_CACHE_DIR, file)
 127
 128                         if os.path.exists(filename):
 129                                 existant_files.append(filename)
 130                         else:
 131                                 download_files.append(filename)
 132
 133                 if download_files:
 134                         log.info(_("Downloading source files:"))
 135
 136                         # Create source download directory.
 137                         if not os.path.exists(SOURCE_CACHE_DIR):
 138                                 os.makedirs(SOURCE_CACHE_DIR)
 139
 140                         for filename in download_files:
 141                                 try:
 142                                         self.grabber.urlgrab(os.path.basename(filename), filename=filename)
 143                                 except URLGrabError, e:
 144                                         # Remove partly downloaded file.
 145                                         try:
 146                                                 os.unlink(filename)
 147                                         except OSError:
 148                                                 pass
 149
 150                                         raise DownloadError, "%s %s" % (os.path.basename(filename), e)
 151
 152                         log.info("")
 153
 154                 return existant_files + download_files
 155
 156
 157 class Mirror(object):
 158         def __init__(self, url, location=None, preferred=False):
 159                 # Save URL of the mirror in full format
 160                 self.url = url
 161
 162                 # Save the location (if given)
 163                 self.location = location
 164
 165                 # Save preference
 166                 self.preferred = False
 167
 168
 169 class MirrorList(object):
 170         def __init__(self, pakfire, repo):
 171                 self.pakfire = pakfire
 172                 self.repo = repo
 173
 174                 self.__mirrors = []
 175
 176                 # Save URL to more mirrors.
 177                 self.mirrorlist = repo.mirrorlist
 178
 179                 self.update(force=False)
 180
 181         @property
 182         def cache(self):
 183                 """
 184                         Shortcut to cache from repository.
 185                 """
 186                 return self.repo.cache
 187
 188         def update(self, force=False):
 189                 # XXX should this be allowed?
 190                 if not self.mirrorlist:
 191                         return
 192
 193                 # If the system is not online, we cannot download anything.
 194                 if self.pakfire.offline:
 195                         return
 196
 197                 log.debug("Updating mirrorlist for repository '%s' (force=%s)" % (self.repo.name, force))
 198
 199                 cache_filename = "mirrors/mirrorlist"
 200
 201                 # Force the update if no mirrorlist is available.
 202                 if not self.cache.exists(cache_filename):
 203                         force = True
 204
 205                 if not force and self.cache.exists(cache_filename):
 206                         age = self.cache.age(cache_filename)
 207
 208                         # If the age could be determined and is higher than 24h,
 209                         # we force an update.
 210                         if age and age > TIME_24H:
 211                                 force = True
 212
 213                 if force:
 214                         g = MetadataDownloader(self.pakfire)
 215
 216                         try:
 217                                 mirrordata = g.urlread(self.mirrorlist, limit=MIRRORLIST_MAXSIZE)
 218                         except URLGrabError, e:
 219                                 log.warning("Could not update the mirrorlist for repo '%s': %s" % (self.repo.name, e))
 220                                 return
 221
 222                         # XXX check for empty files or damaged output
 223
 224                         # Save new mirror data to cache.
 225                         f = self.cache.open(cache_filename, "w")
 226                         f.write(mirrordata)
 227                         f.close()
 228
 229                 # Read mirrorlist from cache and parse it.
 230                 with self.cache.open(cache_filename) as f:
 231                         self.parse_mirrordata(f.read())
 232
 233         def parse_mirrordata(self, data):
 234                 data = json.loads(data)
 235
 236                 for mirror in data["mirrors"]:
 237                         self.add_mirror(**mirror)
 238
 239         def add_mirror(self, *args, **kwargs):
 240                 mirror = Mirror(*args, **kwargs)
 241
 242                 self.__mirrors.append(mirror)
 243
 244         @property
 245         def preferred(self):
 246                 """
 247                         Return a generator for all mirrors that are preferred.
 248                 """
 249                 for mirror in self.__mirrors:
 250                         if mirror.preferred:
 251                                 yield mirror
 252
 253         @property
 254         def non_preferred(self):
 255                 """
 256                         Return a generator for all mirrors that are not preferred.
 257                 """
 258                 for mirror in self.__mirrors:
 259                         if not mirror.preferred:
 260                                 yield mirror
 261
 262         @property
 263         def all(self):
 264                 """
 265                         Return a generator for all mirrors.
 266                 """
 267                 for mirror in self.__mirrors:
 268                         yield mirror
 269
 270         def group(self, grabber):
 271                 """
 272                         Return a MirrorGroup object for the given grabber.
 273                 """
 274                 # A list of mirrors that is passed to MirrorGroup.
 275                 mirrors = []
 276
 277                 # Add all preferred mirrors at the first place and shuffle them
 278                 # that we will start at a random place.
 279                 for mirror in self.preferred:
 280                         mirrors.append(mirror.url.encode("utf-8"))
 281                 random.shuffle(mirrors)
 282
 283                 # All other mirrors are added as well and will only be used if all
 284                 # preferred mirrors did not work.
 285                 for mirror in self.all:
 286                         if mirror.url in mirrors:
 287                                 continue
 288
 289                         mirrors.append({ "mirror" : mirror.url.encode("utf-8") })
 290
 291                 return MirrorGroup(grabber, mirrors)
 292
 293
 294
 295 class Downloader(object):
 296         def __init__(self, mirrors, files):
 297                 self.grabber = PakfireGrabber()
 298
 299                 self.mirrorgroup = mirrors.group(self.grabber)
 300
 301