]> git.ipfire.org Git - pakfire.git/commitdiff
Refactor repository downloading code
authorMichael Tremer <michael.tremer@ipfire.org>
Fri, 5 May 2017 17:10:20 +0000 (19:10 +0200)
committerMichael Tremer <michael.tremer@ipfire.org>
Fri, 5 May 2017 17:10:20 +0000 (19:10 +0200)
This was always a bit messy and hard to use in other parts
of pakfire. Now, repositories are now refreshing themselves
when refresh() is being called.

Downloading code has been tidied up and everything should
be atomic.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/pakfire/base.py
src/pakfire/repository/__init__.py
src/pakfire/repository/base.py
src/pakfire/repository/metadata.py
src/pakfire/repository/remote.py
src/pakfire/util.py

index 04cc678aa4ca1f78d101f6b7a7ba003c39edbb7a..bde775cf4db746a24bbd0fa50f48a188057692c7 100644 (file)
@@ -99,14 +99,13 @@ class Pakfire(object):
                # Dump the configuration when we enter the context
                self.config.dump()
 
-               # Initialize repositories
-               self.repos.initialize()
+               # Refresh repositories
+               self.repos.refresh()
 
                return PakfireContext(self)
 
        def __exit__(self, type, value, traceback):
-               # Close repositories
-               self.repos.shutdown()
+               pass
 
        @property
        def offline(self):
index 9ac38152ece2c9a80576ae5b7a971fde1b2cf8ac..eb59d44ee8088e421ed3a080a4a5cfc99144c5d1 100644 (file)
@@ -74,35 +74,13 @@ class Repositories(object):
                """
                return len([r for r in self if r.enabled])
 
-       @property
-       def initialized(self):
-               """
-                       Indicates if all repositories are initialized.
-               """
-               for repo in self:
-                       if not repo.opened:
-                               return False
-
-               return True
-
-       def initialize(self):
-               # Nothing to do, if everything is already up to date.
-               if self.initialized:
-                       return
-
-               log.info(_("Initializing repositories..."))
-               for repo in self:
-                       repo.open()
-
-               # Empty line.
-               log.info("")
-
-       def shutdown(self):
+       def refresh(self):
                """
-                       Shuts down all repositores.
+                       Refreshes all repositories
                """
                for repo in self:
-                       repo.close()
+                       if repo.enabled:
+                               repo.refresh()
 
        @property
        def distro(self):
index 9dae65db30fd0252d3602006010c6d0b8b52f080..52e697e140e5d638e6f50023790f331281bf1c2b 100644 (file)
@@ -27,23 +27,37 @@ from .. import _pakfire
 from . import packages
 
 class RepositoryFactory(_pakfire.Repo):
-       def __init__(self, pakfire, name, description):
-               _pakfire.Repo.__init__(self, pakfire.pool, name)
+       def __init__(self, pakfire, name, description, **kwargs):
                self.pakfire = pakfire
 
-               self.description = description
+               # Inherit
+               _pakfire.Repo.__init__(self, self.pakfire.pool, name)
 
-               # Some repositories may have a cache.
-               self.cache = None
+               # Save description
+               self.description = description
 
                log.debug("Initialized new repository: %s" % self)
 
                # Marks if this repository has been opened.
                self.opened = False
 
+               self.init(**kwargs)
+
        def __repr__(self):
                return "<%s %s>" % (self.__class__.__name__, self.name)
 
+       def init(self, **kwargs):
+               pass # To be overwritten by inheriting classes
+
+       def refresh(self):
+               """
+                       Called to refresh the repository metadata.
+
+                       This is probably only hand for remote repositories
+                       that need to re-download data.
+               """
+               pass
+
        @property
        def local(self):
                """
index 9fabce0770b1317e536550cf9435210e81909742..d1b7a3d9065459ae64fd82c0633f3c06516ce0d8 100644 (file)
@@ -70,20 +70,12 @@ class Metadata(object):
                with open(filename) as f:
                        self.parse(f.read())
 
-       def save(self, filename=None):
+       def save(self, fp):
                """
                        Save all data to a file that could be exported to a
                        remote repository.
                """
-               if not filename:
-                       filename = self.filename
-
-               f = open(filename, "w")
-
-               # Write all data to the fileobj.
-               json.dump(self._data, f, indent=2)
-
-               f.close()
+               json.dump(self._data, fp, indent=2)
 
        @property
        def version(self):
index 5a101ad2d72cd363dc1d6e5fc33ca48d807ea689..b7e29f802a87327cf9946d86577b2eb7a0eeb315 100644 (file)
 #                                                                             #
 ###############################################################################
 
+import json
+import lzma
 import os
 
 import logging
 log = logging.getLogger("pakfire")
 
+from .. import http
+from .. import util
+
 from . import base
 from . import cache
 from . import metadata
 
-import pakfire.compress as compress
-
-from pakfire.constants import *
-from pakfire.i18n import _
+from ..constants import *
+from ..i18n import _
 
 class RepositoryRemote(base.RepositoryFactory):
        # XXX TODO Make metadata age configureable.
 
-       def __init__(self, pakfire, name, description=None, **settings):
-               # Save the settings that come from the configuration file.
+       def init(self, **settings):
+               # Save the settings that come from the configuration file
                self.settings = settings
 
-               base.RepositoryFactory.__init__(self, pakfire, name, description)
-
                # Enabled/disable the repository, based on the configuration setting.
                enabled = self.settings.get("enabled", True)
-               if enabled in ("1", "yes", "on", True, 1):
-                       self.enabled = True
-               else:
-                       self.enabled = False
-
-               # Create an cache object
-               self.cache = cache.RepositoryCache(self.pakfire, self)
-
-               # Initialize mirror servers.
-               mirrorlist = self.settings.get("mirrors", None)
-               self.mirrors = downloader.MirrorList(self.pakfire, self, mirrorlist)
-
-               # Open metadata if any.
-               self.metadata = self.open_metadata()
+               self.enabled = util.is_enabled(enabled)
 
        @property
        def baseurl(self):
@@ -66,6 +54,7 @@ class RepositoryRemote(base.RepositoryFactory):
        @property
        def keyfile(self):
                keyfile = self.settings.get("keyfile", None)
+
                if keyfile is None:
                        keyfile = self.settings.get("gpgkey", None)
 
@@ -99,153 +88,134 @@ class RepositoryRemote(base.RepositoryFactory):
 
                return priority
 
-       def open(self):
-               # First update the repository metadata.
-               self.update_metadata()
-               self.update_database()
+       def make_downloader(self):
+               """
+                       Creates a downloader that can be used to download
+                       metadata, databases or packages from this repository.
+               """
+               downloader = http.Client(baseurl=self.baseurl)
+
+               # Add any mirrors that we know of
+               for mirror in self.mirrorlist:
+                       downloader.add_mirror(mirror.get("url"))
+
+               return downloader
+
+       def refresh(self, force=False):
+               # Don't do anything if running in offline mode
+               if self.pakfire.offline:
+                       log.debug(_("Skipping refreshing %s since we are running in offline mode") % self)
+                       return
 
-               # Read the database.
-               self.open_database()
+               # Refresh the mirror list
+               self._refresh_mirror_list(force=force)
 
-               # Mark the repository as open.
-               self.opened = True
+               # Refresh metadata
+               self._refresh_metadata(force=force)
 
-       def close(self):
-               # Mark the repository as not open.
-               self.opened = False
+               # Refresh database
+               self._refresh_database()
 
-       def open_metadata(self, path=None):
-               if not path:
-                       path = self.cache_path(os.path.basename(METADATA_DOWNLOAD_FILE))
-                       path = self.cache.abspath(path)
+               # Read database
+               if self.database:
+                       self.read_solv(self.database)
 
-               if self.cache.exists(path):
-                       return metadata.Metadata(self.pakfire, path)
+       @property
+       def mirrorlist(self):
+               """
+                       Opens a cached mirror list
+               """
+               with self.cache_open("mirrors", "r") as f:
+                       mirrors = json.load(f)
 
-       def update_metadata(self, force=False, offline=False):
-               filename = os.path.join(METADATA_DOWNLOAD_PATH, METADATA_DOWNLOAD_FILE)
-               cache_filename = self.cache_path(os.path.basename(filename))
+                       return mirrors.get("mirrors")
 
-               # Check if the metadata is already recent enough...
-               exists = self.cache.exists(cache_filename)
+               return []
 
-               if not exists and offline:
-                       raise OfflineModeError(_("No metadata available for repository %s. Cannot download any.") \
-                               % self.name)
+       def _refresh_mirror_list(self, force=False):
+               # Check age of the mirror list first
+               age = self.cache_age("mirrors")
 
-               elif exists and offline:
-                       # Repository metadata exists. We cannot update anything because of the offline mode.
+               # Don't refresh anything if the mirror list
+               # has been refreshed in the last 24 hours
+               if not force and age and age <= 24 * 3600:
                        return
 
-               if not force and exists:
-                       age = self.cache.age(cache_filename)
-                       if age and age < TIME_10M:
-                               log.debug("Metadata is recent enough. I don't download it again.")
-                               return
+               # (Re-)download the mirror list
+               url = self.settings.get("mirrors", None)
+               if not url:
+                       return
 
-               # Going to download metada.
-               log.debug("Going to download repository metadata for %s..." % self.name)
-               assert not offline
+               # Use a generic downloader
+               downloader = http.Client()
 
-               grabber = downloader.MetadataDownloader(self.pakfire)
-               grabber = self.mirrors.group(grabber)
+               # Download a new mirror list
+               mirrorlist = downloader.get(url, decode="json")
 
-               while True:
-                       try:
-                               data = grabber.urlread(filename, limit=METADATA_DOWNLOAD_LIMIT)
-                       except urlgrabber.grabber.URLGrabError as e:
-                               if e.errno == 256:
-                                       raise DownloadError(_("Could not update metadata for %s from any mirror server") % self.name)
+               # Write new list to disk
+               with self.cache_open("mirrors", "w") as f:
+                       s = json.dumps(mirrorlist)
+                       f.write(s)
 
-                               grabber.increment_mirror(grabber)
-                               continue
+       @property
+       def metadata(self):
+               if not self.cache_exists("repomd.json"):
+                       return
+
+               with self.cache_open("repomd.json", "r") as f:
+                       return metadata.Metadata(self.pakfire, metadata=f.read())
 
-                       # Parse new metadata for comparison.
+       def _refresh_metadata(self, force=False):
+               # Check age of the metadata first
+               age = self.cache_age("repomd.json")
+
+               # Don't refresh anything if the metadata
+               # has been refresh within the last 10 minutes
+               if not force and age and age <= 600:
+                       return
+
+               # Get a downloader
+               downloader = self.make_downloader()
+
+               while True:
+                       data = downloader.get("%s/repodata/repomd.json" % self.pakfire.arch.name, decode="ascii")
+
+                       # Parse new metadata for comparison
                        md = metadata.Metadata(self.pakfire, metadata=data)
 
                        if self.metadata and md < self.metadata:
                                log.warning(_("The downloaded metadata was less recent than the current one."))
-                               grabber.increment_mirror(grabber)
+                               downloader.skip_current_mirror()
                                continue
 
                        # If the download went well, we write the downloaded data to disk
                        # and break the loop.
-                       f = self.cache.open(cache_filename, "w")
-                       f.write(data)
-                       f.close()
+                       with self.cache_open("repomd.json", "w") as f:
+                               md.save(f)
 
                        break
 
-               # Re-open metadata.
-               self.metadata = self.open_metadata()
-               assert self.metadata
-
-       def open_database(self):
-               assert self.metadata, "Metadata needs to be openend first."
-
-               filename = self.cache_path("database", self.metadata.database)
-               filename = self.cache.abspath(filename)
-
-               assert os.path.exists(filename)
-
-               self.index.clear()
-               self.index.read(filename)
-
-       def update_database(self, force=False, offline=False):
-               assert self.metadata, "Metadata needs to be openend first."
-
-               # Construct cache and download filename.
-               filename = os.path.join(METADATA_DOWNLOAD_PATH, self.metadata.database)
-               cache_filename = self.cache_path("database", self.metadata.database)
-
-               if not force:
-                       force = not self.cache.exists(cache_filename)
+       @property
+       def database(self):
+               if self.metadata and self.metadata.database and self.cache_exists(self.metadata.database):
+                       return self.cache_path(self.metadata.database)
 
-               # Raise an exception when we are running in offline mode but an update is required.
-               if force and offline:
-                       raise OfflineModeError(_("Cannot download package database for %s in offline mode.") % self.name)
+       def _refresh_database(self):
+               assert self.metadata, "Metadata does not exist"
 
-               elif not force:
+               # Exit if the file already exists in the cache
+               if self.cache_exists(self.metadata.database):
                        return
 
-               # Just make sure we don't try to download anything in offline mode.
-               assert not offline
-
-               # Initialize a grabber for download.
-               grabber = downloader.DatabaseDownloader(
-                       self.pakfire,
-                       text = _("%s: package database") % self.name,
-               )
-               grabber = self.mirrors.group(grabber)
-
-               while True:
-                       # Open file on server.
-                       urlobj = fileobj = grabber.urlopen(filename)
+               # Make the downloader
+               downloader = self.make_downloader()
 
-                       if self.metadata.database_compression:
-                               fileobj = compress.decompressobj(fileobj=fileobj,
-                                       algo=self.metadata.database_compression)
+               # This is where the file will be saved after download
+               path = self.cache_path(self.metadata.database)
 
-                       # Make a new file in the cache.
-                       cacheobj = self.cache.open(cache_filename, "wb")
-
-                       try:
-                               while True:
-                                       buf = fileobj.read(BUFFER_SIZE)
-                                       if not buf:
-                                               break
-                                       cacheobj.write(buf)
-
-                       finally:
-                               # XXX we should catch decompression errors
-
-                               # Close all file descriptors.
-                               cacheobj.close()
-                               fileobj.close()
-                               if not urlobj == fileobj:
-                                       urlobj.close()
-
-                       break
+               # XXX compare checksum here
+               downloader.retrieve("repodata/%s" % self.metadata.database, filename=path,
+                       message=_("%s: package database") % self.name)
 
        def download(self, pkg, text="", logger=None):
                """
@@ -344,8 +314,9 @@ class RepositoryRemote(base.RepositoryFactory):
                        "baseurl = %s" % self.baseurl,
                ]
 
-               if self.mirrors.mirrorlist:
-                       lines.append("mirrors = %s" % self.mirrors.mirrorlist)
+               mirrors = self.settings.get("mirrors", None)
+               if mirrors:
+                       lines.append("mirrors = %s" % mirrors)
 
                lines += [
                        #"gpgkey = %s" % self.keyfile,
index 3bdab23e72b01545ab9397e66ec0c35032ea601e..1b7fe27e97cfed037e545fdbe02e89ede1ff3aa7 100644 (file)
@@ -46,6 +46,9 @@ def cli_is_interactive():
 
        return False
 
+def is_enabled(s):
+       return s in ("true", "yes", "1")
+
 def ask_user(question):
        """
                Ask the user the question, he or she can answer with yes or no.