From: Michael Tremer Date: Fri, 5 May 2017 17:10:20 +0000 (+0200) Subject: Refactor repository downloading code X-Git-Tag: 0.9.28~1285^2~1351 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1b61ba46117eafd6141429dbaf4263d524d729dd;p=pakfire.git Refactor repository downloading code This was always a bit messy and hard to use in other parts of pakfire. Now, repositories are now refreshing themselves when refresh() is being called. Downloading code has been tidied up and everything should be atomic. Signed-off-by: Michael Tremer --- diff --git a/src/pakfire/base.py b/src/pakfire/base.py index 04cc678aa..bde775cf4 100644 --- a/src/pakfire/base.py +++ b/src/pakfire/base.py @@ -99,14 +99,13 @@ class Pakfire(object): # Dump the configuration when we enter the context self.config.dump() - # Initialize repositories - self.repos.initialize() + # Refresh repositories + self.repos.refresh() return PakfireContext(self) def __exit__(self, type, value, traceback): - # Close repositories - self.repos.shutdown() + pass @property def offline(self): diff --git a/src/pakfire/repository/__init__.py b/src/pakfire/repository/__init__.py index 9ac38152e..eb59d44ee 100644 --- a/src/pakfire/repository/__init__.py +++ b/src/pakfire/repository/__init__.py @@ -74,35 +74,13 @@ class Repositories(object): """ return len([r for r in self if r.enabled]) - @property - def initialized(self): - """ - Indicates if all repositories are initialized. - """ - for repo in self: - if not repo.opened: - return False - - return True - - def initialize(self): - # Nothing to do, if everything is already up to date. - if self.initialized: - return - - log.info(_("Initializing repositories...")) - for repo in self: - repo.open() - - # Empty line. - log.info("") - - def shutdown(self): + def refresh(self): """ - Shuts down all repositores. + Refreshes all repositories """ for repo in self: - repo.close() + if repo.enabled: + repo.refresh() @property def distro(self): diff --git a/src/pakfire/repository/base.py b/src/pakfire/repository/base.py index 9dae65db3..52e697e14 100644 --- a/src/pakfire/repository/base.py +++ b/src/pakfire/repository/base.py @@ -27,23 +27,37 @@ from .. import _pakfire from . import packages class RepositoryFactory(_pakfire.Repo): - def __init__(self, pakfire, name, description): - _pakfire.Repo.__init__(self, pakfire.pool, name) + def __init__(self, pakfire, name, description, **kwargs): self.pakfire = pakfire - self.description = description + # Inherit + _pakfire.Repo.__init__(self, self.pakfire.pool, name) - # Some repositories may have a cache. - self.cache = None + # Save description + self.description = description log.debug("Initialized new repository: %s" % self) # Marks if this repository has been opened. self.opened = False + self.init(**kwargs) + def __repr__(self): return "<%s %s>" % (self.__class__.__name__, self.name) + def init(self, **kwargs): + pass # To be overwritten by inheriting classes + + def refresh(self): + """ + Called to refresh the repository metadata. + + This is probably only hand for remote repositories + that need to re-download data. + """ + pass + @property def local(self): """ diff --git a/src/pakfire/repository/metadata.py b/src/pakfire/repository/metadata.py index 9fabce077..d1b7a3d90 100644 --- a/src/pakfire/repository/metadata.py +++ b/src/pakfire/repository/metadata.py @@ -70,20 +70,12 @@ class Metadata(object): with open(filename) as f: self.parse(f.read()) - def save(self, filename=None): + def save(self, fp): """ Save all data to a file that could be exported to a remote repository. """ - if not filename: - filename = self.filename - - f = open(filename, "w") - - # Write all data to the fileobj. - json.dump(self._data, f, indent=2) - - f.close() + json.dump(self._data, fp, indent=2) @property def version(self): diff --git a/src/pakfire/repository/remote.py b/src/pakfire/repository/remote.py index 5a101ad2d..b7e29f802 100644 --- a/src/pakfire/repository/remote.py +++ b/src/pakfire/repository/remote.py @@ -19,45 +19,33 @@ # # ############################################################################### +import json +import lzma import os import logging log = logging.getLogger("pakfire") +from .. import http +from .. import util + from . import base from . import cache from . import metadata -import pakfire.compress as compress - -from pakfire.constants import * -from pakfire.i18n import _ +from ..constants import * +from ..i18n import _ class RepositoryRemote(base.RepositoryFactory): # XXX TODO Make metadata age configureable. - def __init__(self, pakfire, name, description=None, **settings): - # Save the settings that come from the configuration file. + def init(self, **settings): + # Save the settings that come from the configuration file self.settings = settings - base.RepositoryFactory.__init__(self, pakfire, name, description) - # Enabled/disable the repository, based on the configuration setting. enabled = self.settings.get("enabled", True) - if enabled in ("1", "yes", "on", True, 1): - self.enabled = True - else: - self.enabled = False - - # Create an cache object - self.cache = cache.RepositoryCache(self.pakfire, self) - - # Initialize mirror servers. - mirrorlist = self.settings.get("mirrors", None) - self.mirrors = downloader.MirrorList(self.pakfire, self, mirrorlist) - - # Open metadata if any. - self.metadata = self.open_metadata() + self.enabled = util.is_enabled(enabled) @property def baseurl(self): @@ -66,6 +54,7 @@ class RepositoryRemote(base.RepositoryFactory): @property def keyfile(self): keyfile = self.settings.get("keyfile", None) + if keyfile is None: keyfile = self.settings.get("gpgkey", None) @@ -99,153 +88,134 @@ class RepositoryRemote(base.RepositoryFactory): return priority - def open(self): - # First update the repository metadata. - self.update_metadata() - self.update_database() + def make_downloader(self): + """ + Creates a downloader that can be used to download + metadata, databases or packages from this repository. + """ + downloader = http.Client(baseurl=self.baseurl) + + # Add any mirrors that we know of + for mirror in self.mirrorlist: + downloader.add_mirror(mirror.get("url")) + + return downloader + + def refresh(self, force=False): + # Don't do anything if running in offline mode + if self.pakfire.offline: + log.debug(_("Skipping refreshing %s since we are running in offline mode") % self) + return - # Read the database. - self.open_database() + # Refresh the mirror list + self._refresh_mirror_list(force=force) - # Mark the repository as open. - self.opened = True + # Refresh metadata + self._refresh_metadata(force=force) - def close(self): - # Mark the repository as not open. - self.opened = False + # Refresh database + self._refresh_database() - def open_metadata(self, path=None): - if not path: - path = self.cache_path(os.path.basename(METADATA_DOWNLOAD_FILE)) - path = self.cache.abspath(path) + # Read database + if self.database: + self.read_solv(self.database) - if self.cache.exists(path): - return metadata.Metadata(self.pakfire, path) + @property + def mirrorlist(self): + """ + Opens a cached mirror list + """ + with self.cache_open("mirrors", "r") as f: + mirrors = json.load(f) - def update_metadata(self, force=False, offline=False): - filename = os.path.join(METADATA_DOWNLOAD_PATH, METADATA_DOWNLOAD_FILE) - cache_filename = self.cache_path(os.path.basename(filename)) + return mirrors.get("mirrors") - # Check if the metadata is already recent enough... - exists = self.cache.exists(cache_filename) + return [] - if not exists and offline: - raise OfflineModeError(_("No metadata available for repository %s. Cannot download any.") \ - % self.name) + def _refresh_mirror_list(self, force=False): + # Check age of the mirror list first + age = self.cache_age("mirrors") - elif exists and offline: - # Repository metadata exists. We cannot update anything because of the offline mode. + # Don't refresh anything if the mirror list + # has been refreshed in the last 24 hours + if not force and age and age <= 24 * 3600: return - if not force and exists: - age = self.cache.age(cache_filename) - if age and age < TIME_10M: - log.debug("Metadata is recent enough. I don't download it again.") - return + # (Re-)download the mirror list + url = self.settings.get("mirrors", None) + if not url: + return - # Going to download metada. - log.debug("Going to download repository metadata for %s..." % self.name) - assert not offline + # Use a generic downloader + downloader = http.Client() - grabber = downloader.MetadataDownloader(self.pakfire) - grabber = self.mirrors.group(grabber) + # Download a new mirror list + mirrorlist = downloader.get(url, decode="json") - while True: - try: - data = grabber.urlread(filename, limit=METADATA_DOWNLOAD_LIMIT) - except urlgrabber.grabber.URLGrabError as e: - if e.errno == 256: - raise DownloadError(_("Could not update metadata for %s from any mirror server") % self.name) + # Write new list to disk + with self.cache_open("mirrors", "w") as f: + s = json.dumps(mirrorlist) + f.write(s) - grabber.increment_mirror(grabber) - continue + @property + def metadata(self): + if not self.cache_exists("repomd.json"): + return + + with self.cache_open("repomd.json", "r") as f: + return metadata.Metadata(self.pakfire, metadata=f.read()) - # Parse new metadata for comparison. + def _refresh_metadata(self, force=False): + # Check age of the metadata first + age = self.cache_age("repomd.json") + + # Don't refresh anything if the metadata + # has been refresh within the last 10 minutes + if not force and age and age <= 600: + return + + # Get a downloader + downloader = self.make_downloader() + + while True: + data = downloader.get("%s/repodata/repomd.json" % self.pakfire.arch.name, decode="ascii") + + # Parse new metadata for comparison md = metadata.Metadata(self.pakfire, metadata=data) if self.metadata and md < self.metadata: log.warning(_("The downloaded metadata was less recent than the current one.")) - grabber.increment_mirror(grabber) + downloader.skip_current_mirror() continue # If the download went well, we write the downloaded data to disk # and break the loop. - f = self.cache.open(cache_filename, "w") - f.write(data) - f.close() + with self.cache_open("repomd.json", "w") as f: + md.save(f) break - # Re-open metadata. - self.metadata = self.open_metadata() - assert self.metadata - - def open_database(self): - assert self.metadata, "Metadata needs to be openend first." - - filename = self.cache_path("database", self.metadata.database) - filename = self.cache.abspath(filename) - - assert os.path.exists(filename) - - self.index.clear() - self.index.read(filename) - - def update_database(self, force=False, offline=False): - assert self.metadata, "Metadata needs to be openend first." - - # Construct cache and download filename. - filename = os.path.join(METADATA_DOWNLOAD_PATH, self.metadata.database) - cache_filename = self.cache_path("database", self.metadata.database) - - if not force: - force = not self.cache.exists(cache_filename) + @property + def database(self): + if self.metadata and self.metadata.database and self.cache_exists(self.metadata.database): + return self.cache_path(self.metadata.database) - # Raise an exception when we are running in offline mode but an update is required. - if force and offline: - raise OfflineModeError(_("Cannot download package database for %s in offline mode.") % self.name) + def _refresh_database(self): + assert self.metadata, "Metadata does not exist" - elif not force: + # Exit if the file already exists in the cache + if self.cache_exists(self.metadata.database): return - # Just make sure we don't try to download anything in offline mode. - assert not offline - - # Initialize a grabber for download. - grabber = downloader.DatabaseDownloader( - self.pakfire, - text = _("%s: package database") % self.name, - ) - grabber = self.mirrors.group(grabber) - - while True: - # Open file on server. - urlobj = fileobj = grabber.urlopen(filename) + # Make the downloader + downloader = self.make_downloader() - if self.metadata.database_compression: - fileobj = compress.decompressobj(fileobj=fileobj, - algo=self.metadata.database_compression) + # This is where the file will be saved after download + path = self.cache_path(self.metadata.database) - # Make a new file in the cache. - cacheobj = self.cache.open(cache_filename, "wb") - - try: - while True: - buf = fileobj.read(BUFFER_SIZE) - if not buf: - break - cacheobj.write(buf) - - finally: - # XXX we should catch decompression errors - - # Close all file descriptors. - cacheobj.close() - fileobj.close() - if not urlobj == fileobj: - urlobj.close() - - break + # XXX compare checksum here + downloader.retrieve("repodata/%s" % self.metadata.database, filename=path, + message=_("%s: package database") % self.name) def download(self, pkg, text="", logger=None): """ @@ -344,8 +314,9 @@ class RepositoryRemote(base.RepositoryFactory): "baseurl = %s" % self.baseurl, ] - if self.mirrors.mirrorlist: - lines.append("mirrors = %s" % self.mirrors.mirrorlist) + mirrors = self.settings.get("mirrors", None) + if mirrors: + lines.append("mirrors = %s" % mirrors) lines += [ #"gpgkey = %s" % self.keyfile, diff --git a/src/pakfire/util.py b/src/pakfire/util.py index 3bdab23e7..1b7fe27e9 100644 --- a/src/pakfire/util.py +++ b/src/pakfire/util.py @@ -46,6 +46,9 @@ def cli_is_interactive(): return False +def is_enabled(s): + return s in ("true", "yes", "1") + def ask_user(question): """ Ask the user the question, he or she can answer with yes or no.