From 3cf7127fad7170afb74bf71e1d6959aac2ae9506 Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Fri, 25 Feb 2011 19:24:43 +0100 Subject: [PATCH] Improve metadata handling and downloading. --- pakfire/constants.py | 5 ++ pakfire/index.py | 188 +++++++++++++++++++++++------------------- pakfire/metadata.py | 88 ++++++++++++++++++++ pakfire/repository.py | 2 +- po/POTFILES.in | 1 + po/pakfire.pot | 4 +- 6 files changed, 198 insertions(+), 90 deletions(-) create mode 100644 pakfire/metadata.py diff --git a/pakfire/constants.py b/pakfire/constants.py index 2e3fbdc76..70b12c5a4 100644 --- a/pakfire/constants.py +++ b/pakfire/constants.py @@ -23,6 +23,11 @@ REPOSITORY_DB = "index.db" BUFFER_SIZE = 1024**2 +METADATA_FORMAT = 0 +METADATA_DOWNLOAD_LIMIT = 1024**2 +METADATA_DOWNLOAD_PATH = "repodata" +METADATA_DOWNLOAD_FILE = os.path.join(METADATA_DOWNLOAD_PATH, "repomd.json") + PACKAGE_FORMAT = 0 PACKAGE_EXTENSION = "pfm" MAKEFILE_EXTENSION = "nm" diff --git a/pakfire/index.py b/pakfire/index.py index e01e39e15..670431f07 100644 --- a/pakfire/index.py +++ b/pakfire/index.py @@ -8,6 +8,7 @@ import shutil import database import downloader +import metadata import packages import repository import util @@ -122,93 +123,12 @@ class DirectoryIndex(Index): db.close() -class DatabaseIndex(Index): +class InstalledIndex(Index): def __init__(self, pakfire, repo): Index.__init__(self, pakfire, repo) - self.db = None - - if isinstance(repo, repository.InstalledRepository): - self.db = database.LocalPackageDatabase(self.pakfire) - - else: - # Generate path to database file. - filename = os.path.join(repo.path, ".index.db.%s" % random.randint(0, 1024)) - self.db = database.RemotePackageDatabase(self.pakfire, filename) - - @property - def local(self): - pass - - def update(self, force=False): - """ - Download the repository metadata and the package database. - """ - - # XXX this code needs lots of work: - # XXX * fix the hardcoded paths - # XXX * make checks for downloads (filesize, hashsums) - # XXX * don't download the package database in place - # XXX * check the metadata content - # XXX * use compression - - # Shortcut to repository cache. - cache = self.repo.cache - - cache_filename = "metadata/repomd.json" - - # Marker if we need to do the download. - download = True - - # Check if file does exists and is not too old. - if cache.exists(cache_filename): - age = cache.age(cache_filename) - if age and age < TIME_10M: - download = False - - if download: - # Initialize a grabber for download. - grabber = downloader.MetadataDownloader() - grabber = self.repo.mirrors.group(grabber) - - # XXX do we need limit here for security reasons? - metadata = grabber.urlread("repodata/repomd.json") - - with cache.open(cache_filename, "w") as o: - o.write(metadata) - - # Parse the metadata that we just downloaded or opened from cache. - f = cache.open(cache_filename) - metadata = json.loads(f.read()) - f.close() - - # Get the filename of the package database from the metadata. - download_filename = "repodata/%s" % metadata.get("package_database") - - cache_filename = "metadata/packages.db" - - if not cache.exists(cache_filename): - # Initialize a grabber for download. - grabber = downloader.DatabaseDownloader( - text = _("%s: package database") % self.repo.name, - ) - grabber = self.repo.mirrors.group(grabber) - - i = grabber.urlopen(download_filename) - o = cache.open(cache_filename, "w") - - buf = i.read(BUFFER_SIZE) - while buf: - o.write(buf) - buf = i.read(BUFFER_SIZE) - - i.close() - o.close() - - # XXX possibly, the database needs to be decompressed - - # Reopen the database - self.db = database.RemotePackageDatabase(self.pakfire, cache.abspath(cache_filename)) + # Open the database. + self.db = database.LocalPackageDatabase(self.pakfire) def __get_from_cache(self, pkg): """ @@ -264,6 +184,7 @@ class DatabaseIndex(Index): return self.db.add_package(pkg, reason) def tag_db(self): + # XXX DEPRECATED self.db.close() # Calculate a filename that is based on the hash of the file @@ -277,7 +198,100 @@ class DatabaseIndex(Index): self.db = database.RemotePackageDatabase(self.pakfire, self.db.filename) -# XXX maybe this can be removed later? -class InstalledIndex(DatabaseIndex): - pass +class DatabaseIndex(InstalledIndex): + def __init__(self, pakfire, repo): + Index.__init__(self, pakfire, repo) + + # Initialize with no content. + self.db, self.metadata = None, None + + def _update_metadata(self, force): + # Shortcut to repository cache. + cache = self.repo.cache + + filename = METADATA_DOWNLOAD_FILE + + # Marker if we need to do the download. + download = True + + # Marker for the current metadata. + old_metadata = None + + if not force: + # Check if file does exists and is not too old. + if cache.exists(filename): + age = cache.age(filename) + if age and age < TIME_10M: + download = False + logging.debug("Metadata is recent enough. I don't download it again.") + + # Open old metadata for comparison. + old_metadata = metadata.Metadata(self.pakfire, self, + cache.abspath(filename)) + + if download: + logging.debug("Going to (re-)download the repository metadata.") + + # Initialize a grabber for download. + grabber = downloader.MetadataDownloader() + grabber = self.repo.mirrors.group(grabber) + + data = grabber.urlread(filename, limit=METADATA_DOWNLOAD_LIMIT) + + # Parse new metadata for comparison. + new_metadata = metadata.Metadata(self.pakfire, self, metadata=data) + + if old_metadata and new_metadata < old_metadata: + logging.warning("The downloaded metadata was less recent than the current one. Trashing that.") + + else: + # We explicitely rewrite the metadata if it is equal to have + # a new timestamp and do not download it over and over again. + with cache.open(filename, "w") as o: + o.write(data) + + # Parse the metadata that we just downloaded or load it from cache. + self.metadata = metadata.Metadata(self.pakfire, self, + cache.abspath(filename)) + + def _update_database(self, force): + # Shortcut to repository cache. + cache = self.repo.cache + + # Construct cache and download filename. + filename = os.path.join(METADATA_DOWNLOAD_PATH, self.metadata.database) + + if not cache.exists(filename): + # Initialize a grabber for download. + grabber = downloader.DatabaseDownloader( + text = _("%s: package database") % self.repo.name, + ) + grabber = self.repo.mirrors.group(grabber) + + data = grabber.urlread(filename) + + with cache.open(filename, "w") as o: + o.write(data) + + # XXX possibly, the database needs to be decompressed + + # (Re-)open the database. + self.db = database.RemotePackageDatabase(self.pakfire, + cache.abspath(filename)) + + def update(self, force=False): + """ + Download the repository metadata and the package database. + """ + + # At first, update the metadata. + self._update_metadata(force) + + # Then, we download the database eventually. + self._update_database(force) + + # XXX this code needs lots of work: + # XXX * make checks for downloads (hashsums) + # XXX * check the metadata content + # XXX * use compression diff --git a/pakfire/metadata.py b/pakfire/metadata.py new file mode 100644 index 000000000..db20d0082 --- /dev/null +++ b/pakfire/metadata.py @@ -0,0 +1,88 @@ +#!/usr/bin/python + +import json +import time + +from constants import * + +class Metadata(object): + def __init__(self, pakfire, index, metafile=None, metadata=None): + self.pakfire = pakfire + self.index = index + + self.filename = metafile + + # Place where we save the data. + self._data = { + "database" : None, + "revision" : int(time.time()), + "version" : METADATA_FORMAT, + } + + # If a file was passed, we open it. + if self.filename: + self.open() + + # ... or parse the one that was passed. + elif metadata: + self.parse(metadata) + + def __cmp__(self, other): + """ + Compare two sets of metadata by their revision. + """ + return cmp(self.revision, other.revision) + + def parse(self, metadata): + try: + self._data = json.loads(metadata) + except: + raise # XXX catch json exceptions here + + def open(self, filename=None): + """ + Open a given file or use the default one and parse the + data in it. + """ + if not filename: + filename = self.filename + + with open(filename) as f: + self.parse(f.read()) + + def save(self, filename=None): + """ + Save all data to a file that could be exported to a + remote repository. + """ + if not filename: + filename = self.filename + + f = open(filename, "w") + + # Write all data to the fileobj. + json.dump(self._data, f, indent=2) + + f.close() + + @property + def version(self): + """ + Returns the version of the metadata. + """ + return self._data.get("version") + + @property + def revision(self): + """ + Returns the revision of the metadata. + """ + return self._data.get("revision") + + def get_database(self): + return self._data.get("database") + + def set_database(self, val): + self._data.set("database", val) + + database = property(get_database, set_database) diff --git a/pakfire/repository.py b/pakfire/repository.py index ff9a27904..ffa267781 100644 --- a/pakfire/repository.py +++ b/pakfire/repository.py @@ -375,7 +375,7 @@ class RepositoryCache(object): """ Create all necessary directories. """ - for path in ("mirrors", "packages", "metadata"): + for path in ("mirrors", "packages", "repodata"): path = self.abspath(path) if not os.path.exists(path): diff --git a/po/POTFILES.in b/po/POTFILES.in index a74b5f526..dd36d67da 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -12,6 +12,7 @@ pakfire/i18n.py pakfire/index.py pakfire/__init__.py pakfire/logger.py +pakfire/metadata.py pakfire/packages/base.py pakfire/packages/binary.py pakfire/packages/__init__.py diff --git a/po/pakfire.pot b/po/pakfire.pot index ed6c65c1c..7a946b2e5 100644 --- a/po/pakfire.pot +++ b/po/pakfire.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2011-02-22 21:49+0100\n" +"POT-Creation-Date: 2011-02-25 19:21+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -220,7 +220,7 @@ msgstr "" msgid "Total download size: %s" msgstr "" -#: ../pakfire/index.py:180 +#: ../pakfire/index.py:267 #, python-format msgid "%s: package database" msgstr "" -- 2.39.5