Improve metadata handling and downloading.

author Michael Tremer <michael.tremer@ipfire.org>

Fri, 25 Feb 2011 18:24:43 +0000 (19:24 +0100)

committer Michael Tremer <michael.tremer@ipfire.org>

Fri, 25 Feb 2011 18:24:43 +0000 (19:24 +0100)
author Michael Tremer <michael.tremer@ipfire.org>
Fri, 25 Feb 2011 18:24:43 +0000 (19:24 +0100)
committer Michael Tremer <michael.tremer@ipfire.org>
Fri, 25 Feb 2011 18:24:43 +0000 (19:24 +0100)
diff --git a/pakfire/constants.py b/pakfire/constants.py

index 2e3fbdc76b43c5a44cc57b8a0ff77b3ac8b4d29f..70b12c5a4e98bbc22c4365f83d8d2ceff2995fa4 100644 (file)
--- a/pakfire/constants.py
+++ b/pakfire/constants.py
@@ -23,6 +23,11 @@ REPOSITORY_DB = "index.db"
  
  BUFFER_SIZE = 1024**2
  
+METADATA_FORMAT = 0
+METADATA_DOWNLOAD_LIMIT = 1024**2
+METADATA_DOWNLOAD_PATH  = "repodata"
+METADATA_DOWNLOAD_FILE  = os.path.join(METADATA_DOWNLOAD_PATH, "repomd.json")
+
  PACKAGE_FORMAT = 0
  PACKAGE_EXTENSION = "pfm"
  MAKEFILE_EXTENSION = "nm"
diff --git a/pakfire/index.py b/pakfire/index.py

index e01e39e15ebb0635f03c91d7ae4d241325f8dd1b..670431f07d0155d748da524a2ef9f21e8a18ce1a 100644 (file)
--- a/pakfire/index.py
+++ b/pakfire/index.py
@@ -8,6 +8,7 @@ import shutil
  
  import database
  import downloader
+import metadata
  import packages
  import repository
  import util
@@ -122,93 +123,12 @@ class DirectoryIndex(Index):
                 db.close()
  
  
-class DatabaseIndex(Index):
+class InstalledIndex(Index):
         def __init__(self, pakfire, repo):
                 Index.__init__(self, pakfire, repo)
  
-               self.db = None
-
-               if isinstance(repo, repository.InstalledRepository):
-                       self.db = database.LocalPackageDatabase(self.pakfire)
-
-               else:
-                       # Generate path to database file.
-                       filename = os.path.join(repo.path, ".index.db.%s" % random.randint(0, 1024))
-                       self.db = database.RemotePackageDatabase(self.pakfire, filename)
-
-       @property
-       def local(self):
-               pass
-
-       def update(self, force=False):
-               """
-                       Download the repository metadata and the package database.
-               """
-
-               # XXX this code needs lots of work:
-               # XXX   * fix the hardcoded paths
-               # XXX   * make checks for downloads (filesize, hashsums)
-               # XXX   * don't download the package database in place
-               # XXX   * check the metadata content
-               # XXX   * use compression
-
-               # Shortcut to repository cache.
-               cache = self.repo.cache
-
-               cache_filename = "metadata/repomd.json"
-
-               # Marker if we need to do the download.
-               download = True
-
-               # Check if file does exists and is not too old.
-               if cache.exists(cache_filename):
-                       age = cache.age(cache_filename)
-                       if age and age < TIME_10M:
-                               download = False
-
-               if download:
-                       # Initialize a grabber for download.
-                       grabber = downloader.MetadataDownloader()
-                       grabber = self.repo.mirrors.group(grabber)
-
-                       # XXX do we need limit here for security reasons?
-                       metadata = grabber.urlread("repodata/repomd.json")
-
-                       with cache.open(cache_filename, "w") as o:
-                               o.write(metadata)
-
-               # Parse the metadata that we just downloaded or opened from cache.
-               f = cache.open(cache_filename)
-               metadata = json.loads(f.read())
-               f.close()
-
-               # Get the filename of the package database from the metadata.
-               download_filename = "repodata/%s" % metadata.get("package_database")
-
-               cache_filename = "metadata/packages.db"
-
-               if not cache.exists(cache_filename):
-                       # Initialize a grabber for download.
-                       grabber = downloader.DatabaseDownloader(
-                               text = _("%s: package database") % self.repo.name,
-                       )
-                       grabber = self.repo.mirrors.group(grabber)
-
-                       i = grabber.urlopen(download_filename)
-                       o = cache.open(cache_filename, "w")
-
-                       buf = i.read(BUFFER_SIZE)
-                       while buf:
-                               o.write(buf)
-                               buf = i.read(BUFFER_SIZE)
-
-                       i.close()
-                       o.close()
-
-                       # XXX possibly, the database needs to be decompressed
-
-               # Reopen the database
-               self.db = database.RemotePackageDatabase(self.pakfire, cache.abspath(cache_filename))
+               # Open the database.
+               self.db = database.LocalPackageDatabase(self.pakfire)
  
         def __get_from_cache(self, pkg):
                 """
@@ -264,6 +184,7 @@ class DatabaseIndex(Index):
                 return self.db.add_package(pkg, reason)
  
         def tag_db(self):
+               # XXX DEPRECATED
                 self.db.close()
  
                 # Calculate a filename that is based on the hash of the file
@@ -277,7 +198,100 @@ class DatabaseIndex(Index):
                 self.db = database.RemotePackageDatabase(self.pakfire, self.db.filename)
  
  
-# XXX maybe this can be removed later?
-class InstalledIndex(DatabaseIndex):
-       pass
+class DatabaseIndex(InstalledIndex):
+       def __init__(self, pakfire, repo):
+               Index.__init__(self, pakfire, repo)
+
+               # Initialize with no content.
+               self.db, self.metadata = None, None
+
+       def _update_metadata(self, force):
+               # Shortcut to repository cache.
+               cache = self.repo.cache
+
+               filename = METADATA_DOWNLOAD_FILE
+
+               # Marker if we need to do the download.
+               download = True
+
+               # Marker for the current metadata.
+               old_metadata = None
+
+               if not force:
+                       # Check if file does exists and is not too old.
+                       if cache.exists(filename):
+                               age = cache.age(filename)
+                               if age and age < TIME_10M:
+                                       download = False
+                                       logging.debug("Metadata is recent enough. I don't download it again.")
+
+                               # Open old metadata for comparison.
+                               old_metadata = metadata.Metadata(self.pakfire, self,
+                                       cache.abspath(filename))
+
+               if download:
+                       logging.debug("Going to (re-)download the repository metadata.")
+
+                       # Initialize a grabber for download.
+                       grabber = downloader.MetadataDownloader()
+                       grabber = self.repo.mirrors.group(grabber)
+
+                       data = grabber.urlread(filename, limit=METADATA_DOWNLOAD_LIMIT)
+
+                       # Parse new metadata for comparison.
+                       new_metadata = metadata.Metadata(self.pakfire, self, metadata=data)
+
+                       if old_metadata and new_metadata < old_metadata:
+                               logging.warning("The downloaded metadata was less recent than the current one. Trashing that.")
+
+                       else:
+                               # We explicitely rewrite the metadata if it is equal to have
+                               # a new timestamp and do not download it over and over again.
+                               with cache.open(filename, "w") as o:
+                                       o.write(data)
+
+               # Parse the metadata that we just downloaded or load it from cache.
+               self.metadata = metadata.Metadata(self.pakfire, self,
+                       cache.abspath(filename))
+
+       def _update_database(self, force):
+               # Shortcut to repository cache.
+               cache = self.repo.cache
+
+               # Construct cache and download filename.
+               filename = os.path.join(METADATA_DOWNLOAD_PATH, self.metadata.database)
+
+               if not cache.exists(filename):
+                       # Initialize a grabber for download.
+                       grabber = downloader.DatabaseDownloader(
+                               text = _("%s: package database") % self.repo.name,
+                       )
+                       grabber = self.repo.mirrors.group(grabber)
+
+                       data = grabber.urlread(filename)
+
+                       with cache.open(filename, "w") as o:
+                               o.write(data)
+
+                       # XXX possibly, the database needs to be decompressed
+
+               # (Re-)open the database.
+               self.db = database.RemotePackageDatabase(self.pakfire,
+                       cache.abspath(filename))
+
+       def update(self, force=False):
+               """
+                       Download the repository metadata and the package database.
+               """
+
+               # At first, update the metadata.
+               self._update_metadata(force)
+
+               # Then, we download the database eventually.
+               self._update_database(force)
+
+               # XXX this code needs lots of work:
+               # XXX   * make checks for downloads (hashsums)
+               # XXX   * check the metadata content
+               # XXX   * use compression
  
diff --git a/pakfire/metadata.py b/pakfire/metadata.py

new file mode 100644 (file)

index 0000000..db20d00
--- /dev/null
+++ b/pakfire/metadata.py
@@ -0,0 +1,88 @@
+#!/usr/bin/python
+
+import json
+import time
+
+from constants import *
+
+class Metadata(object):
+       def __init__(self, pakfire, index, metafile=None, metadata=None):
+               self.pakfire = pakfire
+               self.index = index
+
+               self.filename = metafile
+
+               # Place where we save the data.
+               self._data = {
+                       "database" : None,
+                       "revision" : int(time.time()),
+                       "version"  : METADATA_FORMAT,
+               }
+
+               # If a file was passed, we open it.
+               if self.filename:
+                       self.open()
+
+               # ... or parse the one that was passed.
+               elif metadata:
+                       self.parse(metadata)
+
+       def __cmp__(self, other):
+               """
+                       Compare two sets of metadata by their revision.
+               """
+               return cmp(self.revision, other.revision)
+
+       def parse(self, metadata):
+               try:
+                       self._data = json.loads(metadata)
+               except:
+                       raise # XXX catch json exceptions here
+
+       def open(self, filename=None):
+               """
+                       Open a given file or use the default one and parse the
+                       data in it.
+               """
+               if not filename:
+                       filename = self.filename
+
+               with open(filename) as f:
+                       self.parse(f.read())
+
+       def save(self, filename=None):
+               """
+                       Save all data to a file that could be exported to a
+                       remote repository.
+               """
+               if not filename:
+                       filename = self.filename
+
+               f = open(filename, "w")
+
+               # Write all data to the fileobj.
+               json.dump(self._data, f, indent=2)
+
+               f.close()
+
+       @property
+       def version(self):
+               """
+                       Returns the version of the metadata.
+               """
+               return self._data.get("version")
+
+       @property
+       def revision(self):
+               """
+                       Returns the revision of the metadata.
+               """
+               return self._data.get("revision")
+
+       def get_database(self):
+               return self._data.get("database")
+
+       def set_database(self, val):
+               self._data.set("database", val)
+
+       database = property(get_database, set_database)
diff --git a/pakfire/repository.py b/pakfire/repository.py

index ff9a27904f37c3e7d5a81a8c24ed4d3a0e346450..ffa2677813e596912253abb136f327be579629e3 100644 (file)
--- a/pakfire/repository.py
+++ b/pakfire/repository.py
@@ -375,7 +375,7 @@ class RepositoryCache(object):
                 """
                         Create all necessary directories.
                 """
-               for path in ("mirrors", "packages", "metadata"):
+               for path in ("mirrors", "packages", "repodata"):
                         path = self.abspath(path)
  
                         if not os.path.exists(path):
diff --git a/po/POTFILES.in b/po/POTFILES.in

index a74b5f5263007ea97fa288ca2e2cf021e22189f4..dd36d67da620b9a72ba4f898ecaf150894f756ec 100644 (file)
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -12,6 +12,7 @@ pakfire/i18n.py
  pakfire/index.py
  pakfire/__init__.py
  pakfire/logger.py
+pakfire/metadata.py
  pakfire/packages/base.py
  pakfire/packages/binary.py
  pakfire/packages/__init__.py
diff --git a/po/pakfire.pot b/po/pakfire.pot

index ed6c65c1c86b5caed04d28b3069daaeeb19957fb..7a946b2e5db3a14f848642de38a6713475c9a4b1 100644 (file)
--- a/po/pakfire.pot
+++ b/po/pakfire.pot
@@ -8,7 +8,7 @@ msgid ""
  msgstr ""
  "Project-Id-Version: PACKAGE VERSION\n"
  "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2011-02-22 21:49+0100\n"
+"POT-Creation-Date: 2011-02-25 19:21+0100\n"
  "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
  "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
  "Language-Team: LANGUAGE <LL@li.org>\n"
@@ -220,7 +220,7 @@ msgstr ""
  msgid "Total download size: %s"
  msgstr ""
  
-#: ../pakfire/index.py:180
+#: ../pakfire/index.py:267
  #, python-format
  msgid "%s: package database"
  msgstr ""
author	Michael Tremer <michael.tremer@ipfire.org>
	Fri, 25 Feb 2011 18:24:43 +0000 (19:24 +0100)
committer	Michael Tremer <michael.tremer@ipfire.org>
	Fri, 25 Feb 2011 18:24:43 +0000 (19:24 +0100)
pakfire/constants.py		patch \| blob \| blame \| history
pakfire/index.py		patch \| blob \| blame \| history
pakfire/metadata.py	[new file with mode: 0644]	patch \| blob
pakfire/repository.py		patch \| blob \| blame \| history
po/POTFILES.in		patch \| blob \| blame \| history
po/pakfire.pot		patch \| blob \| blame \| history