]> git.ipfire.org Git - location/location-database.git/commitdiff
Add downloader which decodes files downloaded from the RIRs on-the-fly
authorMichael Tremer <michael.tremer@ipfire.org>
Mon, 29 Jan 2018 19:26:48 +0000 (19:26 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Mon, 29 Jan 2018 19:26:48 +0000 (19:26 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
tools/downloader.py [new file with mode: 0644]

diff --git a/tools/downloader.py b/tools/downloader.py
new file mode 100644 (file)
index 0000000..0250669
--- /dev/null
@@ -0,0 +1,126 @@
+#!/usr/bin/python3
+###############################################################################
+#                                                                             #
+# location-database - A database to determine someone's                       #
+#                     location on the Internet                                #
+# Copyright (C) 2018 Michael Tremer                                           #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+import gzip
+import logging
+import urllib.request
+
+# Setup logger
+log = logging.getLogger("downloader")
+log.propagate = 1
+
+class Downloader(object):
+       USER_AGENT = "location-database/1.0"
+
+       def __init__(self):
+               self.proxy = None
+
+       def set_proxy(self, url):
+               """
+                       Sets a HTTP proxy that is used to perform all requests
+               """
+               log.info("Using proxy %s" % url)
+               self.proxy = url
+
+       def request(self, url, data=None):
+               req = urllib.request.Request(url, data=data)
+
+               # Configure proxy
+               if self.proxy:
+                       req.set_proxy(self.proxy, "http")
+
+               # Set User-Agent
+               if self.USER_AGENT:
+                       req.add_header("User-Agent", self.USER_AGENT)
+
+               return DownloaderContext(self, req)
+
+
+class DownloaderContext(object):
+       def __init__(self, downloader, request):
+               self.downloader = downloader
+               self.request = request
+
+               # Save the response object
+               self.response = None
+
+       def __enter__(self):
+               log.info("Retrieving %s..." % self.request.full_url)
+
+               # Send request
+               self.response = urllib.request.urlopen(self.request)
+
+               # Log the response headers
+               log.debug("Response Headers:")
+               for header in self.headers:
+                       log.debug("     %s: %s" % (header, self.get_header(header)))
+
+               return self
+
+       def __exit__(self, type, value, traceback):
+               pass
+
+       def __iter__(self):
+               """
+                       Makes the object iterable by going through each line
+               """
+               return iter(self.body)
+
+       @property
+       def headers(self):
+               if self.response:
+                       return self.response.headers
+
+       def get_header(self, name):
+               if self.headers:
+                       return self.headers.get(name)
+
+       @property
+       def body(self):
+               """
+                       Returns a file-like object with the decoded content
+                       of the response.
+               """
+               content_type = self.get_header("Content-Type")
+
+               # Decompress any gzipped response on the fly
+               if content_type in ("application/x-gzip", "application/gzip"):
+                       return gzip.GzipFile(fileobj=self.response, mode="rb")
+
+               # Return the response by default
+               return self.response
+
+
+if __name__ == "__main__":
+       import sys
+
+       # Enable debug logging
+       logging.basicConfig(level=logging.DEBUG)
+
+       d = Downloader()
+
+       for url in sys.argv[1:]:
+               print("Downloading %s..." % url)
+               
+               with d.request(url) as r:
+                       for line in r:
+                               print(line)