From: Michael Tremer Date: Mon, 29 Jan 2018 19:26:48 +0000 (+0000) Subject: Add downloader which decodes files downloaded from the RIRs on-the-fly X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0a4c3e0deb735cb1e4377d9094c0aee1be234e55;p=location%2Flocation-database.git Add downloader which decodes files downloaded from the RIRs on-the-fly Signed-off-by: Michael Tremer --- diff --git a/tools/downloader.py b/tools/downloader.py new file mode 100644 index 0000000..0250669 --- /dev/null +++ b/tools/downloader.py @@ -0,0 +1,126 @@ +#!/usr/bin/python3 +############################################################################### +# # +# location-database - A database to determine someone's # +# location on the Internet # +# Copyright (C) 2018 Michael Tremer # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +import gzip +import logging +import urllib.request + +# Setup logger +log = logging.getLogger("downloader") +log.propagate = 1 + +class Downloader(object): + USER_AGENT = "location-database/1.0" + + def __init__(self): + self.proxy = None + + def set_proxy(self, url): + """ + Sets a HTTP proxy that is used to perform all requests + """ + log.info("Using proxy %s" % url) + self.proxy = url + + def request(self, url, data=None): + req = urllib.request.Request(url, data=data) + + # Configure proxy + if self.proxy: + req.set_proxy(self.proxy, "http") + + # Set User-Agent + if self.USER_AGENT: + req.add_header("User-Agent", self.USER_AGENT) + + return DownloaderContext(self, req) + + +class DownloaderContext(object): + def __init__(self, downloader, request): + self.downloader = downloader + self.request = request + + # Save the response object + self.response = None + + def __enter__(self): + log.info("Retrieving %s..." % self.request.full_url) + + # Send request + self.response = urllib.request.urlopen(self.request) + + # Log the response headers + log.debug("Response Headers:") + for header in self.headers: + log.debug(" %s: %s" % (header, self.get_header(header))) + + return self + + def __exit__(self, type, value, traceback): + pass + + def __iter__(self): + """ + Makes the object iterable by going through each line + """ + return iter(self.body) + + @property + def headers(self): + if self.response: + return self.response.headers + + def get_header(self, name): + if self.headers: + return self.headers.get(name) + + @property + def body(self): + """ + Returns a file-like object with the decoded content + of the response. + """ + content_type = self.get_header("Content-Type") + + # Decompress any gzipped response on the fly + if content_type in ("application/x-gzip", "application/gzip"): + return gzip.GzipFile(fileobj=self.response, mode="rb") + + # Return the response by default + return self.response + + +if __name__ == "__main__": + import sys + + # Enable debug logging + logging.basicConfig(level=logging.DEBUG) + + d = Downloader() + + for url in sys.argv[1:]: + print("Downloading %s..." % url) + + with d.request(url) as r: + for line in r: + print(line)