]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/location/importer.py
58ec3686f74032e6947b6eb0ceb544e78f920554
[people/ms/libloc.git] / src / python / location / importer.py
1 ###############################################################################
2 # #
3 # libloc - A library to determine the location of someone on the Internet #
4 # #
5 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
6 # #
7 # This library is free software; you can redistribute it and/or #
8 # modify it under the terms of the GNU Lesser General Public #
9 # License as published by the Free Software Foundation; either #
10 # version 2.1 of the License, or (at your option) any later version. #
11 # #
12 # This library is distributed in the hope that it will be useful, #
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
15 # Lesser General Public License for more details. #
16 # #
17 ###############################################################################
18
19 import gzip
20 import logging
21 import tempfile
22 import urllib.request
23
24 # Initialise logging
25 log = logging.getLogger("location.importer")
26 log.propagate = 1
27
28 class Downloader(object):
29 def __init__(self):
30 self.proxy = None
31
32 def set_proxy(self, url):
33 """
34 Sets a HTTP proxy that is used to perform all requests
35 """
36 log.info("Using proxy %s" % url)
37 self.proxy = url
38
39 def retrieve(self, url, **kwargs):
40 """
41 This method will fetch the content at the given URL
42 and will return a file-object to a temporary file.
43
44 If the content was compressed, it will be decompressed on the fly.
45 """
46 # Open a temporary file to buffer the downloaded content
47 t = tempfile.SpooledTemporaryFile(max_size=100 * 1024 * 1024)
48
49 # Create a new request
50 req = urllib.request.Request(url, **kwargs)
51
52 # Configure proxy
53 if self.proxy:
54 req.set_proxy(self.proxy, "http")
55
56 log.info("Retrieving %s..." % req.full_url)
57
58 # Send request
59 res = urllib.request.urlopen(req)
60
61 # Log the response headers
62 log.debug("Response Headers:")
63 for header in res.headers:
64 log.debug(" %s: %s" % (header, res.headers[header]))
65
66 # Write the payload to the temporary file
67 with res as f:
68 while True:
69 buf = f.read(65536)
70 if not buf:
71 break
72
73 t.write(buf)
74
75 # Rewind the temporary file
76 t.seek(0)
77
78 gzip_compressed = False
79
80 # Fetch the content type
81 content_type = res.headers.get("Content-Type")
82
83 # Decompress any gzipped response on the fly
84 if content_type in ("application/x-gzip", "application/gzip"):
85 gzip_compressed = True
86
87 # Check for the gzip magic in case web servers send a different MIME type
88 elif t.read(2) == b"\x1f\x8b":
89 gzip_compressed = True
90
91 # Reset again
92 t.seek(0)
93
94 # Decompress the temporary file
95 if gzip_compressed:
96 log.debug("Gzip compression detected")
97
98 t = gzip.GzipFile(fileobj=t, mode="rb")
99
100 # Return the temporary file handle
101 return t