]> git.ipfire.org Git - location/libloc.git/blob - src/python/downloader.py
Move location-downloader functionality into location-query
[location/libloc.git] / src / python / downloader.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import logging
21 import lzma
22 import os
23 import random
24 import stat
25 import tempfile
26 import time
27 import urllib.error
28 import urllib.parse
29 import urllib.request
30
31 from _location import Database, DATABASE_VERSION_LATEST
32
33 DATABASE_FILENAME = "location.db.xz"
34 MIRRORS = (
35 "https://location.ipfire.org/databases/",
36 )
37
38 # Initialise logging
39 log = logging.getLogger("location.downloader")
40 log.propagate = 1
41
42 class Downloader(object):
43 def __init__(self, version=DATABASE_VERSION_LATEST, mirrors=None):
44 self.version = version
45
46 # Set mirrors or use defaults
47 self.mirrors = list(mirrors or MIRRORS)
48
49 # Randomize mirrors
50 random.shuffle(self.mirrors)
51
52 # Get proxies from environment
53 self.proxies = self._get_proxies()
54
55 def _get_proxies(self):
56 proxies = {}
57
58 for protocol in ("https", "http"):
59 proxy = os.environ.get("%s_proxy" % protocol, None)
60
61 if proxy:
62 proxies[protocol] = proxy
63
64 return proxies
65
66 def _make_request(self, url, baseurl=None, headers={}):
67 if baseurl:
68 url = urllib.parse.urljoin(baseurl, url)
69
70 req = urllib.request.Request(url, method="GET")
71
72 # Update headers
73 headers.update({
74 "User-Agent" : "location-downloader/@VERSION@",
75 })
76
77 # Set headers
78 for header in headers:
79 req.add_header(header, headers[header])
80
81 # Set proxies
82 for protocol in self.proxies:
83 req.set_proxy(self.proxies[protocol], protocol)
84
85 return req
86
87 def _send_request(self, req, **kwargs):
88 # Log request headers
89 log.debug("HTTP %s Request to %s" % (req.method, req.host))
90 log.debug(" URL: %s" % req.full_url)
91 log.debug(" Headers:")
92 for k, v in req.header_items():
93 log.debug(" %s: %s" % (k, v))
94
95 try:
96 res = urllib.request.urlopen(req, **kwargs)
97
98 except urllib.error.HTTPError as e:
99 # Log response headers
100 log.debug("HTTP Response: %s" % e.code)
101 log.debug(" Headers:")
102 for header in e.headers:
103 log.debug(" %s: %s" % (header, e.headers[header]))
104
105 # Raise all other errors
106 raise e
107
108 # Log response headers
109 log.debug("HTTP Response: %s" % res.code)
110 log.debug(" Headers:")
111 for k, v in res.getheaders():
112 log.debug(" %s: %s" % (k, v))
113
114 return res
115
116 def download(self, public_key, timestamp=None, tmpdir=None, **kwargs):
117 url = "%s/%s" % (self.version, DATABASE_FILENAME)
118
119 headers = {}
120 if timestamp:
121 headers["If-Modified-Since"] = timestamp.strftime(
122 "%a, %d %b %Y %H:%M:%S GMT",
123 )
124
125 t = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False)
126 with t:
127 # Try all mirrors
128 for mirror in self.mirrors:
129 # Prepare HTTP request
130 req = self._make_request(url, baseurl=mirror, headers=headers)
131
132 try:
133 with self._send_request(req) as res:
134 decompressor = lzma.LZMADecompressor()
135
136 # Read all data
137 while True:
138 buf = res.read(1024)
139 if not buf:
140 break
141
142 # Decompress data
143 buf = decompressor.decompress(buf)
144 if buf:
145 t.write(buf)
146
147 # Write all data to disk
148 t.flush()
149
150 # Catch decompression errors
151 except lzma.LZMAError as e:
152 log.warning("Could not decompress downloaded file: %s" % e)
153 continue
154
155 except urllib.error.HTTPError as e:
156 # The file on the server was too old
157 if e.code == 304:
158 log.warning("%s is serving an outdated database. Trying next mirror..." % mirror)
159
160 # Log any other HTTP errors
161 else:
162 log.warning("%s reported: %s" % (mirror, e))
163
164 # Throw away any downloaded content and try again
165 t.truncate()
166
167 else:
168 # Check if the downloaded database is recent
169 if not self._check_database(t, public_key, timestamp):
170 log.warning("Downloaded database is outdated. Trying next mirror...")
171
172 # Throw away the data and try again
173 t.truncate()
174 continue
175
176 # Make the file readable for everyone
177 os.chmod(t.name, stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH)
178
179 # Return temporary file
180 return t
181
182 raise FileNotFoundError(url)
183
184 def _check_database(self, f, public_key, timestamp=None):
185 """
186 Checks the downloaded database if it can be opened,
187 verified and if it is recent enough
188 """
189 log.debug("Opening downloaded database at %s" % f.name)
190
191 db = Database(f.name)
192
193 # Database is not recent
194 if timestamp and db.created_at < timestamp.timestamp():
195 return False
196
197 log.info("Downloaded new database from %s" % (time.strftime(
198 "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at),
199 )))
200
201 # Verify the database
202 with open(public_key, "r") as f:
203 if not db.verify(f):
204 log.error("Could not verify database")
205 return False
206
207 return True