]> git.ipfire.org Git - location/libloc.git/blame - src/python/downloader.py
Move location-downloader functionality into location-query
[location/libloc.git] / src / python / downloader.py
CommitLineData
244a3b61
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
a6f1e346 6# Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
244a3b61
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
5a9b4c77 20import logging
244a3b61
MT
21import lzma
22import os
23import random
679e5ae2 24import stat
244a3b61
MT
25import tempfile
26import time
27import urllib.error
28import urllib.parse
29import urllib.request
30
a6f1e346 31from _location import Database, DATABASE_VERSION_LATEST
244a3b61 32
8881d5f4 33DATABASE_FILENAME = "location.db.xz"
244a3b61
MT
34MIRRORS = (
35 "https://location.ipfire.org/databases/",
244a3b61
MT
36)
37
5a9b4c77 38# Initialise logging
e44b30f4
MT
39log = logging.getLogger("location.downloader")
40log.propagate = 1
5a9b4c77 41
244a3b61 42class Downloader(object):
a6f1e346 43 def __init__(self, version=DATABASE_VERSION_LATEST, mirrors=None):
3ee36b5e 44 self.version = version
a6f1e346
MT
45
46 # Set mirrors or use defaults
47 self.mirrors = list(mirrors or MIRRORS)
244a3b61
MT
48
49 # Randomize mirrors
50 random.shuffle(self.mirrors)
51
52 # Get proxies from environment
53 self.proxies = self._get_proxies()
54
55 def _get_proxies(self):
56 proxies = {}
57
58 for protocol in ("https", "http"):
59 proxy = os.environ.get("%s_proxy" % protocol, None)
60
61 if proxy:
62 proxies[protocol] = proxy
63
64 return proxies
65
66 def _make_request(self, url, baseurl=None, headers={}):
67 if baseurl:
68 url = urllib.parse.urljoin(baseurl, url)
69
70 req = urllib.request.Request(url, method="GET")
71
72 # Update headers
73 headers.update({
d2714e4a 74 "User-Agent" : "location-downloader/@VERSION@",
244a3b61
MT
75 })
76
77 # Set headers
78 for header in headers:
79 req.add_header(header, headers[header])
80
81 # Set proxies
82 for protocol in self.proxies:
83 req.set_proxy(self.proxies[protocol], protocol)
84
85 return req
86
87 def _send_request(self, req, **kwargs):
88 # Log request headers
5a9b4c77
MT
89 log.debug("HTTP %s Request to %s" % (req.method, req.host))
90 log.debug(" URL: %s" % req.full_url)
91 log.debug(" Headers:")
244a3b61 92 for k, v in req.header_items():
5a9b4c77 93 log.debug(" %s: %s" % (k, v))
244a3b61
MT
94
95 try:
96 res = urllib.request.urlopen(req, **kwargs)
97
98 except urllib.error.HTTPError as e:
99 # Log response headers
5a9b4c77
MT
100 log.debug("HTTP Response: %s" % e.code)
101 log.debug(" Headers:")
244a3b61 102 for header in e.headers:
5a9b4c77 103 log.debug(" %s: %s" % (header, e.headers[header]))
244a3b61 104
244a3b61
MT
105 # Raise all other errors
106 raise e
107
108 # Log response headers
5a9b4c77
MT
109 log.debug("HTTP Response: %s" % res.code)
110 log.debug(" Headers:")
244a3b61 111 for k, v in res.getheaders():
5a9b4c77 112 log.debug(" %s: %s" % (k, v))
244a3b61
MT
113
114 return res
115
a6f1e346
MT
116 def download(self, public_key, timestamp=None, tmpdir=None, **kwargs):
117 url = "%s/%s" % (self.version, DATABASE_FILENAME)
244a3b61 118
a6f1e346 119 headers = {}
f4fef543
MT
120 if timestamp:
121 headers["If-Modified-Since"] = timestamp.strftime(
122 "%a, %d %b %Y %H:%M:%S GMT",
244a3b61
MT
123 )
124
679e5ae2 125 t = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False)
244a3b61
MT
126 with t:
127 # Try all mirrors
128 for mirror in self.mirrors:
129 # Prepare HTTP request
130 req = self._make_request(url, baseurl=mirror, headers=headers)
131
132 try:
133 with self._send_request(req) as res:
134 decompressor = lzma.LZMADecompressor()
135
136 # Read all data
137 while True:
138 buf = res.read(1024)
139 if not buf:
140 break
141
142 # Decompress data
143 buf = decompressor.decompress(buf)
144 if buf:
145 t.write(buf)
146
f4fef543
MT
147 # Write all data to disk
148 t.flush()
244a3b61
MT
149
150 # Catch decompression errors
151 except lzma.LZMAError as e:
5a9b4c77 152 log.warning("Could not decompress downloaded file: %s" % e)
244a3b61
MT
153 continue
154
244a3b61 155 except urllib.error.HTTPError as e:
f4fef543
MT
156 # The file on the server was too old
157 if e.code == 304:
158 log.warning("%s is serving an outdated database. Trying next mirror..." % mirror)
244a3b61 159
f4fef543
MT
160 # Log any other HTTP errors
161 else:
162 log.warning("%s reported: %s" % (mirror, e))
163
164 # Throw away any downloaded content and try again
165 t.truncate()
244a3b61 166
f4fef543
MT
167 else:
168 # Check if the downloaded database is recent
116b1352 169 if not self._check_database(t, public_key, timestamp):
f4fef543 170 log.warning("Downloaded database is outdated. Trying next mirror...")
244a3b61 171
f4fef543
MT
172 # Throw away the data and try again
173 t.truncate()
174 continue
175
679e5ae2
MT
176 # Make the file readable for everyone
177 os.chmod(t.name, stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH)
178
f4fef543
MT
179 # Return temporary file
180 return t
244a3b61
MT
181
182 raise FileNotFoundError(url)
183
116b1352 184 def _check_database(self, f, public_key, timestamp=None):
f4fef543
MT
185 """
186 Checks the downloaded database if it can be opened,
187 verified and if it is recent enough
188 """
189 log.debug("Opening downloaded database at %s" % f.name)
190
a6f1e346 191 db = Database(f.name)
f4fef543
MT
192
193 # Database is not recent
194 if timestamp and db.created_at < timestamp.timestamp():
195 return False
196
197 log.info("Downloaded new database from %s" % (time.strftime(
198 "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at),
199 )))
200
116b1352
MT
201 # Verify the database
202 with open(public_key, "r") as f:
203 if not db.verify(f):
204 log.error("Could not verify database")
205 return False
206
f4fef543 207 return True