]> git.ipfire.org Git - location/libloc.git/blame - src/python/downloader.py
location(8): Remove any temporary files after unsuccessful downloads
[location/libloc.git] / src / python / downloader.py
CommitLineData
244a3b61
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
a6f1e346 6# Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
244a3b61
MT
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
5a9b4c77 20import logging
244a3b61
MT
21import lzma
22import os
23import random
679e5ae2 24import stat
244a3b61
MT
25import tempfile
26import time
27import urllib.error
28import urllib.parse
29import urllib.request
30
7865c7d4 31from . import __version__
a6f1e346 32from _location import Database, DATABASE_VERSION_LATEST
244a3b61 33
8881d5f4 34DATABASE_FILENAME = "location.db.xz"
244a3b61
MT
35MIRRORS = (
36 "https://location.ipfire.org/databases/",
244a3b61
MT
37)
38
5a9b4c77 39# Initialise logging
e44b30f4
MT
40log = logging.getLogger("location.downloader")
41log.propagate = 1
5a9b4c77 42
244a3b61 43class Downloader(object):
a6f1e346 44 def __init__(self, version=DATABASE_VERSION_LATEST, mirrors=None):
3ee36b5e 45 self.version = version
a6f1e346
MT
46
47 # Set mirrors or use defaults
48 self.mirrors = list(mirrors or MIRRORS)
244a3b61
MT
49
50 # Randomize mirrors
51 random.shuffle(self.mirrors)
52
53 # Get proxies from environment
54 self.proxies = self._get_proxies()
55
56 def _get_proxies(self):
57 proxies = {}
58
59 for protocol in ("https", "http"):
60 proxy = os.environ.get("%s_proxy" % protocol, None)
61
62 if proxy:
63 proxies[protocol] = proxy
64
65 return proxies
66
67 def _make_request(self, url, baseurl=None, headers={}):
68 if baseurl:
69 url = urllib.parse.urljoin(baseurl, url)
70
71 req = urllib.request.Request(url, method="GET")
72
73 # Update headers
74 headers.update({
7865c7d4 75 "User-Agent" : "location/%s" % __version__,
244a3b61
MT
76 })
77
78 # Set headers
79 for header in headers:
80 req.add_header(header, headers[header])
81
82 # Set proxies
83 for protocol in self.proxies:
84 req.set_proxy(self.proxies[protocol], protocol)
85
86 return req
87
88 def _send_request(self, req, **kwargs):
89 # Log request headers
5a9b4c77
MT
90 log.debug("HTTP %s Request to %s" % (req.method, req.host))
91 log.debug(" URL: %s" % req.full_url)
92 log.debug(" Headers:")
244a3b61 93 for k, v in req.header_items():
5a9b4c77 94 log.debug(" %s: %s" % (k, v))
244a3b61
MT
95
96 try:
97 res = urllib.request.urlopen(req, **kwargs)
98
99 except urllib.error.HTTPError as e:
100 # Log response headers
5a9b4c77
MT
101 log.debug("HTTP Response: %s" % e.code)
102 log.debug(" Headers:")
244a3b61 103 for header in e.headers:
5a9b4c77 104 log.debug(" %s: %s" % (header, e.headers[header]))
244a3b61 105
244a3b61
MT
106 # Raise all other errors
107 raise e
108
109 # Log response headers
5a9b4c77
MT
110 log.debug("HTTP Response: %s" % res.code)
111 log.debug(" Headers:")
244a3b61 112 for k, v in res.getheaders():
5a9b4c77 113 log.debug(" %s: %s" % (k, v))
244a3b61
MT
114
115 return res
116
a6f1e346
MT
117 def download(self, public_key, timestamp=None, tmpdir=None, **kwargs):
118 url = "%s/%s" % (self.version, DATABASE_FILENAME)
244a3b61 119
a6f1e346 120 headers = {}
f4fef543
MT
121 if timestamp:
122 headers["If-Modified-Since"] = timestamp.strftime(
123 "%a, %d %b %Y %H:%M:%S GMT",
244a3b61
MT
124 )
125
679e5ae2 126 t = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False)
244a3b61
MT
127 with t:
128 # Try all mirrors
129 for mirror in self.mirrors:
130 # Prepare HTTP request
131 req = self._make_request(url, baseurl=mirror, headers=headers)
132
133 try:
134 with self._send_request(req) as res:
135 decompressor = lzma.LZMADecompressor()
136
137 # Read all data
138 while True:
139 buf = res.read(1024)
140 if not buf:
141 break
142
143 # Decompress data
144 buf = decompressor.decompress(buf)
145 if buf:
146 t.write(buf)
147
f4fef543
MT
148 # Write all data to disk
149 t.flush()
244a3b61
MT
150
151 # Catch decompression errors
152 except lzma.LZMAError as e:
5a9b4c77 153 log.warning("Could not decompress downloaded file: %s" % e)
244a3b61
MT
154 continue
155
244a3b61 156 except urllib.error.HTTPError as e:
f4fef543
MT
157 # The file on the server was too old
158 if e.code == 304:
159 log.warning("%s is serving an outdated database. Trying next mirror..." % mirror)
244a3b61 160
f4fef543
MT
161 # Log any other HTTP errors
162 else:
163 log.warning("%s reported: %s" % (mirror, e))
164
165 # Throw away any downloaded content and try again
166 t.truncate()
244a3b61 167
f4fef543
MT
168 else:
169 # Check if the downloaded database is recent
116b1352 170 if not self._check_database(t, public_key, timestamp):
f4fef543 171 log.warning("Downloaded database is outdated. Trying next mirror...")
244a3b61 172
f4fef543
MT
173 # Throw away the data and try again
174 t.truncate()
175 continue
176
679e5ae2
MT
177 # Make the file readable for everyone
178 os.chmod(t.name, stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH)
179
f4fef543
MT
180 # Return temporary file
181 return t
244a3b61 182
86f1c1dc
SS
183 # Delete the temporary file after unsuccessful downloads
184 os.unlink(t.name)
185
244a3b61
MT
186 raise FileNotFoundError(url)
187
116b1352 188 def _check_database(self, f, public_key, timestamp=None):
f4fef543
MT
189 """
190 Checks the downloaded database if it can be opened,
191 verified and if it is recent enough
192 """
193 log.debug("Opening downloaded database at %s" % f.name)
194
a6f1e346 195 db = Database(f.name)
f4fef543
MT
196
197 # Database is not recent
198 if timestamp and db.created_at < timestamp.timestamp():
199 return False
200
201 log.info("Downloaded new database from %s" % (time.strftime(
202 "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at),
203 )))
204
116b1352
MT
205 # Verify the database
206 with open(public_key, "r") as f:
207 if not db.verify(f):
208 log.error("Could not verify database")
209 return False
210
f4fef543 211 return True