]>
Commit | Line | Data |
---|---|---|
244a3b61 MT |
1 | #!/usr/bin/python3 |
2 | ############################################################################### | |
3 | # # | |
4 | # libloc - A library to determine the location of someone on the Internet # | |
5 | # # | |
a6f1e346 | 6 | # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> # |
244a3b61 MT |
7 | # # |
8 | # This library is free software; you can redistribute it and/or # | |
9 | # modify it under the terms of the GNU Lesser General Public # | |
10 | # License as published by the Free Software Foundation; either # | |
11 | # version 2.1 of the License, or (at your option) any later version. # | |
12 | # # | |
13 | # This library is distributed in the hope that it will be useful, # | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # | |
16 | # Lesser General Public License for more details. # | |
17 | # # | |
18 | ############################################################################### | |
19 | ||
5a9b4c77 | 20 | import logging |
244a3b61 MT |
21 | import lzma |
22 | import os | |
23 | import random | |
679e5ae2 | 24 | import stat |
244a3b61 MT |
25 | import tempfile |
26 | import time | |
27 | import urllib.error | |
28 | import urllib.parse | |
29 | import urllib.request | |
30 | ||
a6f1e346 | 31 | from _location import Database, DATABASE_VERSION_LATEST |
244a3b61 | 32 | |
8881d5f4 | 33 | DATABASE_FILENAME = "location.db.xz" |
244a3b61 MT |
34 | MIRRORS = ( |
35 | "https://location.ipfire.org/databases/", | |
244a3b61 MT |
36 | ) |
37 | ||
5a9b4c77 | 38 | # Initialise logging |
e44b30f4 MT |
39 | log = logging.getLogger("location.downloader") |
40 | log.propagate = 1 | |
5a9b4c77 | 41 | |
244a3b61 | 42 | class Downloader(object): |
a6f1e346 | 43 | def __init__(self, version=DATABASE_VERSION_LATEST, mirrors=None): |
3ee36b5e | 44 | self.version = version |
a6f1e346 MT |
45 | |
46 | # Set mirrors or use defaults | |
47 | self.mirrors = list(mirrors or MIRRORS) | |
244a3b61 MT |
48 | |
49 | # Randomize mirrors | |
50 | random.shuffle(self.mirrors) | |
51 | ||
52 | # Get proxies from environment | |
53 | self.proxies = self._get_proxies() | |
54 | ||
55 | def _get_proxies(self): | |
56 | proxies = {} | |
57 | ||
58 | for protocol in ("https", "http"): | |
59 | proxy = os.environ.get("%s_proxy" % protocol, None) | |
60 | ||
61 | if proxy: | |
62 | proxies[protocol] = proxy | |
63 | ||
64 | return proxies | |
65 | ||
66 | def _make_request(self, url, baseurl=None, headers={}): | |
67 | if baseurl: | |
68 | url = urllib.parse.urljoin(baseurl, url) | |
69 | ||
70 | req = urllib.request.Request(url, method="GET") | |
71 | ||
72 | # Update headers | |
73 | headers.update({ | |
d2714e4a | 74 | "User-Agent" : "location-downloader/@VERSION@", |
244a3b61 MT |
75 | }) |
76 | ||
77 | # Set headers | |
78 | for header in headers: | |
79 | req.add_header(header, headers[header]) | |
80 | ||
81 | # Set proxies | |
82 | for protocol in self.proxies: | |
83 | req.set_proxy(self.proxies[protocol], protocol) | |
84 | ||
85 | return req | |
86 | ||
87 | def _send_request(self, req, **kwargs): | |
88 | # Log request headers | |
5a9b4c77 MT |
89 | log.debug("HTTP %s Request to %s" % (req.method, req.host)) |
90 | log.debug(" URL: %s" % req.full_url) | |
91 | log.debug(" Headers:") | |
244a3b61 | 92 | for k, v in req.header_items(): |
5a9b4c77 | 93 | log.debug(" %s: %s" % (k, v)) |
244a3b61 MT |
94 | |
95 | try: | |
96 | res = urllib.request.urlopen(req, **kwargs) | |
97 | ||
98 | except urllib.error.HTTPError as e: | |
99 | # Log response headers | |
5a9b4c77 MT |
100 | log.debug("HTTP Response: %s" % e.code) |
101 | log.debug(" Headers:") | |
244a3b61 | 102 | for header in e.headers: |
5a9b4c77 | 103 | log.debug(" %s: %s" % (header, e.headers[header])) |
244a3b61 | 104 | |
244a3b61 MT |
105 | # Raise all other errors |
106 | raise e | |
107 | ||
108 | # Log response headers | |
5a9b4c77 MT |
109 | log.debug("HTTP Response: %s" % res.code) |
110 | log.debug(" Headers:") | |
244a3b61 | 111 | for k, v in res.getheaders(): |
5a9b4c77 | 112 | log.debug(" %s: %s" % (k, v)) |
244a3b61 MT |
113 | |
114 | return res | |
115 | ||
a6f1e346 MT |
116 | def download(self, public_key, timestamp=None, tmpdir=None, **kwargs): |
117 | url = "%s/%s" % (self.version, DATABASE_FILENAME) | |
244a3b61 | 118 | |
a6f1e346 | 119 | headers = {} |
f4fef543 MT |
120 | if timestamp: |
121 | headers["If-Modified-Since"] = timestamp.strftime( | |
122 | "%a, %d %b %Y %H:%M:%S GMT", | |
244a3b61 MT |
123 | ) |
124 | ||
679e5ae2 | 125 | t = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False) |
244a3b61 MT |
126 | with t: |
127 | # Try all mirrors | |
128 | for mirror in self.mirrors: | |
129 | # Prepare HTTP request | |
130 | req = self._make_request(url, baseurl=mirror, headers=headers) | |
131 | ||
132 | try: | |
133 | with self._send_request(req) as res: | |
134 | decompressor = lzma.LZMADecompressor() | |
135 | ||
136 | # Read all data | |
137 | while True: | |
138 | buf = res.read(1024) | |
139 | if not buf: | |
140 | break | |
141 | ||
142 | # Decompress data | |
143 | buf = decompressor.decompress(buf) | |
144 | if buf: | |
145 | t.write(buf) | |
146 | ||
f4fef543 MT |
147 | # Write all data to disk |
148 | t.flush() | |
244a3b61 MT |
149 | |
150 | # Catch decompression errors | |
151 | except lzma.LZMAError as e: | |
5a9b4c77 | 152 | log.warning("Could not decompress downloaded file: %s" % e) |
244a3b61 MT |
153 | continue |
154 | ||
244a3b61 | 155 | except urllib.error.HTTPError as e: |
f4fef543 MT |
156 | # The file on the server was too old |
157 | if e.code == 304: | |
158 | log.warning("%s is serving an outdated database. Trying next mirror..." % mirror) | |
244a3b61 | 159 | |
f4fef543 MT |
160 | # Log any other HTTP errors |
161 | else: | |
162 | log.warning("%s reported: %s" % (mirror, e)) | |
163 | ||
164 | # Throw away any downloaded content and try again | |
165 | t.truncate() | |
244a3b61 | 166 | |
f4fef543 MT |
167 | else: |
168 | # Check if the downloaded database is recent | |
116b1352 | 169 | if not self._check_database(t, public_key, timestamp): |
f4fef543 | 170 | log.warning("Downloaded database is outdated. Trying next mirror...") |
244a3b61 | 171 | |
f4fef543 MT |
172 | # Throw away the data and try again |
173 | t.truncate() | |
174 | continue | |
175 | ||
679e5ae2 MT |
176 | # Make the file readable for everyone |
177 | os.chmod(t.name, stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH) | |
178 | ||
f4fef543 MT |
179 | # Return temporary file |
180 | return t | |
244a3b61 MT |
181 | |
182 | raise FileNotFoundError(url) | |
183 | ||
116b1352 | 184 | def _check_database(self, f, public_key, timestamp=None): |
f4fef543 MT |
185 | """ |
186 | Checks the downloaded database if it can be opened, | |
187 | verified and if it is recent enough | |
188 | """ | |
189 | log.debug("Opening downloaded database at %s" % f.name) | |
190 | ||
a6f1e346 | 191 | db = Database(f.name) |
f4fef543 MT |
192 | |
193 | # Database is not recent | |
194 | if timestamp and db.created_at < timestamp.timestamp(): | |
195 | return False | |
196 | ||
197 | log.info("Downloaded new database from %s" % (time.strftime( | |
198 | "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at), | |
199 | ))) | |
200 | ||
116b1352 MT |
201 | # Verify the database |
202 | with open(public_key, "r") as f: | |
203 | if not db.verify(f): | |
204 | log.error("Could not verify database") | |
205 | return False | |
206 | ||
f4fef543 | 207 | return True |