]>
Commit | Line | Data |
---|---|---|
244a3b61 MT |
1 | #!/usr/bin/python3 |
2 | ############################################################################### | |
3 | # # | |
4 | # libloc - A library to determine the location of someone on the Internet # | |
5 | # # | |
a6f1e346 | 6 | # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> # |
244a3b61 MT |
7 | # # |
8 | # This library is free software; you can redistribute it and/or # | |
9 | # modify it under the terms of the GNU Lesser General Public # | |
10 | # License as published by the Free Software Foundation; either # | |
11 | # version 2.1 of the License, or (at your option) any later version. # | |
12 | # # | |
13 | # This library is distributed in the hope that it will be useful, # | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # | |
16 | # Lesser General Public License for more details. # | |
17 | # # | |
18 | ############################################################################### | |
19 | ||
5a9b4c77 | 20 | import logging |
244a3b61 MT |
21 | import lzma |
22 | import os | |
23 | import random | |
679e5ae2 | 24 | import stat |
244a3b61 MT |
25 | import tempfile |
26 | import time | |
27 | import urllib.error | |
28 | import urllib.parse | |
29 | import urllib.request | |
30 | ||
7865c7d4 | 31 | from . import __version__ |
a6f1e346 | 32 | from _location import Database, DATABASE_VERSION_LATEST |
244a3b61 | 33 | |
8881d5f4 | 34 | DATABASE_FILENAME = "location.db.xz" |
244a3b61 MT |
35 | MIRRORS = ( |
36 | "https://location.ipfire.org/databases/", | |
244a3b61 MT |
37 | ) |
38 | ||
5a9b4c77 | 39 | # Initialise logging |
e44b30f4 MT |
40 | log = logging.getLogger("location.downloader") |
41 | log.propagate = 1 | |
5a9b4c77 | 42 | |
244a3b61 | 43 | class Downloader(object): |
a6f1e346 | 44 | def __init__(self, version=DATABASE_VERSION_LATEST, mirrors=None): |
3ee36b5e | 45 | self.version = version |
a6f1e346 MT |
46 | |
47 | # Set mirrors or use defaults | |
48 | self.mirrors = list(mirrors or MIRRORS) | |
244a3b61 MT |
49 | |
50 | # Randomize mirrors | |
51 | random.shuffle(self.mirrors) | |
52 | ||
53 | # Get proxies from environment | |
54 | self.proxies = self._get_proxies() | |
55 | ||
56 | def _get_proxies(self): | |
57 | proxies = {} | |
58 | ||
59 | for protocol in ("https", "http"): | |
60 | proxy = os.environ.get("%s_proxy" % protocol, None) | |
61 | ||
62 | if proxy: | |
63 | proxies[protocol] = proxy | |
64 | ||
65 | return proxies | |
66 | ||
67 | def _make_request(self, url, baseurl=None, headers={}): | |
68 | if baseurl: | |
69 | url = urllib.parse.urljoin(baseurl, url) | |
70 | ||
71 | req = urllib.request.Request(url, method="GET") | |
72 | ||
73 | # Update headers | |
74 | headers.update({ | |
7865c7d4 | 75 | "User-Agent" : "location/%s" % __version__, |
244a3b61 MT |
76 | }) |
77 | ||
78 | # Set headers | |
79 | for header in headers: | |
80 | req.add_header(header, headers[header]) | |
81 | ||
82 | # Set proxies | |
83 | for protocol in self.proxies: | |
84 | req.set_proxy(self.proxies[protocol], protocol) | |
85 | ||
86 | return req | |
87 | ||
88 | def _send_request(self, req, **kwargs): | |
89 | # Log request headers | |
5a9b4c77 MT |
90 | log.debug("HTTP %s Request to %s" % (req.method, req.host)) |
91 | log.debug(" URL: %s" % req.full_url) | |
92 | log.debug(" Headers:") | |
244a3b61 | 93 | for k, v in req.header_items(): |
5a9b4c77 | 94 | log.debug(" %s: %s" % (k, v)) |
244a3b61 MT |
95 | |
96 | try: | |
97 | res = urllib.request.urlopen(req, **kwargs) | |
98 | ||
99 | except urllib.error.HTTPError as e: | |
100 | # Log response headers | |
5a9b4c77 MT |
101 | log.debug("HTTP Response: %s" % e.code) |
102 | log.debug(" Headers:") | |
244a3b61 | 103 | for header in e.headers: |
5a9b4c77 | 104 | log.debug(" %s: %s" % (header, e.headers[header])) |
244a3b61 | 105 | |
244a3b61 MT |
106 | # Raise all other errors |
107 | raise e | |
108 | ||
109 | # Log response headers | |
5a9b4c77 MT |
110 | log.debug("HTTP Response: %s" % res.code) |
111 | log.debug(" Headers:") | |
244a3b61 | 112 | for k, v in res.getheaders(): |
5a9b4c77 | 113 | log.debug(" %s: %s" % (k, v)) |
244a3b61 MT |
114 | |
115 | return res | |
116 | ||
a6f1e346 MT |
117 | def download(self, public_key, timestamp=None, tmpdir=None, **kwargs): |
118 | url = "%s/%s" % (self.version, DATABASE_FILENAME) | |
244a3b61 | 119 | |
a6f1e346 | 120 | headers = {} |
f4fef543 MT |
121 | if timestamp: |
122 | headers["If-Modified-Since"] = timestamp.strftime( | |
123 | "%a, %d %b %Y %H:%M:%S GMT", | |
244a3b61 MT |
124 | ) |
125 | ||
679e5ae2 | 126 | t = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False) |
244a3b61 MT |
127 | with t: |
128 | # Try all mirrors | |
129 | for mirror in self.mirrors: | |
130 | # Prepare HTTP request | |
131 | req = self._make_request(url, baseurl=mirror, headers=headers) | |
132 | ||
133 | try: | |
134 | with self._send_request(req) as res: | |
135 | decompressor = lzma.LZMADecompressor() | |
136 | ||
137 | # Read all data | |
138 | while True: | |
139 | buf = res.read(1024) | |
140 | if not buf: | |
141 | break | |
142 | ||
143 | # Decompress data | |
144 | buf = decompressor.decompress(buf) | |
145 | if buf: | |
146 | t.write(buf) | |
147 | ||
f4fef543 MT |
148 | # Write all data to disk |
149 | t.flush() | |
244a3b61 MT |
150 | |
151 | # Catch decompression errors | |
152 | except lzma.LZMAError as e: | |
5a9b4c77 | 153 | log.warning("Could not decompress downloaded file: %s" % e) |
244a3b61 MT |
154 | continue |
155 | ||
244a3b61 | 156 | except urllib.error.HTTPError as e: |
f4fef543 MT |
157 | # The file on the server was too old |
158 | if e.code == 304: | |
159 | log.warning("%s is serving an outdated database. Trying next mirror..." % mirror) | |
244a3b61 | 160 | |
f4fef543 MT |
161 | # Log any other HTTP errors |
162 | else: | |
163 | log.warning("%s reported: %s" % (mirror, e)) | |
164 | ||
165 | # Throw away any downloaded content and try again | |
166 | t.truncate() | |
244a3b61 | 167 | |
f4fef543 MT |
168 | else: |
169 | # Check if the downloaded database is recent | |
116b1352 | 170 | if not self._check_database(t, public_key, timestamp): |
f4fef543 | 171 | log.warning("Downloaded database is outdated. Trying next mirror...") |
244a3b61 | 172 | |
f4fef543 MT |
173 | # Throw away the data and try again |
174 | t.truncate() | |
175 | continue | |
176 | ||
679e5ae2 MT |
177 | # Make the file readable for everyone |
178 | os.chmod(t.name, stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH) | |
179 | ||
f4fef543 MT |
180 | # Return temporary file |
181 | return t | |
244a3b61 MT |
182 | |
183 | raise FileNotFoundError(url) | |
184 | ||
116b1352 | 185 | def _check_database(self, f, public_key, timestamp=None): |
f4fef543 MT |
186 | """ |
187 | Checks the downloaded database if it can be opened, | |
188 | verified and if it is recent enough | |
189 | """ | |
190 | log.debug("Opening downloaded database at %s" % f.name) | |
191 | ||
a6f1e346 | 192 | db = Database(f.name) |
f4fef543 MT |
193 | |
194 | # Database is not recent | |
195 | if timestamp and db.created_at < timestamp.timestamp(): | |
196 | return False | |
197 | ||
198 | log.info("Downloaded new database from %s" % (time.strftime( | |
199 | "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at), | |
200 | ))) | |
201 | ||
116b1352 MT |
202 | # Verify the database |
203 | with open(public_key, "r") as f: | |
204 | if not db.verify(f): | |
205 | log.error("Could not verify database") | |
206 | return False | |
207 | ||
f4fef543 | 208 | return True |