]> git.ipfire.org Git - location/location-database.git/blob - tools/base.py
tools: Add a caching layer to WHOIS
[location/location-database.git] / tools / base.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # location-database - A database to determine someone's #
5 # location on the Internet #
6 # Copyright (C) 2018 Michael Tremer #
7 # #
8 # This program is free software: you can redistribute it and/or modify #
9 # it under the terms of the GNU General Public License as published by #
10 # the Free Software Foundation, either version 3 of the License, or #
11 # (at your option) any later version. #
12 # #
13 # This program is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16 # GNU General Public License for more details. #
17 # #
18 # You should have received a copy of the GNU General Public License #
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
20 # #
21 ###############################################################################
22
23 import datetime
24 import ipaddress
25 import logging
26 import math
27 import os.path
28 import re
29 import sqlite3
30 import struct
31 import subprocess
32
33 from . import downloader
34 from . import util
35
36 FILENAME_ASNUMS = "asnums.txt"
37 FILENMAE_NETWORKS = "networks.txt"
38
39 FMT = "%-16s%s\n"
40
41 RE_AS = re.compile(r"^(AS|as)(\d+)")
42
43 INVALID_ADDRESSES = (
44 "0.0.0.0",
45 "::/0",
46 "0::/0",
47 )
48
49 class RIR(object):
50 name = None
51 database_urls = []
52
53 whois_server = None
54
55 def __init__(self):
56 self.db = self._open_database(".cache.db")
57
58 def __str__(self):
59 if self.name:
60 return "%s - %s" % (self.__class__.__name__, self.name)
61
62 return self.__class__.__name__
63
64 def _open_database(self, path=None):
65 db = sqlite3.connect(path or ":memory:")
66 db.set_trace_callback(logging.debug)
67
68 # Create tables
69 db.executescript("""
70 CREATE TABLE IF NOT EXISTS whois_query_cache(query TEXT, response TEXT,
71 fetched_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP);
72 CREATE UNIQUE INDEX IF NOT EXISTS whois_query_cache_query
73 ON whois_query_cache(query);
74 """)
75
76 return db
77
78 @property
79 def parser(self):
80 return RIRParser
81
82 def make_path(self, path):
83 return os.path.join(self.__class__.__name__, path)
84
85 @property
86 def filename_asnums(self):
87 return self.make_path(FILENAME_ASNUMS)
88
89 @property
90 def filename_networks(self):
91 return self.make_path(FILENMAE_NETWORKS)
92
93 def update(self, directory):
94 p = self.parser(self)
95
96 # Download all data and store it in memory
97 p.fetch_data()
98
99 # Write the database to disk
100 p.export_database(directory)
101
102 def _whois(self, query):
103 command = [
104 "whois", query,
105 ]
106
107 # Query a specific WHOIS server
108 if self.whois_server:
109 command += ["-h", self.whois_server]
110
111 logging.info("Running command: %s" % " ".join(command))
112
113 try:
114 output = subprocess.check_output(command, stderr=subprocess.STDOUT)
115 except subprocess.CalledProcessError as e:
116 logging.error("Could not run WHOIS query %s: %s" % (query, e.output))
117 raise
118
119 return output.decode(errors="ignore")
120
121 def whois(self, query):
122 # Try fetching a response from the cache
123 with self.db as c:
124 res = c.execute("SELECT response, fetched_at FROM whois_query_cache \
125 WHERE query = ?", (query,))
126
127 # Return any results
128 for row in res:
129 response, fetched_at = row
130
131 logging.debug("Fetched response for %s from cache (%s)"
132 % (query, fetched_at))
133
134 return response
135
136 # If we could not find anything, we will have to contact the whois server
137 response = self._whois(query)
138
139 # Store the response in the database
140 with self.db as c:
141 c.execute("INSERT INTO whois_query_cache(query, response) \
142 VALUES(?, ?)", (query, response))
143
144 # Commit changes to disk
145 self.db.commit()
146
147 return response
148
149 def get_name_for_asn(self, asn):
150 result = self.whois("AS%s" % asn)
151
152 for line in result.splitlines():
153 key, delim, value = line.partition(":")
154 if not value:
155 continue
156
157 if key in ("org-name", "OrgName"):
158 return value.strip()
159
160
161 class RIRParser(object):
162 def __init__(self, rir):
163 self.rir = rir
164
165 # Create a downloader to fetch data
166 self.downloader = downloader.Downloader()
167
168 # Create a database to hold temporary data
169 self.db = self._make_database(":memory:")
170
171 # Start time
172 self.start_time = datetime.datetime.utcnow()
173
174 def _make_database(self, filename):
175 db = sqlite3.connect(filename)
176 db.set_trace_callback(logging.debug)
177
178 # Create database layout
179 with db as cursor:
180 cursor.executescript("""
181 CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, country TEXT, org_id INTEGER, date DATE);
182 CREATE INDEX autnums_org_id ON autnums(org_id);
183
184 CREATE TABLE IF NOT EXISTS inetnums(network TEXT, country TEXT, org_id INTEGER,
185 family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER, date DATE);
186 CREATE INDEX inetnums_sort ON inetnums(address_start);
187 """)
188
189 return db
190
191 def export_database(self, directory):
192 # Write all ASes
193 with open(self.rir.filename_asnums, "w") as f:
194 self._export_asnums(f)
195
196 # Write all networks
197 with open(self.rir.filename_networks, "w") as f:
198 self._export_networks(f)
199
200 def _write_header(self, f):
201 f.write("#\n")
202 f.write("# %s\n" % self.rir)
203 f.write("# Generated at %s\n" % self.start_time)
204 f.write("#\n\n")
205
206 def fetch_data(self):
207 if not self.rir.database_urls:
208 raise NotImplementedError("Database URLs not set")
209
210 # Parse entire database in one go
211 for url in self.rir.database_urls:
212 self.parse_url(url)
213
214 self.db.commit()
215
216 def parse_url(self, url):
217 with self.downloader.request(url) as r:
218 for line in r:
219 self.parse_line(line)
220
221 def parse_line(self, line):
222 # Skip version line
223 if line.startswith("2"):
224 return
225
226 # Skip comments
227 if line.startswith("#"):
228 return
229
230 try:
231 registry, country_code, type, line = line.split("|", 3)
232 except:
233 logging.warning("Could not parse line: %s" % line)
234 return
235
236 # Skip any lines that are for stats only
237 if country_code == "*":
238 return
239
240 if type in ("ipv6", "ipv4"):
241 return self._parse_ip_line(country_code, type, line)
242
243 elif type == "asn":
244 return self._parse_asn_line(country_code, line)
245
246 else:
247 logging.warning("Unknown line type: %s" % type)
248 return
249
250 def _parse_ip_line(self, country_code, type, line):
251 try:
252 address, prefix, date, status, org_id = line.split("|")
253 except ValueError:
254 org_id = None
255
256 # Try parsing the line without org_id
257 try:
258 address, prefix, date, status = line.split("|")
259 except ValueError:
260 logging.warning("Unhandled line format: %s" % line)
261 return
262
263 # Skip anything that isn't properly assigned
264 if not status in ("assigned", "allocated"):
265 return
266
267 # Cast prefix into an integer
268 try:
269 prefix = int(prefix)
270 except:
271 logging.warning("Invalid prefix: %s" % prefix)
272
273 # Fix prefix length for IPv4
274 if type == "ipv4":
275 prefix = 32 - int(math.log(prefix, 2))
276
277 # Parse date
278 try:
279 date = datetime.datetime.strptime(date, "%Y%m%d")
280 except ValueError:
281 logging.warning("Could not parse date: %s" % date)
282 return
283
284 # Try to parse the address
285 try:
286 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
287 except ValueError:
288 logging.warning("Invalid IP address: %s" % address)
289 return
290
291 with self.db as c:
292 # Get the first and last address of this network
293 address_start, address_end = int(network.network_address), int(network.broadcast_address)
294
295 args = (
296 "%s" % network,
297 country_code,
298 org_id,
299 network.version,
300 struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)),
301 struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)),
302 network.prefixlen,
303 date,
304 )
305
306 c.execute("INSERT INTO inetnums(network, country, org_id, \
307 family, address_start, address_end, prefix, date) \
308 VALUES(?, ?, ?, ?, ?, ?, ?, ?)", args)
309
310 def _parse_asn_line(self, country_code, line):
311 try:
312 asn, dunno, date, status, org_id = line.split("|")
313 except ValueError:
314 org_id = None
315
316 # Try parsing the line without org_id
317 try:
318 asn, dunno, date, status = line.split("|")
319 except ValueError:
320 logging.warning("Could not parse line: %s" % line)
321 return
322
323 # Skip anything that isn't properly assigned
324 if not status in ("assigned", "allocated"):
325 return
326
327 # Parse date
328 try:
329 date = datetime.datetime.strptime(date, "%Y%m%d")
330 except ValueError:
331 logging.warning("Could not parse date: %s" % date)
332 return
333
334 with self.db as c:
335 args = (
336 asn,
337 country_code,
338 org_id,
339 date,
340 )
341
342 c.execute("INSERT INTO autnums(asn, country, org_id, date) \
343 VALUES(?, ?, ?, ?)", args)
344
345 def _export_networks(self, f):
346 # Write header
347 self._write_header(f)
348
349 with self.db as c:
350 # Write all networks
351 res = c.execute("""
352 SELECT inetnums.network,
353 autnums.asn,
354 inetnums.address_start,
355 inetnums.country,
356 STRFTIME('%Y-%m-%d', inetnums.date)
357 FROM inetnums
358 LEFT JOIN autnums
359 WHERE inetnums.org_id = autnums.org_id
360 ORDER BY inetnums.address_start
361 """)
362
363 for row in res:
364 net, asn, address_start, country, date = row
365
366 f.write(FMT % ("net:", net))
367
368 if asn:
369 f.write(FMT % ("asnum:", "AS%s" % asn))
370
371 if country:
372 f.write(FMT % ("country:", country))
373
374 if date:
375 f.write(FMT % ("assigned:", date))
376
377 # End the block
378 f.write("\n")
379
380 def _export_asnums(self, f):
381 # Write header
382 self._write_header(f)
383
384 with self.db as c:
385 res = c.execute("SELECT DISTINCT autnums.asn, autnums.country, \
386 STRFTIME('%Y-%m-%d', autnums.date) FROM autnums ORDER BY autnums.asn")
387
388 for row in res:
389 asn, country, date = row
390
391 f.write(FMT % ("asnum:", "AS%s" % asn))
392
393 name = self.rir.get_name_for_asn(asn)
394 if name:
395 f.write(FMT % ("name:", name))
396
397 if country:
398 f.write(FMT % ("country:", country))
399
400 if date:
401 f.write(FMT % ("assigned:", date))
402
403 # End block
404 f.write("\n")