]> git.ipfire.org Git - location/location-database.git/blob - tools/base.py
tools: Fetch organization names for ASes from WHOIS
[location/location-database.git] / tools / base.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # location-database - A database to determine someone's #
5 # location on the Internet #
6 # Copyright (C) 2018 Michael Tremer #
7 # #
8 # This program is free software: you can redistribute it and/or modify #
9 # it under the terms of the GNU General Public License as published by #
10 # the Free Software Foundation, either version 3 of the License, or #
11 # (at your option) any later version. #
12 # #
13 # This program is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16 # GNU General Public License for more details. #
17 # #
18 # You should have received a copy of the GNU General Public License #
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
20 # #
21 ###############################################################################
22
23 import datetime
24 import ipaddress
25 import logging
26 import math
27 import os.path
28 import re
29 import sqlite3
30 import struct
31 import subprocess
32
33 from . import downloader
34 from . import util
35
36 FILENAME_ASNUMS = "asnums.txt"
37 FILENMAE_NETWORKS = "networks.txt"
38
39 FMT = "%-16s%s\n"
40
41 RE_AS = re.compile(r"^(AS|as)(\d+)")
42
43 INVALID_ADDRESSES = (
44 "0.0.0.0",
45 "::/0",
46 "0::/0",
47 )
48
49 class RIR(object):
50 name = None
51 database_urls = []
52
53 whois_server = None
54
55 def __init__(self):
56 pass
57
58 def __str__(self):
59 if self.name:
60 return "%s - %s" % (self.__class__.__name__, self.name)
61
62 return self.__class__.__name__
63
64 @property
65 def parser(self):
66 return RIRParser
67
68 def make_path(self, path):
69 return os.path.join(self.__class__.__name__, path)
70
71 @property
72 def filename_asnums(self):
73 return self.make_path(FILENAME_ASNUMS)
74
75 @property
76 def filename_networks(self):
77 return self.make_path(FILENMAE_NETWORKS)
78
79 def update(self, directory):
80 p = self.parser(self)
81
82 # Download all data and store it in memory
83 p.fetch_data()
84
85 # Write the database to disk
86 p.export_database(directory)
87
88 def whois(self, query):
89 command = [
90 "whois", query,
91 ]
92
93 # Query a specific WHOIS server
94 if self.whois_server:
95 command += ["-h", self.whois_server]
96
97 logging.info("Running command: %s" % " ".join(command))
98
99 try:
100 output = subprocess.check_output(command, stderr=subprocess.STDOUT)
101 except subprocess.CalledProcessError as e:
102 logging.error("Could not run WHOIS query %s: %s" % (query, e.output))
103 raise
104
105 return output.decode(errors="ignore")
106
107 def get_name_for_asn(self, asn):
108 result = self.whois("AS%s" % asn)
109
110 for line in result.splitlines():
111 key, delim, value = line.partition(":")
112 if not value:
113 continue
114
115 if key in ("org-name", "OrgName"):
116 return value.strip()
117
118
119 class RIRParser(object):
120 def __init__(self, rir):
121 self.rir = rir
122
123 # Create a downloader to fetch data
124 self.downloader = downloader.Downloader()
125
126 # Create a database to hold temporary data
127 self.db = self._make_database(":memory:")
128
129 # Start time
130 self.start_time = datetime.datetime.utcnow()
131
132 def _make_database(self, filename):
133 db = sqlite3.connect(filename)
134 db.set_trace_callback(logging.debug)
135
136 # Create database layout
137 with db as cursor:
138 cursor.executescript("""
139 CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, country TEXT, org_id INTEGER, date DATE);
140 CREATE INDEX autnums_org_id ON autnums(org_id);
141
142 CREATE TABLE IF NOT EXISTS inetnums(network TEXT, country TEXT, org_id INTEGER,
143 family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER, date DATE);
144 CREATE INDEX inetnums_sort ON inetnums(address_start);
145 """)
146
147 return db
148
149 def export_database(self, directory):
150 # Write all ASes
151 with open(self.rir.filename_asnums, "w") as f:
152 self._export_asnums(f)
153
154 # Write all networks
155 with open(self.rir.filename_networks, "w") as f:
156 self._export_networks(f)
157
158 def _write_header(self, f):
159 f.write("#\n")
160 f.write("# %s\n" % self.rir)
161 f.write("# Generated at %s\n" % self.start_time)
162 f.write("#\n\n")
163
164 def fetch_data(self):
165 if not self.rir.database_urls:
166 raise NotImplementedError("Database URLs not set")
167
168 # Parse entire database in one go
169 for url in self.rir.database_urls:
170 self.parse_url(url)
171
172 self.db.commit()
173
174 def parse_url(self, url):
175 with self.downloader.request(url) as r:
176 for line in r:
177 self.parse_line(line)
178
179 def parse_line(self, line):
180 # Skip version line
181 if line.startswith("2"):
182 return
183
184 # Skip comments
185 if line.startswith("#"):
186 return
187
188 try:
189 registry, country_code, type, line = line.split("|", 3)
190 except:
191 logging.warning("Could not parse line: %s" % line)
192 return
193
194 # Skip any lines that are for stats only
195 if country_code == "*":
196 return
197
198 if type in ("ipv6", "ipv4"):
199 return self._parse_ip_line(country_code, type, line)
200
201 elif type == "asn":
202 return self._parse_asn_line(country_code, line)
203
204 else:
205 logging.warning("Unknown line type: %s" % type)
206 return
207
208 def _parse_ip_line(self, country_code, type, line):
209 try:
210 address, prefix, date, status, org_id = line.split("|")
211 except ValueError:
212 org_id = None
213
214 # Try parsing the line without org_id
215 try:
216 address, prefix, date, status = line.split("|")
217 except ValueError:
218 logging.warning("Unhandled line format: %s" % line)
219 return
220
221 # Skip anything that isn't properly assigned
222 if not status in ("assigned", "allocated"):
223 return
224
225 # Cast prefix into an integer
226 try:
227 prefix = int(prefix)
228 except:
229 logging.warning("Invalid prefix: %s" % prefix)
230
231 # Fix prefix length for IPv4
232 if type == "ipv4":
233 prefix = 32 - int(math.log(prefix, 2))
234
235 # Parse date
236 try:
237 date = datetime.datetime.strptime(date, "%Y%m%d")
238 except ValueError:
239 logging.warning("Could not parse date: %s" % date)
240 return
241
242 # Try to parse the address
243 try:
244 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
245 except ValueError:
246 logging.warning("Invalid IP address: %s" % address)
247 return
248
249 with self.db as c:
250 # Get the first and last address of this network
251 address_start, address_end = int(network.network_address), int(network.broadcast_address)
252
253 args = (
254 "%s" % network,
255 country_code,
256 org_id,
257 network.version,
258 struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)),
259 struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)),
260 network.prefixlen,
261 date,
262 )
263
264 c.execute("INSERT INTO inetnums(network, country, org_id, \
265 family, address_start, address_end, prefix, date) \
266 VALUES(?, ?, ?, ?, ?, ?, ?, ?)", args)
267
268 def _parse_asn_line(self, country_code, line):
269 try:
270 asn, dunno, date, status, org_id = line.split("|")
271 except ValueError:
272 org_id = None
273
274 # Try parsing the line without org_id
275 try:
276 asn, dunno, date, status = line.split("|")
277 except ValueError:
278 logging.warning("Could not parse line: %s" % line)
279 return
280
281 # Skip anything that isn't properly assigned
282 if not status in ("assigned", "allocated"):
283 return
284
285 # Parse date
286 try:
287 date = datetime.datetime.strptime(date, "%Y%m%d")
288 except ValueError:
289 logging.warning("Could not parse date: %s" % date)
290 return
291
292 with self.db as c:
293 args = (
294 asn,
295 country_code,
296 org_id,
297 date,
298 )
299
300 c.execute("INSERT INTO autnums(asn, country, org_id, date) \
301 VALUES(?, ?, ?, ?)", args)
302
303 def _export_networks(self, f):
304 # Write header
305 self._write_header(f)
306
307 with self.db as c:
308 # Write all networks
309 res = c.execute("""
310 SELECT inetnums.network,
311 autnums.asn,
312 inetnums.address_start,
313 inetnums.country,
314 STRFTIME('%Y-%m-%d', inetnums.date)
315 FROM inetnums
316 LEFT JOIN autnums
317 WHERE inetnums.org_id = autnums.org_id
318 ORDER BY inetnums.address_start
319 """)
320
321 for row in res:
322 net, asn, address_start, country, date = row
323
324 f.write(FMT % ("net:", net))
325
326 if asn:
327 f.write(FMT % ("asnum:", "AS%s" % asn))
328
329 if country:
330 f.write(FMT % ("country:", country))
331
332 if date:
333 f.write(FMT % ("assigned:", date))
334
335 # End the block
336 f.write("\n")
337
338 def _export_asnums(self, f):
339 # Write header
340 self._write_header(f)
341
342 with self.db as c:
343 res = c.execute("SELECT DISTINCT autnums.asn, autnums.country, \
344 STRFTIME('%Y-%m-%d', autnums.date) FROM autnums ORDER BY autnums.asn")
345
346 for row in res:
347 asn, country, date = row
348
349 f.write(FMT % ("asnum:", "AS%s" % asn))
350
351 name = self.rir.get_name_for_asn(asn)
352 if name:
353 f.write(FMT % ("name:", name))
354
355 if country:
356 f.write(FMT % ("country:", country))
357
358 if date:
359 f.write(FMT % ("assigned:", date))
360
361 # End block
362 f.write("\n")