]>
git.ipfire.org Git - location/location-database.git/blob - tools/base.py
2 ###############################################################################
4 # location-database - A database to determine someone's #
5 # location on the Internet #
6 # Copyright (C) 2018 Michael Tremer #
8 # This program is free software: you can redistribute it and/or modify #
9 # it under the terms of the GNU General Public License as published by #
10 # the Free Software Foundation, either version 3 of the License, or #
11 # (at your option) any later version. #
13 # This program is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16 # GNU General Public License for more details. #
18 # You should have received a copy of the GNU General Public License #
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
21 ###############################################################################
33 from . import downloader
36 FILENAME_ASNUMS
= "asnums.txt"
37 FILENMAE_NETWORKS
= "networks.txt"
41 RE_AS
= re
.compile(r
"^(AS|as)(\d+)")
56 self
.db
= self
._open
_database
(".cache.db")
60 return "%s - %s" % (self
.__class
__.__name
__, self
.name
)
62 return self
.__class
__.__name
__
64 def _open_database(self
, path
=None):
65 db
= sqlite3
.connect(path
or ":memory:")
66 db
.set_trace_callback(logging
.debug
)
70 CREATE TABLE IF NOT EXISTS whois_query_cache(query TEXT, response TEXT,
71 fetched_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP);
72 CREATE UNIQUE INDEX IF NOT EXISTS whois_query_cache_query
73 ON whois_query_cache(query);
82 def make_path(self
, path
):
83 return os
.path
.join(self
.__class
__.__name
__, path
)
86 def filename_asnums(self
):
87 return self
.make_path(FILENAME_ASNUMS
)
90 def filename_networks(self
):
91 return self
.make_path(FILENMAE_NETWORKS
)
93 def update(self
, directory
):
96 # Download all data and store it in memory
99 # Write the database to disk
100 p
.export_database(directory
)
102 def _whois(self
, query
):
107 # Query a specific WHOIS server
108 if self
.whois_server
:
109 command
+= ["-h", self
.whois_server
]
111 logging
.info("Running command: %s" % " ".join(command
))
114 output
= subprocess
.check_output(command
, stderr
=subprocess
.STDOUT
)
115 except subprocess
.CalledProcessError
as e
:
116 logging
.error("Could not run WHOIS query %s: %s" % (query
, e
.output
))
119 return output
.decode(errors
="ignore")
121 def whois(self
, query
):
122 # Try fetching a response from the cache
124 res
= c
.execute("SELECT response, fetched_at FROM whois_query_cache \
125 WHERE query = ?", (query
,))
129 response
, fetched_at
= row
131 logging
.debug("Fetched response for %s from cache (%s)"
132 % (query
, fetched_at
))
136 # If we could not find anything, we will have to contact the whois server
137 response
= self
._whois
(query
)
139 # Store the response in the database
141 c
.execute("INSERT INTO whois_query_cache(query, response) \
142 VALUES(?, ?)", (query
, response
))
144 # Commit changes to disk
149 def get_name_for_asn(self
, asn
):
150 result
= self
.whois("AS%s" % asn
)
152 for line
in result
.splitlines():
153 key
, delim
, value
= line
.partition(":")
157 if key
in ("org-name", "OrgName"):
161 class RIRParser(object):
162 def __init__(self
, rir
):
165 # Create a downloader to fetch data
166 self
.downloader
= downloader
.Downloader()
168 # Create a database to hold temporary data
169 self
.db
= self
._make
_database
(":memory:")
172 self
.start_time
= datetime
.datetime
.utcnow()
174 def _make_database(self
, filename
):
175 db
= sqlite3
.connect(filename
)
176 db
.set_trace_callback(logging
.debug
)
178 # Create database layout
180 cursor
.executescript("""
181 CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, country TEXT, org_id INTEGER, date DATE);
182 CREATE INDEX autnums_org_id ON autnums(org_id);
184 CREATE TABLE IF NOT EXISTS inetnums(network TEXT, country TEXT, org_id INTEGER,
185 family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER, date DATE);
186 CREATE INDEX inetnums_sort ON inetnums(address_start);
191 def export_database(self
, directory
):
193 with
open(self
.rir
.filename_asnums
, "w") as f
:
194 self
._export
_asnums
(f
)
197 with
open(self
.rir
.filename_networks
, "w") as f
:
198 self
._export
_networks
(f
)
200 def _write_header(self
, f
):
202 f
.write("# %s\n" % self
.rir
)
203 f
.write("# Generated at %s\n" % self
.start_time
)
206 def fetch_data(self
):
207 if not self
.rir
.database_urls
:
208 raise NotImplementedError("Database URLs not set")
210 # Parse entire database in one go
211 for url
in self
.rir
.database_urls
:
216 def parse_url(self
, url
):
217 with self
.downloader
.request(url
) as r
:
219 self
.parse_line(line
)
221 def parse_line(self
, line
):
223 if line
.startswith("2"):
227 if line
.startswith("#"):
231 registry
, country_code
, type, line
= line
.split("|", 3)
233 logging
.warning("Could not parse line: %s" % line
)
236 # Skip any lines that are for stats only
237 if country_code
== "*":
240 if type in ("ipv6", "ipv4"):
241 return self
._parse
_ip
_line
(country_code
, type, line
)
244 return self
._parse
_asn
_line
(country_code
, line
)
247 logging
.warning("Unknown line type: %s" % type)
250 def _parse_ip_line(self
, country_code
, type, line
):
252 address
, prefix
, date
, status
, org_id
= line
.split("|")
256 # Try parsing the line without org_id
258 address
, prefix
, date
, status
= line
.split("|")
260 logging
.warning("Unhandled line format: %s" % line
)
263 # Skip anything that isn't properly assigned
264 if not status
in ("assigned", "allocated"):
267 # Cast prefix into an integer
271 logging
.warning("Invalid prefix: %s" % prefix
)
273 # Fix prefix length for IPv4
275 prefix
= 32 - int(math
.log(prefix
, 2))
279 date
= datetime
.datetime
.strptime(date
, "%Y%m%d")
281 logging
.warning("Could not parse date: %s" % date
)
284 # Try to parse the address
286 network
= ipaddress
.ip_network("%s/%s" % (address
, prefix
), strict
=False)
288 logging
.warning("Invalid IP address: %s" % address
)
292 # Get the first and last address of this network
293 address_start
, address_end
= int(network
.network_address
), int(network
.broadcast_address
)
300 struct
.pack(">QQ", address_start
>> 64, address_start
% (2 ** 64)),
301 struct
.pack(">QQ", address_end
>> 64, address_end
% (2 ** 64)),
306 c
.execute("INSERT INTO inetnums(network, country, org_id, \
307 family, address_start, address_end, prefix, date) \
308 VALUES(?, ?, ?, ?, ?, ?, ?, ?)", args
)
310 def _parse_asn_line(self
, country_code
, line
):
312 asn
, dunno
, date
, status
, org_id
= line
.split("|")
316 # Try parsing the line without org_id
318 asn
, dunno
, date
, status
= line
.split("|")
320 logging
.warning("Could not parse line: %s" % line
)
323 # Skip anything that isn't properly assigned
324 if not status
in ("assigned", "allocated"):
329 date
= datetime
.datetime
.strptime(date
, "%Y%m%d")
331 logging
.warning("Could not parse date: %s" % date
)
342 c
.execute("INSERT INTO autnums(asn, country, org_id, date) \
343 VALUES(?, ?, ?, ?)", args
)
345 def _export_networks(self
, f
):
347 self
._write
_header
(f
)
352 SELECT inetnums.network,
354 inetnums.address_start,
356 STRFTIME('%Y-%m-%d', inetnums.date)
359 WHERE inetnums.org_id = autnums.org_id
360 ORDER BY inetnums.address_start
364 net
, asn
, address_start
, country
, date
= row
366 f
.write(FMT
% ("net:", net
))
369 f
.write(FMT
% ("asnum:", "AS%s" % asn
))
372 f
.write(FMT
% ("country:", country
))
375 f
.write(FMT
% ("assigned:", date
))
380 def _export_asnums(self
, f
):
382 self
._write
_header
(f
)
385 res
= c
.execute("SELECT DISTINCT autnums.asn, autnums.country, \
386 STRFTIME('%Y-%m-%d', autnums.date) FROM autnums ORDER BY autnums.asn")
389 asn
, country
, date
= row
391 f
.write(FMT
% ("asnum:", "AS%s" % asn
))
393 name
= self
.rir
.get_name_for_asn(asn
)
395 f
.write(FMT
% ("name:", name
))
398 f
.write(FMT
% ("country:", country
))
401 f
.write(FMT
% ("assigned:", date
))