]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/location-importer.in
python: Drop unused list of invalid IP addresses
[people/ms/libloc.git] / src / python / location-importer.in
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import argparse
21 import ipaddress
22 import logging
23 import math
24 import re
25 import sys
26
27 # Load our location module
28 import location
29 import location.database
30 import location.importer
31 from location.i18n import _
32
33 # Initialise logging
34 log = logging.getLogger("location.importer")
35 log.propagate = 1
36
37 class CLI(object):
38 def parse_cli(self):
39 parser = argparse.ArgumentParser(
40 description=_("Location Importer Command Line Interface"),
41 )
42 subparsers = parser.add_subparsers()
43
44 # Global configuration flags
45 parser.add_argument("--debug", action="store_true",
46 help=_("Enable debug output"))
47
48 # version
49 parser.add_argument("--version", action="version",
50 version="%(prog)s @VERSION@")
51
52 # Database
53 parser.add_argument("--database-host", required=True,
54 help=_("Database Hostname"), metavar=_("HOST"))
55 parser.add_argument("--database-name", required=True,
56 help=_("Database Name"), metavar=_("NAME"))
57 parser.add_argument("--database-username", required=True,
58 help=_("Database Username"), metavar=_("USERNAME"))
59 parser.add_argument("--database-password", required=True,
60 help=_("Database Password"), metavar=_("PASSWORD"))
61
62 # Update WHOIS
63 update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
64 update_whois.set_defaults(func=self.handle_update_whois)
65
66 args = parser.parse_args()
67
68 # Enable debug logging
69 if args.debug:
70 log.setLevel(logging.DEBUG)
71
72 # Print usage if no action was given
73 if not "func" in args:
74 parser.print_usage()
75 sys.exit(2)
76
77 return args
78
79 def run(self):
80 # Parse command line arguments
81 args = self.parse_cli()
82
83 # Initialise database
84 self.db = self._setup_database(args)
85
86 # Call function
87 ret = args.func(args)
88
89 # Return with exit code
90 if ret:
91 sys.exit(ret)
92
93 # Otherwise just exit
94 sys.exit(0)
95
96 def _setup_database(self, ns):
97 """
98 Initialise the database
99 """
100 # Connect to database
101 db = location.database.Connection(
102 host=ns.database_host, database=ns.database_name,
103 user=ns.database_username, password=ns.database_password,
104 )
105
106 with db.transaction():
107 db.execute("""
108 -- autnums
109 CREATE TABLE IF NOT EXISTS autnums(number integer, name text);
110 CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
111
112 -- networks
113 CREATE TABLE IF NOT EXISTS networks(network inet, autnum integer, country text);
114 CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network);
115 """)
116
117 return db
118
119 def handle_update_whois(self, ns):
120 downloader = location.importer.Downloader()
121
122 # Download all sources
123 with self.db.transaction():
124 # Create some temporary tables to store parsed data
125 self.db.execute("""
126 CREATE TEMPORARY TABLE _autnums(number integer, organization text)
127 ON COMMIT DROP;
128 CREATE UNIQUE INDEX _autnums_number ON _autnums(number);
129
130 CREATE TEMPORARY TABLE _organizations(handle text, name text)
131 ON COMMIT DROP;
132 CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
133 """)
134
135 for source in location.importer.WHOIS_SOURCES:
136 with downloader.request(source, return_blocks=True) as f:
137 for block in f:
138 self._parse_block(block)
139
140 self.db.execute("""
141 INSERT INTO autnums(number, name)
142 SELECT _autnums.number, _organizations.name FROM _autnums
143 LEFT JOIN _organizations ON _autnums.organization = _organizations.handle
144 ON CONFLICT (number) DO UPDATE SET name = excluded.name;
145 """)
146
147 # Download all extended sources
148 for source in location.importer.EXTENDED_SOURCES:
149 with self.db.transaction():
150 # Download data
151 with downloader.request(source) as f:
152 for line in f:
153 self._parse_line(line)
154
155 def _parse_block(self, block):
156 # Get first line to find out what type of block this is
157 line = block[0]
158
159 # aut-num
160 if line.startswith("aut-num:"):
161 return self._parse_autnum_block(block)
162
163 # organisation
164 elif line.startswith("organisation:"):
165 return self._parse_org_block(block)
166
167 def _parse_autnum_block(self, block):
168 autnum = {}
169 for line in block:
170 # Split line
171 key, val = split_line(line)
172
173 if key == "aut-num":
174 m = re.match(r"^(AS|as)(\d+)", val)
175 if m:
176 autnum["asn"] = m.group(2)
177
178 elif key == "org":
179 autnum[key] = val
180
181 # Skip empty objects
182 if not autnum:
183 return
184
185 # Insert into database
186 self.db.execute("INSERT INTO _autnums(number, organization) \
187 VALUES(%s, %s) ON CONFLICT (number) DO UPDATE SET \
188 organization = excluded.organization",
189 autnum.get("asn"), autnum.get("org"),
190 )
191
192 def _parse_org_block(self, block):
193 org = {}
194 for line in block:
195 # Split line
196 key, val = split_line(line)
197
198 if key in ("organisation", "org-name"):
199 org[key] = val
200
201 # Skip empty objects
202 if not org:
203 return
204
205 self.db.execute("INSERT INTO _organizations(handle, name) \
206 VALUES(%s, %s) ON CONFLICT (handle) DO \
207 UPDATE SET name = excluded.name",
208 org.get("organisation"), org.get("org-name"),
209 )
210
211 def _parse_line(self, line):
212 # Skip version line
213 if line.startswith("2"):
214 return
215
216 # Skip comments
217 if line.startswith("#"):
218 return
219
220 try:
221 registry, country_code, type, line = line.split("|", 3)
222 except:
223 log.warning("Could not parse line: %s" % line)
224 return
225
226 # Skip any lines that are for stats only
227 if country_code == "*":
228 return
229
230 if type in ("ipv6", "ipv4"):
231 return self._parse_ip_line(country_code, type, line)
232
233 def _parse_ip_line(self, country, type, line):
234 try:
235 address, prefix, date, status, organization = line.split("|")
236 except ValueError:
237 organization = None
238
239 # Try parsing the line without organization
240 try:
241 address, prefix, date, status = line.split("|")
242 except ValueError:
243 log.warning("Unhandled line format: %s" % line)
244 return
245
246 # Skip anything that isn't properly assigned
247 if not status in ("assigned", "allocated"):
248 return
249
250 # Cast prefix into an integer
251 try:
252 prefix = int(prefix)
253 except:
254 log.warning("Invalid prefix: %s" % prefix)
255
256 # Fix prefix length for IPv4
257 if type == "ipv4":
258 prefix = 32 - int(math.log(prefix, 2))
259
260 # Try to parse the address
261 try:
262 network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
263 except ValueError:
264 log.warning("Invalid IP address: %s" % address)
265 return
266
267 self.db.execute("INSERT INTO networks(network, country) \
268 VALUES(%s, %s) ON CONFLICT (network) DO \
269 UPDATE SET country = excluded.country",
270 "%s" % network, country,
271 )
272
273
274 def split_line(line):
275 key, colon, val = line.partition(":")
276
277 # Strip any excess space
278 key = key.strip()
279 val = val.strip()
280
281 return key, val
282
283 def main():
284 # Run the command line interface
285 c = CLI()
286 c.run()
287
288 main()