]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/importer.py
importer.py: add source information for RIR data feeds
[people/ms/libloc.git] / src / python / importer.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import gzip
21 import logging
22 import urllib.request
23
24 # Initialise logging
25 log = logging.getLogger("location.importer")
26 log.propagate = 1
27
28 WHOIS_SOURCES = {
29 # African Network Information Centre
30 "AFRINIC": [
31 "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz"
32 ],
33
34 # Asia Pacific Network Information Centre
35 "APNIC": [
36 "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
37 "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
38 #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
39 #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
40 "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
41 "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz"
42 ],
43
44 # American Registry for Internet Numbers
45 # XXX there is nothing useful for us in here
46 # ARIN: [
47 # "https://ftp.arin.net/pub/rr/arin.db"
48 # ],
49
50 # Latin America and Caribbean Network Information Centre
51 # XXX ???
52
53 # Réseaux IP Européens
54 "RIPE": [
55 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
56 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
57 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
58 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
59 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
60 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz"
61 ],
62 }
63
64 EXTENDED_SOURCES = {
65 # African Network Information Centre
66 # "ARIN": [
67 # "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest"
68 # ],
69
70 # Asia Pacific Network Information Centre
71 # "APNIC": [
72 # "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest"
73 # ],
74
75 # American Registry for Internet Numbers
76 "ARIN": [
77 "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest"
78 ],
79
80 # Latin America and Caribbean Network Information Centre
81 "LACNIC": [
82 "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest"
83 ],
84
85 # Réseaux IP Européens
86 # "RIPE": [
87 # "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest"
88 # ],
89 }
90
91 class Downloader(object):
92 def __init__(self):
93 self.proxy = None
94
95 def set_proxy(self, url):
96 """
97 Sets a HTTP proxy that is used to perform all requests
98 """
99 log.info("Using proxy %s" % url)
100 self.proxy = url
101
102 def request(self, url, data=None, return_blocks=False):
103 req = urllib.request.Request(url, data=data)
104
105 # Configure proxy
106 if self.proxy:
107 req.set_proxy(self.proxy, "http")
108
109 return DownloaderContext(self, req, return_blocks=return_blocks)
110
111
112 class DownloaderContext(object):
113 def __init__(self, downloader, request, return_blocks=False):
114 self.downloader = downloader
115 self.request = request
116
117 # Should we return one block or a single line?
118 self.return_blocks = return_blocks
119
120 # Save the response object
121 self.response = None
122
123 def __enter__(self):
124 log.info("Retrieving %s..." % self.request.full_url)
125
126 # Send request
127 self.response = urllib.request.urlopen(self.request)
128
129 # Log the response headers
130 log.debug("Response Headers:")
131 for header in self.headers:
132 log.debug(" %s: %s" % (header, self.get_header(header)))
133
134 return self
135
136 def __exit__(self, type, value, traceback):
137 pass
138
139 def __iter__(self):
140 """
141 Makes the object iterable by going through each block
142 """
143 if self.return_blocks:
144 return iterate_over_blocks(self.body)
145
146 return iterate_over_lines(self.body)
147
148 @property
149 def headers(self):
150 if self.response:
151 return self.response.headers
152
153 def get_header(self, name):
154 if self.headers:
155 return self.headers.get(name)
156
157 @property
158 def body(self):
159 """
160 Returns a file-like object with the decoded content
161 of the response.
162 """
163 content_type = self.get_header("Content-Type")
164
165 # Decompress any gzipped response on the fly
166 if content_type in ("application/x-gzip", "application/gzip"):
167 return gzip.GzipFile(fileobj=self.response, mode="rb")
168
169 # Return the response by default
170 return self.response
171
172
173 def read_blocks(f):
174 for block in iterate_over_blocks(f):
175 type = None
176 data = {}
177
178 for i, line in enumerate(block):
179 key, value = line.split(":", 1)
180
181 # The key of the first line defines the type
182 if i == 0:
183 type = key
184
185 # Store value
186 data[key] = value.strip()
187
188 yield type, data
189
190 def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
191 block = []
192
193 for line in f:
194 # Convert to string
195 for charset in charsets:
196 try:
197 line = line.decode(charset)
198 except UnicodeDecodeError:
199 continue
200 else:
201 break
202
203 # Skip commented lines
204 if line.startswith("#") or line.startswith("%"):
205 continue
206
207 # Strip line-endings
208 line = line.rstrip()
209
210 # Remove any comments at the end of line
211 line, hash, comment = line.partition("#")
212
213 if comment:
214 # Strip any whitespace before the comment
215 line = line.rstrip()
216
217 # If the line is now empty, we move on
218 if not line:
219 continue
220
221 if line:
222 block.append(line)
223 continue
224
225 # End the block on an empty line
226 if block:
227 yield block
228
229 # Reset the block
230 block = []
231
232 # Return the last block
233 if block:
234 yield block
235
236
237 def iterate_over_lines(f):
238 for line in f:
239 # Decode the line
240 line = line.decode()
241
242 # Strip the ending
243 yield line.rstrip()