]> git.ipfire.org Git - people/ms/libloc.git/blob - src/python/importer.py
importer: Write NULL into database when bool is not set
[people/ms/libloc.git] / src / python / importer.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # libloc - A library to determine the location of someone on the Internet #
5 # #
6 # Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7 # #
8 # This library is free software; you can redistribute it and/or #
9 # modify it under the terms of the GNU Lesser General Public #
10 # License as published by the Free Software Foundation; either #
11 # version 2.1 of the License, or (at your option) any later version. #
12 # #
13 # This library is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16 # Lesser General Public License for more details. #
17 # #
18 ###############################################################################
19
20 import gzip
21 import logging
22 import urllib.request
23
24 # Initialise logging
25 log = logging.getLogger("location.importer")
26 log.propagate = 1
27
28 WHOIS_SOURCES = (
29 # African Network Information Centre
30 "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz",
31
32 # Asia Pacific Network Information Centre
33 #"https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
34 #"https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
35 #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
36 #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
37 "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
38 "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
39
40 # American Registry for Internet Numbers
41 # XXX there is nothing useful for us in here
42 #"https://ftp.arin.net/pub/rr/arin.db",
43
44 # Latin America and Caribbean Network Information Centre
45 # XXX ???
46
47 # Réseaux IP Européens
48 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
49 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
50 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
51 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
52 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
53 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
54 )
55
56 EXTENDED_SOURCES = (
57 # African Network Information Centre
58 "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest",
59
60 # Asia Pacific Network Information Centre
61 "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest",
62
63 # American Registry for Internet Numbers
64 "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest",
65
66 # Latin America and Caribbean Network Information Centre
67 "http://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest",
68
69 # Réseaux IP Européens
70 "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest",
71 )
72
73 class Downloader(object):
74 def __init__(self):
75 self.proxy = None
76
77 def set_proxy(self, url):
78 """
79 Sets a HTTP proxy that is used to perform all requests
80 """
81 log.info("Using proxy %s" % url)
82 self.proxy = url
83
84 def request(self, url, data=None, return_blocks=False):
85 req = urllib.request.Request(url, data=data)
86
87 # Configure proxy
88 if self.proxy:
89 req.set_proxy(self.proxy, "http")
90
91 return DownloaderContext(self, req, return_blocks=return_blocks)
92
93
94 class DownloaderContext(object):
95 def __init__(self, downloader, request, return_blocks=False):
96 self.downloader = downloader
97 self.request = request
98
99 # Should we return one block or a single line?
100 self.return_blocks = return_blocks
101
102 # Save the response object
103 self.response = None
104
105 def __enter__(self):
106 log.info("Retrieving %s..." % self.request.full_url)
107
108 # Send request
109 self.response = urllib.request.urlopen(self.request)
110
111 # Log the response headers
112 log.debug("Response Headers:")
113 for header in self.headers:
114 log.debug(" %s: %s" % (header, self.get_header(header)))
115
116 return self
117
118 def __exit__(self, type, value, traceback):
119 pass
120
121 def __iter__(self):
122 """
123 Makes the object iterable by going through each block
124 """
125 if self.return_blocks:
126 return iterate_over_blocks(self.body)
127
128 return iterate_over_lines(self.body)
129
130 @property
131 def headers(self):
132 if self.response:
133 return self.response.headers
134
135 def get_header(self, name):
136 if self.headers:
137 return self.headers.get(name)
138
139 @property
140 def body(self):
141 """
142 Returns a file-like object with the decoded content
143 of the response.
144 """
145 content_type = self.get_header("Content-Type")
146
147 # Decompress any gzipped response on the fly
148 if content_type in ("application/x-gzip", "application/gzip"):
149 return gzip.GzipFile(fileobj=self.response, mode="rb")
150
151 # Return the response by default
152 return self.response
153
154
155 def read_blocks(f):
156 for block in iterate_over_blocks(f):
157 type = None
158 data = {}
159
160 for i, line in enumerate(block):
161 key, value = line.split(":", 1)
162
163 # The key of the first line defines the type
164 if i == 0:
165 type = key
166
167 # Store value
168 data[key] = value.strip()
169
170 yield type, data
171
172 def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
173 block = []
174
175 for line in f:
176 # Convert to string
177 for charset in charsets:
178 try:
179 line = line.decode(charset)
180 except UnicodeDecodeError:
181 continue
182 else:
183 break
184
185 # Skip commented lines
186 if line.startswith("#") or line.startswith("%"):
187 continue
188
189 # Strip line-endings
190 line = line.rstrip()
191
192 # Remove any comments at the end of line
193 line, hash, comment = line.partition("#")
194
195 if comment:
196 # Strip any whitespace before the comment
197 line = line.rstrip()
198
199 # If the line is now empty, we move on
200 if not line:
201 continue
202
203 if line:
204 block.append(line)
205 continue
206
207 # End the block on an empty line
208 if block:
209 yield block
210
211 # Reset the block
212 block = []
213
214 # Return the last block
215 if block:
216 yield block
217
218
219 def iterate_over_lines(f):
220 for line in f:
221 # Decode the line
222 line = line.decode()
223
224 # Strip the ending
225 yield line.rstrip()