]> git.ipfire.org Git - people/ms/libloc.git/blame - src/python/importer.py
debian: Add watch configuration for uscan
[people/ms/libloc.git] / src / python / importer.py
CommitLineData
3192b66c
MT
1#!/usr/bin/python3
2###############################################################################
3# #
4# libloc - A library to determine the location of someone on the Internet #
5# #
6# Copyright (C) 2020 IPFire Development Team <info@ipfire.org> #
7# #
8# This library is free software; you can redistribute it and/or #
9# modify it under the terms of the GNU Lesser General Public #
10# License as published by the Free Software Foundation; either #
11# version 2.1 of the License, or (at your option) any later version. #
12# #
13# This library is distributed in the hope that it will be useful, #
14# but WITHOUT ANY WARRANTY; without even the implied warranty of #
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
16# Lesser General Public License for more details. #
17# #
18###############################################################################
19
20import gzip
21import logging
22import urllib.request
23
24# Initialise logging
25log = logging.getLogger("location.importer")
26log.propagate = 1
27
6ffd06b5
MT
28WHOIS_SOURCES = (
29 # African Network Information Centre
30 "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz",
31
32 # Asia Pacific Network Information Centre
aadac4c5
PM
33 "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
34 "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
429a43d1
MT
35 #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
36 #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
6ffd06b5
MT
37 "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
38 "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
39
40 # American Registry for Internet Numbers
87b3e102
MT
41 # XXX there is nothing useful for us in here
42 #"https://ftp.arin.net/pub/rr/arin.db",
6ffd06b5
MT
43
44 # Latin America and Caribbean Network Information Centre
45 # XXX ???
46
47 # Réseaux IP Européens
aadac4c5
PM
48 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
49 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
429a43d1
MT
50 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
51 #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
6ffd06b5
MT
52 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
53 "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
54)
55
429a43d1
MT
56EXTENDED_SOURCES = (
57 # African Network Information Centre
aadac4c5 58 #"https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest",
429a43d1
MT
59
60 # Asia Pacific Network Information Centre
aadac4c5 61 #"https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest",
429a43d1
MT
62
63 # American Registry for Internet Numbers
64 "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest",
65
66 # Latin America and Caribbean Network Information Centre
eee65490 67 "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest",
429a43d1
MT
68
69 # Réseaux IP Européens
aadac4c5 70 #"https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest",
429a43d1
MT
71)
72
3192b66c
MT
73class Downloader(object):
74 def __init__(self):
75 self.proxy = None
76
77 def set_proxy(self, url):
78 """
79 Sets a HTTP proxy that is used to perform all requests
80 """
81 log.info("Using proxy %s" % url)
82 self.proxy = url
83
6ffd06b5 84 def request(self, url, data=None, return_blocks=False):
3192b66c
MT
85 req = urllib.request.Request(url, data=data)
86
87 # Configure proxy
88 if self.proxy:
89 req.set_proxy(self.proxy, "http")
90
6ffd06b5 91 return DownloaderContext(self, req, return_blocks=return_blocks)
3192b66c
MT
92
93
94class DownloaderContext(object):
6ffd06b5 95 def __init__(self, downloader, request, return_blocks=False):
3192b66c
MT
96 self.downloader = downloader
97 self.request = request
98
6ffd06b5
MT
99 # Should we return one block or a single line?
100 self.return_blocks = return_blocks
101
3192b66c
MT
102 # Save the response object
103 self.response = None
104
105 def __enter__(self):
106 log.info("Retrieving %s..." % self.request.full_url)
107
108 # Send request
109 self.response = urllib.request.urlopen(self.request)
110
111 # Log the response headers
112 log.debug("Response Headers:")
113 for header in self.headers:
114 log.debug(" %s: %s" % (header, self.get_header(header)))
115
116 return self
117
118 def __exit__(self, type, value, traceback):
119 pass
120
121 def __iter__(self):
122 """
123 Makes the object iterable by going through each block
124 """
6ffd06b5
MT
125 if self.return_blocks:
126 return iterate_over_blocks(self.body)
127
429a43d1 128 return iterate_over_lines(self.body)
3192b66c
MT
129
130 @property
131 def headers(self):
132 if self.response:
133 return self.response.headers
134
135 def get_header(self, name):
136 if self.headers:
137 return self.headers.get(name)
138
139 @property
140 def body(self):
141 """
142 Returns a file-like object with the decoded content
143 of the response.
144 """
6ffd06b5
MT
145 content_type = self.get_header("Content-Type")
146
147 # Decompress any gzipped response on the fly
148 if content_type in ("application/x-gzip", "application/gzip"):
149 return gzip.GzipFile(fileobj=self.response, mode="rb")
150
3192b66c
MT
151 # Return the response by default
152 return self.response
6ffd06b5
MT
153
154
d7fc3057
MT
155def read_blocks(f):
156 for block in iterate_over_blocks(f):
157 type = None
158 data = {}
159
160 for i, line in enumerate(block):
161 key, value = line.split(":", 1)
162
163 # The key of the first line defines the type
164 if i == 0:
165 type = key
166
167 # Store value
168 data[key] = value.strip()
169
170 yield type, data
171
6ffd06b5
MT
172def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
173 block = []
174
175 for line in f:
176 # Convert to string
177 for charset in charsets:
178 try:
179 line = line.decode(charset)
180 except UnicodeDecodeError:
181 continue
182 else:
183 break
184
185 # Skip commented lines
186 if line.startswith("#") or line.startswith("%"):
187 continue
188
189 # Strip line-endings
190 line = line.rstrip()
191
192 # Remove any comments at the end of line
193 line, hash, comment = line.partition("#")
194
195 if comment:
196 # Strip any whitespace before the comment
197 line = line.rstrip()
198
199 # If the line is now empty, we move on
200 if not line:
201 continue
202
203 if line:
204 block.append(line)
205 continue
206
207 # End the block on an empty line
208 if block:
209 yield block
210
211 # Reset the block
212 block = []
429a43d1 213
5e4a7e6f
MT
214 # Return the last block
215 if block:
216 yield block
217
429a43d1
MT
218
219def iterate_over_lines(f):
220 for line in f:
221 # Decode the line
222 line = line.decode()
223
224 # Strip the ending
225 yield line.rstrip()