]> git.ipfire.org Git - location/location-database.git/blob - tools/downloader.py
Fix downloading databases in the default format
[location/location-database.git] / tools / downloader.py
1 #!/usr/bin/python3
2 ###############################################################################
3 # #
4 # location-database - A database to determine someone's #
5 # location on the Internet #
6 # Copyright (C) 2018 Michael Tremer #
7 # #
8 # This program is free software: you can redistribute it and/or modify #
9 # it under the terms of the GNU General Public License as published by #
10 # the Free Software Foundation, either version 3 of the License, or #
11 # (at your option) any later version. #
12 # #
13 # This program is distributed in the hope that it will be useful, #
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16 # GNU General Public License for more details. #
17 # #
18 # You should have received a copy of the GNU General Public License #
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. #
20 # #
21 ###############################################################################
22
23 import gzip
24 import logging
25 import urllib.request
26
27 from . import util
28
29 # Setup logger
30 log = logging.getLogger("downloader")
31 log.propagate = 1
32
33 class Downloader(object):
34 USER_AGENT = "location-database/1.0"
35
36 def __init__(self):
37 self.proxy = None
38
39 def set_proxy(self, url):
40 """
41 Sets a HTTP proxy that is used to perform all requests
42 """
43 log.info("Using proxy %s" % url)
44 self.proxy = url
45
46 def request(self, url, data=None, return_blocks=True):
47 req = urllib.request.Request(url, data=data)
48
49 # Configure proxy
50 if self.proxy:
51 req.set_proxy(self.proxy, "http")
52
53 # Set User-Agent
54 if self.USER_AGENT:
55 req.add_header("User-Agent", self.USER_AGENT)
56
57 return DownloaderContext(self, req, return_blocks=return_blocks)
58
59
60 class DownloaderContext(object):
61 def __init__(self, downloader, request, return_blocks=True):
62 self.downloader = downloader
63 self.request = request
64
65 # Should we return one block or a single line?
66 self.return_blocks = return_blocks
67
68 # Save the response object
69 self.response = None
70
71 def __enter__(self):
72 log.info("Retrieving %s..." % self.request.full_url)
73
74 # Send request
75 self.response = urllib.request.urlopen(self.request)
76
77 # Log the response headers
78 log.debug("Response Headers:")
79 for header in self.headers:
80 log.debug(" %s: %s" % (header, self.get_header(header)))
81
82 return self
83
84 def __exit__(self, type, value, traceback):
85 pass
86
87 def __iter__(self):
88 """
89 Makes the object iterable by going through each block
90 """
91 if self.return_blocks:
92 for b in util.iterate_over_blocks(self.body):
93 yield b
94
95 return
96
97 # Store body
98 body = self.body
99
100 while True:
101 line = body.readline()
102 if not line:
103 break
104
105 # Decode the line
106 line = line.decode()
107
108 # Strip the ending
109 yield line.rstrip()
110
111 @property
112 def headers(self):
113 if self.response:
114 return self.response.headers
115
116 def get_header(self, name):
117 if self.headers:
118 return self.headers.get(name)
119
120 @property
121 def body(self):
122 """
123 Returns a file-like object with the decoded content
124 of the response.
125 """
126 content_type = self.get_header("Content-Type")
127
128 # Decompress any gzipped response on the fly
129 if content_type in ("application/x-gzip", "application/gzip"):
130 return gzip.GzipFile(fileobj=self.response, mode="rb")
131
132 # Return the response by default
133 return self.response
134
135
136 if __name__ == "__main__":
137 import sys
138
139 # Enable debug logging
140 logging.basicConfig(level=logging.DEBUG)
141
142 d = Downloader()
143
144 for url in sys.argv[1:]:
145 print("Downloading %s..." % url)
146
147 with d.request(url) as r:
148 for block in r:
149 for line in block:
150 print(line)
151 print()