import gzip
import httpx
import idna
+import ipaddress
import io
import itertools
import logging
# Setup logging
log = logging.getLogger(__name__)
-HOST_PREFIXES = set((
- "0.0.0.0 ",
- "127.0.0.1 ",
- "::1 ",
-))
-
IGNORED_DOMAINS = set((
"localhost",
))
return Format.ADBLOCKPLUS
# Is this a hosts file?
- elif any(line.startswith(prefix) for prefix in HOST_PREFIXES):
+ elif self._detect_format_hosts(line):
return Format.HOSTS
# Check for a plain FQDN
# The format is (still?) unknown
return None
+ def _detect_format_hosts(self, line):
+ """
+ Checks if the line is in hosts format
+ """
+ domain = self._process_hosts(line)
+
+ # If we could parse the domain, this looks like the hosts format
+ if domain:
+ return True
+
+ # We could not parse anything
+ return False
+
def _process_adblockplus(self, line):
"""
Parse the domain from the AdBlockPlus format
"""
Parses a line of a hosts file.
"""
- for prefix in HOST_PREFIXES:
- if line.startswith(prefix):
- return line.removeprefix(prefix)
+ try:
+ address, domain = line.split()
- # If none of the prefixes matched, we return the entire line
- return line
+ # If we could not split the line by two tokens, it is not a hosts file
+ except ValueError as e:
+ return
+
+ # Check if we can parse the IP address
+ try:
+ ipaddress.ip_address(address)
+
+ # If the line is not prefixed with an IP address, we cannot continue
+ except ValueError as e:
+ return
+
+ # We have a valid IP address and therefore can return the domain
+ return domain
def _process_plain(self, line):
"""