IDNA support for zones, messages, names in rdata, rrsets, and rdatasets.

author Bob Halley <halley@dnspython.org>

Sun, 3 May 2020 21:49:55 +0000 (14:49 -0700)

committer Bob Halley <halley@dnspython.org>

Sun, 3 May 2020 21:49:55 +0000 (14:49 -0700)
author Bob Halley <halley@dnspython.org>
Sun, 3 May 2020 21:49:55 +0000 (14:49 -0700)
committer Bob Halley <halley@dnspython.org>
Sun, 3 May 2020 21:49:55 +0000 (14:49 -0700)
diff --git a/dns/message.py b/dns/message.py

index b9b0253d0d7dc1e644f740aa9c98b52520d6c5a2..da1338d2f3665d896ecb37a670667900ffb3b95f 100644 (file)
--- a/dns/message.py
+++ b/dns/message.py
@@ -843,9 +843,9 @@ class _TextReader(object):
      last_name: The most recently read name when building a message object.
      """
  
-    def __init__(self, text, message):
+    def __init__(self, text, message, idna_codec):
          self.message = message
-        self.tok = dns.tokenizer.Tokenizer(text)
+        self.tok = dns.tokenizer.Tokenizer(text, idna_codec=idna_codec)
          self.last_name = None
          self.zone_rdclass = dns.rdataclass.IN
          self.updating = False
@@ -901,7 +901,7 @@ class _TextReader(object):
  
          token = self.tok.get(want_leading=True)
          if not token.is_whitespace():
-            self.last_name = dns.name.from_text(token.value, None)
+            self.last_name = self.tok.as_name(token, None)
          name = self.last_name
          token = self.tok.get()
          if not token.is_identifier():
@@ -934,7 +934,7 @@ class _TextReader(object):
          # Name
          token = self.tok.get(want_leading=True)
          if not token.is_whitespace():
-            self.last_name = dns.name.from_text(token.value, None)
+            self.last_name = self.tok.as_name(token, None)
          name = self.last_name
          token = self.tok.get()
          if not token.is_identifier():
@@ -1010,11 +1010,15 @@ class _TextReader(object):
              line_method(section)
  
  
-def from_text(text):
+def from_text(text, idna_codec=None):
      """Convert the text format message into a message object.
  
      *text*, a ``text``, the text format message.
  
+    *idna_codec*, a ``dns.name.IDNACodec``, specifies the IDNA
+    encoder/decoder.  If ``None``, the default IDNA 2003 encoder/decoder
+    is used.
+
      Raises ``dns.message.UnknownHeaderField`` if a header is unknown.
  
      Raises ``dns.exception.SyntaxError`` if the text is badly formed.
@@ -1028,7 +1032,7 @@ def from_text(text):
  
      m = Message()
  
-    reader = _TextReader(text, m)
+    reader = _TextReader(text, m, idna_codec)
      reader.read()
  
      return m
@@ -1055,7 +1059,7 @@ def from_file(f):
  
  def make_query(qname, rdtype, rdclass=dns.rdataclass.IN, use_edns=None,
                 want_dnssec=False, ednsflags=None, payload=None,
-               request_payload=None, options=None):
+               request_payload=None, options=None, idna_codec=None):
      """Make a query message.
  
      The query name, type, and class may all be specified either
@@ -1091,11 +1095,15 @@ def make_query(qname, rdtype, rdclass=dns.rdataclass.IN, use_edns=None,
      *options*, a list of ``dns.edns.Option`` objects or ``None``, the EDNS
      options.
  
+    *idna_codec*, a ``dns.name.IDNACodec``, specifies the IDNA
+    encoder/decoder.  If ``None``, the default IDNA 2003 encoder/decoder
+    is used.
+
      Returns a ``dns.message.Message``
      """
  
      if isinstance(qname, str):
-        qname = dns.name.from_text(qname)
+        qname = dns.name.from_text(qname, idna_codec=idna_codec)
      if isinstance(rdtype, str):
          rdtype = dns.rdatatype.from_text(rdtype)
      if isinstance(rdclass, str):
diff --git a/dns/message.pyi b/dns/message.pyi

index fb55a4c5a09bd33a968b32fb1832ba21d5077c5c..76af040ae348643a0000d9bf69b9db4484ef4ca5 100644 (file)
--- a/dns/message.pyi
+++ b/dns/message.pyi
@@ -41,7 +41,7 @@ class Message:
      def is_response(self, other : Message) -> bool:
          ...
  
-def from_text(a : str) -> Message:
+def from_text(a : str, idna_codec : Optional[name.IDNACodec] = None) -> Message:
      ...
  
  def from_wire(wire, keyring : Optional[Dict[name.Name,bytes]] = None, request_mac = b'', xfr=False, origin=None,
diff --git a/dns/rdata.pyi b/dns/rdata.pyi

index f5d4abbf9ff597bf28c0bf1dbf41c4a78fb640c5..84eeb8f8f61ade60eb3ff808b958d5bcc7e291a8 100644 (file)
--- a/dns/rdata.pyi
+++ b/dns/rdata.pyi
@@ -12,7 +12,7 @@ _rdata_modules : Dict[Tuple[Any,Rdata],Any]
  
  def from_text(rdclass : int, rdtype : int, tok : Optional[str], origin : Optional[Name] = None,
                relativize : bool = True, relativize_to : Optional[Name] = None,
-              idna_codec : Optional[IDNACodec]):
+              idna_codec : Optional[IDNACodec] = None):
      ...
  
  def from_wire(rdclass : int, rdtype : int, wire : bytes, current : int, rdlen : int, origin : Optional[Name] = None):
diff --git a/dns/rdataset.py b/dns/rdataset.py

index 98049092c0a6af90e2d9863e2edc328b47165c53..9d6a6a8e617cc9cf6daf0669225dcad2bc3bd3af 100644 (file)
--- a/dns/rdataset.py
+++ b/dns/rdataset.py
@@ -298,10 +298,14 @@ class Rdataset(dns.set.Set):
          return False
  
  
-def from_text_list(rdclass, rdtype, ttl, text_rdatas):
+def from_text_list(rdclass, rdtype, ttl, text_rdatas, idna_codec=None):
      """Create an rdataset with the specified class, type, and TTL, and with
      the specified list of rdatas in text format.
  
+    *idna_codec*, a ``dns.name.IDNACodec``, specifies the IDNA
+    encoder/decoder to use; if ``None``, the default IDNA 2003
+    encoder/decoder is used.
+
      Returns a ``dns.rdataset.Rdataset`` object.
      """
  
@@ -312,7 +316,7 @@ def from_text_list(rdclass, rdtype, ttl, text_rdatas):
      r = Rdataset(rdclass, rdtype)
      r.update_ttl(ttl)
      for t in text_rdatas:
-        rd = dns.rdata.from_text(r.rdclass, r.rdtype, t)
+        rd = dns.rdata.from_text(r.rdclass, r.rdtype, t, idna_codec=idna_codec)
          r.add(rd)
      return r
  
diff --git a/dns/rdataset.pyi b/dns/rdataset.pyi

index 3efff88a4cc8081333b328ae43b7052476021beb..a7bbf2d4cd47f832b9148ca7f6fa8918328d36ed 100644 (file)
--- a/dns/rdataset.pyi
+++ b/dns/rdataset.pyi
@@ -45,7 +45,7 @@ class Rdataset(set.Set):
          ...
  
  
-def from_text_list(rdclass : Union[int,str], rdtype : Union[int,str], ttl : int, text_rdatas : str) -> rdataset.Rdataset:
+def from_text_list(rdclass : Union[int,str], rdtype : Union[int,str], ttl : int, text_rdatas : str, idna_codec : Optional[name.IDNACodec] = None) -> rdataset.Rdataset:
      ...
  
  def from_text(rdclass : Union[int,str], rdtype : Union[int,str], ttl : int, *text_rdatas : str) -> rdataset.Rdataset:
diff --git a/dns/rrset.py b/dns/rrset.py

index c5ed5ba4cfcbd6d12db86993a04afde85d6251c5..1113ad36ef1b4b4f6291cbd2ccfe0c8aa867617f 100644 (file)
--- a/dns/rrset.py
+++ b/dns/rrset.py
@@ -143,7 +143,7 @@ def from_text_list(name, ttl, rdclass, rdtype, text_rdatas,
      r = RRset(name, rdclass, rdtype)
      r.update_ttl(ttl)
      for t in text_rdatas:
-        rd = dns.rdata.from_text(r.rdclass, r.rdtype, t)
+        rd = dns.rdata.from_text(r.rdclass, r.rdtype, t, idna_codec=idna_codec)
          r.add(rd)
      return r
  
@@ -162,7 +162,12 @@ def from_rdata_list(name, ttl, rdatas, idna_codec=None):
      """Create an RRset with the specified name and TTL, and with
      the specified list of rdata objects.
  
+    *idna_codec*, a ``dns.name.IDNACodec``, specifies the IDNA
+    encoder/decoder to use; if ``None``, the default IDNA 2003
+    encoder/decoder is used.
+
      Returns a ``dns.rrset.RRset`` object.
+
      """
  
      if isinstance(name, str):
diff --git a/dns/zone.py b/dns/zone.py

index 73c9bf439e738e993c2c1dec2ded9078d62e8314..555b78a4ccf2cbfd6795c4293470332cd8247fa0 100644 (file)
--- a/dns/zone.py
+++ b/dns/zone.py
@@ -635,8 +635,7 @@ class _MasterReader(object):
              raise UnknownOrigin
          token = self.tok.get(want_leading=True)
          if not token.is_whitespace():
-            self.last_name = dns.name.from_text(
-                token.value, self.current_origin)
+            self.last_name = self.tok.as_name(token, self.current_origin)
          else:
              token = self.tok.get()
              if token.is_eol_or_eof():
@@ -859,7 +858,8 @@ class _MasterReader(object):
              name = lhs.replace('$%s' % (lmod), lzfindex)
              rdata = rhs.replace('$%s' % (rmod), rzfindex)
  
-            self.last_name = dns.name.from_text(name, self.current_origin)
+            self.last_name = dns.name.from_text(name, self.current_origin,
+                                                self.tok.idna_codec)
              name = self.last_name
              if not name.is_subdomain(self.zone.origin):
                  self._eat_line()
@@ -943,7 +943,8 @@ class _MasterReader(object):
                          if token.is_identifier():
                              new_origin =\
                                  dns.name.from_text(token.value,
-                                                   self.current_origin)
+                                                   self.current_origin,
+                                                   self.tok.idna_codec)
                              self.tok.get_eol()
                          elif not token.is_eol_or_eof():
                              raise dns.exception.SyntaxError(
@@ -984,7 +985,7 @@ class _MasterReader(object):
  
  def from_text(text, origin=None, rdclass=dns.rdataclass.IN,
                relativize=True, zone_factory=Zone, filename=None,
-              allow_include=False, check_origin=True):
+              allow_include=False, check_origin=True, idna_codec=None):
      """Build a zone object from a master file format string.
  
      @param text: the master file format input
@@ -1007,6 +1008,9 @@ def from_text(text, origin=None, rdclass=dns.rdataclass.IN,
      @param check_origin: should sanity checks of the origin node be done?
      The default is True.
      @type check_origin: bool
+    @param idna_codec: specifies the IDNA encoder/decoder.  If ``None``, the
+    default IDNA 2003 encoder/decoder is used.
+    @type idna_codec: dns.name.IDNACodec or None
      @raises dns.zone.NoSOA: No SOA RR was found at the zone origin
      @raises dns.zone.NoNS: No NS RRset was found at the zone origin
      @rtype: dns.zone.Zone object
@@ -1018,7 +1022,7 @@ def from_text(text, origin=None, rdclass=dns.rdataclass.IN,
  
      if filename is None:
          filename = '<string>'
-    tok = dns.tokenizer.Tokenizer(text, filename)
+    tok = dns.tokenizer.Tokenizer(text, filename, idna_codec=idna_codec)
      reader = _MasterReader(tok, origin, rdclass, relativize, zone_factory,
                             allow_include=allow_include,
                             check_origin=check_origin)
diff --git a/tests/test_message.py b/tests/test_message.py

index b7995862aa71b8894f681afda6e74f3d8d410343..166d7e8643c189c90e2ba0d1156863cff36716d5 100644 (file)
--- a/tests/test_message.py
+++ b/tests/test_message.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8
  # Copyright (C) Dnspython Contributors, see LICENSE for text of ISC license
  
  # Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
@@ -24,6 +25,7 @@ import dns.message
  import dns.name
  import dns.rdataclass
  import dns.rdatatype
+import dns.rrset
  
  query_text = """id 1234
  opcode QUERY
@@ -91,6 +93,16 @@ goodhex3 = b'04d2010f0001000000000001047777777709646e73707974686f6e' \
  
  goodwire3 = binascii.unhexlify(goodhex3)
  
+idna_text = """id 1234
+opcode QUERY
+rcode NOERROR
+flags QR AA RD
+;QUESTION
+Königsgäßchen. IN NS
+;ANSWER
+Königsgäßchen. 3600 IN NS Königsgäßchen.
+"""
+
  class MessageTestCase(unittest.TestCase):
  
      def test_comparison_eq1(self):
@@ -224,5 +236,21 @@ class MessageTestCase(unittest.TestCase):
              dns.message.from_wire(wire[:-3])
          self.assertRaises(dns.message.Truncated, bad)
  
+    def test_IDNA_2003(self):
+        a = dns.message.from_text(idna_text, idna_codec=dns.name.IDNA_2003)
+        rrs = dns.rrset.from_text_list('xn--knigsgsschen-lcb0w.', 30,
+                                       'in', 'ns',
+                                       ['xn--knigsgsschen-lcb0w.'],
+                                       idna_codec=dns.name.IDNA_2003)
+        self.assertEqual(a.answer[0], rrs)
+
+    def test_IDNA_2008(self):
+        a = dns.message.from_text(idna_text, idna_codec=dns.name.IDNA_2008)
+        rrs = dns.rrset.from_text_list('xn--knigsgchen-b4a3dun.', 30,
+                                       'in', 'ns',
+                                       ['xn--knigsgchen-b4a3dun.'],
+                                       idna_codec=dns.name.IDNA_2008)
+        self.assertEqual(a.answer[0], rrs)
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/tests/test_rdataset.py b/tests/test_rdataset.py

new file mode 100644 (file)

index 0000000..abc0841
--- /dev/null
+++ b/tests/test_rdataset.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8
+# Copyright (C) Dnspython Contributors, see LICENSE for text of ISC license
+
+import unittest
+
+import dns.name
+import dns.rdataset
+
+class RdatasetTestCase(unittest.TestCase):
+
+    def testCodec2003(self):
+        r1 = dns.rdataset.from_text_list('in', 'ns', 30,
+                                         ['Königsgäßchen'])
+        r2 = dns.rdataset.from_text_list('in', 'ns', 30,
+                                         ['xn--knigsgsschen-lcb0w'])
+        self.assertEqual(r1, r2)
+
+    def testCodec2008(self):
+        r1 = dns.rdataset.from_text_list('in', 'ns', 30,
+                                         ['Königsgäßchen'],
+                                         idna_codec=dns.name.IDNA_2008)
+        r2 = dns.rdataset.from_text_list('in', 'ns', 30,
+                                         ['xn--knigsgchen-b4a3dun'],
+                                         idna_codec=dns.name.IDNA_2008)
+        self.assertEqual(r1, r2)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_rrset.py b/tests/test_rrset.py

index 638dff37fbd6da9919cfa5ce2bdbe84d02911173..12eac3ff0bfcb06bc0b8a38a58bfe1c3c4b2f47b 100644 (file)
--- a/tests/test_rrset.py
+++ b/tests/test_rrset.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8
  # Copyright (C) Dnspython Contributors, see LICENSE for text of ISC license
  
  # Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
@@ -17,6 +18,7 @@
  
  import unittest
  
+import dns.name
  import dns.rrset
  
  class RRsetTestCase(unittest.TestCase):
@@ -52,5 +54,21 @@ class RRsetTestCase(unittest.TestCase):
          r2 = dns.rrset.from_text('FOO', 30, 'in', 'a', '10.0.0.2', '10.0.0.1')
          self.assertNotEqual(r1, r2)
  
+    def testCodec2003(self):
+        r1 = dns.rrset.from_text_list('Königsgäßchen', 30, 'in', 'ns',
+                                      ['Königsgäßchen'])
+        r2 = dns.rrset.from_text_list('xn--knigsgsschen-lcb0w', 30, 'in', 'ns',
+                                      ['xn--knigsgsschen-lcb0w'])
+        self.assertEqual(r1, r2)
+
+    def testCodec2008(self):
+        r1 = dns.rrset.from_text_list('Königsgäßchen', 30, 'in', 'ns',
+                                      ['Königsgäßchen'],
+                                      idna_codec=dns.name.IDNA_2008)
+        r2 = dns.rrset.from_text_list('xn--knigsgchen-b4a3dun', 30, 'in', 'ns',
+                                      ['xn--knigsgchen-b4a3dun'],
+                                      idna_codec=dns.name.IDNA_2008)
+        self.assertEqual(r1, r2)
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/tests/test_zone.py b/tests/test_zone.py

index d6de67247594bae83bce68e9e9b1136c2b5c263d..3f77f4afa47a4865fe770b248f98402cd9e06423 100644 (file)
--- a/tests/test_zone.py
+++ b/tests/test_zone.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8
  # Copyright (C) Dnspython Contributors, see LICENSE for text of ISC license
  
  # Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
@@ -131,6 +132,13 @@ ns1 1d1s a 10.0.0.1
  ns2 1w1D1h1m1S a 10.0.0.2
  """
  
+codec_text = """
+@ soa foo bar 1 2 3 4 5
+@ ns ns1
+@ ns ns2
+Königsgäßchen 300 NS Königsgäßchen
+"""
+
  _keep_output = True
  
  def _rdata_sort(a):
@@ -577,5 +585,24 @@ class ZoneTestCase(unittest.TestCase):
          z2_rel = dns.zone.from_xfr(make_xfr(z1_rel), relativize=True)
          self.assertEqual(z1_rel, z2_rel)
  
+    def testCodec2003(self):
+        z = dns.zone.from_text(codec_text, 'example.', relativize=True)
+        n2003 = dns.name.from_text('xn--knigsgsschen-lcb0w', None)
+        n2008 = dns.name.from_text('xn--knigsgchen-b4a3dun', None)
+        self.assertTrue(n2003 in z)
+        self.assertFalse(n2008 in z)
+        rrs = z.find_rrset(n2003, 'NS')
+        self.assertEqual(rrs[0].target, n2003)
+
+    def testCodec2008(self):
+        z = dns.zone.from_text(codec_text, 'example.', relativize=True,
+                               idna_codec=dns.name.IDNA_2008)
+        n2003 = dns.name.from_text('xn--knigsgsschen-lcb0w', None)
+        n2008 = dns.name.from_text('xn--knigsgchen-b4a3dun', None)
+        self.assertFalse(n2003 in z)
+        self.assertTrue(n2008 in z)
+        rrs = z.find_rrset(n2008, 'NS')
+        self.assertEqual(rrs[0].target, n2008)
+
  if __name__ == '__main__':
      unittest.main()
author	Bob Halley <halley@dnspython.org>
	Sun, 3 May 2020 21:49:55 +0000 (14:49 -0700)
committer	Bob Halley <halley@dnspython.org>
	Sun, 3 May 2020 21:49:55 +0000 (14:49 -0700)
dns/message.py		patch \| blob \| blame \| history
dns/message.pyi		patch \| blob \| blame \| history
dns/rdata.pyi		patch \| blob \| blame \| history
dns/rdataset.py		patch \| blob \| blame \| history
dns/rdataset.pyi		patch \| blob \| blame \| history
dns/rrset.py		patch \| blob \| blame \| history
dns/zone.py		patch \| blob \| blame \| history
tests/test_message.py		patch \| blob \| blame \| history
tests/test_rdataset.py	[new file with mode: 0644]	patch \| blob
tests/test_rrset.py		patch \| blob \| blame \| history
tests/test_zone.py		patch \| blob \| blame \| history