From: Bob Halley Date: Mon, 26 Sep 2016 19:38:20 +0000 (-0700) Subject: Add IDNA 2008 Practical mode, since IDNA 2008 is absurdly strict. X-Git-Tag: v1.15.0~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb0c9f21f4a6f56f2fe8d7c1fc991080ef89d223;p=thirdparty%2Fdnspython.git Add IDNA 2008 Practical mode, since IDNA 2008 is absurdly strict. --- diff --git a/dns/name.py b/dns/name.py index e1259b95..758a394c 100644 --- a/dns/name.py +++ b/dns/name.py @@ -140,25 +140,41 @@ class IDNA2008Codec(IDNACodec): """IDNA 2008 encoder/decoder.""" - def __init__(self, uts_46=True, transitional=False): + def __init__(self, uts_46=False, transitional=False, + allow_pure_ascii=False): """Initialize the IDNA 2008 encoder/decoder. @param uts_46: If True, apply Unicode IDNA compatibility processing as described in Unicode Technical Standard #46 (U{http://unicode.org/reports/tr46/}). This parameter is only meaningful if IDNA 2008 is in use. If False, do not apply - the mapping. The default is True. + the mapping. The default is False @type uts_46: bool @param transitional: If True, use the "transitional" mode described in Unicode Technical Standard #46. This parameter is only meaningful if IDNA 2008 is in use. The default is False. @type transitional: bool + @param allow_pure_ascii: If True, then a label which + consists of only ASCII characters is allowed. This is less strict + than regular IDNA 2008, but is also necessary for mixed names, + e.g. a name with starting with "_sip._tcp." and ending in an IDN + suffixm which would otherwise be disallowed. The default is False + @type allow_pure_ascii: bool """ self.uts_46 = uts_46 self.transitional = transitional + self.allow_pure_ascii = allow_pure_ascii + + def is_all_ascii(self, label): + for c in label: + if ord(c) > 0x7f: + return False + return True def encode(self, label): if label == '': return b'' + if self.allow_pure_ascii and self.is_all_ascii(label): + return label.encode('ascii') if not have_idna_2008: raise NoIDNA2008 try: @@ -184,9 +200,11 @@ class IDNA2008Codec(IDNACodec): _escaped = bytearray(b'"().;\\@$') IDNA_2003 = IDNA2003Codec() -IDNA_2008 = IDNA2008Codec() -IDNA_2008_Strict = IDNA2008Codec(False) -IDNA_2008_Transitional = IDNA2008Codec(True, True) +IDNA_2008_Practical = IDNA2008Codec(True, False, True) +IDNA_2008_UTS_46 = IDNA2008Codec(True, False, False) +IDNA_2008_Strict = IDNA2008Codec(False, False, False) +IDNA_2008_Transitional = IDNA2008Codec(True, True, False) +IDNA_2008 = IDNA_2008_Practical def _escapify(label, unicode_mode=False): """Escape the characters in label which need it. diff --git a/tests/test_name.py b/tests/test_name.py index 11504c71..b9ddd816 100644 --- a/tests/test_name.py +++ b/tests/test_name.py @@ -675,6 +675,30 @@ class NameTestCase(unittest.TestCase): e2 = dns.name.from_unicode(t, idna_codec=c2) self.assertEqual(str(e2), 'xn--knigsgsschen-lcb0w.') + def testFromUnicodeIDNA2008Mixed(self): + # the IDN rules for names are very restrictive, disallowing + # practical names like u'_sip._tcp.Königsgäßchen'. Dnspython + # has a "practical" mode which permits labels which are purely + # ASCII to go straight through, and thus not invalid useful + # things in the real world. + if dns.name.have_idna_2008: + t = u'_sip._tcp.Königsgäßchen' + def bad1(): + codec = dns.name.IDNA_2008_Strict + return dns.name.from_unicode(t, idna_codec=codec) + def bad2(): + codec = dns.name.IDNA_2008_UTS_46 + return dns.name.from_unicode(t, idna_codec=codec) + def bad3(): + codec = dns.name.IDNA_2008_Transitional + return dns.name.from_unicode(t, idna_codec=codec) + self.failUnlessRaises(dns.name.IDNAException, bad1) + self.failUnlessRaises(dns.name.IDNAException, bad2) + self.failUnlessRaises(dns.name.IDNAException, bad3) + e = dns.name.from_unicode(t, + idna_codec=dns.name.IDNA_2008_Practical) + self.assertEqual(str(e), '_sip._tcp.xn--knigsgchen-b4a3dun.') + def testToUnicode1(self): n = dns.name.from_text(u'foo.bar') s = n.to_unicode()