From 08523cd4bae0d8eec5ea87266360b8a91760a7e3 Mon Sep 17 00:00:00 2001 From: Ross McFarland Date: Sun, 26 Jun 2022 16:35:47 -0400 Subject: [PATCH] Functionally complete (I think) IDNA encode/decode support --- octodns/idna.py | 34 ++++++++++++++++++++-------------- tests/test_octodns_idna.py | 15 ++++++++++++++- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/octodns/idna.py b/octodns/idna.py index 65e3f53..3d89063 100644 --- a/octodns/idna.py +++ b/octodns/idna.py @@ -6,21 +6,27 @@ from idna import decode as _decode, encode as _encode def idna_encode(name): - if not name: - # idna.encode doesn't handle '' + # Based on https://github.com/psf/requests/pull/3695/files + # #diff-0debbb2447ce5debf2872cb0e17b18babe3566e9d9900739e8581b355bd513f7R39 + try: + name.encode('ascii') + # No utf8 chars, just use as-is return name - elif name.startswith('*'): - # idna.encode doesn't like the * - name = _encode(name[2:]).decode('utf-8') - return f'*.{name}' - return _encode(name).decode('utf-8') + except UnicodeEncodeError: + if name.startswith('*'): + # idna.encode doesn't like the * + name = _encode(name[2:]).decode('utf-8') + return f'*.{name}' + return _encode(name).decode('utf-8') def idna_decode(name): - if not name: - # idna.decode doesn't handle '' - return name - elif name.startswith('*'): - # idna.decode doesn't like the * - return f'*.{_decode(name[2:])}' - return _decode(name) + pieces = name.lower().split('.') + if any([p.startswith('xn--') for p in pieces]): + # it's idna + if name.startswith('*'): + # idna.decode doesn't like the * + return f'*.{_decode(name[2:])}' + return _decode(name) + # not idna, just return as-is + return name diff --git a/tests/test_octodns_idna.py b/tests/test_octodns_idna.py index b41ee84..0a28d0f 100644 --- a/tests/test_octodns_idna.py +++ b/tests/test_octodns_idna.py @@ -16,7 +16,7 @@ class TestIdna(TestCase): got = idna_encode(value) self.assertEqual(expected, got) # round tripped - self.assertEqual(value, idna_decode(value)) + self.assertEqual(value, idna_decode(got)) def test_noops(self): # empty @@ -41,3 +41,16 @@ class TestIdna(TestCase): # encoded with encoded name self.assertIdna('zajęzyk.zajęzyk.pl.', 'xn--zajzyk-y4a.xn--zajzyk-y4a.pl.') + + self.assertIdna('déjàvu.com.', 'xn--djvu-1na6c.com.') + self.assertIdna('déjà-vu.com.', 'xn--dj-vu-sqa5d.com.') + + def test_underscores(self): + # underscores aren't valid in idna names, so these are all ascii + + self.assertIdna('foo_bar.pl.', 'foo_bar.pl.') + self.assertIdna('bleep_bloop.foo_bar.pl.', 'bleep_bloop.foo_bar.pl.') + + def test_case_insensitivity(self): + # Shouldn't be hit by octoDNS use cases, but checked anyway + self.assertEqual('zajęzyk.pl.', idna_decode('XN--ZAJZYK-Y4A.PL.'))