From: Xiang Zhang Date: Mon, 22 May 2017 17:04:27 +0000 (+0800) Subject: bpo-30003: Fix handling escape characters in HZ codec (#1720) (#1556) X-Git-Tag: v2.7.14rc1~135 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6e1b832a6c0c8f32962a196ab631ccc17471d32b;p=thirdparty%2FPython%2Fcpython.git bpo-30003: Fix handling escape characters in HZ codec (#1720) (#1556) --- diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py index fdae538973d3..a1049373a450 100644 --- a/Lib/test/test_codecencodings_cn.py +++ b/Lib/test/test_codecencodings_cn.py @@ -82,6 +82,10 @@ class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase): (b'ab~cd', 'replace', u'ab\uFFFDd'), (b'ab\xffcd', 'replace', u'ab\uFFFDcd'), (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'), + # issue 30003 + (u'ab~cd', 'strict', b'ab~~cd'), # escape ~ + (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode + (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode ) def test_main(): diff --git a/Misc/NEWS b/Misc/NEWS index 254bb52f9778..938a02955a54 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -49,6 +49,9 @@ Extension Modules Library ------- +- bpo-30003: Fix handling escape characters in HZ codec. Based on patch + by Ma Lin. + - bpo-30375: Warnings emitted when compile a regular expression now always point to the line in the user code. Previously they could point into inners of the re module if emitted from inside of groups or conditionals. diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 3bc652fefffb..92cf06d5ffd3 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -335,15 +335,17 @@ ENCODER(hz) DBCHAR code; if (c < 0x80) { - if (state->i == 0) { - WRITE1((unsigned char)c) - NEXT(1, 1) - } - else { - WRITE3('~', '}', (unsigned char)c) - NEXT(1, 3) + if (state->i) { + WRITE2('~', '}') + NEXT_OUT(2) state->i = 0; } + WRITE1((unsigned char)c) + NEXT(1, 1) + if (c == '~') { + WRITE1('~') + NEXT_OUT(1) + } continue; } @@ -390,20 +392,19 @@ DECODER(hz) unsigned char c2 = IN2; REQUIRE_INBUF(2) - if (c2 == '~') { + if (c2 == '~' && state->i == 0) { WRITE1('~') - NEXT(2, 1) - continue; + NEXT_OUT(1) } else if (c2 == '{' && state->i == 0) state->i = 1; /* set GB */ + else if (c2 == '\n' && state->i == 0) + ; /* line-continuation */ else if (c2 == '}' && state->i == 1) state->i = 0; /* set ASCII */ - else if (c2 == '\n') - ; /* line-continuation */ else return 2; - NEXT(2, 0); + NEXT_IN(2) continue; }