From: Jonathan Wakely Date: Fri, 10 Oct 2025 22:56:43 +0000 (+0100) Subject: libstdc++: Fix reverse iteration in _Utf16_view X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=8adda950933de309048d6a99d805bb674ffdbadb;p=thirdparty%2Fgcc.git libstdc++: Fix reverse iteration in _Utf16_view When iterating over a range of char16_t in reverse the _Utf_view was incorrectly treating U+DC00 as a valid high surrogate that can precede the low surrogate. But U+DC00 is a low surrogate, and so should not be allowed before another low surrogate. The check should be u2 >= 0xDC00 rather than u2 > 0xDC00. libstdc++-v3/ChangeLog: * include/bits/unicode.h (_Utf_view::_M_read_reverse_utf16): Fix check for high surrogate preceding low surrogate. * testsuite/ext/unicode/view.cc: Check unpaired low surrogates. Reviewed-by: Tomasz KamiƄski --- diff --git a/libstdc++-v3/include/bits/unicode.h b/libstdc++-v3/include/bits/unicode.h index 00efbe89ca8..44872db4ed6 100644 --- a/libstdc++-v3/include/bits/unicode.h +++ b/libstdc++-v3/include/bits/unicode.h @@ -527,7 +527,7 @@ namespace __unicode { // read a low surrogate, expect a high surrogate before it. uint16_t __u2 = *--_M_curr(); - if (__u2 < 0xD800 || __u2 > 0xDC00) [[unlikely]] + if (__u2 < 0xD800 || __u2 >= 0xDC00) [[unlikely]] __c = _S_error(); // unpaired low surrogate else { diff --git a/libstdc++-v3/testsuite/ext/unicode/view.cc b/libstdc++-v3/testsuite/ext/unicode/view.cc index 4ccf646e094..40c8fcf34fb 100644 --- a/libstdc++-v3/testsuite/ext/unicode/view.cc +++ b/libstdc++-v3/testsuite/ext/unicode/view.cc @@ -94,6 +94,11 @@ test_illformed_utf16() compare(uc::_Utf16_view(s4), u"\uFFFD\uFFFD"sv); std::array s5{ s[1], s[0], s[1] }; compare(uc::_Utf16_view(s5), u"\uFFFD\N{CLOWN FACE}"sv); + + std::array s6{ 0xDC00, 0xDC01 }; + compare(uc::_Utf16_view(s6), u"\uFFFD\uFFFD"sv); + std::array s7{ 0xD7FF, 0xDC00 }; + compare(uc::_Utf16_view(s7), u"\uD7FF\uFFFD"sv); } constexpr void