]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/rust/util/rust-punycode.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / rust / util / rust-punycode.cc
CommitLineData
767698ff 1// Copyright (C) 2020-2024 Free Software Foundation, Inc.
619f1874
RT
2
3// This file is part of GCC.
4
5// GCC is free software; you can redistribute it and/or modify it under
6// the terms of the GNU General Public License as published by the Free
7// Software Foundation; either version 3, or (at your option) any later
8// version.
9
10// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11// WARRANTY; without even the implied warranty of MERCHANTABILITY or
12// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13// for more details.
14
15// You should have received a copy of the GNU General Public License
16// along with GCC; see the file COPYING3. If not see
17// <http://www.gnu.org/licenses/>.
18
19// This file provides functions for punycode conversion
20// See https://datatracker.ietf.org/doc/html/rfc3492
21
22#include "rust-system.h"
23#include "rust-unicode.h"
24#include "optional.h"
25#include "selftest.h"
26
27namespace Rust {
28
29// https://tools.ietf.org/html/rfc3492#section-4.
30constexpr uint32_t BASE = 36;
31constexpr uint32_t TMIN = 1;
32constexpr uint32_t TMAX = 26;
33constexpr uint32_t SKEW = 38;
34constexpr uint32_t DAMP = 700;
35constexpr uint32_t INITIAL_BIAS = 72;
36constexpr uint32_t INITIAL_N = 128;
37constexpr char DELIMITER = '-';
38
619f1874
RT
39std::string
40extract_basic_string (const std::vector<Codepoint> &src)
41{
42 std::string basic_string;
43 for (auto c : src)
44 {
52ad16ef 45 if (c.is_ascii ())
619f1874
RT
46 basic_string += c.as_string ();
47 }
48 return basic_string;
49}
50
51uint32_t
52adapt_bias (uint32_t delta, const uint32_t n_points, const bool is_first)
53{
54 delta /= is_first ? DAMP : 2;
55 delta += delta / n_points;
56 uint32_t k = 0;
57
58 while (delta > (BASE - TMIN) * TMAX / 2)
59 {
60 delta /= BASE - TMIN;
61 k += BASE;
62 }
63 return k + (BASE - TMIN + 1) * delta / (delta + SKEW);
64}
65
66uint32_t
67clamped_sub (const uint32_t min, const uint32_t lhs, const uint32_t rhs,
68 const uint32_t max)
69{
70 if (min + rhs >= lhs)
71 return min;
72 else if (max + rhs <= lhs)
73 return max;
74 else
75 return lhs - rhs;
76}
77
78uint32_t
79min_gt_or_eq (const std::vector<Codepoint> &l, const uint32_t threshold)
80{
81 uint32_t min = UINT32_MAX;
82 for (auto c : l)
83 if (c.value >= threshold && c.value < min)
84 min = c.value;
85 return min;
86}
87
88char
89encode_digit (const uint32_t d)
90{
91 return d + 22 + (d < 26 ? 75 : 0);
92}
93
94tl::optional<std::string>
95encode_punycode (const Utf8String &input)
96{
97 std::vector<Codepoint> input_chars = input.get_chars ();
98
99 uint32_t n = INITIAL_N;
100 uint32_t delta = 0;
101 uint32_t bias = INITIAL_BIAS;
102
103 std::string output = extract_basic_string (input_chars);
104 uint32_t h = output.size ();
105 const uint32_t b = h;
106 if (b > 0)
107 output += DELIMITER;
108
109 while (h < input_chars.size ())
110 {
111 const uint32_t m = min_gt_or_eq (input_chars, n);
112
113 if (m - n > ((UINT32_MAX - delta) / (h + 1)))
114 return tl::nullopt;
115
116 delta += (m - n) * (h + 1);
117 n = m;
118
119 for (const auto c : input_chars)
120 {
121 if (c.value < n)
122 delta++;
123 else if (c.value == n)
124 {
125 uint32_t q = delta;
126 // encode as a variable length integer
127 for (uint32_t k = 1;; k++)
128 {
129 const uint32_t kb = k * BASE;
130 const uint32_t t = clamped_sub (TMIN, kb, bias, TMAX);
131 if (q < t)
132 break;
133
134 output += encode_digit (t + (q - t) % (BASE - t));
135 q = (q - t) / (BASE - t);
136 }
137 output += encode_digit (q);
138
139 bias = adapt_bias (delta, h + 1, h == b);
140 delta = 0;
141 h++;
142 }
143 }
144 delta++;
145 n++;
146 }
147
148 return {output};
149}
150
151} // namespace Rust
152
0ebb0a75
OA
153#if CHECKING_P
154
619f1874
RT
155namespace selftest {
156
157void
158encode_assert (const std::string &input, const std::string &expected)
159{
160 Rust::Utf8String input_utf8
161 = Rust::Utf8String::make_utf8_string (input).value ();
162 std::string actual = Rust::encode_punycode (input_utf8).value ();
163 ASSERT_EQ (actual, expected);
164}
165
166void
167rust_punycode_encode_test ()
168{
169 encode_assert ("abc", "abc-");
170 encode_assert ("12345", "12345-");
171 encode_assert ("香港", "j6w193g");
172
173 // Examples from https://datatracker.ietf.org/doc/html/rfc3492#section-7.1
174 encode_assert ("ليهمابتكلموشعربي؟", "egbpdaj6bu4bxfgehfvwxn");
175 encode_assert ("他们为什么不说中文", "ihqwcrb4cv8a8dqg056pqjye");
176 encode_assert ("他們爲什麽不說中文", "ihqwctvzc91f659drss3x8bo0yb");
177 encode_assert ("Pročprostěnemluvíčesky", "Proprostnemluvesky-uyb24dma41a");
178}
179
180} // namespace selftest
0ebb0a75
OA
181
182#endif // CHECKING_P