gcc/d/dmd/utf.d

   1 /**
   2  * Functions related to UTF encoding.
   3  *
   4  * Copyright:   Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
   5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
   6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
   7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/utf.d, _utf.d)
   8  * Documentation:  https://dlang.org/phobos/dmd_utf.html
   9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/utf.d
  10  */
  11
  12 module dmd.utf;
  13
  14 nothrow pure @nogc:
  15
  16 /// The Unicode code space is the range of code points [0x000000,0x10FFFF]
  17 /// except the UTF-16 surrogate pairs in the range [0xD800,0xDFFF]
  18 bool utf_isValidDchar(dchar c)
  19 {
  20     // TODO: Whether non-char code points should be rejected is pending review.
  21     // 0xFFFE and 0xFFFF are valid for internal use, like Phobos std.utf.isValidDChar
  22     // See also https://issues.dlang.org/show_bug.cgi?id=1357
  23     if (c < 0xD800) // Almost all characters in a typical document.
  24         return true;
  25     if (c > 0xDFFF && c <= 0x10FFFF)
  26         return true;
  27     return false;
  28 }
  29
  30 /*******************************
  31  * Return !=0 if unicode alpha.
  32  * Use table from C99 Appendix D.
  33  */
  34 bool isUniAlpha(dchar c)
  35 {
  36     static immutable wchar[2][] ALPHA_TABLE =
  37     [
  38         [0x00AA, 0x00AA],
  39         [0x00B5, 0x00B5],
  40         [0x00B7, 0x00B7],
  41         [0x00BA, 0x00BA],
  42         [0x00C0, 0x00D6],
  43         [0x00D8, 0x00F6],
  44         [0x00F8, 0x01F5],
  45         [0x01FA, 0x0217],
  46         [0x0250, 0x02A8],
  47         [0x02B0, 0x02B8],
  48         [0x02BB, 0x02BB],
  49         [0x02BD, 0x02C1],
  50         [0x02D0, 0x02D1],
  51         [0x02E0, 0x02E4],
  52         [0x037A, 0x037A],
  53         [0x0386, 0x0386],
  54         [0x0388, 0x038A],
  55         [0x038C, 0x038C],
  56         [0x038E, 0x03A1],
  57         [0x03A3, 0x03CE],
  58         [0x03D0, 0x03D6],
  59         [0x03DA, 0x03DA],
  60         [0x03DC, 0x03DC],
  61         [0x03DE, 0x03DE],
  62         [0x03E0, 0x03E0],
  63         [0x03E2, 0x03F3],
  64         [0x0401, 0x040C],
  65         [0x040E, 0x044F],
  66         [0x0451, 0x045C],
  67         [0x045E, 0x0481],
  68         [0x0490, 0x04C4],
  69         [0x04C7, 0x04C8],
  70         [0x04CB, 0x04CC],
  71         [0x04D0, 0x04EB],
  72         [0x04EE, 0x04F5],
  73         [0x04F8, 0x04F9],
  74         [0x0531, 0x0556],
  75         [0x0559, 0x0559],
  76         [0x0561, 0x0587],
  77         [0x05B0, 0x05B9],
  78         [0x05BB, 0x05BD],
  79         [0x05BF, 0x05BF],
  80         [0x05C1, 0x05C2],
  81         [0x05D0, 0x05EA],
  82         [0x05F0, 0x05F2],
  83         [0x0621, 0x063A],
  84         [0x0640, 0x0652],
  85         [0x0660, 0x0669],
  86         [0x0670, 0x06B7],
  87         [0x06BA, 0x06BE],
  88         [0x06C0, 0x06CE],
  89         [0x06D0, 0x06DC],
  90         [0x06E5, 0x06E8],
  91         [0x06EA, 0x06ED],
  92         [0x06F0, 0x06F9],
  93         [0x0901, 0x0903],
  94         [0x0905, 0x0939],
  95         [0x093D, 0x094D],
  96         [0x0950, 0x0952],
  97         [0x0958, 0x0963],
  98         [0x0966, 0x096F],
  99         [0x0981, 0x0983],
 100         [0x0985, 0x098C],
 101         [0x098F, 0x0990],
 102         [0x0993, 0x09A8],
 103         [0x09AA, 0x09B0],
 104         [0x09B2, 0x09B2],
 105         [0x09B6, 0x09B9],
 106         [0x09BE, 0x09C4],
 107         [0x09C7, 0x09C8],
 108         [0x09CB, 0x09CD],
 109         [0x09DC, 0x09DD],
 110         [0x09DF, 0x09E3],
 111         [0x09E6, 0x09F1],
 112         [0x0A02, 0x0A02],
 113         [0x0A05, 0x0A0A],
 114         [0x0A0F, 0x0A10],
 115         [0x0A13, 0x0A28],
 116         [0x0A2A, 0x0A30],
 117         [0x0A32, 0x0A33],
 118         [0x0A35, 0x0A36],
 119         [0x0A38, 0x0A39],
 120         [0x0A3E, 0x0A42],
 121         [0x0A47, 0x0A48],
 122         [0x0A4B, 0x0A4D],
 123         [0x0A59, 0x0A5C],
 124         [0x0A5E, 0x0A5E],
 125         [0x0A66, 0x0A6F],
 126         [0x0A74, 0x0A74],
 127         [0x0A81, 0x0A83],
 128         [0x0A85, 0x0A8B],
 129         [0x0A8D, 0x0A8D],
 130         [0x0A8F, 0x0A91],
 131         [0x0A93, 0x0AA8],
 132         [0x0AAA, 0x0AB0],
 133         [0x0AB2, 0x0AB3],
 134         [0x0AB5, 0x0AB9],
 135         [0x0ABD, 0x0AC5],
 136         [0x0AC7, 0x0AC9],
 137         [0x0ACB, 0x0ACD],
 138         [0x0AD0, 0x0AD0],
 139         [0x0AE0, 0x0AE0],
 140         [0x0AE6, 0x0AEF],
 141         [0x0B01, 0x0B03],
 142         [0x0B05, 0x0B0C],
 143         [0x0B0F, 0x0B10],
 144         [0x0B13, 0x0B28],
 145         [0x0B2A, 0x0B30],
 146         [0x0B32, 0x0B33],
 147         [0x0B36, 0x0B39],
 148         [0x0B3D, 0x0B43],
 149         [0x0B47, 0x0B48],
 150         [0x0B4B, 0x0B4D],
 151         [0x0B5C, 0x0B5D],
 152         [0x0B5F, 0x0B61],
 153         [0x0B66, 0x0B6F],
 154         [0x0B82, 0x0B83],
 155         [0x0B85, 0x0B8A],
 156         [0x0B8E, 0x0B90],
 157         [0x0B92, 0x0B95],
 158         [0x0B99, 0x0B9A],
 159         [0x0B9C, 0x0B9C],
 160         [0x0B9E, 0x0B9F],
 161         [0x0BA3, 0x0BA4],
 162         [0x0BA8, 0x0BAA],
 163         [0x0BAE, 0x0BB5],
 164         [0x0BB7, 0x0BB9],
 165         [0x0BBE, 0x0BC2],
 166         [0x0BC6, 0x0BC8],
 167         [0x0BCA, 0x0BCD],
 168         [0x0BE7, 0x0BEF],
 169         [0x0C01, 0x0C03],
 170         [0x0C05, 0x0C0C],
 171         [0x0C0E, 0x0C10],
 172         [0x0C12, 0x0C28],
 173         [0x0C2A, 0x0C33],
 174         [0x0C35, 0x0C39],
 175         [0x0C3E, 0x0C44],
 176         [0x0C46, 0x0C48],
 177         [0x0C4A, 0x0C4D],
 178         [0x0C60, 0x0C61],
 179         [0x0C66, 0x0C6F],
 180         [0x0C82, 0x0C83],
 181         [0x0C85, 0x0C8C],
 182         [0x0C8E, 0x0C90],
 183         [0x0C92, 0x0CA8],
 184         [0x0CAA, 0x0CB3],
 185         [0x0CB5, 0x0CB9],
 186         [0x0CBE, 0x0CC4],
 187         [0x0CC6, 0x0CC8],
 188         [0x0CCA, 0x0CCD],
 189         [0x0CDE, 0x0CDE],
 190         [0x0CE0, 0x0CE1],
 191         [0x0CE6, 0x0CEF],
 192         [0x0D02, 0x0D03],
 193         [0x0D05, 0x0D0C],
 194         [0x0D0E, 0x0D10],
 195         [0x0D12, 0x0D28],
 196         [0x0D2A, 0x0D39],
 197         [0x0D3E, 0x0D43],
 198         [0x0D46, 0x0D48],
 199         [0x0D4A, 0x0D4D],
 200         [0x0D60, 0x0D61],
 201         [0x0D66, 0x0D6F],
 202         [0x0E01, 0x0E3A],
 203         [0x0E40, 0x0E5B],
 204         [0x0E81, 0x0E82],
 205         [0x0E84, 0x0E84],
 206         [0x0E87, 0x0E88],
 207         [0x0E8A, 0x0E8A],
 208         [0x0E8D, 0x0E8D],
 209         [0x0E94, 0x0E97],
 210         [0x0E99, 0x0E9F],
 211         [0x0EA1, 0x0EA3],
 212         [0x0EA5, 0x0EA5],
 213         [0x0EA7, 0x0EA7],
 214         [0x0EAA, 0x0EAB],
 215         [0x0EAD, 0x0EAE],
 216         [0x0EB0, 0x0EB9],
 217         [0x0EBB, 0x0EBD],
 218         [0x0EC0, 0x0EC4],
 219         [0x0EC6, 0x0EC6],
 220         [0x0EC8, 0x0ECD],
 221         [0x0ED0, 0x0ED9],
 222         [0x0EDC, 0x0EDD],
 223         [0x0F00, 0x0F00],
 224         [0x0F18, 0x0F19],
 225         [0x0F20, 0x0F33],
 226         [0x0F35, 0x0F35],
 227         [0x0F37, 0x0F37],
 228         [0x0F39, 0x0F39],
 229         [0x0F3E, 0x0F47],
 230         [0x0F49, 0x0F69],
 231         [0x0F71, 0x0F84],
 232         [0x0F86, 0x0F8B],
 233         [0x0F90, 0x0F95],
 234         [0x0F97, 0x0F97],
 235         [0x0F99, 0x0FAD],
 236         [0x0FB1, 0x0FB7],
 237         [0x0FB9, 0x0FB9],
 238         [0x10A0, 0x10C5],
 239         [0x10D0, 0x10F6],
 240         [0x1E00, 0x1E9B],
 241         [0x1EA0, 0x1EF9],
 242         [0x1F00, 0x1F15],
 243         [0x1F18, 0x1F1D],
 244         [0x1F20, 0x1F45],
 245         [0x1F48, 0x1F4D],
 246         [0x1F50, 0x1F57],
 247         [0x1F59, 0x1F59],
 248         [0x1F5B, 0x1F5B],
 249         [0x1F5D, 0x1F5D],
 250         [0x1F5F, 0x1F7D],
 251         [0x1F80, 0x1FB4],
 252         [0x1FB6, 0x1FBC],
 253         [0x1FBE, 0x1FBE],
 254         [0x1FC2, 0x1FC4],
 255         [0x1FC6, 0x1FCC],
 256         [0x1FD0, 0x1FD3],
 257         [0x1FD6, 0x1FDB],
 258         [0x1FE0, 0x1FEC],
 259         [0x1FF2, 0x1FF4],
 260         [0x1FF6, 0x1FFC],
 261         [0x203F, 0x2040],
 262         [0x207F, 0x207F],
 263         [0x2102, 0x2102],
 264         [0x2107, 0x2107],
 265         [0x210A, 0x2113],
 266         [0x2115, 0x2115],
 267         [0x2118, 0x211D],
 268         [0x2124, 0x2124],
 269         [0x2126, 0x2126],
 270         [0x2128, 0x2128],
 271         [0x212A, 0x2131],
 272         [0x2133, 0x2138],
 273         [0x2160, 0x2182],
 274         [0x3005, 0x3007],
 275         [0x3021, 0x3029],
 276         [0x3041, 0x3093],
 277         [0x309B, 0x309C],
 278         [0x30A1, 0x30F6],
 279         [0x30FB, 0x30FC],
 280         [0x3105, 0x312C],
 281         [0x4E00, 0x9FA5],
 282         [0xAC00, 0xD7A3]
 283     ];
 284
 285     size_t high = ALPHA_TABLE.length - 1;
 286     // Shortcut search if c is out of range
 287     size_t low = (c < ALPHA_TABLE[0][0] || ALPHA_TABLE[high][1] < c) ? high + 1 : 0;
 288     // Binary search
 289     while (low <= high)
 290     {
 291         size_t mid = (low + high) >> 1;
 292         if (c < ALPHA_TABLE[mid][0])
 293             high = mid - 1;
 294         else if (ALPHA_TABLE[mid][1] < c)
 295             low = mid + 1;
 296         else
 297         {
 298             assert(ALPHA_TABLE[mid][0] <= c && c <= ALPHA_TABLE[mid][1]);
 299             return true;
 300         }
 301     }
 302     return false;
 303 }
 304
 305 /**
 306  * Returns the code length of c in code units.
 307  */
 308 int utf_codeLengthChar(dchar c)
 309 {
 310     if (c <= 0x7F)
 311         return 1;
 312     if (c <= 0x7FF)
 313         return 2;
 314     if (c <= 0xFFFF)
 315         return 3;
 316     if (c <= 0x10FFFF)
 317         return 4;
 318     assert(false);
 319 }
 320
 321 int utf_codeLengthWchar(dchar c)
 322 {
 323     return c <= 0xFFFF ? 1 : 2;
 324 }
 325
 326 /**
 327  * Returns the code length of c in code units for the encoding.
 328  * sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32.
 329  */
 330 int utf_codeLength(int sz, dchar c)
 331 {
 332     if (sz == 1)
 333         return utf_codeLengthChar(c);
 334     if (sz == 2)
 335         return utf_codeLengthWchar(c);
 336     assert(sz == 4);
 337     return 1;
 338 }
 339
 340 void utf_encodeChar(char* s, dchar c)
 341 {
 342     assert(s !is null);
 343     assert(utf_isValidDchar(c));
 344     if (c <= 0x7F)
 345     {
 346         s[0] = cast(char)c;
 347     }
 348     else if (c <= 0x07FF)
 349     {
 350         s[0] = cast(char)(0xC0 | (c >> 6));
 351         s[1] = cast(char)(0x80 | (c & 0x3F));
 352     }
 353     else if (c <= 0xFFFF)
 354     {
 355         s[0] = cast(char)(0xE0 | (c >> 12));
 356         s[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
 357         s[2] = cast(char)(0x80 | (c & 0x3F));
 358     }
 359     else if (c <= 0x10FFFF)
 360     {
 361         s[0] = cast(char)(0xF0 | (c >> 18));
 362         s[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
 363         s[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
 364         s[3] = cast(char)(0x80 | (c & 0x3F));
 365     }
 366     else
 367         assert(0);
 368 }
 369
 370 void utf_encodeWchar(wchar* s, dchar c)
 371 {
 372     assert(s !is null);
 373     assert(utf_isValidDchar(c));
 374     if (c <= 0xFFFF)
 375     {
 376         s[0] = cast(wchar)c;
 377     }
 378     else
 379     {
 380         s[0] = cast(wchar)((((c - 0x010000) >> 10) & 0x03FF) + 0xD800);
 381         s[1] = cast(wchar)(((c - 0x010000) & 0x03FF) + 0xDC00);
 382     }
 383 }
 384
 385 void utf_encode(int sz, void* s, dchar c)
 386 {
 387     if (sz == 1)
 388         utf_encodeChar(cast(char*)s, c);
 389     else if (sz == 2)
 390         utf_encodeWchar(cast(wchar*)s, c);
 391     else
 392     {
 393         assert(sz == 4);
 394         *(cast(dchar*)s) = c;
 395     }
 396 }
 397
 398 /********************************************
 399  * Decode a UTF-8 sequence as a single UTF-32 code point.
 400  * Params:
 401  *      s = UTF-8 sequence
 402  *      ridx = starting index in s[], updated to reflect number of code units decoded
 403  *      rresult = set to character decoded
 404  * Returns:
 405  *      null on success, otherwise error message string
 406  */
 407 string utf_decodeChar(const(char)[] s, ref size_t ridx, out dchar rresult)
 408 {
 409     // UTF-8 decoding errors
 410     static immutable string UTF8_DECODE_OK = null; // no error
 411     static immutable string UTF8_DECODE_OUTSIDE_CODE_SPACE = "Outside Unicode code space";
 412     static immutable string UTF8_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-8 sequence";
 413     static immutable string UTF8_DECODE_OVERLONG = "Overlong UTF-8 sequence";
 414     static immutable string UTF8_DECODE_INVALID_TRAILER = "Invalid trailing code unit";
 415     static immutable string UTF8_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
 416
 417     /* The following encodings are valid, except for the 5 and 6 byte
 418      * combinations:
 419      *      0xxxxxxx
 420      *      110xxxxx 10xxxxxx
 421      *      1110xxxx 10xxxxxx 10xxxxxx
 422      *      11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 423      *      111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 424      *      1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 425      */
 426     static immutable ubyte[256] UTF8_STRIDE =
 427     [
 428         1,1,1,1, 1,1,1,1,
 429         1,1,1,1, 1,1,1,1,
 430         1,1,1,1, 1,1,1,1,
 431         1,1,1,1, 1,1,1,1,
 432         1,1,1,1, 1,1,1,1,
 433         1,1,1,1, 1,1,1,1,
 434         1,1,1,1, 1,1,1,1,
 435         1,1,1,1, 1,1,1,1,
 436
 437         1,1,1,1, 1,1,1,1,
 438         1,1,1,1, 1,1,1,1,
 439         1,1,1,1, 1,1,1,1,
 440         1,1,1,1, 1,1,1,1,
 441         1,1,1,1, 1,1,1,1,
 442         1,1,1,1, 1,1,1,1,
 443         1,1,1,1, 1,1,1,1,
 444         1,1,1,1, 1,1,1,1,
 445
 446         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 447         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 448         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 449         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 450         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 451         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 452         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 453         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 454
 455         2,2,2,2, 2,2,2,2,
 456         2,2,2,2, 2,2,2,2,
 457         2,2,2,2, 2,2,2,2,
 458         2,2,2,2, 2,2,2,2,
 459
 460         3,3,3,3, 3,3,3,3,
 461         3,3,3,3, 3,3,3,3,
 462
 463         4,4,4,4, 4,4,4,4,
 464         5,5,5,5, 6,6,0xFF,0xFF
 465     ];
 466
 467     assert(s !is null);
 468     size_t i = ridx++;
 469
 470     const char u = s[i];
 471     // Pre-stage results for ASCII and error cases
 472     rresult = u;
 473     //printf("utf_decodeChar(s = %02x, %02x, %02x len = %d)\n", u, s[1], s[2], len);
 474     // Get expected sequence length
 475     const size_t n = UTF8_STRIDE[u];
 476     switch (n)
 477     {
 478     case 1:
 479         // ASCII
 480         return UTF8_DECODE_OK;
 481     case 2:
 482     case 3:
 483     case 4:
 484         // multi-byte UTF-8
 485         break;
 486     default:
 487         // 5- or 6-byte sequence
 488         return UTF8_DECODE_OUTSIDE_CODE_SPACE;
 489     }
 490     if (s.length < i + n) // source too short
 491         return UTF8_DECODE_TRUNCATED_SEQUENCE;
 492     // Pick off 7 - n low bits from first code unit
 493     dchar c = u & ((1 << (7 - n)) - 1);
 494     /* The following combinations are overlong, and illegal:
 495      *      1100000x (10xxxxxx)
 496      *      11100000 100xxxxx (10xxxxxx)
 497      *      11110000 1000xxxx (10xxxxxx 10xxxxxx)
 498      *      11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
 499      *      11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
 500      */
 501     const char u2 = s[++i];
 502     // overlong combination
 503     if ((u & 0xFE) == 0xC0 || (u == 0xE0 && (u2 & 0xE0) == 0x80) || (u == 0xF0 && (u2 & 0xF0) == 0x80) || (u == 0xF8 && (u2 & 0xF8) == 0x80) || (u == 0xFC && (u2 & 0xFC) == 0x80))
 504         return UTF8_DECODE_OVERLONG;
 505     // Decode remaining bits
 506     for (const m = n + i - 1; i != m; ++i)
 507     {
 508         const u3 = s[i];
 509         if ((u3 & 0xC0) != 0x80) // trailing bytes are 10xxxxxx
 510             return UTF8_DECODE_INVALID_TRAILER;
 511         c = (c << 6) | (u3 & 0x3F);
 512     }
 513     if (!utf_isValidDchar(c))
 514         return UTF8_DECODE_INVALID_CODE_POINT;
 515     ridx = i;
 516     rresult = c;
 517     return UTF8_DECODE_OK;
 518 }
 519
 520 /********************************************
 521  * Decode a UTF-16 sequence as a single UTF-32 code point.
 522  * Params:
 523  *      s = UTF-16 sequence
 524  *      ridx = starting index in s[], updated to reflect number of code units decoded
 525  *      rresult = set to character decoded
 526  * Returns:
 527  *      null on success, otherwise error message string
 528  */
 529 string utf_decodeWchar(const(wchar)[] s, ref size_t ridx, out dchar rresult)
 530 {
 531     // UTF-16 decoding errors
 532     static immutable string UTF16_DECODE_OK = null; // no error
 533     static immutable string UTF16_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-16 sequence";
 534     static immutable string UTF16_DECODE_INVALID_SURROGATE = "Invalid low surrogate";
 535     static immutable string UTF16_DECODE_UNPAIRED_SURROGATE = "Unpaired surrogate";
 536     static immutable string UTF16_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
 537
 538     assert(s !is null);
 539     size_t i = ridx++;
 540
 541     // Pre-stage results for single wchar and error cases
 542     dchar u = rresult = s[i];
 543     if (u < 0xD800) // Single wchar codepoint
 544         return UTF16_DECODE_OK;
 545     if (0xD800 <= u && u <= 0xDBFF) // Surrogate pair
 546     {
 547         if (s.length <= i + 1)
 548             return UTF16_DECODE_TRUNCATED_SEQUENCE;
 549         wchar u2 = s[i + 1];
 550         if (u2 < 0xDC00 || 0xDFFF < u)
 551             return UTF16_DECODE_INVALID_SURROGATE;
 552         u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00);
 553         ++ridx;
 554     }
 555     else if (0xDC00 <= u && u <= 0xDFFF)
 556         return UTF16_DECODE_UNPAIRED_SURROGATE;
 557     if (!utf_isValidDchar(u))
 558         return UTF16_DECODE_INVALID_CODE_POINT;
 559     rresult = u;
 560     return UTF16_DECODE_OK;
 561 }