binutils/winduni.c

   1 /* winduni.c -- unicode support for the windres program.
   2    Copyright (C) 1997-2015 Free Software Foundation, Inc.
   3    Written by Ian Lance Taylor, Cygnus Support.
   4    Rewritten by Kai Tietz, Onevision.
   5
   6    This file is part of GNU Binutils.
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
  21    02110-1301, USA.  */
  22
  23
  24 /* This file contains unicode support routines for the windres
  25    program.  Ideally, we would have generic unicode support which
  26    would work on all systems.  However, we don't.  Instead, on a
  27    Windows host, we are prepared to call some Windows routines.  This
  28    means that we will generate different output on Windows and Unix
  29    hosts, but that seems better than not really supporting unicode at
  30    all.  */
  31
  32 #include "sysdep.h"
  33 #include "bfd.h"
  34 #include "libiberty.h" /* for xstrdup */
  35 #include "bucomm.h"
  36 /* Must be include before windows.h and winnls.h.  */
  37 #if defined (_WIN32) || defined (__CYGWIN__)
  38 #include <windows.h>
  39 #include <winnls.h>
  40 #endif
  41 #include "winduni.h"
  42 #include "safe-ctype.h"
  43
  44 #if HAVE_ICONV
  45 #include <iconv.h>
  46 #endif
  47
  48 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
  49 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
  50 static int unichar_isascii (const unichar *, rc_uint_type);
  51
  52 /* Convert an ASCII string to a unicode string.  We just copy it,
  53    expanding chars to shorts, rather than doing something intelligent.  */
  54
  55 #if !defined (_WIN32) && !defined (__CYGWIN__)
  56
  57 /* Codepages mapped.  */
  58 static local_iconv_map codepages[] =
  59 {
  60   { 0, "MS-ANSI" },
  61   { 1, "WINDOWS-1252" },
  62   { 437, "MS-ANSI" },
  63   { 737, "MS-GREEK" },
  64   { 775, "WINBALTRIM" },
  65   { 850, "MS-ANSI" },
  66   { 852, "MS-EE" },
  67   { 857, "MS-TURK" },
  68   { 862, "CP862" },
  69   { 864, "CP864" },
  70   { 866, "MS-CYRL" },
  71   { 874, "WINDOWS-874" },
  72   { 932, "CP932" },
  73   { 936, "CP936" },
  74   { 949, "CP949" },
  75   { 950, "CP950" },
  76   { 1250, "WINDOWS-1250" },
  77   { 1251, "WINDOWS-1251" },
  78   { 1252, "WINDOWS-1252" },
  79   { 1253, "WINDOWS-1253" },
  80   { 1254, "WINDOWS-1254" },
  81   { 1255, "WINDOWS-1255" },
  82   { 1256, "WINDOWS-1256" },
  83   { 1257, "WINDOWS-1257" },
  84   { 1258, "WINDOWS-1258" },
  85   { CP_UTF7, "UTF-7" },
  86   { CP_UTF8, "UTF-8" },
  87   { CP_UTF16, "UTF-16LE" },
  88   { (rc_uint_type) -1, NULL }
  89 };
  90
  91 /* Languages supported.  */
  92 static const wind_language_t languages[] =
  93 {
  94   { 0x0000, 437, 1252, "Neutral", "Neutral" },
  95   { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" },    { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
  96   { 0x0403, 850, 1252, "Catalan", "Spain" },          { 0x0404, 950,  950, "Chinese", "Taiwan" },
  97   { 0x0405, 852, 1250, "Czech", "Czech Republic" },   { 0x0406, 850, 1252, "Danish", "Denmark" },
  98   { 0x0407, 850, 1252, "German", "Germany" },         { 0x0408, 737, 1253, "Greek", "Greece" },
  99   { 0x0409, 437, 1252, "English", "United States" },  { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
 100   { 0x040B, 850, 1252, "Finnish", "Finland" },        { 0x040C, 850, 1252, "French", "France" },
 101   { 0x040D, 862, 1255, "Hebrew", "Israel" },          { 0x040E, 852, 1250, "Hungarian", "Hungary" },
 102   { 0x040F, 850, 1252, "Icelandic", "Iceland" },      { 0x0410, 850, 1252, "Italian", "Italy" },
 103   { 0x0411, 932,  932, "Japanese", "Japan" },         { 0x0412, 949,  949, "Korean", "Korea (south)" },
 104   { 0x0413, 850, 1252, "Dutch", "Netherlands" },      { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
 105   { 0x0415, 852, 1250, "Polish", "Poland" },          { 0x0416, 850, 1252, "Portuguese", "Brazil" },
 106   { 0x0418, 852, 1250, "Romanian", "Romania" },       { 0x0419, 866, 1251, "Russian", "Russia" },
 107   { 0x041A, 852, 1250, "Croatian", "Croatia" },       { 0x041B, 852, 1250, "Slovak", "Slovakia" },
 108   { 0x041C, 852, 1250, "Albanian", "Albania" },       { 0x041D, 850, 1252, "Swedish", "Sweden" },
 109   { 0x041E, 874,  874, "Thai", "Thailand" },          { 0x041F, 857, 1254, "Turkish", "Turkey" },
 110   { 0x0421, 850, 1252, "Indonesian", "Indonesia" },   { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
 111   { 0x0423, 866, 1251, "Belarusian", "Belarus" },     { 0x0424, 852, 1250, "Slovene", "Slovenia" },
 112   { 0x0425, 775, 1257, "Estonian", "Estonia" },       { 0x0426, 775, 1257, "Latvian", "Latvia" },
 113   { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
 114   { 0x0429, 864, 1256, "Arabic", "Farsi" },           { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
 115   { 0x042D, 850, 1252, "Basque", "Spain" },
 116   { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
 117   { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
 118   { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
 119   { 0x043C, 437, 1252, "Irish", "Ireland" },
 120   { 0x043E, 850, 1252, "Malay", "Malaysia" },
 121   { 0x0801, 864, 1256, "Arabic", "Iraq" },
 122   { 0x0804, 936,  936, "Chinese (People's republic of China)", "People's republic of China" },
 123   { 0x0807, 850, 1252, "German", "Switzerland" },
 124   { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
 125   { 0x080C, 850, 1252, "French", "Belgium" },
 126   { 0x0810, 850, 1252, "Italian", "Switzerland" },
 127   { 0x0813, 850, 1252, "Dutch", "Belgium" },          { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
 128   { 0x0816, 850, 1252, "Portuguese", "Portugal" },
 129   { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
 130   { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
 131   { 0x0C01, 864, 1256, "Arabic", "Egypt" },
 132   { 0x0C04, 950,  950, "Chinese", "Hong Kong" },
 133   { 0x0C07, 850, 1252, "German", "Austria" },
 134   { 0x0C09, 850, 1252, "English", "Australia" },      { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
 135   { 0x0C0C, 850, 1252, "French", "Canada"},
 136   { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
 137   { 0x1001, 864, 1256, "Arabic", "Libya" },
 138   { 0x1004, 936,  936, "Chinese", "Singapore" },
 139   { 0x1007, 850, 1252, "German", "Luxembourg" },
 140   { 0x1009, 850, 1252, "English", "Canada" },
 141   { 0x100A, 850, 1252, "Spanish", "Guatemala" },
 142   { 0x100C, 850, 1252, "French", "Switzerland" },
 143   { 0x1401, 864, 1256, "Arabic", "Algeria" },
 144   { 0x1407, 850, 1252, "German", "Liechtenstein" },
 145   { 0x1409, 850, 1252, "English", "New Zealand" },    { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
 146   { 0x140C, 850, 1252, "French", "Luxembourg" },
 147   { 0x1801, 864, 1256, "Arabic", "Morocco" },
 148   { 0x1809, 850, 1252, "English", "Ireland" },        { 0x180A, 850, 1252, "Spanish", "Panama" },
 149   { 0x180C, 850, 1252, "French", "Monaco" },
 150   { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
 151   { 0x1C09, 437, 1252, "English", "South Africa" },   { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
 152   { 0x2001, 864, 1256, "Arabic", "Oman" },
 153   { 0x2009, 850, 1252, "English", "Jamaica" },        { 0x200A, 850, 1252, "Spanish", "Venezuela" },
 154   { 0x2401, 864, 1256, "Arabic", "Yemen" },
 155   { 0x2409, 850, 1252, "English", "Caribbean" },      { 0x240A, 850, 1252, "Spanish", "Colombia" },
 156   { 0x2801, 864, 1256, "Arabic", "Syria" },
 157   { 0x2809, 850, 1252, "English", "Belize" },         { 0x280A, 850, 1252, "Spanish", "Peru" },
 158   { 0x2C01, 864, 1256, "Arabic", "Jordan" },
 159   { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
 160   { 0x3001, 864, 1256, "Arabic", "Lebanon" },
 161   { 0x3009, 437, 1252, "English", "Zimbabwe" },       { 0x300A, 850, 1252, "Spanish", "Ecuador" },
 162   { 0x3401, 864, 1256, "Arabic", "Kuwait" },
 163   { 0x3409, 437, 1252, "English", "Philippines" },    { 0x340A, 850, 1252, "Spanish", "Chile" },
 164   { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
 165   { 0x380A, 850, 1252, "Spanish", "Uruguay" },
 166   { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
 167   { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
 168   { 0x4001, 864, 1256, "Arabic", "Qatar" },
 169   { 0x400A, 850, 1252, "Spanish", "Bolivia" },
 170   { 0x440A, 850, 1252, "Spanish", "El Salvador" },
 171   { 0x480A, 850, 1252, "Spanish", "Honduras" },
 172   { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
 173   { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
 174   { (unsigned) -1,  0,      0, NULL, NULL }
 175 };
 176
 177 #endif
 178
 179 /* Specifies the default codepage to be used for unicode
 180    transformations.  By default this is CP_ACP.  */
 181 rc_uint_type wind_default_codepage = CP_ACP;
 182
 183 /* Specifies the currently used codepage for unicode
 184    transformations.  By default this is CP_ACP.  */
 185 rc_uint_type wind_current_codepage = CP_ACP;
 186
 187 /* Convert an ASCII string to a unicode string.  We just copy it,
 188    expanding chars to shorts, rather than doing something intelligent.  */
 189
 190 void
 191 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
 192 {
 193   unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
 194 }
 195
 196 /* Convert an ASCII string with length A_LENGTH to a unicode string.  We just
 197    copy it, expanding chars to shorts, rather than doing something intelligent.
 198    This routine converts also \0 within a string.  */
 199
 200 void
 201 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
 202 {
 203   char *tmp, *p;
 204   rc_uint_type tlen, elen, idx = 0;
 205
 206   *unicode = NULL;
 207
 208   if (!a_length)
 209     {
 210       if (length)
 211         *length = 0;
 212       return;
 213     }
 214
 215   /* Make sure we have zero terminated string.  */
 216   p = tmp = (char *) alloca (a_length + 1);
 217   memcpy (tmp, ascii, a_length);
 218   tmp[a_length] = 0;
 219
 220   while (a_length > 0)
 221     {
 222       unichar *utmp, *up;
 223
 224       tlen = strlen (p);
 225
 226       if (tlen > a_length)
 227         tlen = a_length;
 228       if (*p == 0)
 229         {
 230           /* Make room for one more character.  */
 231           utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
 232           if (idx > 0)
 233             {
 234               memcpy (utmp, *unicode, idx * sizeof (unichar));
 235             }
 236           *unicode = utmp;
 237           utmp[idx++] = 0;
 238           --a_length;
 239           p++;
 240           continue;
 241         }
 242       utmp = NULL;
 243       elen = 0;
 244       elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
 245       if (elen)
 246         {
 247           utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
 248           wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
 249           elen /= sizeof (unichar);
 250           elen --;
 251         }
 252       else
 253         {
 254           /* Make room for one more character.  */
 255           utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
 256           if (idx > 0)
 257             {
 258               memcpy (utmp, *unicode, idx * sizeof (unichar));
 259             }
 260           *unicode = utmp;
 261           utmp[idx++] = ((unichar) *p) & 0xff;
 262           --a_length;
 263           p++;
 264           continue;
 265         }
 266       p += tlen;
 267       a_length -= tlen;
 268
 269       up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
 270       if (idx > 0)
 271         memcpy (up, *unicode, idx * sizeof (unichar));
 272
 273       *unicode = up;
 274       if (elen)
 275         memcpy (&up[idx], utmp, sizeof (unichar) * elen);
 276
 277       idx += elen;
 278     }
 279
 280   if (length)
 281     *length = idx;
 282 }
 283
 284 /* Convert an unicode string to an ASCII string.  We just copy it,
 285    shrink shorts to chars, rather than doing something intelligent.
 286    Shorts with not within the char range are replaced by '_'.  */
 287
 288 void
 289 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
 290 {
 291   codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
 292 }
 293
 294 /* Print the unicode string UNICODE to the file E.  LENGTH is the
 295    number of characters to print, or -1 if we should print until the
 296    end of the string.  FIXME: On a Windows host, we should be calling
 297    some Windows function, probably WideCharToMultiByte.  */
 298
 299 void
 300 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
 301 {
 302   while (1)
 303     {
 304       unichar ch;
 305
 306       if (length == 0)
 307         return;
 308       if ((bfd_signed_vma) length > 0)
 309         --length;
 310
 311       ch = *unicode;
 312
 313       if (ch == 0 && (bfd_signed_vma) length < 0)
 314         return;
 315
 316       ++unicode;
 317
 318       if ((ch & 0x7f) == ch)
 319         {
 320           if (ch == '\\')
 321             fputs ("\\\\", e);
 322           else if (ch == '"')
 323             fputs ("\"\"", e);
 324           else if (ISPRINT (ch))
 325             putc (ch, e);
 326           else
 327             {
 328               switch (ch)
 329                 {
 330                 case ESCAPE_A:
 331                   fputs ("\\a", e);
 332                   break;
 333
 334                 case ESCAPE_B:
 335                   fputs ("\\b", e);
 336                   break;
 337
 338                 case ESCAPE_F:
 339                   fputs ("\\f", e);
 340                   break;
 341
 342                 case ESCAPE_N:
 343                   fputs ("\\n", e);
 344                   break;
 345
 346                 case ESCAPE_R:
 347                   fputs ("\\r", e);
 348                   break;
 349
 350                 case ESCAPE_T:
 351                   fputs ("\\t", e);
 352                   break;
 353
 354                 case ESCAPE_V:
 355                   fputs ("\\v", e);
 356                   break;
 357
 358                 default:
 359                   fprintf (e, "\\%03o", (unsigned int) ch);
 360                   break;
 361                 }
 362             }
 363         }
 364       else if ((ch & 0xff) == ch)
 365         fprintf (e, "\\%03o", (unsigned int) ch);
 366       else
 367         fprintf (e, "\\x%04x", (unsigned int) ch);
 368     }
 369 }
 370
 371 /* Print a unicode string to a file.  */
 372
 373 void
 374 ascii_print (FILE *e, const char *s, rc_uint_type length)
 375 {
 376   while (1)
 377     {
 378       char ch;
 379
 380       if (length == 0)
 381         return;
 382       if ((bfd_signed_vma) length > 0)
 383         --length;
 384
 385       ch = *s;
 386
 387       if (ch == 0 && (bfd_signed_vma) length < 0)
 388         return;
 389
 390       ++s;
 391
 392       if ((ch & 0x7f) == ch)
 393         {
 394           if (ch == '\\')
 395             fputs ("\\\\", e);
 396           else if (ch == '"')
 397             fputs ("\"\"", e);
 398           else if (ISPRINT (ch))
 399             putc (ch, e);
 400           else
 401             {
 402               switch (ch)
 403                 {
 404                 case ESCAPE_A:
 405                   fputs ("\\a", e);
 406                   break;
 407
 408                 case ESCAPE_B:
 409                   fputs ("\\b", e);
 410                   break;
 411
 412                 case ESCAPE_F:
 413                   fputs ("\\f", e);
 414                   break;
 415
 416                 case ESCAPE_N:
 417                   fputs ("\\n", e);
 418                   break;
 419
 420                 case ESCAPE_R:
 421                   fputs ("\\r", e);
 422                   break;
 423
 424                 case ESCAPE_T:
 425                   fputs ("\\t", e);
 426                   break;
 427
 428                 case ESCAPE_V:
 429                   fputs ("\\v", e);
 430                   break;
 431
 432                 default:
 433                   fprintf (e, "\\%03o", (unsigned int) ch);
 434                   break;
 435                 }
 436             }
 437         }
 438       else
 439         fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
 440     }
 441 }
 442
 443 rc_uint_type
 444 unichar_len (const unichar *unicode)
 445 {
 446   rc_uint_type r = 0;
 447
 448   if (unicode)
 449     while (unicode[r] != 0)
 450       r++;
 451   else
 452     --r;
 453   return r;
 454 }
 455
 456 unichar *
 457 unichar_dup (const unichar *unicode)
 458 {
 459   unichar *r;
 460   int len;
 461
 462   if (! unicode)
 463     return NULL;
 464   for (len = 0; unicode[len] != 0; ++len)
 465     ;
 466   ++len;
 467   r = ((unichar *) res_alloc (len * sizeof (unichar)));
 468   memcpy (r, unicode, len * sizeof (unichar));
 469   return r;
 470 }
 471
 472 unichar *
 473 unichar_dup_uppercase (const unichar *u)
 474 {
 475   unichar *r = unichar_dup (u);
 476   int i;
 477
 478   if (! r)
 479     return NULL;
 480
 481   for (i = 0; r[i] != 0; ++i)
 482     {
 483       if (r[i] >= 'a' && r[i] <= 'z')
 484         r[i] &= 0xdf;
 485     }
 486   return r;
 487 }
 488
 489 static int
 490 unichar_isascii (const unichar *u, rc_uint_type len)
 491 {
 492   rc_uint_type i;
 493
 494   if ((bfd_signed_vma) len < 0)
 495     {
 496       if (u)
 497         len = (rc_uint_type) unichar_len (u);
 498       else
 499         len = 0;
 500     }
 501
 502   for (i = 0; i < len; i++)
 503     if ((u[i] & 0xff80) != 0)
 504       return 0;
 505   return 1;
 506 }
 507
 508 void
 509 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
 510 {
 511   if (! unichar_isascii (u, len))
 512     fputc ('L', e);
 513   fputc ('"', e);
 514   unicode_print (e, u, len);
 515   fputc ('"', e);
 516 }
 517
 518 int
 519 unicode_is_valid_codepage (rc_uint_type cp)
 520 {
 521   if ((cp & 0xffff) != cp)
 522     return 0;
 523   if (cp == CP_UTF16 || cp == CP_ACP)
 524     return 1;
 525
 526 #if !defined (_WIN32) && !defined (__CYGWIN__)
 527   if (! wind_find_codepage_info (cp))
 528     return 0;
 529   return 1;
 530 #else
 531   return !! IsValidCodePage ((UINT) cp);
 532 #endif
 533 }
 534
 535 #if defined (_WIN32) || defined (__CYGWIN__)
 536
 537 #define max_cp_string_len 6
 538
 539 static unsigned int
 540 codepage_from_langid (unsigned short langid)
 541 {
 542   char cp_string [max_cp_string_len];
 543   int c;
 544
 545   memset (cp_string, 0, max_cp_string_len);
 546   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
 547      but is unavailable on Win95.  */
 548   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 549                       LOCALE_IDEFAULTANSICODEPAGE,
 550                       cp_string, max_cp_string_len);
 551   /* If codepage data for an LCID is not installed on users's system,
 552      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 553      default. */
 554   if (c == 0)
 555     return CP_ACP;
 556   return strtoul (cp_string, 0, 10);
 557 }
 558
 559 static unsigned int
 560 wincodepage_from_langid (unsigned short langid)
 561 {
 562   char cp_string [max_cp_string_len];
 563   int c;
 564
 565   memset (cp_string, 0, max_cp_string_len);
 566   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
 567      but is unavailable on Win95.  */
 568   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 569                       LOCALE_IDEFAULTCODEPAGE,
 570                       cp_string, max_cp_string_len);
 571   /* If codepage data for an LCID is not installed on users's system,
 572      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 573      default. */
 574   if (c == 0)
 575     return CP_OEM;
 576   return strtoul (cp_string, 0, 10);
 577 }
 578
 579 static char *
 580 lang_from_langid (unsigned short langid)
 581 {
 582   char cp_string[261];
 583   int c;
 584
 585   memset (cp_string, 0, 261);
 586   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 587                       LOCALE_SENGLANGUAGE,
 588                       cp_string, 260);
 589   /* If codepage data for an LCID is not installed on users's system,
 590      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 591      default. */
 592   if (c == 0)
 593     strcpy (cp_string, "Neutral");
 594   return xstrdup (cp_string);
 595 }
 596
 597 static char *
 598 country_from_langid (unsigned short langid)
 599 {
 600   char cp_string[261];
 601   int c;
 602
 603   memset (cp_string, 0, 261);
 604   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 605                       LOCALE_SENGCOUNTRY,
 606                       cp_string, 260);
 607   /* If codepage data for an LCID is not installed on users's system,
 608      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 609      default. */
 610   if (c == 0)
 611     strcpy (cp_string, "Neutral");
 612   return xstrdup (cp_string);
 613 }
 614
 615 #endif
 616
 617 const wind_language_t *
 618 wind_find_language_by_id (unsigned id)
 619 {
 620 #if !defined (_WIN32) && !defined (__CYGWIN__)
 621   int i;
 622
 623   if (! id)
 624     return NULL;
 625   for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
 626     ;
 627   if (languages[i].id == id)
 628     return &languages[i];
 629   return NULL;
 630 #else
 631   static wind_language_t wl;
 632
 633   wl.id = id;
 634   wl.doscp = codepage_from_langid ((unsigned short) id);
 635   wl.wincp = wincodepage_from_langid ((unsigned short) id);
 636   wl.name = lang_from_langid ((unsigned short) id);
 637   wl.country = country_from_langid ((unsigned short) id);
 638
 639   return & wl;
 640 #endif
 641 }
 642
 643 const local_iconv_map *
 644 wind_find_codepage_info (unsigned cp)
 645 {
 646 #if !defined (_WIN32) && !defined (__CYGWIN__)
 647   int i;
 648
 649   for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
 650     ;
 651   if (codepages[i].codepage == (rc_uint_type) -1)
 652     return NULL;
 653   return &codepages[i];
 654 #else
 655   static local_iconv_map lim;
 656   if (!unicode_is_valid_codepage (cp))
 657         return NULL;
 658   lim.codepage = cp;
 659   lim.iconv_name = "";
 660   return & lim;
 661 #endif
 662 }
 663
 664 /* Convert an Codepage string to a unicode string.  */
 665
 666 void
 667 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
 668 {
 669   rc_uint_type len;
 670
 671   len = wind_MultiByteToWideChar (cp, src, NULL, 0);
 672   if (len)
 673     {
 674       *u = ((unichar *) res_alloc (len));
 675       wind_MultiByteToWideChar (cp, src, *u, len);
 676     }
 677   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
 678      this will set *length to -1.  */
 679   len -= sizeof (unichar);
 680
 681   if (length != NULL)
 682     *length = len / sizeof (unichar);
 683 }
 684
 685 /* Convert an unicode string to an codepage string.  */
 686
 687 void
 688 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
 689 {
 690   rc_uint_type len;
 691
 692   len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
 693   if (len)
 694     {
 695       *ascii = (char *) res_alloc (len * sizeof (char));
 696       wind_WideCharToMultiByte (cp, unicode, *ascii, len);
 697     }
 698   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
 699      this will set *length to -1.  */
 700   len--;
 701
 702   if (length != NULL)
 703     *length = len;
 704 }
 705
 706 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
 707 static int
 708 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
 709 {
 710   int i;
 711
 712   for (i = 1; i <= 32; i++)
 713     {
 714       char *tmp_d = d;
 715       ICONV_CONST char *tmp_s = s;
 716       size_t ret;
 717       size_t s_left = (size_t) i;
 718       size_t d_left = (size_t) d_len;
 719
 720       ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
 721
 722       if (ret != (size_t) -1)
 723         {
 724           *n_s = tmp_s;
 725           *n_d = tmp_d;
 726           return 0;
 727         }
 728     }
 729
 730   return 1;
 731 }
 732
 733 static const char *
 734 wind_iconv_cp (rc_uint_type cp)
 735 {
 736   const local_iconv_map *lim = wind_find_codepage_info (cp);
 737
 738   if (!lim)
 739     return NULL;
 740   return lim->iconv_name;
 741 }
 742 #endif /* HAVE_ICONV */
 743
 744 static rc_uint_type
 745 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
 746                           unichar *u, rc_uint_type u_len)
 747 {
 748   rc_uint_type ret = 0;
 749
 750 #if defined (_WIN32) || defined (__CYGWIN__)
 751   rc_uint_type conv_flags = MB_PRECOMPOSED;
 752
 753   /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
 754      MultiByteToWideChar will set the last error to
 755      ERROR_INVALID_FLAGS if we do. */
 756   if (cp == CP_UTF8 || cp == CP_UTF7)
 757     conv_flags = 0;
 758
 759   ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
 760                                             mb, -1, u, u_len);
 761   /* Convert to bytes. */
 762   ret *= sizeof (unichar);
 763
 764 #elif defined (HAVE_ICONV)
 765   int first = 1;
 766   char tmp[32];
 767   char *p_tmp;
 768   const char *iconv_name = wind_iconv_cp (cp);
 769
 770   if (!mb || !iconv_name)
 771     return 0;
 772   iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
 773
 774   while (1)
 775     {
 776       int iret;
 777       const char *n_mb = "";
 778       char *n_tmp = "";
 779
 780       p_tmp = tmp;
 781       iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
 782       if (first)
 783         {
 784           first = 0;
 785           continue;
 786         }
 787       if (!iret)
 788         {
 789           size_t l_tmp = (size_t) (n_tmp - p_tmp);
 790
 791           if (u)
 792             {
 793               if ((size_t) u_len < l_tmp)
 794                 break;
 795               memcpy (u, tmp, l_tmp);
 796               u += l_tmp/2;
 797               u_len -= l_tmp;
 798             }
 799           ret += l_tmp;
 800         }
 801       else
 802         break;
 803       if (tmp[0] == 0 && tmp[1] == 0)
 804         break;
 805       mb = n_mb;
 806     }
 807   iconv_close (cd);
 808 #else
 809   if (cp)
 810     ret = 0;
 811   ret = strlen (mb) + 1;
 812   ret *= sizeof (unichar);
 813   if (u != NULL && u_len != 0)
 814     {
 815       do
 816         {
 817           *u++ = ((unichar) *mb) & 0xff;
 818           --u_len; mb++;
 819         }
 820       while (u_len != 0 && mb[-1] != 0);
 821     }
 822   if (u != NULL && u_len != 0)
 823     *u = 0;
 824 #endif
 825   return ret;
 826 }
 827
 828 static rc_uint_type
 829 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
 830 {
 831   rc_uint_type ret = 0;
 832 #if defined (_WIN32) || defined (__CYGWIN__)
 833   WINBOOL used_def = FALSE;
 834
 835   ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
 836                                             NULL, & used_def);
 837 #elif defined (HAVE_ICONV)
 838   int first = 1;
 839   char tmp[32];
 840   char *p_tmp;
 841   const char *iconv_name = wind_iconv_cp (cp);
 842
 843   if (!u || !iconv_name)
 844     return 0;
 845   iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
 846
 847   while (1)
 848     {
 849       int iret;
 850       const char *n_u = "";
 851       char *n_tmp = "";
 852
 853       p_tmp = tmp;
 854       iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
 855       if (first)
 856         {
 857           first = 0;
 858           continue;
 859         }
 860       if (!iret)
 861         {
 862           size_t l_tmp = (size_t) (n_tmp - p_tmp);
 863
 864           if (mb)
 865             {
 866               if ((size_t) mb_len < l_tmp)
 867                 break;
 868               memcpy (mb, tmp, l_tmp);
 869               mb += l_tmp;
 870               mb_len -= l_tmp;
 871             }
 872           ret += l_tmp;
 873         }
 874       else
 875         break;
 876       if (u[0] == 0)
 877         break;
 878       u = (const unichar *) n_u;
 879     }
 880   iconv_close (cd);
 881 #else
 882   if (cp)
 883     ret = 0;
 884
 885   while (u[ret] != 0)
 886     ++ret;
 887
 888   ++ret;
 889
 890   if (mb)
 891     {
 892       while (*u != 0 && mb_len != 0)
 893         {
 894           if (u[0] == (u[0] & 0x7f))
 895             *mb++ = (char) u[0];
 896           else
 897             *mb++ = '_';
 898           ++u; --mb_len;
 899         }
 900       if (mb_len != 0)
 901         *mb = 0;
 902     }
 903 #endif
 904   return ret;
 905 }