binutils/mclex.c

   1 /* mclex.c -- lexer for Windows mc files parser.
   2    Copyright (C) 2007-2024 Free Software Foundation, Inc.
   3
   4    Written by Kai Tietz, Onevision.
   5
   6    This file is part of GNU Binutils.
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
  21    02110-1301, USA.  */
  22
  23 /* This is a lexer used by the Windows rc file parser.
  24    It basically just recognized a bunch of keywords.  */
  25
  26 #include "sysdep.h"
  27 #include "bfd.h"
  28 #include "bucomm.h"
  29 #include "libiberty.h"
  30 #include "safe-ctype.h"
  31 #include "windmc.h"
  32 #include "mcparse.h"
  33
  34 #include <assert.h>
  35
  36 /* Exported globals.  */
  37 bool mclex_want_nl = false;
  38 bool mclex_want_line = false;
  39 bool mclex_want_filename = false;
  40
  41 /* Local globals.  */
  42 static unichar *input_stream = NULL;
  43 static unichar *input_stream_pos = NULL;
  44 static int input_line = 1;
  45 static const char *input_filename = NULL;
  46
  47 void
  48 mc_set_content (const unichar *src)
  49 {
  50   if (!src)
  51     return;
  52   input_stream = input_stream_pos = unichar_dup (src);
  53 }
  54
  55 void
  56 mc_set_inputfile (const char *name)
  57 {
  58   if (! name || *name == 0)
  59     input_filename = "-";
  60   else
  61     {
  62       const char *s1 = strrchr (name, '/');
  63       const char *s2 = strrchr (name, '\\');
  64
  65       if (! s1)
  66         s1 = s2;
  67       if (s1 && s2 && s1 < s2)
  68         s1 = s2;
  69       if (! s1)
  70         s1 = name;
  71       else
  72         s1++;
  73       s1 = xstrdup (s1);
  74       input_filename = s1;
  75     }
  76 }
  77
  78 static void
  79 show_msg (const char *kind, const char *msg, va_list argp)
  80 {
  81   fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind);
  82   vfprintf (stderr, msg, argp);
  83   fprintf (stderr, ".\n");
  84 }
  85
  86 void
  87 mc_warn (const char *s, ...)
  88 {
  89   va_list argp;
  90   va_start (argp, s);
  91   show_msg ("warning", s, argp);
  92   va_end (argp);
  93 }
  94
  95 void
  96 mc_fatal (const char *s, ...)
  97 {
  98   va_list argp;
  99   va_start (argp, s);
 100   show_msg ("fatal", s, argp);
 101   va_end (argp);
 102   xexit (1);
 103 }
 104
 105
 106 static void
 107 mc_error (const char *s, ...)
 108 {
 109   va_list argp;
 110   va_start (argp, s);
 111   show_msg ("parser", s, argp);
 112   va_end (argp);
 113 }
 114
 115 void
 116 yyerror (const char *s)
 117 {
 118   mc_error (s);
 119 }
 120
 121 static unichar *
 122 get_diff (unichar *end, unichar *start)
 123 {
 124   unichar *ret;
 125   unichar save = *end;
 126
 127   *end = 0;
 128   ret = unichar_dup (start);
 129   *end = save;
 130   return ret;
 131 }
 132
 133 static rc_uint_type
 134 parse_digit (unichar ch)
 135 {
 136   rc_uint_type base = 10, v = 0, c;
 137
 138   if (ch == '0')
 139     {
 140       base = 8;
 141       switch (input_stream_pos[0])
 142         {
 143         case 'x': case 'X': base = 16; input_stream_pos++; break;
 144         case 'o': case 'O': base = 8; input_stream_pos++; break;
 145         case 'b': case 'B': base = 2; input_stream_pos++; break;
 146         }
 147     }
 148   else
 149     v = (rc_uint_type) (ch - '0');
 150
 151   while ((ch = input_stream_pos[0]) != 0)
 152     {
 153       if (ch >= 'A' && ch <= 'F')
 154         c = (rc_uint_type) (ch - 'A') + 10;
 155       else if (ch >= 'a' && ch <= 'f')
 156         c = (rc_uint_type) (ch - 'a') + 10;
 157       else if (ch >= '0' && ch <= '9')
 158         c = (rc_uint_type) (ch - '0');
 159       else
 160         break;
 161       v *= base;
 162       v += c;
 163       ++input_stream_pos;
 164     }
 165   if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u')
 166     input_stream_pos++;
 167   if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
 168     input_stream_pos++;
 169   if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
 170     input_stream_pos++;
 171   return v;
 172 }
 173
 174 static mc_keyword *keyword_top = NULL;
 175
 176 const mc_keyword *
 177 enum_facility (int e)
 178 {
 179   mc_keyword *h = keyword_top;
 180
 181   while (h != NULL)
 182     {
 183       while (h && strcmp (h->group_name, "facility") != 0)
 184         h = h->next;
 185       if (e == 0)
 186         return h;
 187       --e;
 188       if (h)
 189         h = h->next;
 190     }
 191   return h;
 192 }
 193
 194 const mc_keyword *
 195 enum_severity (int e)
 196 {
 197   mc_keyword *h = keyword_top;
 198
 199   while (h != NULL)
 200     {
 201       while (h && strcmp (h->group_name, "severity") != 0)
 202         h = h->next;
 203       if (e == 0)
 204         return h;
 205       --e;
 206       if (h)
 207         h = h->next;
 208     }
 209   return h;
 210 }
 211
 212 static void
 213 mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv)
 214 {
 215   unichar *usz = NULL, *usv = NULL;
 216   rc_uint_type usz_len;
 217
 218   unicode_from_codepage (&usz_len, &usz, sz, CP_ACP);
 219   if (sv)
 220     unicode_from_codepage (&usz_len, &usv, sv, CP_ACP);
 221   mc_add_keyword (usz, rid, grp, nv, usv);
 222 }
 223
 224 void
 225 mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv)
 226 {
 227   mc_keyword *p, *c, *n;
 228   size_t len = unichar_len (usz);
 229
 230   c = keyword_top;
 231   p = NULL;
 232   while (c != NULL)
 233     {
 234       if (c->len > len)
 235         break;
 236       if (c->len == len)
 237         {
 238           int e = memcmp (usz, c->usz, len * sizeof (unichar));
 239
 240           if (e < 0)
 241             break;
 242           if (! e)
 243             {
 244               if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0)
 245                 fatal (_("Duplicate symbol entered into keyword list."));
 246               c->rid = rid;
 247               c->nval = nv;
 248               c->sval = (!sv ? NULL : unichar_dup (sv));
 249               if (! strcmp (grp, "language"))
 250                 {
 251                   const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
 252
 253                   if (lag == NULL)
 254                     fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
 255                   memcpy (&c->lang_info, lag, sizeof (*lag));
 256                 }
 257               return;
 258             }
 259         }
 260       c = (p = c)->next;
 261     }
 262   n = xmalloc (sizeof (mc_keyword));
 263   n->next = c;
 264   n->len = len;
 265   n->group_name = grp;
 266   n->usz = usz;
 267   n->rid = rid;
 268   n->nval = nv;
 269   n->sval = (!sv ? NULL : unichar_dup (sv));
 270   if (! strcmp (grp, "language"))
 271     {
 272       const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
 273       if (lag == NULL)
 274         fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
 275       memcpy (&n->lang_info, lag, sizeof (*lag));
 276     }
 277   if (! p)
 278     keyword_top = n;
 279   else
 280     p->next = n;
 281 }
 282
 283 static int
 284 mc_token (const unichar *t, size_t len)
 285 {
 286   static int was_init = 0;
 287   mc_keyword *k;
 288
 289   if (! was_init)
 290     {
 291       was_init = 1;
 292       mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL);
 293       mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL);
 294       mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL);
 295       mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL);
 296       mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL);
 297       mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL);
 298       mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL);
 299       mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL);
 300       mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL);
 301       mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL);
 302       mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL);
 303       mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL);
 304       mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL);
 305       mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL);
 306       mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL);
 307       mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL);
 308       mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001");
 309   }
 310   k = keyword_top;
 311   if (!len || !t || *t == 0)
 312     return -1;
 313   while (k != NULL)
 314     {
 315       if (k->len > len)
 316         break;
 317       if (k->len == len)
 318         {
 319           if (! memcmp (k->usz, t, len * sizeof (unichar)))
 320             {
 321               if (k->rid == MCTOKEN)
 322                 yylval.tok = k;
 323               return k->rid;
 324             }
 325         }
 326       k = k->next;
 327     }
 328   return -1;
 329 }
 330
 331 /* Skip characters in input_stream_pos up to and including a newline
 332    character.  Returns non-zero if the newline was found, zero otherwise.  */
 333
 334 static int
 335 skip_until_eol (void)
 336 {
 337   while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
 338     ++input_stream_pos;
 339   if (input_stream_pos[0] == 0)
 340     return 0;
 341   if (input_stream_pos[0] == '\n')
 342     {
 343       ++input_stream_pos;
 344       input_line += 1;
 345     }
 346   return 1;
 347 }
 348
 349 int
 350 yylex (void)
 351 {
 352   unichar *start_token;
 353   unichar ch;
 354
 355   if (! input_stream_pos)
 356     {
 357       fatal ("Input stream not setuped.\n");
 358       return -1;
 359     }
 360
 361   if (mclex_want_line)
 362     {
 363       start_token = input_stream_pos;
 364       if (input_stream_pos[0] == 0)
 365         return -1;
 366       /* PR 26082: Reject a period followed by EOF.  */
 367       if (input_stream_pos[0] == '.' && input_stream_pos[1] == 0)
 368         return -1;
 369       if (input_stream_pos[0] == '.'
 370           && (input_stream_pos[1] == '\n'
 371               || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n')))
 372         {
 373           mclex_want_line = false;
 374           return skip_until_eol () ? MCENDLINE : -1;
 375         }
 376       if (!skip_until_eol ())
 377         return -1;
 378       yylval.ustr = get_diff (input_stream_pos, start_token);
 379       return MCLINE;
 380     }
 381
 382   while ((ch = input_stream_pos[0]) <= 0x20)
 383     {
 384       if (ch == 0)
 385         return -1;
 386       ++input_stream_pos;
 387       if (ch == '\n')
 388         input_line += 1;
 389       if (mclex_want_nl && ch == '\n')
 390         {
 391           mclex_want_nl = false;
 392           return NL;
 393         }
 394     }
 395   start_token = input_stream_pos;
 396   ++input_stream_pos;
 397   if (mclex_want_filename)
 398     {
 399       mclex_want_filename = false;
 400       if (ch == '"')
 401         {
 402           start_token++;
 403           while ((ch = input_stream_pos[0]) != 0)
 404             {
 405               if (ch == '"')
 406                 break;
 407               ++input_stream_pos;
 408             }
 409           yylval.ustr = get_diff (input_stream_pos, start_token);
 410           if (ch == '"')
 411             ++input_stream_pos;
 412         }
 413       else
 414         {
 415           while ((ch = input_stream_pos[0]) != 0)
 416             {
 417               if (ch <= 0x20 || ch == ')')
 418                 break;
 419               ++input_stream_pos;
 420             }
 421           yylval.ustr = get_diff (input_stream_pos, start_token);
 422         }
 423       return MCFILENAME;
 424     }
 425   switch (ch)
 426   {
 427   case ';':
 428     ++start_token;
 429     if (!skip_until_eol ())
 430       return -1;
 431     yylval.ustr = get_diff (input_stream_pos, start_token);
 432     return MCCOMMENT;
 433   case '=':
 434     return '=';
 435   case '(':
 436     return '(';
 437   case ')':
 438     return ')';
 439   case '+':
 440     return '+';
 441   case ':':
 442     return ':';
 443   case '0': case '1': case '2': case '3': case '4':
 444   case '5': case '6': case '7': case '8': case '9':
 445     yylval.ival = parse_digit (ch);
 446     return MCNUMBER;
 447   default:
 448     if (ch >= 0x40)
 449       {
 450         int ret;
 451         while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9'))
 452           ++input_stream_pos;
 453         ret = mc_token (start_token, (size_t) (input_stream_pos - start_token));
 454         if (ret != -1)
 455           return ret;
 456         yylval.ustr = get_diff (input_stream_pos, start_token);
 457         return MCIDENT;
 458       }
 459     mc_error ("illegal character 0x%x.", ch);
 460   }
 461   return -1;
 462 }