contrib/binutils/binutils/mclex.c

   1 /* mclex.c -- lexer for Windows mc files parser.
   2    Copyright 2007
   3    Free Software Foundation, Inc.
   4
   5    Written by Kai Tietz, Onevision.
   6
   7    This file is part of GNU Binutils.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
  22    02110-1301, USA.  */
  23
  24 /* This is a lexer used by the Windows rc file parser.
  25    It basically just recognized a bunch of keywords.  */
  26
  27 #include "sysdep.h"
  28 #include "bfd.h"
  29 #include "bucomm.h"
  30 #include "libiberty.h"
  31 #include "safe-ctype.h"
  32 #include "windmc.h"
  33 #include "mcparse.h"
  34
  35 #include <assert.h>
  36
  37 /* Exported globals.  */
  38 bfd_boolean mclex_want_nl = FALSE;
  39 bfd_boolean mclex_want_line = FALSE;
  40 bfd_boolean mclex_want_filename = FALSE;
  41
  42 /* Local globals.  */
  43 static unichar *input_stream = NULL;
  44 static unichar *input_stream_pos = NULL;
  45 static int input_line = 1;
  46 static const char *input_filename = NULL;
  47
  48 void
  49 mc_set_content (const unichar *src)
  50 {
  51   if (!src)
  52     return;
  53   input_stream = input_stream_pos = unichar_dup (src);
  54 }
  55
  56 void
  57 mc_set_inputfile (const char *name)
  58 {
  59   if (! name || *name == 0)
  60     input_filename = "-";
  61   else
  62     {
  63       const char *s1 = strrchr (name, '/');
  64       const char *s2 = strrchr (name, '\\');
  65
  66       if (! s1)
  67         s1 = s2;
  68       if (s1 && s2 && s1 < s2)
  69         s1 = s2;
  70       if (! s1)
  71         s1 = name;
  72       else
  73         s1++;
  74       s1 = xstrdup (s1);
  75       input_filename = s1;
  76     }
  77 }
  78
  79 static void
  80 show_msg (const char *kind, const char *msg, va_list argp)
  81 {
  82   fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind);
  83   vfprintf (stderr, msg, argp);
  84   fprintf (stderr, ".\n");
  85 }
  86
  87 void
  88 mc_warn (const char *s, ...)
  89 {
  90   va_list argp;
  91   va_start (argp, s);
  92   show_msg ("warning", s, argp);
  93   va_end (argp);
  94 }
  95
  96 void
  97 mc_fatal (const char *s, ...)
  98 {
  99   va_list argp;
 100   va_start (argp, s);
 101   show_msg ("fatal", s, argp);
 102   va_end (argp);
 103   xexit (1);
 104 }
 105
 106
 107 int
 108 yyerror (const char *s, ...)
 109 {
 110   va_list argp;
 111   va_start (argp, s);
 112   show_msg ("parser", s, argp);
 113   va_end (argp);
 114   return 1;
 115 }
 116
 117 static unichar *
 118 get_diff (unichar *end, unichar *start)
 119 {
 120   unichar *ret;
 121   unichar save = *end;
 122
 123   *end = 0;
 124   ret = unichar_dup (start);
 125   *end = save;
 126   return ret;
 127 }
 128
 129 static rc_uint_type
 130 parse_digit (unichar ch)
 131 {
 132   rc_uint_type base = 10, v = 0, c;
 133
 134   if (ch == '0')
 135     {
 136       base = 8;
 137       switch (input_stream_pos[0])
 138         {
 139         case 'x': case 'X': base = 16; input_stream_pos++; break;
 140         case 'o': case 'O': base = 8; input_stream_pos++; break;
 141         case 'b': case 'B': base = 2; input_stream_pos++; break;
 142         }
 143     }
 144   else
 145     v = (rc_uint_type) (ch - '0');
 146
 147   while ((ch = input_stream_pos[0]) != 0)
 148     {
 149       if (ch >= 'A' && ch <= 'F')
 150         c = (rc_uint_type) (ch - 'A') + 10;
 151       else if (ch >= 'a' && ch <= 'f')
 152         c = (rc_uint_type) (ch - 'a') + 10;
 153       else if (ch >= '0' && ch <= '9')
 154         c = (rc_uint_type) (ch - '0');
 155       else
 156         break;
 157       v *= base;
 158       v += c;
 159       ++input_stream_pos;
 160     }
 161   if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u')
 162     input_stream_pos++;
 163   if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
 164     input_stream_pos++;
 165   if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
 166     input_stream_pos++;
 167   return v;
 168 }
 169
 170 static mc_keyword *keyword_top = NULL;
 171
 172 const mc_keyword *
 173 enum_facility (int e)
 174 {
 175   mc_keyword *h = keyword_top;
 176
 177   while (h != NULL)
 178     {
 179       while (h && strcmp (h->group_name, "facility") != 0)
 180         h = h->next;
 181       if (e == 0)
 182         return h;
 183       --e;
 184       if (h)
 185         h = h->next;
 186     }
 187   return h;
 188 }
 189
 190 const mc_keyword *
 191 enum_severity (int e)
 192 {
 193   mc_keyword *h = keyword_top;
 194
 195   while (h != NULL)
 196     {
 197       while (h && strcmp (h->group_name, "severity") != 0)
 198         h = h->next;
 199       if (e == 0)
 200         return h;
 201       --e;
 202       if (h)
 203         h = h->next;
 204     }
 205   return h;
 206 }
 207
 208 static void
 209 mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv)
 210 {
 211   unichar *usz, *usv = NULL;
 212   rc_uint_type usz_len;
 213
 214   unicode_from_codepage (&usz_len, &usz, sz, CP_ACP);
 215   if (sv)
 216     unicode_from_codepage (&usz_len, &usv, sv, CP_ACP);
 217   mc_add_keyword (usz, rid, grp, nv, usv);
 218 }
 219
 220 void
 221 mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv)
 222 {
 223   mc_keyword *p, *c, *n;
 224   size_t len = unichar_len (usz);
 225
 226   c = keyword_top;
 227   p = NULL;
 228   while (c != NULL)
 229     {
 230       if (c->len > len)
 231         break;
 232       if (c->len == len)
 233         {
 234           int e = memcmp (usz, c->usz, len * sizeof (unichar));
 235
 236           if (e < 0)
 237             break;
 238           if (! e)
 239             {
 240               if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0)
 241                 fatal (_("Duplicate symbol entered into keyword list."));
 242               c->rid = rid;
 243               c->nval = nv;
 244               c->sval = (!sv ? NULL : unichar_dup (sv));
 245               if (! strcmp (grp, "language"))
 246                 {
 247                   const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
 248
 249                   if (lag == NULL)
 250                     fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
 251                   memcpy (&c->lang_info, lag, sizeof (*lag));
 252                 }
 253               return;
 254             }
 255         }
 256       c = (p = c)->next;
 257     }
 258   n = xmalloc (sizeof (mc_keyword));
 259   n->next = c;
 260   n->len = len;
 261   n->group_name = grp;
 262   n->usz = usz;
 263   n->rid = rid;
 264   n->nval = nv;
 265   n->sval = (!sv ? NULL : unichar_dup (sv));
 266   if (! strcmp (grp, "language"))
 267     {
 268       const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
 269       if (lag == NULL)
 270         fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
 271       memcpy (&n->lang_info, lag, sizeof (*lag));
 272     }
 273   if (! p)
 274     keyword_top = n;
 275   else
 276     p->next = n;
 277 }
 278
 279 static int
 280 mc_token (const unichar *t, size_t len)
 281 {
 282   static int was_init = 0;
 283   mc_keyword *k;
 284
 285   if (! was_init)
 286     {
 287       was_init = 1;
 288       mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL);
 289       mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL);
 290       mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL);
 291       mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL);
 292       mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL);
 293       mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL);
 294       mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL);
 295       mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL);
 296       mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL);
 297       mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL);
 298       mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL);
 299       mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL);
 300       mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL);
 301       mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL);
 302       mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL);
 303       mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL);
 304       mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001");
 305   }
 306   k = keyword_top;
 307   if (!len || !t || *t == 0)
 308     return -1;
 309   while (k != NULL)
 310     {
 311       if (k->len > len)
 312         break;
 313       if (k->len == len)
 314         {
 315           if (! memcmp (k->usz, t, len * sizeof (unichar)))
 316             {
 317               if (k->rid == MCTOKEN)
 318                 yylval.tok = k;
 319               return k->rid;
 320             }
 321         }
 322       k = k->next;
 323     }
 324   return -1;
 325 }
 326
 327 int
 328 yylex (void)
 329 {
 330   unichar *start_token;
 331   unichar ch;
 332
 333   if (! input_stream_pos)
 334     {
 335       fatal ("Input stream not setuped.\n");
 336       return -1;
 337     }
 338   if (mclex_want_line)
 339     {
 340       start_token = input_stream_pos;
 341       if (input_stream_pos[0] == '.'
 342           && (input_stream_pos[1] == '\n'
 343               || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n')))
 344       {
 345         mclex_want_line = FALSE;
 346         while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
 347           ++input_stream_pos;
 348         if (input_stream_pos[0] == '\n')
 349           ++input_stream_pos;
 350         return MCENDLINE;
 351       }
 352       while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
 353         ++input_stream_pos;
 354       if (input_stream_pos[0] == '\n')
 355         ++input_stream_pos;
 356       yylval.ustr = get_diff (input_stream_pos, start_token);
 357       return MCLINE;
 358     }
 359   while ((ch = input_stream_pos[0]) <= 0x20)
 360     {
 361       if (ch == 0)
 362         return -1;
 363       ++input_stream_pos;
 364       if (ch == '\n')
 365         input_line += 1;
 366       if (mclex_want_nl && ch == '\n')
 367         {
 368           mclex_want_nl = FALSE;
 369           return NL;
 370         }
 371     }
 372   start_token = input_stream_pos;
 373   ++input_stream_pos;
 374   if (mclex_want_filename)
 375     {
 376       mclex_want_filename = FALSE;
 377       if (ch == '"')
 378         {
 379           start_token++;
 380           while ((ch = input_stream_pos[0]) != 0)
 381             {
 382               if (ch == '"')
 383                 break;
 384               ++input_stream_pos;
 385             }
 386           yylval.ustr = get_diff (input_stream_pos, start_token);
 387           if (ch == '"')
 388             ++input_stream_pos;
 389         }
 390       else
 391         {
 392           while ((ch = input_stream_pos[0]) != 0)
 393             {
 394               if (ch <= 0x20 || ch == ')')
 395                 break;
 396               ++input_stream_pos;
 397             }
 398           yylval.ustr = get_diff (input_stream_pos, start_token);
 399         }
 400       return MCFILENAME;
 401     }
 402   switch (ch)
 403   {
 404   case ';':
 405     ++start_token;
 406     while (input_stream_pos[0] != '\n' && input_stream_pos[0] != 0)
 407       ++input_stream_pos;
 408     if (input_stream_pos[0] == '\n')
 409       input_stream_pos++;
 410     yylval.ustr = get_diff (input_stream_pos, start_token);
 411     return MCCOMMENT;
 412   case '=':
 413     return '=';
 414   case '(':
 415     return '(';
 416   case ')':
 417     return ')';
 418   case '+':
 419     return '+';
 420   case ':':
 421     return ':';
 422   case '0': case '1': case '2': case '3': case '4':
 423   case '5': case '6': case '7': case '8': case '9':
 424     yylval.ival = parse_digit (ch);
 425     return MCNUMBER;
 426   default:
 427     if (ch >= 0x40)
 428       {
 429         int ret;
 430         while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9'))
 431           ++input_stream_pos;
 432         ret = mc_token (start_token, (size_t) (input_stream_pos - start_token));
 433         if (ret != -1)
 434           return ret;
 435         yylval.ustr = get_diff (input_stream_pos, start_token);
 436         return MCIDENT;
 437       }
 438     yyerror ("illegal character 0x%x.", ch);
 439   }
 440   return -1;
 441 }