]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
desktop: Simplify the parsing logic
authorDaiki Ueno <ueno@gnu.org>
Mon, 8 Dec 2014 10:22:16 +0000 (19:22 +0900)
committerDaiki Ueno <ueno@gnu.org>
Mon, 8 Dec 2014 22:09:04 +0000 (07:09 +0900)
* read-desktop.h (desktop_reader_class_ty): Rename 'handle_text'
to 'handle_blank'.
(desktop_reader_handle_blank): Rename from
'desktop_reader_handle_text'.
* read-desktop.c (SIZEOF): New macro.
(desktop_reader_handle_blank): Rename from
'desktop_reader_handle_blank'.
(read_until_newline, read_group_name, read_key_name): Remove.
Merge into...
(desktop_lex): ...here.
(desktop_parse): Call 'desktop_lex' instead of read_*.  Don't
normalize whitespaces.
(enum token_type_ty): New enum.
(struct token_ty): New struct.
(free_token): New function.
* write-desktop.c (msgfmt_desktop_handle_blank): Rename from
'msgfmt_desktop_handle_text'.
* x-desktop.c: Include "c-ctype.h".
(extract_desktop_handle_comment): Normalize whitespaces here.
(extract_desktop_handle_blank): Rename from
'extract_desktop_handle_text'.

gettext-tools/src/ChangeLog
gettext-tools/src/read-desktop.c
gettext-tools/src/read-desktop.h
gettext-tools/src/write-desktop.c
gettext-tools/src/x-desktop.c

index 8baf37c216c76baefa2877e36f0d08d1d48db209..53768f85c3ffa542d81cc1fb7adc62cad561f214 100644 (file)
@@ -1,3 +1,28 @@
+2014-12-09  Daiki Ueno  <ueno@gnu.org>
+
+       desktop: Simplify the parsing logic
+       * read-desktop.h (desktop_reader_class_ty): Rename 'handle_text'
+       to 'handle_blank'.
+       (desktop_reader_handle_blank): Rename from
+       'desktop_reader_handle_text'.
+       * read-desktop.c (SIZEOF): New macro.
+       (desktop_reader_handle_blank): Rename from
+       'desktop_reader_handle_blank'.
+       (read_until_newline, read_group_name, read_key_name): Remove.
+       Merge into...
+       (desktop_lex): ...here.
+       (desktop_parse): Call 'desktop_lex' instead of read_*.  Don't
+       normalize whitespaces.
+       (enum token_type_ty): New enum.
+       (struct token_ty): New struct.
+       (free_token): New function.
+       * write-desktop.c (msgfmt_desktop_handle_blank): Rename from
+       'msgfmt_desktop_handle_text'.
+       * x-desktop.c: Include "c-ctype.h".
+       (extract_desktop_handle_comment): Normalize whitespaces here.
+       (extract_desktop_handle_blank): Rename from
+       'extract_desktop_handle_text'.
+
 2014-12-07  Daiki Ueno  <ueno@gnu.org>
 
        vala: Make regex literal handling robuster
index c1665d2472171510c4380a4dfaae2db513422b53..37c557a80b8ee7fe62d7872eaecf46fe35ec63c0 100644 (file)
@@ -42,6 +42,8 @@
 
 #define _(str) gettext (str)
 
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
 /* The syntax of a Desktop Entry file is defined at
    http://standards.freedesktop.org/desktop-entry-spec/latest/index.html.  */
 
@@ -91,10 +93,10 @@ desktop_reader_handle_comment (desktop_reader_ty *reader, const char *s)
 }
 
 void
-desktop_reader_handle_text (desktop_reader_ty *reader, const char *s)
+desktop_reader_handle_blank (desktop_reader_ty *reader, const char *s)
 {
-  if (reader->methods->handle_text)
-    reader->methods->handle_text (reader, s);
+  if (reader->methods->handle_blank)
+    reader->methods->handle_blank (reader, s);
 }
 
 /* Real filename, used in error messages about the input file.  */
@@ -178,124 +180,302 @@ phase2_ungetc (int c)
     phase2_pushback[phase2_pushback_length++] = c;
 }
 
-static char *
-read_until_newline (void)
+enum token_type_ty
 {
-  char *buffer = NULL;
-  size_t bufmax = 0;
-  size_t buflen;
-
-  buflen = 0;
-  for (;;)
-    {
-      int c;
-
-      c = phase2_getc ();
-
-      if (buflen >= bufmax)
-        {
-          bufmax += 100;
-          buffer = xrealloc (buffer, bufmax);
-        }
-
-      if (c == EOF || c == '\n')
-        break;
+  token_type_eof,
+  token_type_group,
+  token_type_pair,
+  /* Unlike other scanners, preserve comments and blank lines for
+     merging translations back into a desktop file, with msgfmt.  */
+  token_type_comment,
+  token_type_blank,
+  token_type_other
+};
+typedef enum token_type_ty token_type_ty;
+
+typedef struct token_ty token_ty;
+struct token_ty
+{
+  token_type_ty type;
+  char *string;
+  const char *value;
+  const char *locale;
+};
 
-      buffer[buflen++] = c;
-    }
-  buffer[buflen] = '\0';
-  return buffer;
+/* Free the memory pointed to by a 'struct token_ty'.  */
+static inline void
+free_token (token_ty *tp)
+{
+  if (tp->type == token_type_group || tp->type == token_type_pair
+      || tp->type == token_type_comment || tp->type == token_type_blank)
+    free (tp->string);
 }
 
-static char *
-read_group_name (void)
+static void
+desktop_lex (token_ty *tp)
 {
-  char *buffer = NULL;
-  size_t bufmax = 0;
-  size_t buflen;
-
-  buflen = 0;
+  static char *buffer;
+  static size_t bufmax;
+  size_t bufpos;
+
+#define APPEND(c)                               \
+  do                                            \
+    {                                           \
+      if (bufpos >= bufmax)                     \
+        {                                       \
+          bufmax += 100;                        \
+          buffer = xrealloc (buffer, bufmax);   \
+        }                                       \
+      buffer[bufpos++] = c;                     \
+    }                                           \
+  while (0)
+
+  bufpos = 0;
   for (;;)
     {
       int c;
 
       c = phase2_getc ();
 
-      if (buflen >= bufmax)
+      switch (c)
         {
-          bufmax += 100;
-          buffer = xrealloc (buffer, bufmax);
-        }
+        case EOF:
+          tp->type = token_type_eof;
+          return;
 
-      if (c == EOF || c == '\n' || c == ']')
-        break;
+        case '[':
+          {
+            bool non_blank = false;
 
-      buffer[buflen++] = c;
-    }
-  buffer[buflen] = '\0';
-  return buffer;
-}
+            for (;;)
+              {
+                c = phase2_getc ();
+                switch (c)
+                  {
+                  default:
+                    /* Group names may contain all ASCII characters
+                       except for '[' and ']' and control characters.  */
+                    if (!(c_isascii (c) && c != '[') && !c_iscntrl (c))
+                      break;
+                    APPEND (c);
+                    continue;
+                  case '\n':
+                    po_xerror (PO_SEVERITY_WARNING, NULL,
+                               real_file_name, gram_pos.line_number, 0, false,
+                               _("unterminated group name"));
+                    break;
+                  case EOF: case ']':
+                    break;
+                  }
+                break;
+              }
+            /* Skip until newline.  */
+            if (c != '\n')
+              {
+                for (;;)
+                  {
+                    if (c == '\n' || c == EOF)
+                      break;
+                    if (!c_isspace (c))
+                      non_blank = true;
+                    c = phase2_getc ();
+                  }
+              }
+            if (non_blank)
+              po_xerror (PO_SEVERITY_WARNING, NULL,
+                         real_file_name, gram_pos.line_number, 0, false,
+                         _("invalid non-blank character"));
+            APPEND (0);
+            tp->type = token_type_group;
+            tp->string = xstrdup (buffer);
+            return;
+          }
+
+        case '#':
+          {
+            /* Read until newline.  */
+            for (;;)
+              {
+                c = phase2_getc ();
+                switch (c)
+                  {
+                  default:
+                    APPEND (c);
+                    continue;
+                  case EOF: case '\n':
+                    break;
+                  }
+                break;
+              }
+            APPEND (0);
+            tp->type = token_type_comment;
+            tp->string = xstrdup (buffer);
+            return;
+          }
+
+        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+        case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+        case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+        case 'Y': case 'Z':
+        case '-':
+        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+        case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+        case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+        case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+        case 'y': case 'z':
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+          {
+            const char *locale = NULL;
+            const char *value = NULL;
+            for (;;)
+              {
+                APPEND (c);
 
-static char *
-read_key_name (const char **locale)
-{
-  char *buffer = NULL;
-  size_t bufmax = 0;
-  size_t buflen;
-  const char *locale_start = NULL;
+                c = phase2_getc ();
+                switch (c)
+                  {
+                  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                  case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+                  case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+                  case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+                  case 'Y': case 'Z':
+                  case '-':
+                  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                  case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+                  case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+                  case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+                  case 'y': case 'z':
+                  case '0': case '1': case '2': case '3': case '4':
+                  case '5': case '6': case '7': case '8': case '9':
+                    continue;
+
+                  case '[':
+                    /* Finish the key part and start the locale part.  */
+                    APPEND (0);
+                    locale = &buffer[bufpos];
+
+                    for (;;)
+                      {
+                        int c2 = phase2_getc ();
+                        switch (c2)
+                          {
+                          default:
+                            APPEND (c2);
+                            continue;
+                          case EOF: case ']':
+                            break;
+                          }
+                        break;
+                      }
+                    break;
+
+                  default:
+                    phase2_ungetc (c);
+                    break;
+                  }
+                break;
+              }
+            APPEND (0);
 
-  buflen = 0;
-  for (;;)
-    {
-      int c;
+            /* Skip any whitespace before '='.  */
+            for (;;)
+              {
+                c = phase2_getc ();
+                switch (c)
+                  {
+                  default:
+                    if (c_isspace (c))
+                      continue;
+                    phase2_ungetc (c);
+                    break;
+                  case EOF: case '\n':
+                    break;
+                  }
+                break;
+              }
 
-      c = phase2_getc ();
+            c = phase2_getc ();
+            if (c != '=')
+              {
+                po_xerror (PO_SEVERITY_WARNING, NULL,
+                           real_file_name, gram_pos.line_number, 0, false,
+                           xasprintf (_("missing '=' after \"%s\""), buffer));
+                for (;;)
+                  {
+                    c = phase2_getc ();
+                    if (c == EOF || c == '\n')
+                      break;
+                  }
+                tp->type = token_type_other;
+                return;
+              }
+
+            /* Skip any whitespace after '='.  */
+            for (;;)
+              {
+                c = phase2_getc ();
+                switch (c)
+                  {
+                  default:
+                    if (c_isspace (c))
+                      continue;
+                    phase2_ungetc (c);
+                    break;
+                  case EOF: case '\n':
+                    break;
+                  }
+                break;
+              }
 
-      if (buflen >= bufmax)
-        {
-          bufmax += 100;
-          buffer = xrealloc (buffer, bufmax);
-        }
+            value = &buffer[bufpos];
+            for (;;)
+              {
+                c = phase2_getc ();
+                if (c == EOF || c == '\n')
+                  break;
+                APPEND (c);
+              }
+            APPEND (0);
+            tp->type = token_type_pair;
+            tp->string = xmemdup (buffer, bufpos);
+            tp->locale = locale;
+            tp->value = value;
+            return;
+          }
+        default:
+          {
+            bool non_blank = false;
 
-      if (c == EOF || c == '\n')
-        break;
+            for (;;)
+              {
+                if (c == '\n' || c == EOF)
+                  break;
 
-      if (!locale_start)
-        {
-          if (c == '[')
-            {
-              buffer[buflen++] = '\0';
-              locale_start = &buffer[buflen];
-              continue;
-            }
-          else if (!c_isalnum (c) && c != '-')
-            {
-              phase2_ungetc (c);
-              break;
-            }
-        }
-      else
-        {
-          if (c == ']')
-            {
-              buffer[buflen++] = '\0';
-              break;
-            }
-          else if (!c_isascii (c))
-            {
-              phase2_ungetc (c);
-              break;
-            }
-        }
+                if (!c_isspace (c))
+                  non_blank = true;
+                else
+                  APPEND (c);
 
-      buffer[buflen++] = c;
+                c = phase2_getc ();
+              }
+            if (non_blank)
+              {
+                po_xerror (PO_SEVERITY_WARNING, NULL,
+                           real_file_name, gram_pos.line_number, 0, false,
+                           _("invalid non-blank line"));
+                tp->type = token_type_other;
+                return;
+              }
+            APPEND (0);
+            tp->type = token_type_blank;
+            tp->string = xstrdup (buffer);
+            return;
+          }
+        }
     }
-  buffer[buflen] = '\0';
-
-  if (locale_start)
-    *locale = locale_start;
-
-  return buffer;
+#undef APPEND
 }
 
 void
@@ -309,96 +489,30 @@ desktop_parse (desktop_reader_ty *reader, FILE *file,
 
   for (;;)
     {
-      int c;
-
-      c = phase2_getc ();
-
-      if (c == EOF)
-        break;
-
-      if (c == '[')
-        {
-          /* A group header.  */
-          char *group_name;
-
-          group_name = read_group_name ();
-
-          do
-            c = phase2_getc ();
-          while (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
-
-          if (c == EOF)
-            break;
-
-          phase2_ungetc (c);
-
-          desktop_reader_handle_group (reader, group_name);
-          free (group_name);
-        }
-      else if (c == '#')
-        {
-          /* A comment line.  */
-          char *comment;
-
-          comment = read_until_newline ();
-          desktop_reader_handle_comment (reader, comment);
-          free (comment);
-        }
-      else if (c_isalnum (c) || c == '-')
-        {
-          /* A key/value pair.  */
-          char *key_name;
-          const char *locale;
-
-          phase2_ungetc (c);
-
-          locale = NULL;
-          key_name = read_key_name (&locale);
-          do
-            c = phase2_getc ();
-          while (c == ' ' || c == '\t' || c == '\r' || c == '\f');
-
-          if (c == EOF)
-            break;
-
-          if (c != '=')
-            {
-              po_xerror (PO_SEVERITY_FATAL_ERROR, NULL,
-                         real_filename, gram_pos.line_number, 0, false,
-                         xasprintf (_("missing '=' after \"%s\""), key_name));
-            }
-          else
-            {
-              char *value;
-
-              do
-                c = phase2_getc ();
-              while (c == ' ' || c == '\t' || c == '\r' || c == '\f');
-
-              if (c == EOF)
-                break;
-
-              phase2_ungetc (c);
-
-              value = read_until_newline ();
-              desktop_reader_handle_pair (reader, &gram_pos,
-                                          key_name, locale, value);
-              free (value);
-            }
-          free (key_name);
-        }
-      else
+      struct token_ty token;
+      desktop_lex (&token);
+      switch (token.type)
         {
-          char *text;
-
-          phase2_ungetc (c);
-
-          text = read_until_newline ();
-          desktop_reader_handle_text (reader, text);
-          free (text);
+        case token_type_eof:
+          goto out;
+        case token_type_group:
+          desktop_reader_handle_group (reader, token.string);
+          break;
+        case token_type_comment:
+          desktop_reader_handle_comment (reader, token.string);
+          break;
+        case token_type_pair:
+          desktop_reader_handle_pair (reader, &gram_pos,
+                                      token.string, token.locale, token.value);
+          break;
+        case token_type_blank:
+          desktop_reader_handle_blank (reader, token.string);
+          break;
         }
+      free_token (&token);
     }
 
+ out:
   fp = NULL;
   real_file_name = NULL;
   gram_pos.line_number = 0;
index c36cc6b066f486e9feb403fbc85e3a1a55f33a2a..19ad8d8baf0fb17b6bd9ddad760aa0a29bdc8d36 100644 (file)
@@ -63,8 +63,8 @@ struct desktop_reader_class_ty
   /* what to do with a comment */
   void (*handle_comment) (struct desktop_reader_ty *pop, const char *s);
 
-  /* what to do with other lines */
-  void (*handle_text) (struct desktop_reader_ty *pop, const char *s);
+  /* what to do with a blank line */
+  void (*handle_blank) (struct desktop_reader_ty *pop, const char *s);
 };
 
 /* This next structure defines the base class passed to the methods.
@@ -99,8 +99,8 @@ void desktop_reader_handle_pair (desktop_reader_ty *reader,
 void desktop_reader_handle_comment (desktop_reader_ty *reader,
                                     const char *s);
 
-void desktop_reader_handle_text (desktop_reader_ty *reader,
-                                 const char *s);
+void desktop_reader_handle_blank (desktop_reader_ty *reader,
+                                  const char *s);
 
 
 void desktop_parse (desktop_reader_ty *reader, FILE *file,
index cb953d0e8e9a6c883e5d1381ba2964870b96cd67..dd3fb60649c76ea75cbbfd055ad73c5fe7f45f9e 100644 (file)
@@ -117,7 +117,7 @@ msgfmt_desktop_handle_comment (struct desktop_reader_ty *reader, const char *s)
 }
 
 static void
-msgfmt_desktop_handle_text (struct desktop_reader_ty *reader, const char *s)
+msgfmt_desktop_handle_blank (struct desktop_reader_ty *reader, const char *s)
 {
   msgfmt_desktop_reader_ty *msgfmt_reader = (msgfmt_desktop_reader_ty *) reader;
 
@@ -133,7 +133,7 @@ desktop_reader_class_ty msgfmt_methods =
     msgfmt_desktop_handle_group,
     msgfmt_desktop_handle_pair,
     msgfmt_desktop_handle_comment,
-    msgfmt_desktop_handle_text
+    msgfmt_desktop_handle_blank
   };
 
 int
index 8484004c9776c108509bb59426d52f1535cf2d47..320266e9cd9f5935d278c3384de22501e9837595 100644 (file)
@@ -39,6 +39,7 @@
 #include "gettext.h"
 #include "read-desktop.h"
 #include "po-charset.h"
+#include "c-ctype.h"
 
 #define _(s) gettext(s)
 
@@ -132,14 +133,29 @@ extract_desktop_handle_pair (struct desktop_reader_ty *reader,
 
 static void
 extract_desktop_handle_comment (struct desktop_reader_ty *reader,
-                                const char *s)
+                                const char *buffer)
 {
-  savable_comment_add (s);
+  size_t buflen = strlen (buffer);
+  size_t bufpos = 0;
+
+  while (bufpos < buflen
+         && c_isspace (buffer[bufpos]))
+    ++bufpos;
+  while (buflen >= bufpos
+         && c_isspace (buffer[buflen - 1]))
+    --buflen;
+  if (bufpos < buflen)
+    {
+      char *comment = xstrdup (buffer);
+      comment[buflen] = 0;
+      savable_comment_add (&comment[bufpos]);
+      free (comment);
+    }
 }
 
 static void
-extract_desktop_handle_text (struct desktop_reader_ty *reader,
-                             const char *s)
+extract_desktop_handle_blank (struct desktop_reader_ty *reader,
+                              const char *s)
 {
   savable_comment_reset ();
 }
@@ -152,7 +168,7 @@ desktop_reader_class_ty extract_methods =
     extract_desktop_handle_group,
     extract_desktop_handle_pair,
     extract_desktop_handle_comment,
-    extract_desktop_handle_text
+    extract_desktop_handle_blank
   };
 
 void