Update copyright dates with scripts/update-copyrights.

[thirdparty/glibc.git] / catgets / gencat.c
diff --git a/catgets/gencat.c b/catgets/gencat.c

index 7fe159e5b9e601fdf29ed354d5fb187ed41ec0a0..52fa0c48f37c3082da2e4d578236219ac13a7bc6 100644 (file)
--- a/catgets/gencat.c
+++ b/catgets/gencat.c
@@ -1,41 +1,44 @@
-/* Copyright (C) 1996 Free Software Foundation, Inc.
-This file is part of the GNU C Library.
-Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+/* Copyright (C) 1996-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@redhat.com>, 1996.
  
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published
+   by the Free Software Foundation; version 2 of the License, or
+   (at your option) any later version.
  
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
  
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  
  #ifdef HAVE_CONFIG_H
-# include <config.h>
+# include "config.h"
  #endif
  
+#include <argp.h>
+#include <assert.h>
  #include <ctype.h>
  #include <endian.h>
  #include <errno.h>
  #include <error.h>
  #include <fcntl.h>
-#include <getopt.h>
+#include <iconv.h>
+#include <langinfo.h>
  #include <locale.h>
  #include <libintl.h>
  #include <limits.h>
  #include <nl_types.h>
  #include <obstack.h>
+#include <stdint.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
  #include <unistd.h>
+#include <wchar.h>
  
  #include "version.h"
  
@@ -78,7 +81,7 @@ struct catalog
    struct set_list *all_sets;
    struct set_list *current_set;
    size_t total_messages;
-  char quote_char;
+  wint_t quote_char;
    int last_set;
  
    struct obstack mem_pool;
@@ -88,42 +91,74 @@ struct catalog
  /* If non-zero force creation of new file, not using existing one.  */
  static int force_new;
  
-/* Long options.  */
-static const struct option long_options[] =
+/* Name of output file.  */
+static const char *output_name;
+
+/* Name of generated C header file.  */
+static const char *header_name;
+
+/* Name and version of program.  */
+static void print_version (FILE *stream, struct argp_state *state);
+void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
+
+#define OPT_NEW 1
+
+/* Definitions of arguments for argp functions.  */
+static const struct argp_option options[] =
+{
+  { "header", 'H', N_("NAME"), 0,
+    N_("Create C header file NAME containing symbol definitions") },
+  { "new", OPT_NEW, NULL, 0,
+    N_("Do not use existing catalog, force new output file") },
+  { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
+  { NULL, 0, NULL, 0, NULL }
+};
+
+/* Short description of program.  */
+static const char doc[] = N_("Generate message catalog.\
+\vIf INPUT-FILE is -, input is read from standard input.  If OUTPUT-FILE\n\
+is -, output is written to standard output.\n");
+
+/* Strings for arguments in help texts.  */
+static const char args_doc[] = N_("\
+-o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
+
+/* Prototype for option handler.  */
+static error_t parse_opt (int key, char *arg, struct argp_state *state);
+
+/* Function to print some extra text in the help message.  */
+static char *more_help (int key, const char *text, void *input);
+
+/* Data structure to communicate with argp functions.  */
+static struct argp argp =
  {
-  { "header", required_argument, NULL, 'H' },
-  { "help", no_argument, NULL, 'h' },
-  { "new", no_argument, &force_new, 1 },
-  { "output", required_argument, NULL, 'o' },
-  { "version", no_argument, NULL, 'V' },
-  { NULL, 0, NULL, 0 }
+  options, parse_opt, args_doc, doc, NULL, more_help
  };
  
+
  /* Wrapper functions with error checking for standard functions.  */
-extern void *xmalloc (size_t n);
+#include <programs/xmalloc.h>
  
  /* Prototypes for local functions.  */
-static void usage (int status) __attribute__ ((noreturn));
  static void error_print (void);
  static struct catalog *read_input_file (struct catalog *current,
                                         const char *fname);
  static void write_out (struct catalog *result, const char *output_name,
                        const char *header_name);
  static struct set_list *find_set (struct catalog *current, int number);
-static void normalize_line (const char *fname, size_t line, char *string,
-                           char quote_char);
+static void normalize_line (const char *fname, size_t line, iconv_t cd,
+                           wchar_t *string, wchar_t quote_char,
+                           wchar_t escape_char);
  static void read_old (struct catalog *catalog, const char *file_name);
+static int open_conversion (const char *codesetp, iconv_t *cd_towcp,
+                           iconv_t *cd_tombp, wchar_t *escape_charp);
  
  
  int
  main (int argc, char *argv[])
  {
    struct catalog *result;
-  const char *output_name;
-  const char *header_name;
-  int do_help;
-  int do_version;
-  int opt;
+  int remaining;
  
    /* Set program name for messages.  */
    error_print_progname = error_print;
@@ -135,62 +170,21 @@ main (int argc, char *argv[])
    textdomain (PACKAGE);
  
    /* Initialize local variables.  */
-  do_help = 0;
-  do_version = 0;
-  output_name = NULL;
-  header_name = NULL;
    result = NULL;
  
-  while ((opt = getopt_long (argc, argv, "hH:o:V", long_options, NULL)) != EOF)
-    switch (opt)
-      {
-      case '\0':       /* Long option.  */
-       break;
-      case 'h':
-       do_help = 1;
-       break;
-      case 'H':
-       header_name = optarg;
-       break;
-      case 'o':
-       output_name = optarg;
-       break;
-      case 'V':
-       do_version = 1;
-       break;
-      default:
-       usage (EXIT_FAILURE);
-      }
-
-  /* Version information is requested.  */
-  if (do_version)
-    {
-      fprintf (stderr, "gencat (GNU %s) %s\n", PACKAGE, VERSION);
-      fprintf (stderr, _("\
-Copyright (C) %s Free Software Foundation, Inc.\n\
-This is free software; see the source for copying conditions.  There is NO\n\
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "1996");
-      fprintf (stderr, _("Written by %s.\n"),
-              "Ulrich Drepper");
-
-      exit (EXIT_SUCCESS);
-    }
-
-  /* Help is requested.  */
-  if (do_help)
-    usage (EXIT_SUCCESS);
+  /* Parse and process arguments.  */
+  argp_parse (&argp, argc, argv, 0, &remaining, NULL);
  
    /* Determine output file.  */
    if (output_name == NULL)
-    output_name = optind < argc ? argv[optind++] : "-";
+    output_name = remaining < argc ? argv[remaining++] : "-";
  
    /* Process all input files.  */
    setlocale (LC_CTYPE, "C");
-  if (optind < argc)
+  if (remaining < argc)
      do
-      result = read_input_file (result, argv[optind]);
-    while (++optind < argc);
+      result = read_input_file (result, argv[remaining]);
+    while (++remaining < argc);
    else
      result = read_input_file (NULL, "-");
  
@@ -198,42 +192,69 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
    if (result != NULL)
      write_out (result, output_name, header_name);
  
-  exit (EXIT_SUCCESS);
+  return error_message_count != 0;
  }
  
  
-static void
-usage (int status)
+/* Handle program arguments.  */
+static error_t
+parse_opt (int key, char *arg, struct argp_state *state)
  {
-  if (status != EXIT_SUCCESS)
-    fprintf (stderr, gettext ("Try `%s --help' for more information.\n"),
-             program_invocation_name);
-  else
+  switch (key)
+    {
+    case 'H':
+      header_name = arg;
+      break;
+    case OPT_NEW:
+      force_new = 1;
+      break;
+    case 'o':
+      output_name = arg;
+      break;
+    default:
+      return ARGP_ERR_UNKNOWN;
+    }
+  return 0;
+}
+
+
+static char *
+more_help (int key, const char *text, void *input)
+{
+  char *tp = NULL;
+  switch (key)
      {
-      printf(gettext ("\
-Usage: %s [OPTION]... -o OUTPUT-FILE [INPUT-FILE]...\n\
-       %s [OPTION]... [OUTPUT-FILE [INPUT-FILE]...]\n\
-Mandatory arguments to long options are mandatory for short options too.\n\
-  -H, --header        create C header file containing symbol definitions\n\
-  -h, --help          display this help and exit\n\
-      --new           do not use existing catalog, force new output file\n\
-  -o, --output=NAME   write output to file NAME\n\
-  -V, --version       output version information and exit\n\
-If INPUT-FILE is -, input is read from standard input.  If OUTPUT-FILE\n\
-is -, output is written to standard output.\n"),
-          program_invocation_name, program_invocation_name);
-      fputs (gettext ("Report bugs to <bug-glibc@prep.ai.mit.edu>.\n"),
-            stdout);
+    case ARGP_KEY_HELP_EXTRA:
+      /* We print some extra information.  */
+      if (asprintf (&tp, gettext ("\
+For bug reporting instructions, please see:\n\
+%s.\n"), REPORT_BUGS_TO) < 0)
+       return NULL;
+      return tp;
+    default:
+      break;
      }
+  return (char *) text;
+}
  
-  exit (status);
+/* Print the version information.  */
+static void
+print_version (FILE *stream, struct argp_state *state)
+{
+  fprintf (stream, "gencat %s%s\n", PKGVERSION, VERSION);
+  fprintf (stream, gettext ("\
+Copyright (C) %s Free Software Foundation, Inc.\n\
+This is free software; see the source for copying conditions.  There is NO\n\
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
+"), "2014");
+  fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
  }
  
  
  /* The address of this function will be assigned to the hook in the
     error functions.  */
  static void
-error_print ()
+error_print (void)
  {
    /* We don't want the program name to be printed in messages.  Emacs'
       compile.el does not like this.  */
@@ -247,6 +268,12 @@ read_input_file (struct catalog *current, const char *fname)
    char *buf;
    size_t len;
    size_t line_number;
+  wchar_t *wbuf;
+  size_t wbufsize;
+  iconv_t cd_towc = (iconv_t) -1;
+  iconv_t cd_tomb = (iconv_t) -1;
+  wchar_t escape_char = L'\\';
+  char *codeset = NULL;
  
    if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0)
      {
@@ -264,21 +291,22 @@ read_input_file (struct catalog *current, const char *fname)
    /* If we haven't seen anything yet, allocate result structure.  */
    if (current == NULL)
      {
-      current = (struct catalog *) xmalloc (sizeof (*current));
+      current = (struct catalog *) xcalloc (1, sizeof (*current));
  
-      current->all_sets = NULL;
-      current->total_messages = 0;
-      current->last_set = 0;
-      current->current_set = find_set (current, NL_SETD);
-
-#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_alloc malloc
  #define obstack_chunk_free free
        obstack_init (&current->mem_pool);
+
+      current->current_set = find_set (current, NL_SETD);
      }
  
    buf = NULL;
    len = 0;
    line_number = 0;
+
+  wbufsize = 1024;
+  wbuf = (wchar_t *) xmalloc (wbufsize);
+
    while (!feof (fp))
      {
        int continued;
@@ -296,15 +324,29 @@ read_input_file (struct catalog *current, const char *fname)
           ++line_number;
  
           /* It the line continued?  */
+         continued = 0;
           if (buf[act_len - 1] == '\n')
             {
               --act_len;
-             continued = buf[act_len - 1] == '\\';
-             if (continued)
-               --act_len;
+
+             /* There might be more than one backslash at the end of
+                the line.  Only if there is an odd number of them is
+                the line continued.  */
+             if (act_len > 0 && buf[act_len - 1] == '\\')
+               {
+                 int temp_act_len = act_len;
+
+                 do
+                   {
+                     --temp_act_len;
+                     continued = !continued;
+                   }
+                 while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\');
+
+                 if (continued)
+                   --act_len;
+               }
             }
-         else
-           continued = 0;
  
           /* Append to currently selected line.  */
           obstack_grow (&current->mem_pool, buf, act_len);
@@ -317,11 +359,33 @@ read_input_file (struct catalog *current, const char *fname)
        used = 0;
        if (this_line[0] == '$')
         {
-         if (isspace (this_line[1]))
-           /* This is a comment line.  Do nothing.  */;
+         if (isblank (this_line[1]))
+           {
+             int cnt = 1;
+             while (isblank (this_line[cnt]))
+               ++cnt;
+             if (strncmp (&this_line[cnt], "codeset=", 8) != 0)
+               /* This is a comment line. Do nothing.  */;
+             else if (codeset != NULL)
+               /* Ignore multiple codeset. */;
+             else
+               {
+                 int start = cnt + 8;
+                 cnt = start;
+                 while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
+                   ++cnt;
+                 if (cnt != start)
+                   {
+                     int len = cnt - start;
+                     codeset = xmalloc (len + 1);
+                     *((char *) mempcpy (codeset, &this_line[start], len))
+                       = '\0';
+                   }
+               }
+           }
           else if (strncmp (&this_line[1], "set", 3) == 0)
             {
-             int cnt = sizeof ("cnt");
+             int cnt = sizeof ("set");
               int set_number;
               const char *symbol = NULL;
               while (isspace (this_line[cnt]))
@@ -354,7 +418,7 @@ read_input_file (struct catalog *current, const char *fname)
                     }
                   else
                     {
-                     /* We have found seomthing which looks like a
+                     /* We have found seomthing that looks like a
                          correct identifier.  */
                       struct set_list *runp;
  
@@ -402,7 +466,6 @@ this is the first definition"));
           else if (strncmp (&this_line[1], "delset", 6) == 0)
             {
               int cnt = sizeof ("delset");
-             size_t set_number;
               while (isspace (this_line[cnt]))
                 ++cnt;
  
@@ -424,11 +487,8 @@ this is the first definition"));
                     ++cnt;
  
                   if (cnt == start)
-                   {
-                     error_at_line (0, 0, fname, start_line,
-                                    gettext ("illegal set number"));
-                     set_number = 0;
-                   }
+                   error_at_line (0, 0, fname, start_line,
+                                  gettext ("illegal set number"));
                   else
                     {
                       const char *symbol;
@@ -441,7 +501,6 @@ this is the first definition"));
                       /* We have a symbolic set name.  This name must
                          appear somewhere else in the catalogs read so
                          far.  */
-                     set_number = 0;
                       for (runp = current->all_sets; runp != NULL;
                            runp = runp->next)
                         {
@@ -460,12 +519,45 @@ this is the first definition"));
             }
           else if (strncmp (&this_line[1], "quote", 5) == 0)
             {
-             int cnt = sizeof ("quote");
+             char buf[2];
+             char *bufptr;
+             size_t buflen;
+             char *wbufptr;
+             size_t wbuflen;
+             int cnt;
+
+             cnt = sizeof ("quote");
               while (isspace (this_line[cnt]))
                 ++cnt;
+
+             /* We need the conversion.  */
+             if (cd_towc == (iconv_t) -1
+                 && open_conversion (codeset, &cd_towc, &cd_tomb,
+                                     &escape_char) != 0)
+               /* Something is wrong.  */
+               goto out;
+
               /* Yes, the quote char can be '\0'; this means no quote
-                char.  */
-             current->quote_char = this_line[cnt];
+                char.  The function using the information works on
+                wide characters so we have to convert it here.  */
+             buf[0] = this_line[cnt];
+             buf[1] = '\0';
+             bufptr = buf;
+             buflen = 2;
+
+             wbufptr = (char *) wbuf;
+             wbuflen = wbufsize;
+
+             /* Flush the state.  */
+             iconv (cd_towc, NULL, NULL, NULL, NULL);
+
+             iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen);
+             if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2])
+               error_at_line (0, 0, fname, start_line,
+                              gettext ("invalid quote character"));
+             else
+               /* Use the converted wide character.  */
+               current->quote_char = wbuf[0];
             }
           else
             {
@@ -482,40 +574,73 @@ this is the first definition"));
        else if (isalnum (this_line[0]) || this_line[0] == '_')
         {
           const char *ident = this_line;
+         char *line = this_line;
           int message_number;
  
           do
-           ++this_line;
-         while (this_line[0] != '\0' && !isspace (this_line[0]));;
-         this_line[0] = '\0';  /* Terminate the identifier.  */
+           ++line;
+         while (line[0] != '\0' && !isspace (line[0]));
+         if (line[0] != '\0')
+           *line++ = '\0';     /* Terminate the identifier.  */
  
-         do
-           ++this_line;
-         while (isspace (this_line[0]));
           /* Now we found the beginning of the message itself.  */
  
           if (isdigit (ident[0]))
             {
               struct message_list *runp;
+             struct message_list *lastp;
  
               message_number = atoi (ident);
  
               /* Find location to insert the new message.  */
               runp = current->current_set->messages;
+             lastp = NULL;
               while (runp != NULL)
                 if (runp->number == message_number)
                   break;
                 else
-                 runp = runp->next;
+                 {
+                   lastp = runp;
+                   runp = runp->next;
+                 }
               if (runp != NULL)
                 {
                   /* Oh, oh.  There is already a message with this
-                    number is the message set.  */
-                 error_at_line (0, 0, fname, start_line,
-                                gettext ("duplicated message number"));
-                 error_at_line (0, 0, runp->fname, runp->line,
-                                gettext ("this is the first definition"));
-                 message_number = 0;
+                    number in the message set.  */
+                 if (runp->symbol == NULL)
+                   {
+                     /* The existing message had its number specified
+                        by the user.  Fatal collision type uh, oh.  */
+                     error_at_line (0, 0, fname, start_line,
+                                    gettext ("duplicated message number"));
+                     error_at_line (0, 0, runp->fname, runp->line,
+                                    gettext ("this is the first definition"));
+                     message_number = 0;
+                   }
+                 else
+                   {
+                     /* Collision was with number auto-assigned to a
+                        symbolic.  Change existing symbolic number
+                        and move to end the list (if not already there).  */
+                     runp->number = ++current->current_set->last_message;
+
+                     if (runp->next != NULL)
+                       {
+                         struct message_list *endp;
+
+                         if (lastp == NULL)
+                           current->current_set->messages=runp->next;
+                         else
+                           lastp->next=runp->next;
+
+                         endp = runp->next;
+                         while (endp->next != NULL)
+                           endp = endp->next;
+
+                         endp->next = runp;
+                         runp->next = NULL;
+                       }
+                   }
                 }
               ident = NULL;     /* We don't have a symbol.  */
  
@@ -526,10 +651,10 @@ this is the first definition"));
           else if (ident[0] != '\0')
             {
               struct message_list *runp;
-             runp = current->current_set->messages;
  
               /* Test whether the symbolic name was not used for
                  another message in this message set.  */
+             runp = current->current_set->messages;
               while (runp != NULL)
                 if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0)
                   break;
@@ -538,8 +663,8 @@ this is the first definition"));
               if (runp != NULL)
                 {
                   /* The name is already used.  */
-                 error_at_line (0, 0, fname, start_line,
-                                gettext ("duplicated message identifier"));
+                 error_at_line (0, 0, fname, start_line, gettext ("\
+duplicated message identifier"));
                   error_at_line (0, 0, runp->fname, runp->line,
                                  gettext ("this is the first definition"));
                   message_number = 0;
@@ -553,20 +678,105 @@ this is the first definition"));
  
           if (message_number != 0)
             {
+             char *inbuf;
+             size_t inlen;
+             char *outbuf;
+             size_t outlen;
               struct message_list *newp;
+             size_t line_len = strlen (line) + 1;
+             size_t ident_len = 0;
+
+             /* We need the conversion.  */
+             if (cd_towc == (iconv_t) -1
+                 && open_conversion (codeset, &cd_towc, &cd_tomb,
+                                     &escape_char) != 0)
+               /* Something is wrong.  */
+               goto out;
+
+             /* Convert to a wide character string.  We have to
+                interpret escape sequences which will be impossible
+                without doing the conversion if the codeset of the
+                message is stateful.  */
+             while (1)
+               {
+                 inbuf = line;
+                 inlen = line_len;
+                 outbuf = (char *) wbuf;
+                 outlen = wbufsize;
  
-             used = 1; /* Yes, we use the line.  */
+                 /* Flush the state.  */
+                 iconv (cd_towc, NULL, NULL, NULL, NULL);
+
+                 iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen);
+                 if (inlen == 0)
+                   {
+                     /* The string is converted.  */
+                     assert (outlen < wbufsize);
+                     assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1]
+                             == L'\0');
+                     break;
+                   }
+
+                 if (outlen != 0)
+                   {
+                     /* Something is wrong with this string, we ignore it.  */
+                     error_at_line (0, 0, fname, start_line, gettext ("\
+invalid character: message ignored"));
+                     goto ignore;
+                   }
+
+                 /* The output buffer is too small.  */
+                 wbufsize *= 2;
+                 wbuf = (wchar_t *) xrealloc (wbuf, wbufsize);
+               }
  
               /* Strip quote characters, change escape sequences into
                  correct characters etc.  */
-             normalize_line (fname, start_line, this_line,
-                             current->quote_char);
+             normalize_line (fname, start_line, cd_towc, wbuf,
+                             current->quote_char, escape_char);
+
+             if (ident)
+               ident_len = line - this_line;
+
+             /* Now the string is free of escape sequences.  Convert it
+                back into a multibyte character string.  First free the
+                memory allocated for the original string.  */
+             obstack_free (&current->mem_pool, this_line);
+
+             used = 1; /* Yes, we use the line.  */
+
+             /* Now fill in the new string.  It should never happen that
+                the replaced string is longer than the original.  */
+             inbuf = (char *) wbuf;
+             inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t);
+
+             outlen = obstack_room (&current->mem_pool);
+             obstack_blank (&current->mem_pool, outlen);
+             this_line = (char *) obstack_base (&current->mem_pool);
+             outbuf = this_line + ident_len;
+             outlen -= ident_len;
+
+             /* Flush the state.  */
+             iconv (cd_tomb, NULL, NULL, NULL, NULL);
+
+             iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen);
+             if (inlen != 0)
+               {
+                 error_at_line (0, 0, fname, start_line,
+                                gettext ("invalid line"));
+                 goto ignore;
+               }
+             assert (outbuf[-1] == '\0');
+
+             /* Free the memory in the obstack we don't use.  */
+             obstack_blank (&current->mem_pool, -(int) outlen);
+             line = obstack_finish (&current->mem_pool);
  
               newp = (struct message_list *) xmalloc (sizeof (*newp));
               newp->number = message_number;
-             newp->message = this_line;
+             newp->message = line + ident_len;
               /* Remember symbolic name; is NULL if no is given.  */
-             newp->symbol = ident;
+             newp->symbol = ident ? line : NULL;
               /* Remember where we found the character.  */
               newp->fname = fname;
               newp->line = start_line;
@@ -610,11 +820,20 @@ this is the first definition"));
                            gettext ("malformed line ignored"));
         }
  
+    ignore:
        /* We can save the memory for the line if it was not used.  */
        if (!used)
         obstack_free (&current->mem_pool, this_line);
      }
  
+  /* Close the conversion modules.  */
+  iconv_close (cd_towc);
+  iconv_close (cd_tomb);
+  free (codeset);
+
+ out:
+  free (wbuf);
+
    if (fp != stdin)
      fclose (fp);
    return current;
@@ -633,7 +852,7 @@ write_out (struct catalog *catalog, const char *output_name,
    struct obstack string_pool;
    const char *strings;
    size_t strings_size;
-  u_int32_t *array1, *array2;
+  uint32_t *array1, *array2;
    size_t cnt;
    int fd;
  
@@ -711,10 +930,10 @@ write_out (struct catalog *catalog, const char *output_name,
  
    /* Allocate room for all needed arrays.  */
    array1 =
-    (u_int32_t *) alloca (best_size * best_depth * sizeof (u_int32_t) * 3);
-  memset (array1, '\0', best_size * best_depth * sizeof (u_int32_t) * 3);
+    (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3);
+  memset (array1, '\0', best_size * best_depth * sizeof (uint32_t) * 3);
    array2
-    = (u_int32_t *) alloca (best_size * best_depth * sizeof (u_int32_t) * 3);
+    = (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3);
    obstack_init (&string_pool);
  
    set_run = catalog->all_sets;
@@ -772,11 +991,11 @@ write_out (struct catalog *catalog, const char *output_name,
    /* We always write out the little endian version of the index
       arrays.  */
  #if __BYTE_ORDER == __LITTLE_ENDIAN
-  write (fd, array1, best_size * best_depth * sizeof (u_int32_t) * 3);
-  write (fd, array2, best_size * best_depth * sizeof (u_int32_t) * 3);
+  write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
+  write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
  #elif __BYTE_ORDER == __BIG_ENDIAN
-  write (fd, array2, best_size * best_depth * sizeof (u_int32_t) * 3);
-  write (fd, array1, best_size * best_depth * sizeof (u_int32_t) * 3);
+  write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
+  write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
  #else
  # error Cannot handle __BYTE_ORDER byte order
  #endif
@@ -827,16 +1046,18 @@ write_out (struct catalog *catalog, const char *output_name,
                  #define out.  But we have to take care for the set
                  not having a symbolic name.  */
               if (message_run->symbol != NULL)
-               if (set_run->symbol == NULL)
-                 fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
-                          set_run->number, message_run->symbol,
-                          message_run->number, message_run->fname,
-                          message_run->line);
-               else
-                 fprintf (fp, "#define %s%s %#x\t/* %s:%Zu */\n",
-                          set_run->symbol, message_run->symbol,
-                          message_run->number, message_run->fname,
-                          message_run->line);
+               {
+                 if (set_run->symbol == NULL)
+                   fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
+                            set_run->number, message_run->symbol,
+                            message_run->number, message_run->fname,
+                            message_run->line);
+                 else
+                   fprintf (fp, "#define %s%s %#x\t/* %s:%Zu */\n",
+                            set_run->symbol, message_run->symbol,
+                            message_run->number, message_run->fname,
+                            message_run->line);
+               }
  
               message_run = message_run->next;
             }
@@ -866,10 +1087,8 @@ find_set (struct catalog *current, int number)
        result = result->next;
  
    /* Prepare new message set.  */
-  result = (struct set_list *) xmalloc (sizeof (*result));
+  result = (struct set_list *) xcalloc (1, sizeof (*result));
    result->number = number;
-  result->deleted = 0;
-  result->messages = NULL;
    result->next = current->all_sets;
    current->all_sets = result;
  
@@ -880,13 +1099,14 @@ find_set (struct catalog *current, int number)
  /* Normalize given string *in*place* by processing escape sequences
     and quote characters.  */
  static void
-normalize_line (const char *fname, size_t line, char *string, char quote_char)
+normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
+               wchar_t quote_char, wchar_t escape_char)
  {
    int is_quoted;
-  char *rp = string;
-  char *wp = string;
+  wchar_t *rp = string;
+  wchar_t *wp = string;
  
-  if (quote_char != '\0' && *rp == quote_char)
+  if (quote_char != L'\0' && *rp == quote_char)
      {
        is_quoted = 1;
        ++rp;
@@ -894,62 +1114,89 @@ normalize_line (const char *fname, size_t line, char *string, char quote_char)
    else
      is_quoted = 0;
  
-  while (*rp != '\0')
+  while (*rp != L'\0')
      if (*rp == quote_char)
        /* We simply end the string when we find the first time an
          not-escaped quote character.  */
         break;
-    else if (*rp == '\\')
+    else if (*rp == escape_char)
        {
         ++rp;
-       if (quote_char != '\0' && *rp == quote_char)
+       if (quote_char != L'\0' && *rp == quote_char)
           /* This is an extension to XPG.  */
           *wp++ = *rp++;
         else
           /* Recognize escape sequences.  */
           switch (*rp)
             {
-           case 'n':
-             *wp++ = '\n';
+           case L'n':
+             *wp++ = L'\n';
               ++rp;
               break;
-           case 't':
-             *wp++ = '\t';
+           case L't':
+             *wp++ = L'\t';
               ++rp;
               break;
-           case 'v':
-             *wp++ = '\v';
+           case L'v':
+             *wp++ = L'\v';
               ++rp;
               break;
-           case 'b':
-             *wp++ = '\b';
+           case L'b':
+             *wp++ = L'\b';
               ++rp;
               break;
-           case 'r':
-             *wp++ = '\r';
+           case L'r':
+             *wp++ = L'\r';
               ++rp;
               break;
-           case 'f':
-             *wp++ = '\f';
+           case L'f':
+             *wp++ = L'\f';
               ++rp;
               break;
-           case '\\':
-             *wp++ = '\\';
-             ++rp;
-             break;
-           case '0' ... '7':
+           case L'0' ... L'7':
               {
-               int number = *rp++ - '0';
-               while (number <= (255 / 8) && *rp >= '0' && *rp <= '7')
+               int number;
+               char cbuf[2];
+               char *cbufptr;
+               size_t cbufin;
+               wchar_t wcbuf[2];
+               char *wcbufptr;
+               size_t wcbufin;
+
+               number = *rp++ - L'0';
+               while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7')
                   {
                     number *= 8;
-                   number += *rp++ - '0';
+                   number += *rp++ - L'0';
                   }
-               *wp++ = (char) number;
+
+               cbuf[0] = (char) number;
+               cbuf[1] = '\0';
+               cbufptr = cbuf;
+               cbufin = 2;
+
+               wcbufptr = (char *) wcbuf;
+               wcbufin = sizeof (wcbuf);
+
+               /* Flush the state.  */
+               iconv (cd, NULL, NULL, NULL, NULL);
+
+               iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin);
+               if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2])
+                 error_at_line (0, 0, fname, line,
+                                gettext ("invalid escape sequence"));
+               else
+                 *wp++ = wcbuf[0];
               }
               break;
             default:
-             /* Simply ignore the backslash character.  */
+             if (*rp == escape_char)
+               {
+                 *wp++ = escape_char;
+                 ++rp;
+               }
+             else
+               /* Simply ignore the backslash character.  */;
               break;
             }
        }
@@ -959,10 +1206,10 @@ normalize_line (const char *fname, size_t line, char *string, char quote_char)
    /* If we saw a quote character at the beginning we expect another
       one at the end.  */
    if (is_quoted && *rp != quote_char)
-    error (0, 0, fname, line, gettext ("unterminated message"));
+    error_at_line (0, 0, fname, line, gettext ("unterminated message"));
  
    /* Terminate string.  */
-  *wp = '\0';
+  *wp = L'\0';
    return;
  }
  
@@ -975,22 +1222,21 @@ read_old (struct catalog *catalog, const char *file_name)
    int last_set = -1;
    size_t cnt;
  
-  old_cat_obj.status = closed;
-  old_cat_obj.cat_name = file_name;
-
    /* Try to open catalog, but don't look through the NLSPATH.  */
-  __open_catalog (&old_cat_obj, 0);
-
-  if (old_cat_obj.status != mmaped && old_cat_obj.status != malloced)
-    if (errno == ENOENT)
-      /* No problem, the catalog simply does not exist.  */
-      return;
-    else
-      error (EXIT_FAILURE, errno, gettext ("while opening old catalog file"));
+  if (__open_catalog (file_name, NULL, NULL, &old_cat_obj) != 0)
+    {
+      if (errno == ENOENT)
+       /* No problem, the catalog simply does not exist.  */
+       return;
+      else
+       error (EXIT_FAILURE, errno,
+              gettext ("while opening old catalog file"));
+    }
  
    /* OK, we have the catalog loaded.  Now read all messages and merge
       them.  When set and message number clash for any message the new
-     one is used.  */
+     one is used.  If the new one is empty it indicates that the
+     message should be deleted.  */
    for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt)
      {
        struct message_list *message, *last;
@@ -999,7 +1245,7 @@ read_old (struct catalog *catalog, const char *file_name)
         /* No message in this slot.  */
         continue;
  
-      if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (u_int32_t) last_set)
+      if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set)
         {
           last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1;
           set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1);
@@ -1009,14 +1255,14 @@ read_old (struct catalog *catalog, const char *file_name)
        message = set->messages;
        while (message != NULL)
         {
-         if ((u_int32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1])
+         if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1])
             break;
           last = message;
           message = message->next;
         }
  
        if (message == NULL
-         || (u_int32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1])
+         || (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1])
         {
           /* We have found a message which is not yet in the catalog.
              Insert it at the right position.  */
@@ -1038,5 +1284,74 @@ read_old (struct catalog *catalog, const char *file_name)
  
           ++catalog->total_messages;
         }
+      else if (*message->message == '\0')
+       {
+         /* The new empty message has overridden the old one thus
+            "deleting" it as required.  Now remove the empty remains. */
+         if (last == NULL)
+           set->messages = message->next;
+         else
+           last->next = message->next;
+       }
+    }
+}
+
+
+static int
+open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp,
+                wchar_t *escape_charp)
+{
+  char buf[2];
+  char *bufptr;
+  size_t bufsize;
+  wchar_t wbuf[2];
+  char *wbufptr;
+  size_t wbufsize;
+
+  /* If the input file does not specify the codeset use the locale's.  */
+  if (codeset == NULL)
+    {
+      setlocale (LC_ALL, "");
+      codeset = nl_langinfo (CODESET);
+      setlocale (LC_ALL, "C");
+    }
+
+  /* Get the conversion modules.  */
+  *cd_towcp = iconv_open ("WCHAR_T", codeset);
+  *cd_tombp = iconv_open (codeset, "WCHAR_T");
+  if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1)
+    {
+      error (0, 0, gettext ("conversion modules not available"));
+      if (*cd_towcp != (iconv_t) -1)
+       iconv_close (*cd_towcp);
+
+      return 1;
      }
+
+  /* One special case for historical reasons is the backslash
+     character.  In some codesets the byte value 0x5c is not mapped to
+     U005c in Unicode.  These charsets then don't have a backslash
+     character at all.  Therefore we have to live with whatever the
+     codeset provides and recognize, instead of the U005c, the character
+     the byte value 0x5c is mapped to.  */
+  buf[0] = '\\';
+  buf[1] = '\0';
+  bufptr = buf;
+  bufsize = 2;
+
+  wbufptr = (char *) wbuf;
+  wbufsize = sizeof (wbuf);
+
+  iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize);
+  if (bufsize != 0 || wbufsize != 0)
+    {
+      /* Something went wrong, we couldn't convert the byte 0x5c.  Go
+        on with using U005c.  */
+      error (0, 0, gettext ("cannot determine escape character"));
+      *escape_charp = L'\\';
+    }
+  else
+    *escape_charp = wbuf[0];
+
+  return 0;
  }