Apply big patch from Ørn E. Hansen.

author Jim Meyering <jim@meyering.net>

Mon, 13 Oct 1997 02:08:01 +0000 (02:08 +0000)

committer Jim Meyering <jim@meyering.net>

Mon, 13 Oct 1997 02:08:01 +0000 (02:08 +0000)
author Jim Meyering <jim@meyering.net>
Mon, 13 Oct 1997 02:08:01 +0000 (02:08 +0000)
committer Jim Meyering <jim@meyering.net>
Mon, 13 Oct 1997 02:08:01 +0000 (02:08 +0000)
diff --git a/src/sort.c b/src/sort.c

index c094bc5fafa27ac62ff6c49c1f806376c3ebdd85..67b9a4541f16fd5be56b4758f14ed514a22a79d3 100644 (file)
--- a/src/sort.c
+++ b/src/sort.c
@@ -19,6 +19,16 @@
     The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
     or (US mail) as Mike Haertel c/o Free Software Foundation. */
  
+/* NLS addition added 1997 by Ørn E. Hansen.
+
+   Who can be reached at (e-mail)  oehansen@daimi.aau.dk,
+                                  oe.hansen@halmstad.mail.telia.com
+
+   The additions made to allow NLS for sorting, is free software
+   and can be freely distributed or modified, under the GNU general
+   public licence as published by the Free Software Foundation. */
+
+
  #include <config.h>
  
  /* Get isblank from GNU libc.  */
@@ -36,6 +46,13 @@
  #include "error.h"
  #include "xstrtod.h"
  
+#ifdef ENABLE_NLS
+/* this may need some heading.... applies to Debian linux          */
+/* for reading the structur of _NL_ITEM... to get abreviated month */
+/* names                                                           */
+#include <langinfo.h>
+#endif   /* NLS */
+
  #ifdef HAVE_LIMITS_H
  # include <limits.h>
  #else
@@ -52,6 +69,8 @@ void free ();
  /* Undefine, to avoid warning about redefinition on some systems.  */
  #undef min
  #define min(a, b) ((a) < (b) ? (a) : (b))
+#undef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
  
  #define UCHAR_LIM (UCHAR_MAX + 1)
  #define UCHAR(c) ((unsigned char) (c))
@@ -67,6 +86,52 @@ void free ();
     status code greater than 1.  */
  #define SORT_FAILURE 2
  
+/* Some character constants used in the program.  Better do assign */
+/* these globally.  Makes the program a little more readable.      */
+static unsigned char decimal_point = '.';
+static unsigned char th_sep        = ',';
+static unsigned char *nls_grouping = "\003\003";
+
+#define FLOATING_POINT  '.'
+#define FLOATING_COMMA  ','
+#define NEGATIVE_SIGN   '-'
+#define NUMERIC_ZERO    '0'
+
+#define CHARS_IN_ABM    3
+
+static int need_locale = 0;            /* This is "C" locale, need another? */
+static int nls_fraction_found = 1;     /* Should we look for decimal point? */
+static int nls_month_found = 1;        /* Look for month notations in text? */
+
+/* If native language support is requested, make a 1-1 map to the */
+/* locale character map, otherwise ensure normal behaviour        */
+#ifdef ENABLE_NLS
+
+#define NLS_KEY_LIMIT            30        /* Keys have limited length */
+#define NLS_NUM_MONTHS           12        /* 12 months in a year      */
+#define NLS_MAX_GROUPS            8        /* Maximum number of groups */
+
+/* A string with one character, to enforce char collation         */
+#define NLS_ONE_CHARACTER_STRING " "
+
+/* Two buffers, specificly used to get a one-one map of the table */
+/* used under inittables.                                         */
+unsigned char *nls_temp_buf1, *nls_temp_buf2;
+
+/* Create a map, that maps the characters in the "C" locale */
+/* 1 - 1 to the locale view of character order              */
+unsigned char nls_locale_map[UCHAR_LIM];
+
+/* A definition to map each character through the above translation */
+/* table, during sort.                                              */
+#define NLS_MAP(c)  UCHAR(c)
+
+#else
+
+/* No NLS the character value itself, represents the sorting order */
+#define NLS_MAP(c)  UCHAR(c)
+#endif
+
  /* The kind of blanks for '-b' to skip in various options. */
  enum blanktype { bl_start, bl_end, bl_both };
  
@@ -143,7 +208,7 @@ static char fold_toupper[UCHAR_LIM];
  
  /* Table mapping 3-letter month names to integers.
     Alphabetic order allows binary search. */
-static struct month const monthtab[] =
+static struct month us_monthtab[] =
  {
    {"APR", 4},
    {"AUG", 8},
@@ -159,6 +224,23 @@ static struct month const monthtab[] =
    {"SEP", 9}
  };
  
+#ifdef ENABLE_NLS
+
+/* Locale may have a different idea of month names   */
+static struct month nls_monthtab[NLS_NUM_MONTHS];
+static int nls_months_collide[NLS_NUM_MONTHS+1];
+
+/* Numeric keys, to search for numeric format */
+static struct nls_keyfield {
+  struct keyfield *key;
+  struct nls_keyfield *next;
+} *nls_keyhead = NULL;
+
+#endif
+
+/* Which month table to use in the program, default C */
+static struct month *monthtab = us_monthtab;
+
  /* During the merge phase, the number of files to merge at once. */
  #define NMERGE 16
  
@@ -246,7 +328,7 @@ for that key.  If no key given, use the entire line as key.  With no\n\
  FILE, or when FILE is -, read standard input.\n\
  ")
               , DEFAULT_TMPDIR);
-      puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
+      puts (_("\nReport bugs to textutils-bugs@gnu.ai.mit.edu"));
      }
    /* Don't use EXIT_FAILURE here in case it is defined to be 1.
       POSIX requires that sort return 1 IFF invoked with -c and
@@ -445,8 +527,39 @@ zaptemp (char *name)
      }
  }
  
+#ifdef ENABLE_NLS
  /* Initialize the character class tables. */
  
+static int nls_sort_month_comp(struct month *m1, struct month *m2)
+{
+  return strcoll(m1->name, m2->name);
+}
+
+/* strncoll(a, b, l)                                                  */
+/* do collation on strings a and b, but for at most l characters      */
+/* we use the fact, that we KNOW that l is the min of the two lengths */
+/* and we make use of the fact, that collation on chars has already   */
+/* been done and is stored in NLS_MAP                                 */
+static int strncoll(unsigned char *s1, unsigned char *s2, int l)
+{
+  register int diff = 0;
+
+  if (need_locale) {
+    /* Let's emulate a strncoll() function, by forcing strcoll() */
+    /* to compare only l characters in both strings.             */
+    register unsigned char n1=s1[l],n2=s2[l];
+
+    s1[l]=s2[l]=0;
+    diff = strcoll(s1, s2);
+    s1[l]=n1;
+    s2[l]=n2;
+  } else
+    diff = memcmp(s1, s2, l);
+  return diff;
+}
+
+#endif  /* NLS */
+
  static void
  inittables (void)
  {
@@ -465,6 +578,33 @@ inittables (void)
        else
         fold_toupper[i] = i;
      }
+
+#ifdef ENABLE_NLS
+  /* If We're not in the "C" locale, we gotta read in different */
+  /* names for months.                                          */
+  if (need_locale) {
+    unsigned char *s;
+    int j;
+    int (*comp)() = nls_sort_month_comp;
+
+    nls_months_collide[0] = 1;  /* if an error, look again       */
+    for (i = 0; i < NLS_NUM_MONTHS; i++) {
+      s = nl_langinfo(_NL_ITEM(LC_TIME, ABMON_1+us_monthtab[i].val-1));
+      nls_monthtab[i].name = strdup(s);
+      nls_monthtab[i].val  = us_monthtab[i].val;
+
+      /* It has been pointed out, that abreviated month names    */
+      /* may be longer than the usual 3 characters               */
+      for(j=0;j<strlen(s);j++) nls_monthtab[i].name[j] = fold_toupper[s[j]];
+      nls_months_collide[nls_monthtab[i].val] = (strncmp(nls_monthtab[i].name, us_monthtab[i].name, CHARS_IN_ABM) == 0);
+    }
+    /* Now quicksort the month table (should be sorted already!) */
+    /* However, another locale doesn't rule out the possibility  */
+    /* of a different order of month names.                      */
+    qsort((void *)nls_monthtab, NLS_NUM_MONTHS, sizeof(struct month), comp);
+    monthtab = nls_monthtab;
+  }
+#endif     /* NLS */
  }
  
  /* Initialize BUF, allocating ALLOC bytes initially. */
@@ -754,13 +894,86 @@ findlines (struct buffer *buf, struct lines *lines)
     should begin with a decimal point followed immediately by the digits
     of the fraction.  Strings not of this form are considered to be zero. */
  
+/* The goal here, is to take two numbers a and b... compare these
+   in parallel.  Instead of converting each, and then comparing the
+   outcome.  Most likely stopping the comparison before the conversion
+   is complete.  The algorithm used, in the old sort:
+
+   Algorithm: fraccompare
+   Action   : compare two decimal fractions
+   accepts  : char *a, char *b
+   returns  : -1 if a<b, 0 if a=b, 1 if a>b.
+   implement:
+
+   if *a == decimal_point AND *b == decimal_point
+     find first character different in a and b.
+     if both are digits, return the difference *a - *b.
+     if *a is a digit
+       skip past zeroes
+       if digit return 1, else 0
+     if *b is a digit
+       skip past zeroes
+       if digit return -1, else 0
+   if *a is a decimal_point
+     skip past decimal_point and zeroes
+     if digit return 1, else 0
+   if *b is a decimal_point
+     skip past decimal_point and zeroes
+     if digit return -1, else 0
+   return 0
+
+   As can be clearly seen, the above implementation duplicates code,
+   and thus there is place for improvement:
+      the difference in code of a and b, is solved by using a
+      refernce to s, assigned to either a or b. and using n
+      to denote return value.
+      the difference in either that start being a digit or
+      the decimal point, is solved by testing if either is
+      a decimal point, or if the other is a digit...
+
+   if *a or *b is a decimal_point
+      skip all chars where *a == *b
+      if *a and *b are digits return *a - *b
+      s is b, and return code is -1
+      if *a is a digit or *a is a decimal_pointm then  s is a, return code 1
+      skip decimal_point in s
+      skip zeroes in s
+      if *s is a digit, return n
+    return 0                                                          */
+
+#ifdef ENABLE_NLS
+
+static int fraccompare(register const char *a, register const char *b)
+{
+  register const char *s;
+  int n = -1;
+
+  if (!nls_fraction_found) nls_fraction_found=1;
+  if (*a == decimal_point || *b == decimal_point) {
+    if (*a == *b)
+      do {
+       ++a, ++b;
+      } while (*a == *b && ISDIGIT(*a));
+    if (ISDIGIT(*a) && ISDIGIT(*b))
+      return (*a) - (*b);
+    s = b;
+    if (*a==decimal_point || (ISDIGIT(*a) && *b!=decimal_point))
+      s = a, n=1;
+    if (*s == decimal_point) ++s;
+    while (*s == NUMERIC_ZERO) ++s;
+    if (ISDIGIT(*s)) return n;
+  }
+  return 0;
+}
+
+#else
  static int
  fraccompare (register const char *a, register const char *b)
  {
    register int tmpa = *a;
    register int tmpb = *b;
  
-  if (tmpa == '.' && tmpb == '.')
+  if (tmpa == decimal_point && tmpb == decimal_point)
      {
        do
         tmpa = *++a, tmpb = *++b;
@@ -769,15 +982,15 @@ fraccompare (register const char *a, register const char *b)
         return tmpa - tmpb;
        if (ISDIGIT (tmpa))
         {
-         while (tmpa == '0')
+         while (tmpa == NUMERIC_ZERO)
             tmpa = *++a;
           if (ISDIGIT (tmpa))
             return 1;
           return 0;
         }
-      if (ISDIGIT (tmpb))
+      if (digits[tmpb])
         {
-         while (tmpb == '0')
+         while (tmpb == NUMERIC_ZERO)
             tmpb = *++b;
           if (ISDIGIT (tmpb))
             return -1;
@@ -785,31 +998,224 @@ fraccompare (register const char *a, register const char *b)
         }
        return 0;
      }
-  else if (tmpa == '.')
+  else if (tmpa == decimal_point)
      {
        do
         tmpa = *++a;
-      while (tmpa == '0');
+      while (tmpa == NUMERIC_ZERO);
        if (ISDIGIT (tmpa))
         return 1;
        return 0;
      }
-  else if (tmpb == '.')
+  else if (tmpb == decimal_point)
      {
        do
         tmpb = *++b;
-      while (tmpb == '0');
+      while (tmpb == NUMERIC_ZERO);
        if (ISDIGIT (tmpb))
         return -1;
        return 0;
      }
    return 0;
  }
+#endif
  
  /* Compare strings A and B as numbers without explicitly converting them to
     machine numbers.  Comparatively slow for short strings, but asymptotically
     hideously fast. */
  
+/* The code here, is like the above... continuous reoccurrance of the
+   same code... improved 15-JAN-1997 in connection with native languages
+   support */
+
+#ifdef ENABLE_NLS
+
+/* Decide the kind of fraction the program will use */
+static int nls_set_fraction(register unsigned char ch)
+{
+  if (!nls_fraction_found && ch != decimal_point)
+    if (ch == FLOATING_POINT) {             /* US style */
+      decimal_point = FLOATING_POINT;
+      th_sep        = FLOATING_COMMA;
+    } else if (ch == FLOATING_COMMA) {      /* EU style */
+      decimal_point = FLOATING_COMMA;
+      th_sep        = FLOATING_POINT;
+    } else if (ch != decimal_point) {       /* Alien    */
+      decimal_point = ch;
+      th_sep        = '\0';
+    }
+  return nls_fraction_found=1;
+}
+
+/* Look for a fraction
+   It ain't as simple as it looks... however, consider a number:
+      1.234,00
+      1,234.00
+   It's easy to tell which is a decimal point, and which isn't.  We use
+   the grouping iformation to find out how many digits are grouped together
+   for thousand seperator.
+
+   The idea here, is to use the grouping information... but not to
+   spend time with verifying the groups... not too much time, anyway.
+   so, a number represented to us as:
+      1.234.567,89
+   will be taken and seperated into different groups, seperated by a
+   seperator character (Decimal point or thousands seperator).
+      {1,234,567}
+   these are the groups of digits that lead to a seperator character,
+   and with the trailing group is added:
+      {1,234,567,89}
+   resulting in 4 groups of numbers.  If the resulting number of groups,
+   are none, or just 1... this is not enough to decide anything about
+   the decimal point.  We need at least two for that.  With two groups
+   we have at least one seperator.  That seperator can be a decimal
+   point, or a thousands seperator... if it is a thousands seperator
+   the number of digits in the last group, will comply with the first
+   rule in the grouping rule for numeric values. i.e.
+      |{89}| = grouping[0]
+   if so, and there are only two groups of numbers, the value cannot
+   be determined.  If there are three or more numbers, the seperator
+   seperating the groups is checked.  If these are the same, the
+   character is determined to be a thousands seperator.  If they are
+   not the same, the last seperator is determined to be a decimal
+   point.  If checking the grouping rules, we find out that there
+   are no grouping rules defined, either the grouping rules is NULL
+   or the first grouping number is 0, then the locale format is used.
+
+   We try to take an advantage of a special situation.  If the trailing
+   group, the one that normally should be the fractional part, turns
+   out to have the same length as the thousands seperator rule says,
+   making a doubt on that it may be a decimal point, we look for the
+   group before that, i.e. with a two group form:
+     {1234,567}
+   where the grouping rule is 3;3... we take a look at group 1, and find
+   out that |{1234}| > larger of the two first grouping rules, then
+   the seperator has to be a decimal point...
+   */
+
+static int look_for_fraction(unsigned char *s, unsigned char *e)
+{
+  /* I don't think it's reasonable to think of more than 6 groups */
+  register unsigned char *p=s, n=0;
+  unsigned short groups[NLS_MAX_GROUPS];
+
+  /* skip blanks and signs */
+  while(blanks[*s] || *s == NEGATIVE_SIGN) s++;
+  /* groups = {}, n = 0 */
+  for(;p < e;p++) {
+    /* groups[n]={number of digits leading to seperator n}
+              n = number of seperators so far */
+    if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT) {
+      if (++n >= NLS_MAX_GROUPS) return; /* WOW! BIG Number... */
+      groups[n] = (short)(p - s), s=p+1;
+    } else if (!ISDIGIT(*p)) break;
+    /* mem[s..p]=digits only */
+  }
+  /* n = number of seperators in s..e */
+  groups[++n]=(short)(p - s);
+  /* n = groups in the number */
+  if (n <= 1) return 0;   /* Only one group of numbers... not enough */
+  p = nls_grouping;
+  /* p = address of group rules
+     s = address of next character after seperator */
+  s = s - 1;  /* s = address of last seperator */
+  if (p && *p) {
+    /* a legal trailing group, iff groups[n] == first rule */
+    if (groups[n] != (short)*p) return nls_set_fraction(*s);
+    if (n == 2) { /* Only two groups */
+      if (groups[n-1] > max(p[0],p[1]))
+       return nls_set_fraction(*s);
+      return 0;
+    }
+    /* if the seperators are the same, it's a thousands */
+    if (*s != *(s - groups[n]))
+      return nls_set_fraction(*s);
+    /* s[0] = thousands seperator */
+    if (*s == FLOATING_COMMA)
+      return nls_set_fraction(FLOATING_POINT);
+    return nls_fraction_found=1;
+  } else { /* no grouping allowed here, last seperator IS decimal point */
+    return nls_set_fraction(*s);
+  }
+  return 0;
+}
+
+static int
+numcompare (register const unsigned char *a, register const unsigned char *b)
+{
+  int ret_code = 1;  /* normal return status, see later in code */
+  int diff     = 0;  /* difference between two digits           */
+
+  while (blanks[*a]) ++a;
+  while (blanks[*b]) ++b;
+
+  /* next character in a,b is non-blank */
+  if ((*a == NEGATIVE_SIGN || *b == NEGATIVE_SIGN) && *a != *b) {
+    /* a < 0, or b < 0, but not both */
+    if (*a == NEGATIVE_SIGN)      ret_code = -1, ++a; /* a looks < b */
+    else if (*b == NEGATIVE_SIGN) ret_code =  1, ++b; /* b looks < a */
+    /* bypass zeroes, decimal points, and thousand sep in a & b */
+    while (*a == NUMERIC_ZERO ||(th_sep && *a == th_sep)|| *a == decimal_point)
+      ++a;
+    while (*b == NUMERIC_ZERO ||(th_sep && *b == th_sep)|| *b == decimal_point)
+      ++b;
+    if (ISDIGIT(*a) || ISDIGIT(*b))
+      /* here, either a or b or both are digits
+        if a and b are digits, the signed one is the lesser.
+        if a is a digit, and not b.. it means b==0, and if b==0
+        than either is signed if b is signed then -0 < a
+        or if a is signed then -a < 0.  The ret_code is already set
+        to mark that the signed number is the lesser, so we just
+        return that number here.                                    */
+      return ret_code;
+
+    /* *a and *b are neither digits, they are equal -0 == +0 */
+    return 0;
+  } else {
+    /* either both numbers are signed, or both are not-signed */
+    if (*a == NEGATIVE_SIGN) ++a, ++b, ret_code=-1;
+    /* if both are signed, then remember -100 < -10 (ret_code reversed!) */
+
+    /* Skip any leading zeroes */
+    while (*a == NUMERIC_ZERO) ++a;
+    while (*b == NUMERIC_ZERO) ++b;
+
+continue_thousands:
+
+    /* skip all equal digits */
+    while (ISDIGIT(*a) && ISDIGIT(*b) && *a == *b)
+      a++, b++;
+
+    /* Here, we have either different digits, or possible fractions
+       or thousand seperators. */
+
+    if (ISDIGIT(*a) && ISDIGIT(*b)) {
+      if (diff == 0)
+       diff = ((*a) - (*b));        /* simple, isn't it? not quite */
+      a++, b++;
+      goto continue_thousands;
+    }
+
+    /* now, here either may be a fraction, or a thousand seperator...
+       or both.                                                        */
+    /* We've decided what are decimal_points, and what are thousands sep */
+    if ((th_sep != 0) && (*a == th_sep || *b == th_sep)) {
+      if (*a == th_sep) ++a;
+      if (*b == th_sep) ++b;
+      goto continue_thousands;     /* Ugly, but better than a while(1) */
+    }
+
+    if (ISDIGIT(*a)) return ret_code *  1; /* a has more digits than b */
+    if (ISDIGIT(*b)) return ret_code * -1; /* b has more digits than a */
+
+    /* now, we should have the fractions solved */
+    if ((diff == 0) && (*a == decimal_point || *b == decimal_point))
+      return ret_code * fraccompare(a, b);
+
+    return diff;               /* fall through here, and diff decides */
+  }
+}
+#else
  static int
  numcompare (register const char *a, register const char *b)
  {
@@ -823,48 +1229,47 @@ numcompare (register const char *a, register const char *b)
    while (blanks[tmpb])
      tmpb = UCHAR (*++b);
  
-  if (tmpa == '-')
+  if (tmpa == NEGATIVE_SIGN)
      {
        do
-       tmpa = *++a;
-      while (tmpa == '0');
-      if (tmpb != '-')
+       tmpa = UCHAR (*++a);
+      while (tmpa == NUMERIC_ZERO);
+      if (tmpb != NEGATIVE_SIGN)
         {
-         if (tmpa == '.')
+         if (tmpa == decimal_point)
             do
               tmpa = *++a;
-           while (tmpa == '0');
+           while (tmpa == NUMERIC_ZERO);
           if (ISDIGIT (tmpa))
             return -1;
-         while (tmpb == '0')
-           tmpb = *++b;
-         if (tmpb == '.')
+         while (tmpb == NUMERIC_ZERO)
+           tmpb = UCHAR (*++b);
+         if (tmpb == decimal_point)
             do
               tmpb = *++b;
-           while (tmpb == '0');
+           while (tmpb == NUMERIC_ZERO);
           if (ISDIGIT (tmpb))
             return -1;
           return 0;
         }
        do
-       tmpb = *++b;
-      while (tmpb == '0');
+       tmpb = UCHAR (*++b);
+      while (tmpb == NUMERIC_ZERO);
  
-      while (tmpa == tmpb && ISDIGIT (tmpa))
-       tmpa = *++a, tmpb = *++b;
+      while (tmpa == tmpb && digits[tmpa])
+       tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
  
-      if ((tmpa == '.' && !ISDIGIT (tmpb))
-         || (tmpb == '.' && !ISDIGIT (tmpa)))
+      if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa)))
         return -fraccompare (a, b);
  
        if (ISDIGIT (tmpa))
-       for (loga = 1; ISDIGIT (*++a); ++loga)
+       for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
           ;
        else
         loga = 0;
  
        if (ISDIGIT (tmpb))
-       for (logb = 1; ISDIGIT (*++b); ++logb)
+       for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
           ;
        else
         logb = 0;
@@ -877,49 +1282,48 @@ numcompare (register const char *a, register const char *b)
  
        return tmpb - tmpa;
      }
-  else if (tmpb == '-')
+  else if (tmpb == NEGATIVE_SIGN)
      {
        do
-       tmpb = *++b;
-      while (tmpb == '0');
-      if (tmpb == '.')
+       tmpb = UCHAR (*++b);
+      while (tmpb == NUMERIC_ZERO);
+      if (tmpb == decimal_point)
         do
           tmpb = *++b;
-       while (tmpb == '0');
+       while (tmpb == NUMERIC_ZERO);
        if (ISDIGIT (tmpb))
         return 1;
-      while (tmpa == '0')
-       tmpa = *++a;
-      if (tmpa == '.')
+      while (tmpa == NUMERIC_ZERO)
+       tmpa = UCHAR (*++a);
+      if (tmpa == decimal_point)
         do
-         tmpa = *++a;
-       while (tmpa == '0');
+         tmpa = UCHAR (*++a);
+       while (tmpa == NUMERIC_ZERO);
        if (ISDIGIT (tmpa))
         return 1;
        return 0;
      }
    else
      {
-      while (tmpa == '0')
-       tmpa = *++a;
-      while (tmpb == '0')
-       tmpb = *++b;
+      while (tmpa == NUMERIC_ZERO)
+       tmpa = UCHAR (*++a);
+      while (tmpb == NUMERIC_ZERO)
+       tmpb = UCHAR (*++b);
  
        while (tmpa == tmpb && ISDIGIT (tmpa))
-       tmpa = *++a, tmpb = *++b;
+       tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
  
-      if ((tmpa == '.' && !ISDIGIT (tmpb))
-         || (tmpb == '.' && !ISDIGIT (tmpa)))
+      if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa)))
         return fraccompare (a, b);
  
        if (ISDIGIT (tmpa))
-       for (loga = 1; ISDIGIT (*++a); ++loga)
+       for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
           ;
        else
         loga = 0;
  
        if (ISDIGIT (tmpb))
-       for (logb = 1; ISDIGIT (*++b); ++logb)
+       for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
           ;
        else
         logb = 0;
@@ -933,6 +1337,7 @@ numcompare (register const char *a, register const char *b)
        return tmpa - tmpb;
      }
  }
+#endif
  
  static int
  general_numcompare (const char *sa, const char *sb)
@@ -967,20 +1372,42 @@ getmonth (const char *s, int len)
    if (len < 3)
      return 0;
  
-  for (i = 0; i < 3; ++i)
+  for (i = 0; i < CHARS_IN_ABM; ++i)
      month[i] = fold_toupper[UCHAR (s[i])];
    month[3] = '\0';
  
-  while (hi - lo > 1)
+  while (hi - lo > 1) {
+#ifdef ENABLE_NLS
+    if (strcoll (month, monthtab[(lo+hi)/2].name) < 0)
+#else
      if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
+#endif
        hi = (lo + hi) / 2;
      else
        lo = (lo + hi) / 2;
+  }
    if (!strcmp (month, monthtab[lo].name))
      return monthtab[lo].val;
    return 0;
  }
  
+#ifdef ENABLE_NLS
+/* Look for the month in locale table, and if that fails try with
+   us month name table                                              */
+static int nls_month_is_either_locale(const char *s, int len)
+{
+  int ind;
+
+  monthtab = nls_monthtab;
+  ind = getmonth(s, len);
+  if (ind == 0) {
+    monthtab = us_monthtab;
+    ind = getmonth(s, len);
+  }
+  return ind;
+}
+#endif
+
  /* Compare two lines A and B trying every key in sequence until there
     are no more keys or a difference is found. */
  
@@ -1082,11 +1509,60 @@ keycompare (const struct line *a, const struct line *b)
         }
        else if (key->month)
         {
+#ifdef ENABLE_NLS
+
+         /* if we haven't decided which locale to go with, we get the
+            month name from either.  If either month name is fully
+            solved and the month name doesn't collide with the other
+            locale... then use that table from there forward */
+         if (!nls_month_found) {
+           int x;
+
+           x = nls_month_is_either_locale(texta, lena);
+           if (nls_month_found = !nls_months_collide[x])
+             diff = x - getmonth(textb, lenb);
+           else {
+             diff = nls_month_is_either_locale(textb, lenb);
+             nls_month_found = !nls_months_collide[diff];
+             diff = x - diff;
+           }
+         } else
+#endif
           diff = getmonth (texta, lena) - getmonth (textb, lenb);
           if (diff)
             return key->reverse ? -diff : diff;
           continue;
         }
+#ifdef ENABLE_NLS
+
+      /* This sorting may become slow, so in a simple locale */
+      /* The user can select a faster sort, that is similar  */
+      /* to ascii sort, but 8-bit instead of 7-bit.  But     */
+      /* can't handle more complex, combined, character sets */
+      else if (need_locale) {
+       unsigned char copy_a[lena+1], copy_b[lenb+1];
+       int la, lb, i;
+
+       /* we can't just go strcoll() the two strings, but   */
+       /* must extract the text for the key, and do the     */
+       /* proper 'ignore' and 'translate' before comparing  */
+       for(la=lb=i=0;i<max(lena,lenb);i++) {
+         if (i < lena) {
+           copy_a[la]=translate?translate[UCHAR(texta[i])]:texta[i];
+           la = ignore?(ignore[UCHAR(texta[i])]?la:la+1):la+1;
+         }
+         if (i < lenb) {
+           copy_b[lb]=translate?translate[UCHAR(textb[i])]:textb[i];
+           lb = ignore?(ignore[UCHAR(textb[i])]?lb:lb+1):lb+1;
+         }
+       }
+       copy_a[la]=copy_b[lb]=0;
+       diff = strcoll(copy_a, copy_b);
+       if (diff)
+         return key->reverse? -diff:diff;
+       continue;
+      }
+#endif
        else if (ignore && translate)
  
  #define CMP_WITH_IGNORE(A, B)                                          \
@@ -1102,7 +1578,7 @@ keycompare (const struct line *a, const struct line *b)
                 {                                                       \
                   if ((A) != (B))                                       \
                     {                                                   \
-                     diff = (A) - (B);                                 \
+                     diff = NLS_MAP(A) - NLS_MAP(B);                   \
                       break;                                            \
                     }                                                   \
                   ++texta;                                              \
@@ -1144,13 +1620,21 @@ keycompare (const struct line *a, const struct line *b)
           {
             if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
               {
-               diff = (translate[UCHAR (*--texta)]
-                       - translate[UCHAR (*--textb)]);
+               diff = (NLS_MAP(translate[UCHAR (*--texta)])
+                       - NLS_MAP(translate[UCHAR (*--textb)]));
                 break;
               }
           }
        else
+#ifndef ENABLE_NLS
         diff = memcmp (texta, textb, min (lena, lenb));
+#else
+      /* since we don't have a strncoll, should one be emulated? */
+      /* as the normal behaviour of the sort program, when two   */
+      /* equivalent keys are met, is to sort according to length */
+
+        diff = strncoll (texta, textb, min(lena, lenb));
+#endif
  
        if (diff)
         return key->reverse ? -diff : diff;
@@ -1191,10 +1675,18 @@ compare (register const struct line *a, register const struct line *b)
      {
        char *ap = a->text, *bp = b->text;
  
-      diff = UCHAR (*ap) - UCHAR (*bp);
+#ifdef ENABLE_NLS
+      if (need_locale)  /* want absolutely correct sorting */
+       return reverse ? -strcoll(ap, bp) : strcoll(ap, bp);
+#endif
+      diff = NLS_MAP (*ap) - NLS_MAP (*bp);
        if (diff == 0)
         {
+#ifdef ENABLE_NLS
+         diff = strncoll (ap, bp, mini);
+#else
           diff = memcmp (ap, bp, mini);
+#endif
           if (diff == 0)
             diff = tmpa - tmpb;
         }
@@ -1469,6 +1961,41 @@ mergefps (FILE **fps, register int nfps, FILE *ofp)
      }
  }
  
+#ifdef ENABLE_NLS
+
+/*
+ * Let's go into a frenzy and find the numeric format that this file
+ * represents to us for sorting.
+ */
+nls_numeric_format(const struct line *line, int nlines)
+{
+  struct keyfield *key;
+  struct nls_keyfield *n_key = nls_keyhead;
+  int iter = 0;
+  unsigned char *text, *lim;
+
+  for(;!nls_fraction_found && nlines>0;line++,nlines--)
+    for(iter=0;!nls_fraction_found;++iter) {
+      key = n_key->key;
+      if (iter || line->keybeg == NULL) {
+       if (key->eword >= 0)
+         lim = limfield(line, key);
+       else
+         lim = line->text + line->length;
+       if (key->sword >= 0)
+         text = begfield(line, key);
+       else
+         text = line->text;
+      } else
+       text = line->keybeg, lim = line->keylim;
+      look_for_fraction(text, lim);
+      if ((n_key = n_key->next) == nls_keyhead) break;
+    }
+  return nls_fraction_found=1;
+}
+
+#endif
+
  /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
  
  static void
@@ -1603,6 +2130,12 @@ sort (char **files, int nfiles, FILE *ofp)
               tmp = (struct line *)
                 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
             }
+#ifdef ENABLE_NLS
+         if (nls_keyhead)
+           nls_keyhead = nls_keyhead->next;
+         if (!nls_fraction_found && nls_keyhead)
+           nls_numeric_format(lines.lines, lines.used);
+#endif
           sortlines (lines.lines, lines.used, tmp);
           if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
             tfp = ofp;
@@ -1650,6 +2183,18 @@ insertkey (struct keyfield *key)
      k = k->next;
    k->next = key;
    key->next = NULL;
+  if (key->numeric || key->general_numeric) {
+    struct nls_keyfield *nk;
+
+    nk = (struct nls_keyfield *)xmalloc(sizeof(struct nls_keyfield));
+    nk->key  = key;
+    if (nls_keyhead) {
+      nk->next = nls_keyhead->next;
+      nls_keyhead->next = nk;
+    } else
+      nk->next = nk;
+    nls_keyhead = nk;
+  }
  }
  
  static void
@@ -1746,7 +2291,30 @@ main (int argc, char **argv)
  #endif                         /* SA_INTERRUPT */
  
    program_name = argv[0];
-  setlocale (LC_ALL, "");
+
+#ifdef ENABLE_NLS
+
+  s = setlocale(LC_ALL, "");
+  if (strcmp(s, "C") && strcmp(s, "POSIX"))
+    need_locale = 1;  /* Neither C nor POSIX, we need to initialize it */
+
+  /* Let's get locale's representation of the decimal point */
+  decimal_point = *( localeconv() )->decimal_point;
+  th_sep        = *( localeconv() )->thousands_sep;
+  nls_grouping  =  ( localeconv() )->grouping;
+
+  /* if locale doesn't define a decimal point, we'll use the
+     US notation.                                            */
+  if (decimal_point == 0)
+    decimal_point = FLOATING_POINT;
+  else
+    nls_fraction_found = 0;  /* Figure out which decimal point to use  */
+  nls_month_found      = 0;  /* Figure out which month notation to use */
+
+  monthtab = nls_monthtab;
+
+#endif /* NLS */
+
    bindtextdomain (PACKAGE, LOCALEDIR);
    textdomain (PACKAGE);
author	Jim Meyering <jim@meyering.net>
	Mon, 13 Oct 1997 02:08:01 +0000 (02:08 +0000)
committer	Jim Meyering <jim@meyering.net>
	Mon, 13 Oct 1997 02:08:01 +0000 (02:08 +0000)