Remove unused Unicode functions

author Greg Hudson <ghudson@mit.edu>

Sat, 10 Dec 2022 06:26:36 +0000 (01:26 -0500)

committer Greg Hudson <ghudson@mit.edu>

Mon, 26 Dec 2022 07:30:31 +0000 (02:30 -0500)
author Greg Hudson <ghudson@mit.edu>
Sat, 10 Dec 2022 06:26:36 +0000 (01:26 -0500)
committer Greg Hudson <ghudson@mit.edu>
Mon, 26 Dec 2022 07:30:31 +0000 (02:30 -0500)
diff --git a/src/include/k5-unicode.h b/src/include/k5-unicode.h

index 45c1788b2642f3b89f816bcf86d6b7ffeaddbe60..81c495f65694442ee9236bb5819985cb9a9bc38b 100644 (file)
--- a/src/include/k5-unicode.h
+++ b/src/include/k5-unicode.h
@@ -87,41 +87,12 @@
  
  typedef krb5_ucs4 krb5_unicode;
  
-int krb5int_ucstrncmp(
-    const krb5_unicode *,
-    const krb5_unicode *,
-    size_t);
-
-int krb5int_ucstrncasecmp(
-    const krb5_unicode *,
-    const krb5_unicode *,
-    size_t);
-
-krb5_unicode *krb5int_ucstrnchr(
-    const krb5_unicode *,
-    size_t,
-    krb5_unicode);
-
-krb5_unicode *krb5int_ucstrncasechr(
-    const krb5_unicode *,
-    size_t,
-    krb5_unicode);
-
-void krb5int_ucstr2upper(
-    krb5_unicode *,
-    size_t);
-
  #define KRB5_UTF8_NOCASEFOLD    0x0U
  #define KRB5_UTF8_CASEFOLD      0x1U
  #define KRB5_UTF8_ARG1NFC       0x2U
  #define KRB5_UTF8_ARG2NFC       0x4U
  #define KRB5_UTF8_APPROX        0x8U
  
-krb5_error_code krb5int_utf8_normalize(
-    const krb5_data *,
-    krb5_data **,
-    unsigned);
-
  int krb5int_utf8_normcmp(
      const krb5_data *,
      const krb5_data *,
diff --git a/src/include/k5-utf8.h b/src/include/k5-utf8.h

index 7cc8cda47eceaa609f3f8abffa3bc525e77d9f47..11949f9f3219986db69c8ad363897ba4f2d4d795 100644 (file)
--- a/src/include/k5-utf8.h
+++ b/src/include/k5-utf8.h
@@ -73,9 +73,6 @@
  typedef uint16_t krb5_ucs2;
  typedef uint32_t krb5_ucs4;
  
-int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out);
-size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf);
-
  int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out);
  size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf);
  
@@ -96,49 +93,6 @@ int k5_utf16le_to_utf8(const uint8_t *utf16bytes, size_t nbytes,
  int k5_utf8_to_utf16le(const char *utf8, uint8_t **utf16_out,
                         size_t *nbytes_out);
  
-/* returns the number of bytes in the UTF-8 string */
-size_t krb5int_utf8_bytes(const char *);
-/* returns the number of UTF-8 characters in the string */
-size_t krb5int_utf8_chars(const char *);
-/* returns the number of UTF-8 characters in the counted string */
-size_t krb5int_utf8c_chars(const char *, size_t);
-/* returns the length (in bytes) of the UTF-8 character */
-int krb5int_utf8_offset(const char *);
-/* returns the length (in bytes) indicated by the UTF-8 character */
-int krb5int_utf8_charlen(const char *);
-
-/* returns the length (in bytes) indicated by the UTF-8 character
- * also checks that shortest possible encoding was used
- */
-int krb5int_utf8_charlen2(const char *);
-
-/* copies a UTF-8 character and returning number of bytes copied */
-int krb5int_utf8_copy(char *, const char *);
-
-/* returns pointer of next UTF-8 character in string */
-char *krb5int_utf8_next( const char *);
-/* returns pointer of previous UTF-8 character in string */
-char *krb5int_utf8_prev( const char *);
-
-/* primitive ctype routines -- not aware of non-ascii characters */
-int krb5int_utf8_isascii( const char *);
-int krb5int_utf8_isalpha( const char *);
-int krb5int_utf8_isalnum( const char *);
-int krb5int_utf8_isdigit( const char *);
-int krb5int_utf8_isxdigit( const char *);
-int krb5int_utf8_isspace( const char *);
-
-/* span characters not in set, return bytes spanned */
-size_t krb5int_utf8_strcspn( const char* str, const char *set);
-/* span characters in set, return bytes spanned */
-size_t krb5int_utf8_strspn( const char* str, const char *set);
-/* return first occurrence of character in string */
-char *krb5int_utf8_strchr( const char* str, const char *chr);
-/* return first character of set in string */
-char *krb5int_utf8_strpbrk( const char* str, const char *set);
-/* reentrant tokenizer */
-char *krb5int_utf8_strtok( char* sp, const char* sep, char **last);
-
  /* Optimizations */
  extern const char krb5int_utf8_lentab[128];
  extern const char krb5int_utf8_mintab[32];
@@ -157,38 +111,10 @@ extern const char krb5int_utf8_mintab[32];
           (krb5int_utf8_mintab[KRB5_UTF8_BV(p) & 0x1f] & (p)[1])) ?      \
          l : 0)
  
-#define KRB5_UTF8_OFFSET(p) (KRB5_UTF8_ISASCII(p)               \
-                             ? 1 : krb5int_utf8_offset((p)) )
-
-#define KRB5_UTF8_COPY(d,s) (KRB5_UTF8_ISASCII(s)                       \
-                             ? (*(d) = *(s), 1) : krb5int_utf8_copy((d),(s)))
-
-#define KRB5_UTF8_NEXT(p) (KRB5_UTF8_ISASCII(p)                         \
-                           ? (char *)(p)+1 : krb5int_utf8_next((p)))
-
-#define KRB5_UTF8_INCR(p) ((p) = KRB5_UTF8_NEXT(p))
-
-/* For symmetry */
-#define KRB5_UTF8_PREV(p) (krb5int_utf8_prev((p)))
-#define KRB5_UTF8_DECR(p) ((p)=KRB5_UTF8_PREV((p)))
-
  /*
   * these macros assume 'x' is an ASCII x
   * and assume the "C" locale
   */
-#define KRB5_ASCII(c)           (!((c) & 0x80))
-#define KRB5_SPACE(c)           ((c) == ' ' || (c) == '\t' || (c) == '\n')
-#define KRB5_DIGIT(c)           ((c) >= '0' && (c) <= '9')
-#define KRB5_LOWER(c)           ((c) >= 'a' && (c) <= 'z')
  #define KRB5_UPPER(c)           ((c) >= 'A' && (c) <= 'Z')
-#define KRB5_ALPHA(c)           (KRB5_LOWER(c) || KRB5_UPPER(c))
-#define KRB5_ALNUM(c)           (KRB5_ALPHA(c) || KRB5_DIGIT(c))
-
-#define KRB5_LDH(c)             (KRB5_ALNUM(c) || (c) == '-')
-
-#define KRB5_HEXLOWER(c)        ((c) >= 'a' && (c) <= 'f')
-#define KRB5_HEXUPPER(c)        ((c) >= 'A' && (c) <= 'F')
-#define KRB5_HEX(c)             (KRB5_DIGIT(c) ||                       \
-                                 KRB5_HEXLOWER(c) || KRB5_HEXUPPER(c))
  
  #endif /* K5_UTF8_H */
diff --git a/src/lib/krb5/unicode/Makefile.in b/src/lib/krb5/unicode/Makefile.in

index e23028df81e9d750194004fb261b9ddc1cd8d88a..d7dc0f5f53c21ba65c271eaaad8b76b3049305eb 100644 (file)
--- a/src/lib/krb5/unicode/Makefile.in
+++ b/src/lib/krb5/unicode/Makefile.in
@@ -6,19 +6,15 @@ BUILDTOP=$(REL)..$(S)..$(S)..
  ##DOS##OBJFILE=..\$(OUTPRE)$(PREFIXDIR).lst
  
  XXDIR = $(srcdir)/ucdata/
-XXHEADERS = ucdata.h ure.h uctable.h
-XXSRCS  = ucdata.c ucgendat.c ure.c urestubs.c
+XXHEADERS = ucdata.h uctable.h
+XXSRCS  = ucdata.c ucgendat.c
  
  STLIBOBJS= \
         ucdata.o        \
-       ure.o           \
-       urestubs.o      \
         ucstr.o         
  
  OBJS= \
         $(OUTPRE)ucdata.$(OBJEXT)       \
-       $(OUTPRE)ure.$(OBJEXT)          \
-       $(OUTPRE)urestubs.$(OBJEXT)     \
         $(OUTPRE)ucstr.$(OBJEXT)        
  
  SRCS= \
@@ -57,9 +53,6 @@ ucgendat: ucgendat.o
  ##DOS##                $(CP) $(srcdir)\ucdata\ucdata.c ucdata.c
  ##DOS##                $(CP) $(srcdir)\ucdata\ucgendat.c ucgendat.c
  ##DOS##                $(CP) $(srcdir)\ucdata\uctable.h uctable.h
-##DOS##                $(CP) $(srcdir)\ure\ure.h ure.h
-##DOS##                $(CP) $(srcdir)\ure\ure.c ure.c
-##DOS##                $(CP) $(srcdir)\ure\urestubs.c urestubs.c
  ##DOS##                $(CP) nul .links
  
  $(XXSRCS) $(XXHEADERS) : .links
diff --git a/src/lib/krb5/unicode/ucdata/bidiapi.txt b/src/lib/krb5/unicode/ucdata/bidiapi.txt

deleted file mode 100644 (file)

index dffd12e..0000000
--- a/src/lib/krb5/unicode/ucdata/bidiapi.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-#
-# $Id: bidiapi.txt,v 1.2 1999/11/19 15:24:29 mleisher Exp $
-#
-
-                       "Pretty Good Bidi Algorithm" API
-
-The PGBA (Pretty Good Bidi Algorithm) is an effective alternative to the
-Unicode BiDi algorithm.  It currently provides only implicit reordering and
-does not yet support explicit reordering codes that the Unicode BiDi algorithm
-supports.  In addition to reordering, the PGBA includes cursor movement
-support for both visual and logical navigation.
-
------------------------------------------------------------------------------
-
-#define UCPGBA_LTR 0
-#define UCPGBA_RTL 1
-
-  These macros appear in the `direction' field of the data structures.
-
-#define UCPGBA_CURSOR_VISUAL  0
-#define UCPGBA_CURSOR_LOGICAL 1
-
-  These macros are used to set the cursor movement for each reordered string.
-
------------------------------------------------------------------------------
-
-ucstring_t *ucstring_create(unsigned long *source, unsigned long start,
-                            unsigned long end, int default_direction,
-                            int cursor_motion)
-
-  This function will create a reordered string by using the implicit
-  directionality of the characters in the specified substring.
-
-  The `default_direction' parameter should be one of UCPGBA_LTR or UCPGBA_RTL
-  and is used only in cases where a string contains no characters with strong
-  directionality.
-
-  The `cursor_motion' parameter should be one of UCPGBA_CURSOR_VISUAL or
-  UCPGBA_CURSOR_LOGICAL, and is used to specify the initial cursor motion
-  behavior.  This behavior can be switched at any time using
-  ustring_set_cursor_motion().
-
------------------------------------------------------------------------------
-
-void ucstring_free(ucstring_t *string)
-
-  This function will deallocate the memory used by the string, incuding the
-  string itself.
-
------------------------------------------------------------------------------
-
-void ucstring_cursor_info(ustring_t *string, int *direction,
-                          unsigned long *position)
-
-  This function will return the text position of the internal cursor and the
-  directionality of the text at that position.  The position returned is the
-  original text position of the character.
-
------------------------------------------------------------------------------
-
-int ucstring_set_cursor_motion(ucstring_t *string, int cursor_motion)
-
-  This function will change the cursor motion type and return the previous
-  cursor motion type.
-
------------------------------------------------------------------------------
-
-int ucstring_cursor_right(ucstring_t *string, int count)
-
-  This function will move the internal cursor to the right according to the
-  type of cursor motion set for the string.
-
-  If no cursor motion is performed, it returns 0.  Otherwise it will return a
-  1.
-
------------------------------------------------------------------------------
-
-int ucstring_cursor_left(ucstring_t *string, int count)
-
-  This function will move the internal cursor to the left according to the
-  type of cursor motion set for the string.
-
-  If no cursor motion is performed, it returns 0.  Otherwise it will return a
-  1.
diff --git a/src/lib/krb5/unicode/ucdata/ucpgba.c b/src/lib/krb5/unicode/ucdata/ucpgba.c

deleted file mode 100644 (file)

index 5190703..0000000
--- a/src/lib/krb5/unicode/ucdata/ucpgba.c
+++ /dev/null
@@ -1,755 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 2001 Computing Research Labs, New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucpgba.c,v 1.9 2008/01/07 23:20:05 kurt Exp $
- * $Id: ucpgba.c,v 1.5 2001/01/02 18:46:20 mleisher Exp $
- */
-
-#include "k5-int.h"
-#include "k5-utf8.h"
-#include "k5-unicode.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "ucdata.h"
-#include "ucpgba.h"
-
-/*
- * These macros are used while reordering of RTL runs of text for the
- * special case of non-spacing characters being in runs of weakly
- * directional text.  They check for weak and non-spacing, and digits and
- * non-spacing.
- */
-#define ISWEAKSPECIAL(cc)  ucisprop(cc, UC_EN|UC_ES|UC_MN, UC_ET|UC_AN|UC_CS)
-#define ISDIGITSPECIAL(cc) ucisprop(cc, UC_ND|UC_MN, 0)
-
-/*
- * These macros are used while breaking a string into runs of text in
- * different directions.  Descriptions:
- *
- * ISLTR_LTR - Test for members of an LTR run in an LTR context.  This looks
- *             for characters with ltr, non-spacing, weak, and neutral
- *             properties.
- *
- * ISRTL_RTL - Test for members of an RTL run in an RTL context.  This looks
- *             for characters with rtl, non-spacing, weak, and neutral
- *             properties.
- *
- * ISRTL_NEUTRAL  - Test for RTL or neutral characters.
- *
- * ISWEAK_NEUTRAL - Test for weak or neutral characters.
- */
-#define ISLTR_LTR(cc) ucisprop(cc, UC_L|UC_MN|UC_EN|UC_ES,\
-                               UC_ET|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
-
-#define ISRTL_RTL(cc) ucisprop(cc, UC_R|UC_MN|UC_EN|UC_ES,\
-                               UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
-
-#define ISRTL_NEUTRAL(cc) ucisprop(cc, UC_R, UC_B|UC_S|UC_WS|UC_ON)
-#define ISWEAK_NEUTRAL(cc) ucisprop(cc, UC_EN|UC_ES, \
-                                    UC_B|UC_S|UC_WS|UC_ON|UC_ET|UC_AN|UC_CS)
-
-/*
- * This table is temporarily hard-coded here until it can be constructed
- * automatically somehow.
- */
-static unsigned long _symmetric_pairs[] = {
-    0x0028, 0x0029, 0x0029, 0x0028, 0x003C, 0x003E, 0x003E, 0x003C,
-    0x005B, 0x005D, 0x005D, 0x005B, 0x007B, 0x007D, 0x007D, 0x007B,
-    0x2045, 0x2046, 0x2046, 0x2045, 0x207D, 0x207E, 0x207E, 0x207D,
-    0x208D, 0x208E, 0x208E, 0x208D, 0x3008, 0x3009, 0x3009, 0x3008,
-    0x300A, 0x300B, 0x300B, 0x300A, 0x300C, 0x300D, 0x300D, 0x300C,
-    0x300E, 0x300F, 0x300F, 0x300E, 0x3010, 0x3011, 0x3011, 0x3010,
-    0x3014, 0x3015, 0x3015, 0x3014, 0x3016, 0x3017, 0x3017, 0x3016,
-    0x3018, 0x3019, 0x3019, 0x3018, 0x301A, 0x301B, 0x301B, 0x301A,
-    0xFD3E, 0xFD3F, 0xFD3F, 0xFD3E, 0xFE59, 0xFE5A, 0xFE5A, 0xFE59,
-    0xFE5B, 0xFE5C, 0xFE5C, 0xFE5B, 0xFE5D, 0xFE5E, 0xFE5E, 0xFE5D,
-    0xFF08, 0xFF09, 0xFF09, 0xFF08, 0xFF3B, 0xFF3D, 0xFF3D, 0xFF3B,
-    0xFF5B, 0xFF5D, 0xFF5D, 0xFF5B, 0xFF62, 0xFF63, 0xFF63, 0xFF62,
-};
-
-static int _symmetric_pairs_size =
-sizeof(_symmetric_pairs)/sizeof(_symmetric_pairs[0]);
-
-/*
- * This routine looks up the other form of a symmetric pair.
- */
-static unsigned long
-_ucsymmetric_pair(unsigned long c)
-{
-    int i;
-
-    for (i = 0; i < _symmetric_pairs_size; i += 2) {
-        if (_symmetric_pairs[i] == c)
-          return _symmetric_pairs[i+1];
-    }
-    return c;
-}
-
-/*
- * This routine creates a new run, copies the text into it, links it into the
- * logical text order chain and returns it to the caller to be linked into
- * the visual text order chain.
- */
-static ucrun_t *
-_add_run(ucstring_t *str, unsigned long *src,
-         unsigned long start, unsigned long end, int direction)
-{
-    long i, t;
-    ucrun_t *run;
-
-    run = (ucrun_t *) malloc(sizeof(ucrun_t));
-    run->visual_next = run->visual_prev = 0;
-    run->direction = direction;
-
-    run->cursor = ~0;
-
-    run->chars = (unsigned long *)
-        malloc(sizeof(unsigned long) * ((end - start) << 1));
-    run->positions = run->chars + (end - start);
-
-    run->source = src;
-    run->start = start;
-    run->end = end;
-
-    if (direction == UCPGBA_RTL) {
-        /*
-         * Copy the source text into the run in reverse order and select
-         * replacements for the pairwise punctuation and the <> characters.
-         */
-        for (i = 0, t = end - 1; start < end; start++, t--, i++) {
-            run->positions[i] = t;
-            if (ucissymmetric(src[t]) || src[t] == '<' || src[t] == '>')
-              run->chars[i] = _ucsymmetric_pair(src[t]);
-            else
-              run->chars[i] = src[t];
-        }
-    } else {
-        /*
-         * Copy the source text into the run directly.
-         */
-        for (i = start; i < end; i++) {
-            run->positions[i - start] = i;
-            run->chars[i - start] = src[i];
-        }
-    }
-
-    /*
-     * Add the run to the logical list for cursor traversal.
-     */
-    if (str->logical_first == 0)
-      str->logical_first = str->logical_last = run;
-    else {
-        run->logical_prev = str->logical_last;
-        str->logical_last->logical_next = run;
-        str->logical_last = run;
-    }
-
-    return run;
-}
-
-static void
-_ucadd_rtl_segment(ucstring_t *str, unsigned long *source, unsigned long start,
-                   unsigned long end)
-{
-    unsigned long s, e;
-    ucrun_t *run, *lrun;
-
-    /*
-     * This is used to splice runs into strings with overall LTR direction.
-     * The `lrun' variable will never be NULL because at least one LTR run was
-     * added before this RTL run.
-     */
-    lrun = str->visual_last;
-
-    for (e = s = start; s < end;) {
-        for (; e < end && ISRTL_NEUTRAL(source[e]); e++) ;
-
-        if (e > s) {
-            run = _add_run(str, source, s, e, UCPGBA_RTL);
-
-            /*
-             * Add the run to the visual list for cursor traversal.
-             */
-            if (str->visual_first != 0) {
-                if (str->direction == UCPGBA_LTR) {
-                    run->visual_prev = lrun;
-                    run->visual_next = lrun->visual_next;
-                    if (lrun->visual_next != 0)
-                      lrun->visual_next->visual_prev = run;
-                    lrun->visual_next = run;
-                    if (lrun == str->visual_last)
-                      str->visual_last = run;
-                } else {
-                    run->visual_next = str->visual_first;
-                    str->visual_first->visual_prev = run;
-                    str->visual_first = run;
-                }
-            } else
-              str->visual_first = str->visual_last = run;
-        }
-
-        /*
-         * Handle digits in a special way.  This makes sure the weakly
-         * directional characters appear on the expected sides of a number
-         * depending on whether that number is Arabic or not.
-         */
-        for (s = e; e < end && ISWEAKSPECIAL(source[e]); e++) {
-            if (!ISDIGITSPECIAL(source[e]) &&
-                (e + 1 == end || !ISDIGITSPECIAL(source[e + 1])))
-              break;
-        }
-
-        if (e > s) {
-            run = _add_run(str, source, s, e, UCPGBA_LTR);
-
-            /*
-             * Add the run to the visual list for cursor traversal.
-             */
-            if (str->visual_first != 0) {
-                if (str->direction == UCPGBA_LTR) {
-                    run->visual_prev = lrun;
-                    run->visual_next = lrun->visual_next;
-                    if (lrun->visual_next != 0)
-                      lrun->visual_next->visual_prev = run;
-                    lrun->visual_next = run;
-                    if (lrun == str->visual_last)
-                      str->visual_last = run;
-                } else {
-                    run->visual_next = str->visual_first;
-                    str->visual_first->visual_prev = run;
-                    str->visual_first = run;
-                }
-            } else
-              str->visual_first = str->visual_last = run;
-        }
-
-        /*
-         * Collect all weak non-digit sequences for an RTL segment.  These
-         * will appear as part of the next RTL segment or will be added as
-         * an RTL segment by themselves.
-         */
-        for (s = e; e < end && ucisweak(source[e]) && !ucisdigit(source[e]);
-             e++) ;
-    }
-
-    /*
-     * Capture any weak non-digit sequences that occur at the end of the RTL
-     * run.
-     */
-    if (e > s) {
-        run = _add_run(str, source, s, e, UCPGBA_RTL);
-
-        /*
-         * Add the run to the visual list for cursor traversal.
-         */
-        if (str->visual_first != 0) {
-            if (str->direction == UCPGBA_LTR) {
-                run->visual_prev = lrun;
-                run->visual_next = lrun->visual_next;
-                if (lrun->visual_next != 0)
-                  lrun->visual_next->visual_prev = run;
-                lrun->visual_next = run;
-                if (lrun == str->visual_last)
-                  str->visual_last = run;
-            } else {
-                run->visual_next = str->visual_first;
-                str->visual_first->visual_prev = run;
-                str->visual_first = run;
-            }
-        } else
-          str->visual_first = str->visual_last = run;
-    }
-}
-
-static void
-_ucadd_ltr_segment(ucstring_t *str, unsigned long *source, unsigned long start,
-                   unsigned long end)
-{
-    ucrun_t *run;
-
-    run = _add_run(str, source, start, end, UCPGBA_LTR);
-
-    /*
-     * Add the run to the visual list for cursor traversal.
-     */
-    if (str->visual_first != 0) {
-        if (str->direction == UCPGBA_LTR) {
-            run->visual_prev = str->visual_last;
-            str->visual_last->visual_next = run;
-            str->visual_last = run;
-        } else {
-            run->visual_next = str->visual_first;
-            str->visual_first->visual_prev = run;
-            str->visual_first = run;
-        }
-    } else
-      str->visual_first = str->visual_last = run;
-}
-
-ucstring_t *
-ucstring_create(unsigned long *source, unsigned long start, unsigned long end,
-                int default_direction, int cursor_motion)
-{
-    int rtl_first;
-    unsigned long s, e, ld;
-    ucstring_t *str;
-
-    str = (ucstring_t *) malloc(sizeof(ucstring_t));
-
-    /*
-     * Set the initial values.
-     */
-    str->cursor_motion = cursor_motion;
-    str->logical_first = str->logical_last = 0;
-    str->visual_first = str->visual_last = str->cursor = 0;
-    str->source = source;
-    str->start = start;
-    str->end = end;
-
-    /*
-     * If the length of the string is 0, then just return it at this point.
-     */
-    if (start == end)
-      return str;
-
-    /*
-     * This flag indicates whether the collection loop for RTL is called
-     * before the LTR loop the first time.
-     */
-    rtl_first = 0;
-
-    /*
-     * Look for the first character in the string that has strong
-     * directionality.
-     */
-    for (s = start; s < end && !ucisstrong(source[s]); s++) ;
-
-    if (s == end)
-      /*
-       * If the string contains no characters with strong directionality, use
-       * the default direction.
-       */
-      str->direction = default_direction;
-    else
-      str->direction = ucisrtl(source[s]) ? UCPGBA_RTL : UCPGBA_LTR;
-
-    if (str->direction == UCPGBA_RTL)
-      /*
-       * Set the flag that causes the RTL collection loop to run first.
-       */
-      rtl_first = 1;
-
-    /*
-     * This loop now separates the string into runs based on directionality.
-     */
-    for (s = e = 0; s < end; s = e) {
-        if (!rtl_first) {
-            /*
-             * Determine the next run of LTR text.
-             */
-
-            ld = s;
-            while (e < end && ISLTR_LTR(source[e])) {
-                if (ucisdigit(source[e]) &&
-                    !(0x660 <= source[e] && source[e] <= 0x669))
-                  ld = e;
-                e++;
-            }
-            if (str->direction != UCPGBA_LTR) {
-                while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
-                  e--;
-            }
-
-            /*
-             * Add the LTR segment to the string.
-             */
-            if (e > s)
-              _ucadd_ltr_segment(str, source, s, e);
-        }
-
-        /*
-         * Determine the next run of RTL text.
-         */
-        ld = s = e;
-        while (e < end && ISRTL_RTL(source[e])) {
-            if (ucisdigit(source[e]) &&
-                !(0x660 <= source[e] && source[e] <= 0x669))
-              ld = e;
-            e++;
-        }
-        if (str->direction != UCPGBA_RTL) {
-            while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
-              e--;
-        }
-
-        /*
-         * Add the RTL segment to the string.
-         */
-        if (e > s)
-          _ucadd_rtl_segment(str, source, s, e);
-
-        /*
-         * Clear the flag that allowed the RTL collection loop to run first
-         * for strings with overall RTL directionality.
-         */
-        rtl_first = 0;
-    }
-
-    /*
-     * Set up the initial cursor run.
-     */
-    str->cursor = str->logical_first;
-    if (str != 0)
-      str->cursor->cursor = (str->cursor->direction == UCPGBA_RTL) ?
-          str->cursor->end - str->cursor->start : 0;
-
-    return str;
-}
-
-void
-ucstring_free(ucstring_t *s)
-{
-    ucrun_t *l, *r;
-
-    if (s == 0)
-      return;
-
-    for (l = 0, r = s->visual_first; r != 0; r = r->visual_next) {
-        if (r->end > r->start)
-          free((char *) r->chars);
-        if (l)
-          free((char *) l);
-        l = r;
-    }
-    if (l)
-      free((char *) l);
-
-    free((char *) s);
-}
-
-int
-ucstring_set_cursor_motion(ucstring_t *str, int cursor_motion)
-{
-    int n;
-
-    if (str == 0)
-      return -1;
-
-    n = str->cursor_motion;
-    str->cursor_motion = cursor_motion;
-    return n;
-}
-
-static int
-_ucstring_visual_cursor_right(ucstring_t *str, int count)
-{
-    int cnt = count;
-    unsigned long size;
-    ucrun_t *cursor;
-
-    if (str == 0)
-      return 0;
-
-    cursor = str->cursor;
-    while (cnt > 0) {
-        size = cursor->end - cursor->start;
-        if ((cursor->direction == UCPGBA_RTL && cursor->cursor + 1 == size) ||
-            cursor->cursor + 1 > size) {
-            /*
-             * If the next run is NULL, then the cursor is already on the
-             * far right end already.
-             */
-            if (cursor->visual_next == 0)
-              /*
-               * If movement occured, then report it.
-               */
-              return (cnt != count);
-
-            /*
-             * Move to the next run.
-             */
-            str->cursor = cursor = cursor->visual_next;
-            cursor->cursor = (cursor->direction == UCPGBA_RTL) ? -1 : 0;
-            size = cursor->end - cursor->start;
-        } else
-          cursor->cursor++;
-        cnt--;
-    }
-    return 1;
-}
-
-static int
-_ucstring_logical_cursor_right(ucstring_t *str, int count)
-{
-    int cnt = count;
-    unsigned long size;
-    ucrun_t *cursor;
-
-    if (str == 0)
-      return 0;
-
-    cursor = str->cursor;
-    while (cnt > 0) {
-        size = cursor->end - cursor->start;
-        if (str->direction == UCPGBA_RTL) {
-            if (cursor->direction == UCPGBA_RTL) {
-                if (cursor->cursor + 1 == size) {
-                    if (cursor == str->logical_first)
-                      /*
-                       * Already at the beginning of the string.
-                       */
-                      return (cnt != count);
-
-                    str->cursor = cursor = cursor->logical_prev;
-                    size = cursor->end - cursor->start;
-                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
-                        size : 0;
-                } else
-                  cursor->cursor++;
-            } else {
-                if (cursor->cursor == 0) {
-                    if (cursor == str->logical_first)
-                      /*
-                       * At the beginning of the string already.
-                       */
-                      return (cnt != count);
-
-                    str->cursor = cursor = cursor->logical_prev;
-                    size = cursor->end - cursor->start;
-                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
-                        size : 0;
-                } else
-                  cursor->cursor--;
-            }
-        } else {
-            if (cursor->direction == UCPGBA_RTL) {
-                if (cursor->cursor == 0) {
-                    if (cursor == str->logical_last)
-                      /*
-                       * Already at the end of the string.
-                       */
-                      return (cnt != count);
-
-                    str->cursor = cursor = cursor->logical_next;
-                    size = cursor->end - cursor->start;
-                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
-                        0 : size - 1;
-                } else
-                  cursor->cursor--;
-            } else {
-                if (cursor->cursor + 1 > size) {
-                    if (cursor == str->logical_last)
-                      /*
-                       * Already at the end of the string.
-                       */
-                      return (cnt != count);
-
-                    str->cursor = cursor = cursor->logical_next;
-                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
-                        0 : size - 1;
-                } else
-                  cursor->cursor++;
-            }
-        }
-        cnt--;
-    }
-    return 1;
-}
-
-int
-ucstring_cursor_right(ucstring_t *str, int count)
-{
-    if (str == 0)
-      return 0;
-    return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
-        _ucstring_visual_cursor_right(str, count) :
-        _ucstring_logical_cursor_right(str, count);
-}
-
-static int
-_ucstring_visual_cursor_left(ucstring_t *str, int count)
-{
-    int cnt = count;
-    unsigned long size;
-    ucrun_t *cursor;
-
-    if (str == 0)
-      return 0;
-
-    cursor = str->cursor;
-    while (cnt > 0) {
-        size = cursor->end - cursor->start;
-        if ((cursor->direction == UCPGBA_LTR && cursor->cursor == 0) ||
-            cursor->cursor - 1 < -1) {
-            /*
-             * If the preceding run is NULL, then the cursor is already on the
-             * far left end already.
-             */
-            if (cursor->visual_prev == 0)
-              /*
-               * If movement occured, then report it.
-               */
-              return (cnt != count);
-
-            /*
-             * Move to the previous run.
-             */
-            str->cursor = cursor = cursor->visual_prev;
-            size = cursor->end - cursor->start;
-            cursor->cursor = (cursor->direction == UCPGBA_RTL) ?
-                size : size - 1;
-        } else
-          cursor->cursor--;
-        cnt--;
-    }
-    return 1;
-}
-
-static int
-_ucstring_logical_cursor_left(ucstring_t *str, int count)
-{
-    int cnt = count;
-    unsigned long size;
-    ucrun_t *cursor;
-
-    if (str == 0)
-      return 0;
-
-    cursor = str->cursor;
-    while (cnt > 0) {
-        size = cursor->end - cursor->start;
-        if (str->direction == UCPGBA_RTL) {
-            if (cursor->direction == UCPGBA_RTL) {
-                if (cursor->cursor == -1) {
-                    if (cursor == str->logical_last)
-                      /*
-                       * Already at the end of the string.
-                       */
-                      return (cnt != count);
-
-                    str->cursor = cursor = cursor->logical_next;
-                    size = cursor->end - cursor->start;
-                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
-                        0 : size - 1;
-                } else
-                  cursor->cursor--;
-            } else {
-                if (cursor->cursor + 1 > size) {
-                    if (cursor == str->logical_last)
-                      /*
-                       * At the end of the string already.
-                       */
-                      return (cnt != count);
-
-                    str->cursor = cursor = cursor->logical_next;
-                    size = cursor->end - cursor->start;
-                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
-                        0 : size - 1;
-                } else
-                  cursor->cursor++;
-            }
-        } else {
-            if (cursor->direction == UCPGBA_RTL) {
-                if (cursor->cursor + 1 == size) {
-                    if (cursor == str->logical_first)
-                      /*
-                       * Already at the beginning of the string.
-                       */
-                      return (cnt != count);
-
-                    str->cursor = cursor = cursor->logical_prev;
-                    size = cursor->end - cursor->start;
-                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
-                        size : 0;
-                } else
-                  cursor->cursor++;
-            } else {
-                if (cursor->cursor == 0) {
-                    if (cursor == str->logical_first)
-                      /*
-                       * Already at the beginning of the string.
-                       */
-                      return (cnt != count);
-
-                    str->cursor = cursor = cursor->logical_prev;
-                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
-                        size : 0;
-                } else
-                  cursor->cursor--;
-            }
-        }
-        cnt--;
-    }
-    return 1;
-}
-
-int
-ucstring_cursor_left(ucstring_t *str, int count)
-{
-    if (str == 0)
-      return 0;
-    return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
-        _ucstring_visual_cursor_left(str, count) :
-        _ucstring_logical_cursor_left(str, count);
-}
-
-void
-ucstring_cursor_info(ucstring_t *str, int *direction, unsigned long *position)
-{
-    long c;
-    unsigned long size;
-    ucrun_t *cursor;
-
-    if (str == 0 || direction == 0 || position == 0)
-      return;
-
-    cursor = str->cursor;
-
-    *direction = cursor->direction;
-
-    c = cursor->cursor;
-    size = cursor->end - cursor->start;
-
-    if (c == size)
-      *position = (cursor->direction == UCPGBA_RTL) ?
-          cursor->start : cursor->positions[c - 1];
-    else if (c == -1)
-      *position = (cursor->direction == UCPGBA_RTL) ?
-          cursor->end : cursor->start;
-    else
-      *position = cursor->positions[c];
-}
diff --git a/src/lib/krb5/unicode/ucdata/ucpgba.h b/src/lib/krb5/unicode/ucdata/ucpgba.h

deleted file mode 100644 (file)

index 7e1d570..0000000
--- a/src/lib/krb5/unicode/ucdata/ucpgba.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1999 Computing Research Labs, New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucpgba.h,v 1.10 2008/01/07 23:20:05 kurt Exp $
- * $Id: ucpgba.h,v 1.4 1999/11/19 15:24:30 mleisher Exp $
- */
-
-#ifndef _h_ucpgba
-#define _h_ucpgba
-
-#include "k5-int.h"
-
-/***************************************************************************
- *
- * Macros and types.
- *
- ***************************************************************************/
-
-/*
- * These are the direction values that can appear in render runs and render
- * strings.
- */
-#define UCPGBA_LTR 0
-#define UCPGBA_RTL 1
-
-/*
- * These are the flags for cursor motion.
- */
-#define UCPGBA_CURSOR_VISUAL  0
-#define UCPGBA_CURSOR_LOGICAL 1
-
-/*
- * This structure is used to contain runs of text in a particular direction.
- */
-typedef struct _ucrun_t {
-    struct _ucrun_t *visual_prev;  /* Pointer to the previous visual run.    */
-    struct _ucrun_t *visual_next;  /* Pointer to the next visual run.        */
-
-    struct _ucrun_t *logical_prev; /* Pointer to the previous logical run.   */
-    struct _ucrun_t *logical_next; /* Pointer to the next logical run.       */
-
-    int direction;                 /* Direction of the run.                  */
-
-    long cursor;                   /* Position of "cursor" in the string.    */
-
-    unsigned long *chars;          /* List of characters for the run.        */
-    unsigned long *positions;      /* List of original positions in source.  */
-
-    unsigned long *source;         /* The source string.                     */
-    unsigned long start;           /* Beginning offset in the source string. */
-    unsigned long end;             /* Ending offset in the source string.    */
-} ucrun_t;
-
-/*
- * This represents a string of runs rendered up to a point that is not
- * platform specific.
- */
-typedef struct _ucstring_t {
-    int direction;                /* Overall direction of the string.       */
-
-    int cursor_motion;            /* Logical or visual cursor motion flag.  */
-
-    ucrun_t *cursor;              /* The run containing the "cursor."       */
-
-    ucrun_t *logical_first;       /* First run in the logical order.        */
-    ucrun_t *logical_last;        /* Last run in the logical order.         */
-
-    ucrun_t *visual_first;        /* First run in the visual order.         */
-    ucrun_t *visual_last;         /* Last run in the visual order.          */
-
-    unsigned long *source;        /* The source string.                     */
-    unsigned long start;          /* The beginning offset in the source.    */
-    unsigned long end;            /* The ending offset in the source.       */
-} ucstring_t;
-
-/***************************************************************************
- *
- * API
- *
- ***************************************************************************/
-
-/*
- * This creates and reorders the specified substring using the
- * "Pretty Good Bidi Algorithm."  A default direction is provided for cases
- * of a string containing no strong direction characters and the default
- * cursor motion should be provided.
- */
-ucstring_t *
-ucstring_create (unsigned long *source,
-                       unsigned long start,
-                       unsigned long end,
-                       int default_direction,
-                       int cursor_motion);
-/*
- * This releases the string.
- */
-void ucstring_free (ucstring_t *string);
-
-/*
- * This changes the cursor motion flag for the string.
- */
-int
-ucstring_set_cursor_motion (ucstring_t *string,
-                                  int cursor_motion);
-
-/*
- * This function will move the cursor to the right depending on the
- * type of cursor motion that was specified for the string.
- *
- * A 0 is returned if no cursor motion is performed, otherwise a
- * 1 is returned.
- */
-int
-ucstring_cursor_right (ucstring_t *string, int count);
-
-/*
- * This function will move the cursor to the left depending on the
- * type of cursor motion that was specified for the string.
- *
- * A 0 is returned if no cursor motion is performed, otherwise a
- * 1 is returned.
- */
-int
-ucstring_cursor_left (ucstring_t *string, int count);
-
-/*
- * This routine retrieves the direction of the run containing the cursor
- * and the actual position in the original text string.
- */
-void
-ucstring_cursor_info (ucstring_t *string, int *direction,
-                            unsigned long *position);
-
-#endif /* _h_ucpgba */
diff --git a/src/lib/krb5/unicode/ucdata/ucpgba.man b/src/lib/krb5/unicode/ucdata/ucpgba.man

deleted file mode 100644 (file)

index 4486509..0000000
--- a/src/lib/krb5/unicode/ucdata/ucpgba.man
+++ /dev/null
@@ -1,97 +0,0 @@
-.\"
-.\" $Id: ucpgba.man,v 1.1 1999/11/19 16:08:34 mleisher Exp $
-.\"
-.TH ucpgba 3 "19 November 1999"
-.SH NAME 
-ucpgba \- functions for doing bidirectional reordering of Unicode text and
-logical and visual cursor motion
-
-.SH SYNOPSIS
-.nf
-#include <ucdata.h>
-#include <ucpgba.h>
-
-ucstring_t *ucstring_create(unsigned long *source, unsigned long start,
-                            unsigned long end, int default_direction,
-                            int cursor_motion)
-.sp
-void ucstring_free(ucstring_t *string)
-.sp
-int ucstring_set_cursor_motion(ucstring_t *string, int cursor_motion)
-.sp
-int ucstring_cursor_right(ucstring_t *string, int count)
-.sp
-int ucstring_cursor_left(ucstring_t *string, int count)
-.sp
-void ucstring_cursor_info(ucstring_t *string, int *direction,
-                          unsigned long *position)
-
-.SH DESCRIPTION
-.TP 4
-.BR Macros
-UCPGBA_LTR
-.br
-UCPGBA_RTL
-.br
-UCPGBA_CURSOR_VISUAL
-.br
-UCPGBA_CURSOR_LOGICAL
-
-.TP 4
-.BR ucstring_create()
-This function will create a reordered string by using the implicit
-directionality of the characters in the specified substring.
-.sp
-The `default_direction' parameter should be one of UCPGBA_LTR or UCPGBA_RTL
-and is used only in cases where a string contains no characters with strong
-directionality.
-.sp
-The `cursor_motion' parameter should be one of UCPGBA_CURSOR_VISUAL or
-UCPGBA_CURSOR_LOGICAL, and is used to specify the initial cursor motion
-behavior.  This behavior can be switched at any time using
-ustring_set_cursor_motion().
-
-.TP 4
-.BR ucstring_free()
-This function will deallocate the memory used by the string, incuding the
-string itself.
-
-.TP 4
-.BR ucstring_cursor_info()
-This function will return the text position of the internal cursor and the
-directionality of the text at that position.  The position returned is the
-original text position of the character.
-
-.TP 4
-.BR ucstring_set_cursor_motion()
-This function will change the cursor motion type and return the previous
-cursor motion type.
-
-.TP 4
-.BR ucstring_cursor_right()
-This function will move the internal cursor to the right according to the
-type of cursor motion set for the string.
-.sp
-If no cursor motion is performed, it returns 0.  Otherwise it will return a 1.
-
-.TP 4
-.BR ucstring_cursor_left()
-This function will move the internal cursor to the left according to the
-type of cursor motion set for the string.
-.sp
-If no cursor motion is performed, it returns 0.  Otherwise it will return a 1.
-
-.SH "SEE ALSO"
-ucdata(3)
-
-.SH ACKNOWLEDGMENTS
-These are people who have helped with patches or alerted me about problems.
-
-.SH AUTHOR
-Mark Leisher
-.br
-Computing Research Lab
-.br
-New Mexico State University
-.br
-Email: mleisher@crl.nmsu.edu
diff --git a/src/lib/krb5/unicode/ucstr.c b/src/lib/krb5/unicode/ucstr.c

index 0257882cd7e7a3798500a321a666f799cb23910c..0a2e5ab41c1bc92726ac3640402c99dd9f651d9d 100644 (file)
--- a/src/lib/krb5/unicode/ucstr.c
+++ b/src/lib/krb5/unicode/ucstr.c
@@ -23,7 +23,7 @@
  
  #include <ctype.h>
  
-int
+static int
  krb5int_ucstrncmp(
                   const krb5_unicode * u1,
                   const krb5_unicode * u2,
@@ -40,7 +40,7 @@ krb5int_ucstrncmp(
      return 0;
  }
  
-int
+static int
  krb5int_ucstrncasecmp(
                       const krb5_unicode * u1,
                       const krb5_unicode * u2,
@@ -60,47 +60,6 @@ krb5int_ucstrncasecmp(
      return 0;
  }
  
-krb5_unicode *
-krb5int_ucstrnchr(
-                 const krb5_unicode * u,
-                 size_t n,
-                 krb5_unicode c)
-{
-    for (; 0 < n; ++u, --n) {
-       if (*u == c) {
-           return (krb5_unicode *) u;
-       }
-    }
-
-    return NULL;
-}
-
-krb5_unicode *
-krb5int_ucstrncasechr(
-                     const krb5_unicode * u,
-                     size_t n,
-                     krb5_unicode c)
-{
-    c = uctolower(c);
-    for (; 0 < n; ++u, --n) {
-       if ((krb5_unicode) uctolower(*u) == c) {
-           return (krb5_unicode *) u;
-       }
-    }
-
-    return NULL;
-}
-
-void
-krb5int_ucstr2upper(
-                   krb5_unicode * u,
-                   size_t n)
-{
-    for (; 0 < n; ++u, --n) {
-       *u = uctoupper(*u);
-    }
-}
-
  /* Return true if data contains valid UTF-8 sequences. */
  krb5_boolean
  k5_utf8_validate(const krb5_data *data)
@@ -127,211 +86,8 @@ k5_utf8_validate(const krb5_data *data)
      return !in.status;
  }
  
-#define TOUPPER(c)  (islower(c) ? toupper(c) : (c))
  #define TOLOWER(c)  (isupper(c) ? tolower(c) : (c))
  
-krb5_error_code
-krb5int_utf8_normalize(
-                      const krb5_data * data,
-                      krb5_data ** newdataptr,
-                      unsigned flags)
-{
-    int i, j, len, clen, outpos = 0, ucsoutlen, outsize;
-    char *out = NULL, *outtmp, *s;
-    krb5_ucs4 *ucs = NULL, *p, *ucsout = NULL;
-    krb5_data *newdata;
-    krb5_error_code retval = 0;
-
-    static unsigned char mask[] = {
-    0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
-
-    unsigned casefold = flags & KRB5_UTF8_CASEFOLD;
-    unsigned approx = flags & KRB5_UTF8_APPROX;
-
-    *newdataptr = NULL;
-
-    s = data->data;
-    len = data->length;
-
-    newdata = malloc(sizeof(*newdata));
-    if (newdata == NULL)
-       return ENOMEM;
-
-    /*
-     * Should first check to see if string is already in proper normalized
-     * form. This is almost as time consuming as the normalization though.
-     */
-
-    /* finish off everything up to character before first non-ascii */
-    if (KRB5_UTF8_ISASCII(s)) {
-       if (casefold) {
-           outsize = len + 7;
-           out = malloc(outsize);
-           if (out == NULL) {
-               retval = ENOMEM;
-               goto cleanup;
-           }
-
-           for (i = 1; (i < len) && KRB5_UTF8_ISASCII(s + i); i++) {
-               out[outpos++] = TOLOWER(s[i - 1]);
-           }
-           if (i == len) {
-               out[outpos++] = TOLOWER(s[len - 1]);
-               goto cleanup;
-           }
-       } else {
-           for (i = 1; (i < len) && KRB5_UTF8_ISASCII(s + i); i++) {
-               /* empty */
-           }
-
-           if (i == len) {
-               newdata->length = len;
-               newdata->data = k5memdup0(s, len, &retval);
-               if (newdata->data == NULL)
-                   goto cleanup;
-               *newdataptr = newdata;
-               return 0;
-           }
-           outsize = len + 7;
-           out = malloc(outsize);
-           if (out == NULL) {
-               retval = ENOMEM;
-               goto cleanup;
-           }
-           outpos = i - 1;
-           memcpy(out, s, outpos);
-       }
-    } else {
-       outsize = len + 7;
-       out = malloc(outsize);
-       if (out == NULL) {
-           retval = ENOMEM;
-           goto cleanup;
-       }
-       i = 0;
-    }
-
-    p = ucs = malloc(len * sizeof(*ucs));
-    if (ucs == NULL) {
-       retval = ENOMEM;
-       goto cleanup;
-    }
-    /* convert character before first non-ascii to ucs-4 */
-    if (i > 0) {
-       *p = casefold ? TOLOWER(s[i - 1]) : s[i - 1];
-       p++;
-    }
-    /* s[i] is now first non-ascii character */
-    for (;;) {
-       /* s[i] is non-ascii */
-       /* convert everything up to next ascii to ucs-4 */
-       while (i < len) {
-           /* KRB5_UTF8_CHARLEN only looks at the first byte; use it to guard
-            * against small read overruns. */
-           if (KRB5_UTF8_CHARLEN(s + i) > len - i) {
-               retval = KRB5_ERR_INVALID_UTF8;
-               goto cleanup;
-           }
-           clen = KRB5_UTF8_CHARLEN2(s + i, clen);
-           if (clen == 0) {
-               retval = KRB5_ERR_INVALID_UTF8;
-               goto cleanup;
-           }
-           if (clen == 1) {
-               /* ascii */
-               break;
-           }
-           *p = s[i] & mask[clen];
-           i++;
-           for (j = 1; j < clen; j++) {
-               if ((s[i] & 0xc0) != 0x80) {
-                   retval = KRB5_ERR_INVALID_UTF8;
-                   goto cleanup;
-               }
-               *p <<= 6;
-               *p |= s[i] & 0x3f;
-               i++;
-           }
-           if (casefold) {
-               *p = uctolower(*p);
-           }
-           p++;
-       }
-       /* normalize ucs of length p - ucs */
-       uccompatdecomp(ucs, p - ucs, &ucsout, &ucsoutlen);
-       if (approx) {
-           for (j = 0; j < ucsoutlen; j++) {
-               if (ucsout[j] < 0x80) {
-                   out[outpos++] = ucsout[j];
-               }
-           }
-       } else {
-           ucsoutlen = uccanoncomp(ucsout, ucsoutlen);
-           /* convert ucs to utf-8 and store in out */
-           for (j = 0; j < ucsoutlen; j++) {
-               /*
-                * allocate more space if not enough room for 6 bytes and
-                * terminator
-                */
-               if (outsize - outpos < 7) {
-                   outsize = ucsoutlen - j + outpos + 6;
-                   outtmp = realloc(out, outsize);
-                   if (outtmp == NULL) {
-                       retval = ENOMEM;
-                       goto cleanup;
-                   }
-                   out = outtmp;
-               }
-               outpos += krb5int_ucs4_to_utf8(ucsout[j], &out[outpos]);
-           }
-       }
-
-       free(ucsout);
-       ucsout = NULL;
-
-       if (i == len) {
-           break;
-       }
-
-       /* Allocate more space in out if necessary */
-       if (len - i >= outsize - outpos) {
-           outsize += 1 + ((len - i) - (outsize - outpos));
-           outtmp = realloc(out, outsize);
-           if (outtmp == NULL) {
-               retval = ENOMEM;
-               goto cleanup;
-           }
-           out = outtmp;
-       }
-       /* s[i] is ascii */
-       /* finish off everything up to char before next non-ascii */
-       for (i++; (i < len) && KRB5_UTF8_ISASCII(s + i); i++) {
-           out[outpos++] = casefold ? TOLOWER(s[i - 1]) : s[i - 1];
-       }
-       if (i == len) {
-           out[outpos++] = casefold ? TOLOWER(s[len - 1]) : s[len - 1];
-           break;
-       }
-       /* convert character before next non-ascii to ucs-4 */
-       *ucs = casefold ? TOLOWER(s[i - 1]) : s[i - 1];
-       p = ucs + 1;
-    }
-
-cleanup:
-    free(ucs);
-    free(ucsout);
-    if (retval) {
-       free(out);
-       free(newdata);
-       return retval;
-    }
-    out[outpos] = '\0';
-    newdata->data = out;
-    newdata->length = outpos;
-    *newdataptr = newdata;
-    return 0;
-}
-
  /* compare UTF8-strings, optionally ignore casing */
  /* slow, should be optimized */
  int
diff --git a/src/lib/krb5/unicode/ure/README b/src/lib/krb5/unicode/ure/README

deleted file mode 100644 (file)

index c9918f5..0000000
--- a/src/lib/krb5/unicode/ure/README
+++ /dev/null
@@ -1,212 +0,0 @@
-#
-# $Id: README,v 1.3 1999/09/21 15:47:43 mleisher Exp $
-#
-# Copyright 1997, 1998, 1999 Computing Research Labs,
-# New Mexico State University
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
-# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-
-
-                       Unicode and Regular Expressions
-                                 Version 0.5
-
-This is a simple regular expression package for matching against Unicode text
-in UCS2 form.  The implementation of this URE package is a variation on the
-RE->DFA algorithm done by Mark Hopkins (markh@csd4.csd.uwm.edu).  Mark
-Hopkins' algorithm had the virtue of being very simple, so it was used as a
-model.
-
----------------------------------------------------------------------------
-
-Assumptions:
-
-  o  Regular expression and text already normalized.
-
-  o  Conversion to lower case assumes a 1-1 mapping.
-
-Definitions:
-
-  Separator - any one of U+2028, U+2029, '\n', '\r'.
-
-Operators:
-  .   - match any character.
-  *   - match zero or more of the last subexpression.
-  +   - match one or more of the last subexpression.
-  ?   - match zero or one of the last subexpression.
-  ()  - subexpression grouping.
-
-  Notes:
-
-    o  The "." operator normally does not match separators, but a flag is
-       available for the ure_exec() function that will allow this operator to
-       match a separator.
-
-Literals and Constants:
-
-  c       - literal UCS2 character.
-  \x....  - hexadecimal number of up to 4 digits.
-  \X....  - hexadecimal number of up to 4 digits.
-  \u....  - hexadecimal number of up to 4 digits.
-  \U....  - hexadecimal number of up to 4 digits.
-
-Character classes:
-
-  [...]           - Character class.
-  [^...]          - Negated character class.
-  \pN1,N2,...,Nn  - Character properties class.
-  \PN1,N2,...,Nn  - Negated character properties class.
-
-  POSIX character classes recognized:
-
-    :alnum:
-    :alpha:
-    :cntrl:
-    :digit:
-    :graph:
-    :lower:
-    :print:
-    :punct:
-    :space:
-    :upper:
-    :xdigit:
-
-  Notes:
-
-    o  Character property classes are \p or \P followed by a comma separated
-       list of integers between 1 and 32.  These integers are references to
-       the following character properties:
-
-        N      Character Property
-        --------------------------
-        1      _URE_NONSPACING
-        2      _URE_COMBINING
-        3      _URE_NUMDIGIT
-        4      _URE_NUMOTHER
-        5      _URE_SPACESEP
-        6      _URE_LINESEP
-        7      _URE_PARASEP
-        8      _URE_CNTRL
-        9      _URE_PUA
-        10     _URE_UPPER
-        11     _URE_LOWER
-        12     _URE_TITLE
-        13     _URE_MODIFIER
-        14     _URE_OTHERLETTER
-        15     _URE_DASHPUNCT
-        16     _URE_OPENPUNCT
-        17     _URE_CLOSEPUNCT
-        18     _URE_OTHERPUNCT
-        19     _URE_MATHSYM
-        20     _URE_CURRENCYSYM
-        21     _URE_OTHERSYM
-        22     _URE_LTR
-        23     _URE_RTL
-        24     _URE_EURONUM
-        25     _URE_EURONUMSEP
-        26     _URE_EURONUMTERM
-        27     _URE_ARABNUM
-        28     _URE_COMMONSEP
-        29     _URE_BLOCKSEP
-        30     _URE_SEGMENTSEP
-        31     _URE_WHITESPACE
-        32     _URE_OTHERNEUT
-
-    o  Character classes can contain literals, constants, and character
-       property classes. Example:
-
-       [abc\U10A\p1,3,4]
-
----------------------------------------------------------------------------
-
-Before using URE
-----------------
-Before URE is used, two functions need to be created.  One to check if a
-character matches a set of URE character properties, and one to convert a
-character to lower case.
-
-Stubs for these function are located in the urestubs.c file.
-
-Using URE
----------
-
-Sample pseudo-code fragment.
-
-  ure_buffer_t rebuf;
-  ure_dfa_t dfa;
-  ucs2_t *re, *text;
-  unsigned long relen, textlen;
-  unsigned long match_start, match_end;
-
-  /*
-   * Allocate the dynamic storage needed to compile regular expressions.
-   */
-  rebuf = ure_buffer_create();
-
-  for each regular expression in a list {
-      re = next regular expression;
-      relen = length(re);
-
-      /*
-       * Compile the regular expression with the case insensitive flag
-       * turned on.
-       */
-      dfa = ure_compile(re, relen, 1, rebuf);
-
-      /*
-       * Look for the first match in some text.  The matching will be done
-       * in a case insensitive manner because the expression was compiled
-       * with the case insensitive flag on.
-       */
-      if (ure_exec(dfa, 0, text, textlen, &match_start, &match_end))
-        printf("MATCH: %ld %ld\n", match_start, match_end);
-
-      /*
-       * Look for the first match in some text, ignoring non-spacing
-       * characters.
-       */
-      if (ure_exec(dfa, URE_IGNORE_NONSPACING, text, textlen,
-                   &match_start, &match_end))
-        printf("MATCH: %ld %ld\n", match_start, match_end);
-
-      /*
-       * Free the DFA.
-       */
-      ure_free_dfa(dfa);
-  }
-
-  /*
-   * Free the dynamic storage used for compiling the expressions.
-   */
-  ure_free_buffer(rebuf);
-
----------------------------------------------------------------------------
-
-Mark Leisher <mleisher@crl.nmsu.edu>
-29 March 1997
-
-===========================================================================
-
-CHANGES
--------
-
-Version: 0.5
-Date   : 21 September 1999
-==========================
-  1. Added copyright stuff and put in CVS.
diff --git a/src/lib/krb5/unicode/ure/ure.c b/src/lib/krb5/unicode/ure/ure.c

deleted file mode 100644 (file)

index 7b30487..0000000
--- a/src/lib/krb5/unicode/ure/ure.c
+++ /dev/null
@@ -1,2139 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ure/ure.c,v 1.19 2008/01/07 23:20:05 kurt Exp $
- * $Id: ure.c,v 1.2 1999/09/21 15:47:43 mleisher Exp $"
- */
-
-#include <k5-int.h>
-
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include "ure.h"
-
-/*
- * Flags used internally in the DFA.
- */
-#define _URE_DFA_CASEFOLD  0x01
-#define _URE_DFA_BLANKLINE 0x02
-
-static unsigned long cclass_flags[] = {
-    0,
-    _URE_NONSPACING,
-    _URE_COMBINING,
-    _URE_NUMDIGIT,
-    _URE_NUMOTHER,
-    _URE_SPACESEP,
-    _URE_LINESEP,
-    _URE_PARASEP,
-    _URE_CNTRL,
-    _URE_PUA,
-    _URE_UPPER,
-    _URE_LOWER,
-    _URE_TITLE,
-    _URE_MODIFIER,
-    _URE_OTHERLETTER,
-    _URE_DASHPUNCT,
-    _URE_OPENPUNCT,
-    _URE_CLOSEPUNCT,
-    _URE_OTHERPUNCT,
-    _URE_MATHSYM,
-    _URE_CURRENCYSYM,
-    _URE_OTHERSYM,
-    _URE_LTR,
-    _URE_RTL,
-    _URE_EURONUM,
-    _URE_EURONUMSEP,
-    _URE_EURONUMTERM,
-    _URE_ARABNUM,
-    _URE_COMMONSEP,
-    _URE_BLOCKSEP,
-    _URE_SEGMENTSEP,
-    _URE_WHITESPACE,
-    _URE_OTHERNEUT,
-};
-
-/*
- * Symbol types for the DFA.
- */
-#define _URE_ANY_CHAR   1
-#define _URE_CHAR       2
-#define _URE_CCLASS     3
-#define _URE_NCCLASS    4
-#define _URE_BOL_ANCHOR 5
-#define _URE_EOL_ANCHOR 6
-
-/*
- * Op codes for converting the NFA to a DFA.
- */
-#define _URE_SYMBOL     10
-#define _URE_PAREN      11
-#define _URE_QUEST      12
-#define _URE_STAR       13
-#define _URE_PLUS       14
-#define _URE_ONE        15
-#define _URE_AND        16
-#define _URE_OR         17
-
-#define _URE_NOOP       0xffff
-
-#define _URE_REGSTART 0x8000
-#define _URE_REGEND   0x4000
-
-/*
- * Structure used to handle a compacted range of characters.
- */
-typedef struct {
-    ucs4_t min_code;
-    ucs4_t max_code;
-} _ure_range_t;
-
-typedef struct {
-    _ure_range_t *ranges;
-    ucs2_t ranges_used;
-    ucs2_t ranges_size;
-} _ure_ccl_t;
-
-typedef union {
-    ucs4_t chr;
-    _ure_ccl_t ccl;
-} _ure_sym_t;
-
-/*
- * This is a general element structure used for expressions and stack
- * elements.
- */
-typedef struct {
-    ucs2_t reg;
-    ucs2_t onstack;
-    ucs2_t type;
-    ucs2_t lhs;
-    ucs2_t rhs;
-} _ure_elt_t;
-
-/*
- * This is a structure used to track a list or a stack of states.
- */
-typedef struct {
-    ucs2_t *slist;
-    ucs2_t slist_size;
-    ucs2_t slist_used;
-} _ure_stlist_t;
-
-/*
- * Structure to track the list of unique states for a symbol
- * during reduction.
- */
-typedef struct {
-    ucs2_t id;
-    ucs2_t type;
-    unsigned long mods;
-    unsigned long props;
-    _ure_sym_t sym;
-    _ure_stlist_t states;
-} _ure_symtab_t;
-
-/*
- * Structure to hold a single state.
- */
-typedef struct {
-    ucs2_t id;
-    ucs2_t accepting;
-    ucs2_t pad;
-    _ure_stlist_t st;
-    _ure_elt_t *trans;
-    ucs2_t trans_size;
-    ucs2_t trans_used;
-} _ure_state_t;
-
-/*
- * Structure used for keeping lists of states.
- */
-typedef struct {
-    _ure_state_t *states;
-    ucs2_t states_size;
-    ucs2_t states_used;
-} _ure_statetable_t;
-
-/*
- * Structure to track pairs of DFA states when equivalent states are
- * merged.
- */
-typedef struct {
-    ucs2_t l;
-    ucs2_t r;
-} _ure_equiv_t;
-
-/*
- * Structure used for constructing the NFA and reducing to a minimal DFA.
- */
-typedef struct _ure_buffer_t {
-    int reducing;
-    int error;
-    unsigned long flags;
-
-    _ure_stlist_t stack;
-
-    /*
-     * Table of unique symbols encountered.
-     */
-    _ure_symtab_t *symtab;
-    ucs2_t symtab_size;
-    ucs2_t symtab_used;
-
-    /*
-     * Tracks the unique expressions generated for the NFA and when the NFA is
-     * reduced.
-     */
-    _ure_elt_t *expr;
-    ucs2_t expr_used;
-    ucs2_t expr_size;
-
-    /*
-     * The reduced table of unique groups of NFA states.
-     */
-    _ure_statetable_t states;
-
-    /*
-     * Tracks states when equivalent states are merged.
-     */
-    _ure_equiv_t *equiv;
-    ucs2_t equiv_used;
-    ucs2_t equiv_size;
-} _ure_buffer_t;
-
-typedef struct {
-    ucs2_t symbol;
-    ucs2_t next_state;
-} _ure_trans_t;
-
-typedef struct {
-    ucs2_t accepting;
-    ucs2_t ntrans;
-    _ure_trans_t *trans;
-} _ure_dstate_t;
-
-typedef struct _ure_dfa_t {
-    unsigned long flags;
-
-    _ure_symtab_t *syms;
-    ucs2_t nsyms;
-
-    _ure_dstate_t *states;
-    ucs2_t nstates;
-
-    _ure_trans_t *trans;
-    ucs2_t ntrans;
-} _ure_dfa_t;
-
-/*************************************************************************
- *
- * Functions.
- *
- *************************************************************************/
-
-static void
-_ure_memmove(char *dest, char *src, unsigned long bytes)
-{
-    long i, j;
-
-    i = (long) bytes;
-    j = i & 7;
-    i = (i + 7) >> 3;
-
-    /*
-     * Do a memmove using Ye Olde Duff's Device for efficiency.
-     */
-    if (src < dest) {
-        src += bytes;
-        dest += bytes;
-
-        switch (j) {
-          case 0: do {
-              *--dest = *--src;
-            case 7: *--dest = *--src;
-            case 6: *--dest = *--src;
-            case 5: *--dest = *--src;
-            case 4: *--dest = *--src;
-            case 3: *--dest = *--src;
-            case 2: *--dest = *--src;
-            case 1: *--dest = *--src;
-          } while (--i > 0);
-        }
-    } else if (src > dest) {
-        switch (j) {
-          case 0: do {
-              *dest++ = *src++;
-            case 7: *dest++ = *src++;
-            case 6: *dest++ = *src++;
-            case 5: *dest++ = *src++;
-            case 4: *dest++ = *src++;
-            case 3: *dest++ = *src++;
-            case 2: *dest++ = *src++;
-            case 1: *dest++ = *src++;
-          } while (--i > 0);
-        }
-    }
-}
-
-static void
-_ure_push(ucs2_t v, _ure_buffer_t *b)
-{
-    _ure_stlist_t *s;
-
-    if (b == 0)
-      return;
-
-    /*
-     * If the `reducing' parameter is non-zero, check to see if the value
-     * passed is already on the stack.
-     */
-    if (b->reducing != 0 && b->expr[v].onstack != 0)
-      return;
-
-    s = &b->stack;
-    if (s->slist_used == s->slist_size) {
-        if (s->slist_size == 0)
-          s->slist = (ucs2_t *) malloc(sizeof(ucs2_t) << 3);
-        else
-          s->slist = (ucs2_t *) realloc((char *) s->slist,
-                                        sizeof(ucs2_t) * (s->slist_size + 8));
-        s->slist_size += 8;
-    }
-    s->slist[s->slist_used++] = v;
-
-    /*
-     * If the `reducing' parameter is non-zero, flag the element as being on
-     * the stack.
-     */
-    if (b->reducing != 0)
-      b->expr[v].onstack = 1;
-}
-
-static ucs2_t
-_ure_peek(_ure_buffer_t *b)
-{
-    if (b == 0 || b->stack.slist_used == 0)
-      return _URE_NOOP;
-
-    return b->stack.slist[b->stack.slist_used - 1];
-}
-
-static ucs2_t
-_ure_pop(_ure_buffer_t *b)
-{
-    ucs2_t v;
-
-    if (b == 0 || b->stack.slist_used == 0)
-      return _URE_NOOP;
-
-    v = b->stack.slist[--b->stack.slist_used];
-    if (b->reducing)
-      b->expr[v].onstack = 0;
-
-    return v;
-}
-
-/*************************************************************************
- *
- * Start symbol parse functions.
- *
- *************************************************************************/
-
-/*
- * Parse a comma-separated list of integers that represent character
- * properties.  Combine them into a mask that is returned in the `mask'
- * variable, and return the number of characters consumed.
- */
-static unsigned long
-_ure_prop_list(ucs2_t *pp, unsigned long limit, unsigned long *mask,
-               _ure_buffer_t *b)
-{
-    unsigned long n, m;
-    ucs2_t *sp, *ep;
-
-    sp = pp;
-    ep = sp + limit;
-
-    for (m = n = 0; b->error == _URE_OK && sp < ep; sp++) {
-        if (*sp == ',') {
-            /*
-             * Encountered a comma, so select the next character property flag
-             * and reset the number.
-             */
-            m |= cclass_flags[n];
-            n = 0;
-        } else if (*sp >= '0' && *sp <= '9')
-          /*
-           * Encountered a digit, so start or continue building the cardinal
-           * that represents the character property flag.
-           */
-          n = (n * 10) + (*sp - '0');
-        else
-          /*
-           * Encountered something that is not part of the property list.
-           * Indicate that we are done.
-           */
-          break;
-
-        /*
-         * If a property number greater than 32 occurs, then there is a
-         * problem.  Most likely a missing comma separator.
-         */
-        if (n > 32)
-          b->error = _URE_INVALID_PROPERTY;
-    }
-
-    if (b->error == _URE_OK && n != 0)
-      m |= cclass_flags[n];
-
-    /*
-     * Set the mask that represents the group of character properties.
-     */
-    *mask = m;
-
-    /*
-     * Return the number of characters consumed.
-     */
-    return sp - pp;
-}
-
-/*
- * Collect a hex number with 1 to 4 digits and return the number
- * of characters used.
- */
-static unsigned long
-_ure_hex(ucs2_t *np, unsigned long limit, ucs4_t *n)
-{
-    ucs2_t i;
-    ucs2_t *sp, *ep;
-    ucs4_t nn;
-
-    sp = np;
-    ep = sp + limit;
-
-    for (nn = 0, i = 0; i < 4 && sp < ep; i++, sp++) {
-        if (*sp >= '0' && *sp <= '9')
-          nn = (nn << 4) + (*sp - '0');
-        else if (*sp >= 'A' && *sp <= 'F')
-          nn = (nn << 4) + ((*sp - 'A') + 10);
-        else if (*sp >= 'a' && *sp <= 'f')
-          nn = (nn << 4) + ((*sp - 'a') + 10);
-        else
-          /*
-           * Encountered something that is not a hex digit.
-           */
-          break;
-    }
-
-    /*
-     * Assign the character code collected and return the number of
-     * characters used.
-     */
-    *n = nn;
-
-    return sp - np;
-}
-
-/*
- * Insert a range into a character class, removing duplicates and ordering
- * them in increasing range-start order.
- */
-static void
-_ure_add_range(_ure_ccl_t *ccl, _ure_range_t *r, _ure_buffer_t *b)
-{
-    ucs2_t i;
-    ucs4_t tmp;
-    _ure_range_t *rp;
-
-    /*
-     * If the `casefold' flag is set, then make sure both endpoints of the
-     * range are converted to lower case.
-     */
-    if (b->flags & _URE_DFA_CASEFOLD) {
-        r->min_code = _ure_tolower(r->min_code);
-        r->max_code = _ure_tolower(r->max_code);
-    }
-
-    /*
-     * Swap the range endpoints if they are not in increasing order.
-     */
-    if (r->min_code > r->max_code) {
-        tmp = r->min_code;
-        r->min_code = r->max_code;
-        r->max_code = tmp;
-    }
-
-    for (i = 0, rp = ccl->ranges;
-         i < ccl->ranges_used && r->min_code < rp->min_code; i++, rp++) ;
-
-    /*
-     * Check for a duplicate.
-     */
-    if (i < ccl->ranges_used &&
-        r->min_code == rp->min_code && r->max_code == rp->max_code)
-      return;
-
-    if (ccl->ranges_used == ccl->ranges_size) {
-        if (ccl->ranges_size == 0)
-          ccl->ranges = (_ure_range_t *) malloc(sizeof(_ure_range_t) << 3);
-        else
-          ccl->ranges = (_ure_range_t *)
-              realloc((char *) ccl->ranges,
-                      sizeof(_ure_range_t) * (ccl->ranges_size + 8));
-        ccl->ranges_size += 8;
-    }
-
-    rp = ccl->ranges + ccl->ranges_used;
-
-    if (i < ccl->ranges_used)
-      _ure_memmove((char *) (rp + 1), (char *) rp,
-                   sizeof(_ure_range_t) * (ccl->ranges_used - i));
-
-    ccl->ranges_used++;
-    rp->min_code = r->min_code;
-    rp->max_code = r->max_code;
-}
-
-#define _URE_ALPHA_MASK  (_URE_UPPER|_URE_LOWER|_URE_OTHERLETTER|\
-_URE_MODIFIER|_URE_TITLE|_URE_NONSPACING|_URE_COMBINING)
-#define _URE_ALNUM_MASK  (_URE_ALPHA_MASK|_URE_NUMDIGIT)
-#define _URE_PUNCT_MASK  (_URE_DASHPUNCT|_URE_OPENPUNCT|_URE_CLOSEPUNCT|\
-_URE_OTHERPUNCT)
-#define _URE_GRAPH_MASK (_URE_NUMDIGIT|_URE_NUMOTHER|_URE_ALPHA_MASK|\
-_URE_MATHSYM|_URE_CURRENCYSYM|_URE_OTHERSYM)
-#define _URE_PRINT_MASK (_URE_GRAPH_MASK|_URE_SPACESEP)
-#define _URE_SPACE_MASK  (_URE_SPACESEP|_URE_LINESEP|_URE_PARASEP)
-
-typedef void (*_ure_cclsetup_t)(
-    _ure_symtab_t *sym,
-    unsigned long mask,
-    _ure_buffer_t *b
-);
-
-typedef struct {
-    ucs2_t key;
-    unsigned int len : 8;
-    unsigned int next : 8;
-    _ure_cclsetup_t func;
-    unsigned long mask;
-} _ure_trie_t;
-
-static void
-_ure_ccl_setup(_ure_symtab_t *sym, unsigned long mask, _ure_buffer_t *b)
-{
-    sym->props |= mask;
-}
-
-static void
-_ure_space_setup(_ure_symtab_t *sym, unsigned long mask, _ure_buffer_t *b)
-{
-    _ure_range_t range;
-
-    sym->props |= mask;
-
-    /*
-     * Add the additional characters needed for handling isspace().
-     */
-    range.min_code = range.max_code = '\t';
-    _ure_add_range(&sym->sym.ccl, &range, b);
-    range.min_code = range.max_code = '\r';
-    _ure_add_range(&sym->sym.ccl, &range, b);
-    range.min_code = range.max_code = '\n';
-    _ure_add_range(&sym->sym.ccl, &range, b);
-    range.min_code = range.max_code = '\f';
-    _ure_add_range(&sym->sym.ccl, &range, b);
-    range.min_code = range.max_code = 0xfeff;
-    _ure_add_range(&sym->sym.ccl, &range, b);
-}
-
-static void
-_ure_xdigit_setup(_ure_symtab_t *sym, unsigned long mask, _ure_buffer_t *b)
-{
-    _ure_range_t range;
-
-    /*
-     * Add the additional characters needed for handling isxdigit().
-     */
-    range.min_code = '0';
-    range.max_code = '9';
-    _ure_add_range(&sym->sym.ccl, &range, b);
-    range.min_code = 'A';
-    range.max_code = 'F';
-    _ure_add_range(&sym->sym.ccl, &range, b);
-    range.min_code = 'a';
-    range.max_code = 'f';
-    _ure_add_range(&sym->sym.ccl, &range, b);
-}
-
-static const _ure_trie_t cclass_trie[] = {
-    {0x003a, 1, 1, 0, 0},
-    {0x0061, 9, 10, 0, 0},
-    {0x0063, 8, 19, 0, 0},
-    {0x0064, 7, 24, 0, 0},
-    {0x0067, 6, 29, 0, 0},
-    {0x006c, 5, 34, 0, 0},
-    {0x0070, 4, 39, 0, 0},
-    {0x0073, 3, 49, 0, 0},
-    {0x0075, 2, 54, 0, 0},
-    {0x0078, 1, 59, 0, 0},
-    {0x006c, 1, 11, 0, 0},
-    {0x006e, 2, 13, 0, 0},
-    {0x0070, 1, 16, 0, 0},
-    {0x0075, 1, 14, 0, 0},
-    {0x006d, 1, 15, 0, 0},
-    {0x003a, 1, 16, _ure_ccl_setup, _URE_ALNUM_MASK},
-    {0x0068, 1, 17, 0, 0},
-    {0x0061, 1, 18, 0, 0},
-    {0x003a, 1, 19, _ure_ccl_setup, _URE_ALPHA_MASK},
-    {0x006e, 1, 20, 0, 0},
-    {0x0074, 1, 21, 0, 0},
-    {0x0072, 1, 22, 0, 0},
-    {0x006c, 1, 23, 0, 0},
-    {0x003a, 1, 24, _ure_ccl_setup, _URE_CNTRL},
-    {0x0069, 1, 25, 0, 0},
-    {0x0067, 1, 26, 0, 0},
-    {0x0069, 1, 27, 0, 0},
-    {0x0074, 1, 28, 0, 0},
-    {0x003a, 1, 29, _ure_ccl_setup, _URE_NUMDIGIT},
-    {0x0072, 1, 30, 0, 0},
-    {0x0061, 1, 31, 0, 0},
-    {0x0070, 1, 32, 0, 0},
-    {0x0068, 1, 33, 0, 0},
-    {0x003a, 1, 34, _ure_ccl_setup, _URE_GRAPH_MASK},
-    {0x006f, 1, 35, 0, 0},
-    {0x0077, 1, 36, 0, 0},
-    {0x0065, 1, 37, 0, 0},
-    {0x0072, 1, 38, 0, 0},
-    {0x003a, 1, 39, _ure_ccl_setup, _URE_LOWER},
-    {0x0072, 2, 41, 0, 0},
-    {0x0075, 1, 45, 0, 0},
-    {0x0069, 1, 42, 0, 0},
-    {0x006e, 1, 43, 0, 0},
-    {0x0074, 1, 44, 0, 0},
-    {0x003a, 1, 45, _ure_ccl_setup, _URE_PRINT_MASK},
-    {0x006e, 1, 46, 0, 0},
-    {0x0063, 1, 47, 0, 0},
-    {0x0074, 1, 48, 0, 0},
-    {0x003a, 1, 49, _ure_ccl_setup, _URE_PUNCT_MASK},
-    {0x0070, 1, 50, 0, 0},
-    {0x0061, 1, 51, 0, 0},
-    {0x0063, 1, 52, 0, 0},
-    {0x0065, 1, 53, 0, 0},
-    {0x003a, 1, 54, _ure_space_setup, _URE_SPACE_MASK},
-    {0x0070, 1, 55, 0, 0},
-    {0x0070, 1, 56, 0, 0},
-    {0x0065, 1, 57, 0, 0},
-    {0x0072, 1, 58, 0, 0},
-    {0x003a, 1, 59, _ure_ccl_setup, _URE_UPPER},
-    {0x0064, 1, 60, 0, 0},
-    {0x0069, 1, 61, 0, 0},
-    {0x0067, 1, 62, 0, 0},
-    {0x0069, 1, 63, 0, 0},
-    {0x0074, 1, 64, 0, 0},
-    {0x003a, 1, 65, _ure_xdigit_setup, 0},
-};
-
-/*
- * Probe for one of the POSIX colon delimited character classes in the static
- * trie.
- */
-static unsigned long
-_ure_posix_ccl(ucs2_t *cp, unsigned long limit, _ure_symtab_t *sym,
-               _ure_buffer_t *b)
-{
-    int i;
-    unsigned long n;
-    const _ure_trie_t *tp;
-    ucs2_t *sp, *ep;
-
-    /*
-     * If the number of characters left is less than 7, then this cannot be
-     * interpreted as one of the colon delimited classes.
-     */
-    if (limit < 7)
-      return 0;
-
-    sp = cp;
-    ep = sp + limit;
-    tp = cclass_trie;
-    for (i = 0; sp < ep && i < 8; i++, sp++) {
-        n = tp->len;
-
-        for (; n > 0 && tp->key != *sp; tp++, n--) ;
-
-        if (n == 0)
-          return 0;
-
-        if (*sp == ':' && (i == 6 || i == 7)) {
-            sp++;
-            break;
-        }
-        if (sp + 1 < ep)
-          tp = cclass_trie + tp->next;
-    }
-    if (tp->func == 0)
-      return 0;
-
-    (*tp->func)(sym, tp->mask, b);
-
-    return sp - cp;
-}
-
-/*
- * Construct a list of ranges and return the number of characters consumed.
- */
-static unsigned long
-_ure_cclass(ucs2_t *cp, unsigned long limit, _ure_symtab_t *symp,
-            _ure_buffer_t *b)
-{
-    int range_end;
-    unsigned long n;
-    ucs2_t *sp, *ep;
-    ucs4_t c, last;
-    _ure_ccl_t *cclp;
-    _ure_range_t range;
-
-    sp = cp;
-    ep = sp + limit;
-
-    if (*sp == '^') {
-      symp->type = _URE_NCCLASS;
-      sp++;
-    } else
-      symp->type = _URE_CCLASS;
-
-    for (last = 0, range_end = 0;
-         b->error == _URE_OK && sp < ep && *sp != ']'; ) {
-        c = *sp++;
-        if (c == '\\') {
-            if (sp == ep) {
-                /*
-                 * The EOS was encountered when expecting the reverse solidus
-                 * to be followed by the character it is escaping.  Set an
-                 * error code and return the number of characters consumed up
-                 * to this point.
-                 */
-                b->error = _URE_UNEXPECTED_EOS;
-                return sp - cp;
-            }
-
-            c = *sp++;
-            switch (c) {
-              case 'a':
-                c = 0x07;
-                break;
-              case 'b':
-                c = 0x08;
-                break;
-              case 'f':
-                c = 0x0c;
-                break;
-              case 'n':
-                c = 0x0a;
-                break;
-              case 'r':
-                c = 0x0d;
-                break;
-              case 't':
-                c = 0x09;
-                break;
-              case 'v':
-                c = 0x0b;
-                break;
-              case 'p':
-              case 'P':
-                sp += _ure_prop_list(sp, ep - sp, &symp->props, b);
-                /*
-                 * Invert the bit mask of the properties if this is a negated
-                 * character class or if 'P' is used to specify a list of
-                 * character properties that should *not* match in a
-                 * character class.
-                 */
-                if (c == 'P')
-                  symp->props = ~symp->props;
-                continue;
-                break;
-              case 'x':
-              case 'X':
-              case 'u':
-              case 'U':
-                if (sp < ep &&
-                    ((*sp >= '0' && *sp <= '9') ||
-                     (*sp >= 'A' && *sp <= 'F') ||
-                     (*sp >= 'a' && *sp <= 'f')))
-                  sp += _ure_hex(sp, ep - sp, &c);
-            }
-        } else if (c == ':') {
-            /*
-             * Probe for a POSIX colon delimited character class.
-             */
-            sp--;
-            if ((n = _ure_posix_ccl(sp, ep - sp, symp, b)) == 0)
-              sp++;
-            else {
-                sp += n;
-                continue;
-            }
-        }
-
-        cclp = &symp->sym.ccl;
-
-        /*
-         * Check to see if the current character is a low surrogate that needs
-         * to be combined with a preceding high surrogate.
-         */
-        if (last != 0) {
-            if (c >= 0xdc00 && c <= 0xdfff)
-              /*
-               * Construct the UTF16 character code.
-               */
-              c = 0x10000 + (((last & 0x03ff) << 10) | (c & 0x03ff));
-            else {
-                /*
-                 * Add the isolated high surrogate to the range.
-                 */
-                if (range_end == 1)
-                  range.max_code = last & 0xffff;
-                else
-                  range.min_code = range.max_code = last & 0xffff;
-
-                _ure_add_range(cclp, &range, b);
-                range_end = 0;
-            }
-        }
-
-        /*
-         * Clear the last character code.
-         */
-        last = 0;
-
-        /*
-         * This slightly awkward code handles the different cases needed to
-         * construct a range.
-         */
-        if (c >= 0xd800 && c <= 0xdbff) {
-            /*
-             * If the high surrogate is followed by a range indicator, simply
-             * add it as the range start.  Otherwise, save it in case the next
-             * character is a low surrogate.
-             */
-            if (*sp == '-') {
-                sp++;
-                range.min_code = c;
-                range_end = 1;
-            } else
-              last = c;
-        } else if (range_end == 1) {
-            range.max_code = c;
-            _ure_add_range(cclp, &range, b);
-            range_end = 0;
-        } else {
-            range.min_code = range.max_code = c;
-            if (*sp == '-') {
-                sp++;
-                range_end = 1;
-            } else
-              _ure_add_range(cclp, &range, b);
-        }
-    }
-
-    if (sp < ep && *sp == ']')
-      sp++;
-    else
-      /*
-       * The parse was not terminated by the character class close symbol
-       * (']'), so set an error code.
-       */
-      b->error = _URE_CCLASS_OPEN;
-
-    return sp - cp;
-}
-
-/*
- * Probe for a low surrogate hex code.
- */
-static unsigned long
-_ure_probe_ls(ucs2_t *ls, unsigned long limit, ucs4_t *c)
-{
-    ucs4_t i, code;
-    ucs2_t *sp, *ep;
-
-    for (i = code = 0, sp = ls, ep = sp + limit; i < 4 && sp < ep; sp++) {
-        if (*sp >= '0' && *sp <= '9')
-          code = (code << 4) + (*sp - '0');
-        else if (*sp >= 'A' && *sp <= 'F')
-          code = (code << 4) + ((*sp - 'A') + 10);
-        else if (*sp >= 'a' && *sp <= 'f')
-          code = (code << 4) + ((*sp - 'a') + 10);
-        else
-          break;
-    }
-
-    *c = code;
-    return (0xdc00 <= code && code <= 0xdfff) ? sp - ls : 0;
-}
-
-static unsigned long
-_ure_compile_symbol(ucs2_t *sym, unsigned long limit, _ure_symtab_t *symp,
-                    _ure_buffer_t *b)
-{
-    ucs4_t c;
-    ucs2_t *sp, *ep;
-
-    sp = sym;
-    ep = sym + limit;
-
-    if ((c = *sp++) == '\\') {
-
-        if (sp == ep) {
-            /*
-             * The EOS was encountered when expecting the reverse solidus to
-             * be followed by the character it is escaping.  Set an error code
-             * and return the number of characters consumed up to this point.
-             */
-            b->error = _URE_UNEXPECTED_EOS;
-            return sp - sym;
-        }
-
-        c = *sp++;
-        switch (c) {
-          case 'p':
-          case 'P':
-            symp->type = (c == 'p') ? _URE_CCLASS : _URE_NCCLASS;
-            sp += _ure_prop_list(sp, ep - sp, &symp->props, b);
-            break;
-          case 'a':
-            symp->type = _URE_CHAR;
-            symp->sym.chr = 0x07;
-            break;
-          case 'b':
-            symp->type = _URE_CHAR;
-            symp->sym.chr = 0x08;
-            break;
-          case 'f':
-            symp->type = _URE_CHAR;
-            symp->sym.chr = 0x0c;
-            break;
-          case 'n':
-            symp->type = _URE_CHAR;
-            symp->sym.chr = 0x0a;
-            break;
-          case 'r':
-            symp->type = _URE_CHAR;
-            symp->sym.chr = 0x0d;
-            break;
-          case 't':
-            symp->type = _URE_CHAR;
-            symp->sym.chr = 0x09;
-            break;
-          case 'v':
-            symp->type = _URE_CHAR;
-            symp->sym.chr = 0x0b;
-            break;
-          case 'x':
-          case 'X':
-          case 'u':
-          case 'U':
-            /*
-             * Collect between 1 and 4 digits representing a UCS2 code.  Fall
-             * through to the next case.
-             */
-            if (sp < ep &&
-                ((*sp >= '0' && *sp <= '9') ||
-                 (*sp >= 'A' && *sp <= 'F') ||
-                 (*sp >= 'a' && *sp <= 'f')))
-              sp += _ure_hex(sp, ep - sp, &c);
-            /* FALLTHROUGH */
-          default:
-            /*
-             * Simply add an escaped character here.
-             */
-            symp->type = _URE_CHAR;
-            symp->sym.chr = c;
-        }
-    } else if (c == '^' || c == '$')
-      /*
-       * Handle the BOL and EOL anchors.  This actually consists simply of
-       * setting a flag that indicates that the user supplied anchor match
-       * function should be called.  This needs to be done instead of simply
-       * matching line/paragraph separators because beginning-of-text and
-       * end-of-text tests are needed as well.
-       */
-      symp->type = (c == '^') ? _URE_BOL_ANCHOR : _URE_EOL_ANCHOR;
-    else if (c == '[')
-      /*
-       * Construct a character class.
-       */
-      sp += _ure_cclass(sp, ep - sp, symp, b);
-    else if (c == '.')
-      symp->type = _URE_ANY_CHAR;
-    else {
-        symp->type = _URE_CHAR;
-        symp->sym.chr = c;
-    }
-
-    /*
-     * If the symbol type happens to be a character and is a high surrogate,
-     * then probe forward to see if it is followed by a low surrogate that
-     * needs to be added.
-     */
-    if (sp < ep && symp->type == _URE_CHAR &&
-        0xd800 <= symp->sym.chr && symp->sym.chr <= 0xdbff) {
-
-        if (0xdc00 <= *sp && *sp <= 0xdfff) {
-            symp->sym.chr = 0x10000 + (((symp->sym.chr & 0x03ff) << 10) |
-                                       (*sp & 0x03ff));
-            sp++;
-        } else if (*sp == '\\' && (*(sp + 1) == 'x' || *(sp + 1) == 'X' ||
-                                 *(sp + 1) == 'u' || *(sp + 1) == 'U')) {
-            sp += _ure_probe_ls(sp + 2, ep - (sp + 2), &c);
-            if (0xdc00 <= c && c <= 0xdfff) {
-                /*
-                 * Take into account the \[xu] in front of the hex code.
-                 */
-                sp += 2;
-                symp->sym.chr = 0x10000 + (((symp->sym.chr & 0x03ff) << 10) |
-                                           (c & 0x03ff));
-            }
-        }
-    }
-
-    /*
-     * Last, make sure any _URE_CHAR type symbols are changed to lower case if
-     * the `casefold' flag is set.
-     */
-    if ((b->flags & _URE_DFA_CASEFOLD) && symp->type == _URE_CHAR)
-      symp->sym.chr = _ure_tolower(symp->sym.chr);
-
-    /*
-     * If the symbol constructed is anything other than one of the anchors,
-     * make sure the _URE_DFA_BLANKLINE flag is removed.
-     */
-    if (symp->type != _URE_BOL_ANCHOR && symp->type != _URE_EOL_ANCHOR)
-      b->flags &= ~_URE_DFA_BLANKLINE;
-
-    /*
-     * Return the number of characters consumed.
-     */
-    return sp - sym;
-}
-
-static int
-_ure_sym_neq(_ure_symtab_t *a, _ure_symtab_t *b)
-{
-    if (a->type != b->type || a->mods != b->mods || a->props != b->props)
-      return 1;
-
-    if (a->type == _URE_CCLASS || a->type == _URE_NCCLASS) {
-        if (a->sym.ccl.ranges_used != b->sym.ccl.ranges_used)
-          return 1;
-        if (a->sym.ccl.ranges_used > 0 &&
-            memcmp((char *) a->sym.ccl.ranges, (char *) b->sym.ccl.ranges,
-                   sizeof(_ure_range_t) * a->sym.ccl.ranges_used) != 0)
-          return 1;
-    } else if (a->type == _URE_CHAR && a->sym.chr != b->sym.chr)
-      return 1;
-    return 0;
-}
-
-/*
- * Construct a symbol, but only keep unique symbols.
- */
-static ucs2_t
-_ure_make_symbol(ucs2_t *sym, unsigned long limit, unsigned long *consumed,
-                 _ure_buffer_t *b)
-{
-    ucs2_t i;
-    _ure_symtab_t *sp, symbol;
-
-    /*
-     * Build the next symbol so we can test to see if it is already in the
-     * symbol table.
-     */
-    (void) memset((char *) &symbol, '\0', sizeof(_ure_symtab_t));
-    *consumed = _ure_compile_symbol(sym, limit, &symbol, b);
-
-    /*
-     * Check to see if the symbol exists.
-     */
-    for (i = 0, sp = b->symtab;
-         i < b->symtab_used && _ure_sym_neq(&symbol, sp); i++, sp++) ;
-
-    if (i < b->symtab_used) {
-        /*
-         * Free up any ranges used for the symbol.
-         */
-        if ((symbol.type == _URE_CCLASS || symbol.type == _URE_NCCLASS) &&
-            symbol.sym.ccl.ranges_size > 0)
-          free((char *) symbol.sym.ccl.ranges);
-
-        return b->symtab[i].id;
-    }
-
-    /*
-     * Need to add the new symbol.
-     */
-    if (b->symtab_used == b->symtab_size) {
-        if (b->symtab_size == 0)
-          b->symtab = (_ure_symtab_t *) malloc(sizeof(_ure_symtab_t) << 3);
-        else
-          b->symtab = (_ure_symtab_t *)
-              realloc((char *) b->symtab,
-                      sizeof(_ure_symtab_t) * (b->symtab_size + 8));
-        sp = b->symtab + b->symtab_size;
-        (void) memset((char *) sp, '\0', sizeof(_ure_symtab_t) << 3);
-        b->symtab_size += 8;
-    }
-
-    symbol.id = b->symtab_used++;
-    (void) memmove((char *) &b->symtab[symbol.id], (char *) &symbol,
-                   sizeof(_ure_symtab_t));
-
-    return symbol.id;
-}
-
-/*************************************************************************
- *
- * End symbol parse functions.
- *
- *************************************************************************/
-
-static ucs2_t
-_ure_make_expr(ucs2_t type, ucs2_t lhs, ucs2_t rhs, _ure_buffer_t *b)
-{
-    ucs2_t i;
-
-    if (b == 0)
-      return _URE_NOOP;
-
-    /*
-     * Determine if the expression already exists or not.
-     */
-    for (i = 0; i < b->expr_used; i++) {
-        if (b->expr[i].type == type && b->expr[i].lhs == lhs &&
-            b->expr[i].rhs == rhs)
-          break;
-    }
-    if (i < b->expr_used)
-      return i;
-
-    /*
-     * Need to add a new expression.
-     */
-    if (b->expr_used == b->expr_size) {
-        if (b->expr_size == 0)
-          b->expr = (_ure_elt_t *) malloc(sizeof(_ure_elt_t) << 3);
-        else
-          b->expr = (_ure_elt_t *)
-              realloc((char *) b->expr,
-                      sizeof(_ure_elt_t) * (b->expr_size + 8));
-        b->expr_size += 8;
-    }
-
-    b->expr[b->expr_used].onstack = 0;
-    b->expr[b->expr_used].type = type;
-    b->expr[b->expr_used].lhs = lhs;
-    b->expr[b->expr_used].rhs = rhs;
-
-    return b->expr_used++;
-}
-
-static unsigned char spmap[] = {
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
-
-#define _ure_isspecial(cc) ((cc) > 0x20 && (cc) < 0x7f && \
-                            (spmap[(cc) >> 3] & (1 << ((cc) & 7))))
-
-/*
- * Convert the regular expression into an NFA in a form that will be easy to
- * reduce to a DFA.  The starting state for the reduction will be returned.
- */
-static ucs2_t
-_ure_re2nfa(ucs2_t *re, unsigned long relen, _ure_buffer_t *b)
-{
-    ucs2_t c, state, top, sym, *sp, *ep;
-    unsigned long used;
-
-    state = _URE_NOOP;
-
-    sp = re;
-    ep = sp + relen;
-    while (b->error == _URE_OK && sp < ep) {
-        c = *sp++;
-        switch (c) {
-          case '(':
-            _ure_push(_URE_PAREN, b);
-            break;
-          case ')':
-            /*
-             * Check for the case of too many close parentheses.
-             */
-            if (_ure_peek(b) == _URE_NOOP) {
-                b->error = _URE_UNBALANCED_GROUP;
-                break;
-            }
-
-            while ((top = _ure_peek(b)) == _URE_AND || top == _URE_OR)
-              /*
-               * Make an expression with the AND or OR operator and its right
-               * hand side.
-               */
-              state = _ure_make_expr(_ure_pop(b), _ure_pop(b), state, b);
-
-            /*
-             * Remove the _URE_PAREN off the stack.
-             */
-            (void) _ure_pop(b);
-            break;
-          case '*':
-            state = _ure_make_expr(_URE_STAR, state, _URE_NOOP, b);
-            break;
-          case '+':
-            state = _ure_make_expr(_URE_PLUS, state, _URE_NOOP, b);
-            break;
-          case '?':
-            state = _ure_make_expr(_URE_QUEST, state, _URE_NOOP, b);
-            break;
-          case '|':
-            while ((top = _ure_peek(b)) == _URE_AND || top == _URE_OR)
-              /*
-               * Make an expression with the AND or OR operator and its right
-               * hand side.
-               */
-              state = _ure_make_expr(_ure_pop(b), _ure_pop(b), state, b);
-
-            _ure_push(state, b);
-            _ure_push(_URE_OR, b);
-            break;
-          default:
-            sp--;
-            sym = _ure_make_symbol(sp, ep - sp, &used, b);
-            sp += used;
-            state = _ure_make_expr(_URE_SYMBOL, sym, _URE_NOOP, b);
-            break;
-        }
-
-        if (c != '(' && c != '|' && sp < ep &&
-            (!_ure_isspecial(*sp) || *sp == '(')) {
-            _ure_push(state, b);
-            _ure_push(_URE_AND, b);
-        }
-    }
-    while ((top = _ure_peek(b)) == _URE_AND || top == _URE_OR)
-      /*
-       * Make an expression with the AND or OR operator and its right
-       * hand side.
-       */
-      state = _ure_make_expr(_ure_pop(b), _ure_pop(b), state, b);
-
-    if (b->stack.slist_used > 0)
-      b->error = _URE_UNBALANCED_GROUP;
-
-    return (b->error == _URE_OK) ? state : _URE_NOOP;
-}
-
-static void
-_ure_add_symstate(ucs2_t sym, ucs2_t state, _ure_buffer_t *b)
-{
-    ucs2_t i, *stp;
-    _ure_symtab_t *sp;
-
-    /*
-     * Locate the symbol in the symbol table so the state can be added.
-     * If the symbol doesn't exist, then a real problem exists.
-     */
-    for (i = 0, sp = b->symtab; i < b->symtab_used && sym != sp->id;
-         i++, sp++) ;
-
-    /*
-     * Now find out if the state exists in the symbol's state list.
-     */
-    for (i = 0, stp = sp->states.slist;
-         i < sp->states.slist_used && state > *stp; i++, stp++) ;
-
-    if (i == sp->states.slist_used || state < *stp) {
-        /*
-         * Need to add the state in order.
-         */
-        if (sp->states.slist_used == sp->states.slist_size) {
-            if (sp->states.slist_size == 0)
-              sp->states.slist = (ucs2_t *) malloc(sizeof(ucs2_t) << 3);
-            else
-              sp->states.slist = (ucs2_t *)
-                  realloc((char *) sp->states.slist,
-                          sizeof(ucs2_t) * (sp->states.slist_size + 8));
-            sp->states.slist_size += 8;
-        }
-        if (i < sp->states.slist_used)
-          (void) _ure_memmove((char *) (sp->states.slist + i + 1),
-                              (char *) (sp->states.slist + i),
-                              sizeof(ucs2_t) * (sp->states.slist_used - i));
-        sp->states.slist[i] = state;
-        sp->states.slist_used++;
-    }
-}
-
-static ucs2_t
-_ure_add_state(ucs2_t nstates, ucs2_t *states, _ure_buffer_t *b)
-{
-    ucs2_t i;
-    _ure_state_t *sp;
-
-    for (i = 0, sp = b->states.states; i < b->states.states_used; i++, sp++) {
-        if (sp->st.slist_used == nstates &&
-            memcmp((char *) states, (char *) sp->st.slist,
-                   sizeof(ucs2_t) * nstates) == 0)
-          break;
-    }
-
-    if (i == b->states.states_used) {
-        /*
-         * Need to add a new DFA state (set of NFA states).
-         */
-        if (b->states.states_used == b->states.states_size) {
-            if (b->states.states_size == 0)
-              b->states.states = (_ure_state_t *)
-                  malloc(sizeof(_ure_state_t) << 3);
-            else
-              b->states.states = (_ure_state_t *)
-                  realloc((char *) b->states.states,
-                          sizeof(_ure_state_t) * (b->states.states_size + 8));
-            sp = b->states.states + b->states.states_size;
-            (void) memset((char *) sp, '\0', sizeof(_ure_state_t) << 3);
-            b->states.states_size += 8;
-        }
-
-        sp = b->states.states + b->states.states_used++;
-        sp->id = i;
-
-        if (sp->st.slist_used + nstates > sp->st.slist_size) {
-            if (sp->st.slist_size == 0)
-              sp->st.slist = (ucs2_t *)
-                  malloc(sizeof(ucs2_t) * (sp->st.slist_used + nstates));
-            else
-              sp->st.slist = (ucs2_t *)
-                  realloc((char *) sp->st.slist,
-                          sizeof(ucs2_t) * (sp->st.slist_used + nstates));
-            sp->st.slist_size = sp->st.slist_used + nstates;
-        }
-        sp->st.slist_used = nstates;
-        (void) memmove((char *) sp->st.slist, (char *) states,
-                       sizeof(ucs2_t) * nstates);
-    }
-
-    /*
-     * Return the ID of the DFA state representing a group of NFA states.
-     */
-    return i;
-}
-
-static void
-_ure_reduce(ucs2_t start, _ure_buffer_t *b)
-{
-    ucs2_t i, j, state, eval, syms, rhs;
-    ucs2_t s1, s2, ns1, ns2;
-    _ure_state_t *sp;
-    _ure_symtab_t *smp;
-
-    b->reducing = 1;
-
-    /*
-     * Add the starting state for the reduction.
-     */
-    _ure_add_state(1, &start, b);
-
-    /*
-     * Process each set of NFA states that get created.
-     */
-    for (i = 0; i < b->states.states_used; i++) {
-        sp = b->states.states + i;
-
-        /*
-         * Push the current states on the stack.
-         */
-        for (j = 0; j < sp->st.slist_used; j++)
-          _ure_push(sp->st.slist[j], b);
-
-        /*
-         * Reduce the NFA states.
-         */
-        for (j = sp->accepting = syms = 0; j < b->stack.slist_used; j++) {
-            state = b->stack.slist[j];
-            eval = 1;
-
-            /*
-             * This inner loop is the iterative equivalent of recursively
-             * reducing subexpressions generated as a result of a reduction.
-             */
-            while (eval) {
-                switch (b->expr[state].type) {
-                  case _URE_SYMBOL:
-                    ns1 = _ure_make_expr(_URE_ONE, _URE_NOOP, _URE_NOOP, b);
-                    _ure_add_symstate(b->expr[state].lhs, ns1, b);
-                    syms++;
-                    eval = 0;
-                    break;
-                  case _URE_ONE:
-                    sp->accepting = 1;
-                    eval = 0;
-                    break;
-                  case _URE_QUEST:
-                    s1 = b->expr[state].lhs;
-                    ns1 = _ure_make_expr(_URE_ONE, _URE_NOOP, _URE_NOOP, b);
-                    state = _ure_make_expr(_URE_OR, ns1, s1, b);
-                    break;
-                  case _URE_PLUS:
-                    s1 = b->expr[state].lhs;
-                    ns1 = _ure_make_expr(_URE_STAR, s1, _URE_NOOP, b);
-                    state = _ure_make_expr(_URE_AND, s1, ns1, b);
-                    break;
-                  case _URE_STAR:
-                    s1 = b->expr[state].lhs;
-                    ns1 = _ure_make_expr(_URE_ONE, _URE_NOOP, _URE_NOOP, b);
-                    ns2 = _ure_make_expr(_URE_PLUS, s1, _URE_NOOP, b);
-                    state = _ure_make_expr(_URE_OR, ns1, ns2, b);
-                    break;
-                  case _URE_OR:
-                    s1 = b->expr[state].lhs;
-                    s2 = b->expr[state].rhs;
-                    _ure_push(s1, b);
-                    _ure_push(s2, b);
-                    eval = 0;
-                    break;
-                  case _URE_AND:
-                    s1 = b->expr[state].lhs;
-                    s2 = b->expr[state].rhs;
-                    switch (b->expr[s1].type) {
-                      case _URE_SYMBOL:
-                        _ure_add_symstate(b->expr[s1].lhs, s2, b);
-                        syms++;
-                        eval = 0;
-                        break;
-                      case _URE_ONE:
-                        state = s2;
-                        break;
-                      case _URE_QUEST:
-                        ns1 = b->expr[s1].lhs;
-                        ns2 = _ure_make_expr(_URE_AND, ns1, s2, b);
-                        state = _ure_make_expr(_URE_OR, s2, ns2, b);
-                        break;
-                      case _URE_PLUS:
-                        ns1 = b->expr[s1].lhs;
-                        ns2 = _ure_make_expr(_URE_OR, s2, state, b);
-                        state = _ure_make_expr(_URE_AND, ns1, ns2, b);
-                        break;
-                      case _URE_STAR:
-                        ns1 = b->expr[s1].lhs;
-                        ns2 = _ure_make_expr(_URE_AND, ns1, state, b);
-                        state = _ure_make_expr(_URE_OR, s2, ns2, b);
-                        break;
-                      case _URE_OR:
-                        ns1 = b->expr[s1].lhs;
-                        ns2 = b->expr[s1].rhs;
-                        ns1 = _ure_make_expr(_URE_AND, ns1, s2, b);
-                        ns2 = _ure_make_expr(_URE_AND, ns2, s2, b);
-                        state = _ure_make_expr(_URE_OR, ns1, ns2, b);
-                        break;
-                      case _URE_AND:
-                        ns1 = b->expr[s1].lhs;
-                        ns2 = b->expr[s1].rhs;
-                        ns2 = _ure_make_expr(_URE_AND, ns2, s2, b);
-                        state = _ure_make_expr(_URE_AND, ns1, ns2, b);
-                        break;
-                    }
-                }
-            }
-        }
-
-        /*
-         * Clear the state stack.
-         */
-        while (_ure_pop(b) != _URE_NOOP) ;
-
-        /*
-         * Reset the state pointer because the reduction may have moved it
-         * during a reallocation.
-         */
-        sp = b->states.states + i;
-
-        /*
-         * Generate the DFA states for the symbols collected during the
-         * current reduction.
-         */
-        if (sp->trans_used + syms > sp->trans_size) {
-            if (sp->trans_size == 0)
-              sp->trans = (_ure_elt_t *)
-                  malloc(sizeof(_ure_elt_t) * (sp->trans_used + syms));
-            else
-              sp->trans = (_ure_elt_t *)
-                  realloc((char *) sp->trans,
-                          sizeof(_ure_elt_t) * (sp->trans_used + syms));
-            sp->trans_size = sp->trans_used + syms;
-        }
-
-        /*
-         * Go through the symbol table and generate the DFA state transitions
-         * for each symbol that has collected NFA states.
-         */
-        for (j = syms = 0, smp = b->symtab; j < b->symtab_used; j++, smp++) {
-            sp = b->states.states + i;
-
-            if (smp->states.slist_used > 0) {
-                sp->trans[syms].lhs = smp->id;
-                rhs = _ure_add_state(smp->states.slist_used,
-                                     smp->states.slist, b);
-                /*
-                 * Reset the state pointer in case the reallocation moves it
-                 * in memory.
-                 */
-                sp = b->states.states + i;
-                sp->trans[syms].rhs = rhs;
-
-                smp->states.slist_used = 0;
-                syms++;
-            }
-        }
-
-        /*
-         * Set the number of transitions actually used.
-         */
-        sp->trans_used = syms;
-    }
-    b->reducing = 0;
-}
-
-static void
-_ure_add_equiv(ucs2_t l, ucs2_t r, _ure_buffer_t *b)
-{
-    ucs2_t tmp;
-
-    l = b->states.states[l].id;
-    r = b->states.states[r].id;
-
-    if (l == r)
-      return;
-
-    if (l > r) {
-        tmp = l;
-        l = r;
-        r = tmp;
-    }
-
-    /*
-     * Check to see if the equivalence pair already exists.
-     */
-    for (tmp = 0; tmp < b->equiv_used &&
-             (b->equiv[tmp].l != l || b->equiv[tmp].r != r);
-         tmp++) ;
-
-    if (tmp < b->equiv_used)
-      return;
-
-    if (b->equiv_used == b->equiv_size) {
-        if (b->equiv_size == 0)
-          b->equiv = (_ure_equiv_t *) malloc(sizeof(_ure_equiv_t) << 3);
-        else
-          b->equiv = (_ure_equiv_t *) realloc((char *) b->equiv,
-                                              sizeof(_ure_equiv_t) *
-                                              (b->equiv_size + 8));
-        b->equiv_size += 8;
-    }
-    b->equiv[b->equiv_used].l = l;
-    b->equiv[b->equiv_used].r = r;
-    b->equiv_used++;
-}
-
-/*
- * Merge the DFA states that are equivalent.
- */
-static void
-_ure_merge_equiv(_ure_buffer_t *b)
-{
-    ucs2_t i, j, k, eq, done;
-    _ure_state_t *sp1, *sp2, *ls, *rs;
-
-    for (i = 0; i < b->states.states_used; i++) {
-        sp1 = b->states.states + i;
-        if (sp1->id != i)
-          continue;
-        for (j = 0; j < i; j++) {
-            sp2 = b->states.states + j;
-            if (sp2->id != j)
-              continue;
-            b->equiv_used = 0;
-            _ure_add_equiv(i, j, b);
-            for (eq = 0, done = 0; eq < b->equiv_used; eq++) {
-                ls = b->states.states + b->equiv[eq].l;
-                rs = b->states.states + b->equiv[eq].r;
-                if (ls->accepting != rs->accepting ||
-                    ls->trans_used != rs->trans_used) {
-                    done = 1;
-                    break;
-                }
-                for (k = 0; k < ls->trans_used &&
-                         ls->trans[k].lhs == rs->trans[k].lhs; k++) ;
-                if (k < ls->trans_used) {
-                    done = 1;
-                    break;
-                }
-
-                for (k = 0; k < ls->trans_used; k++)
-                  _ure_add_equiv(ls->trans[k].rhs, rs->trans[k].rhs, b);
-            }
-            if (done == 0)
-              break;
-        }
-        for (eq = 0; j < i && eq < b->equiv_used; eq++)
-          b->states.states[b->equiv[eq].r].id =
-              b->states.states[b->equiv[eq].l].id;
-    }
-
-    /*
-     * Renumber the states appropriately.
-     */
-    for (i = eq = 0, sp1 = b->states.states; i < b->states.states_used;
-         sp1++, i++)
-      sp1->id = (sp1->id == i) ? eq++ : b->states.states[sp1->id].id;
-}
-
-/*************************************************************************
- *
- * API.
- *
- *************************************************************************/
-
-ure_buffer_t
-ure_buffer_create(void)
-{
-    ure_buffer_t b;
-
-    b = (ure_buffer_t) calloc(1, sizeof(_ure_buffer_t));
-
-    return b;
-}
-
-void
-ure_buffer_free(ure_buffer_t buf)
-{
-    unsigned long i;
-
-    if (buf == 0)
-      return;
-
-    if (buf->stack.slist_size > 0)
-      free((char *) buf->stack.slist);
-
-    if (buf->expr_size > 0)
-      free((char *) buf->expr);
-
-    for (i = 0; i < buf->symtab_size; i++) {
-        if (buf->symtab[i].states.slist_size > 0)
-          free((char *) buf->symtab[i].states.slist);
-    }
-
-    if (buf->symtab_size > 0)
-      free((char *) buf->symtab);
-
-    for (i = 0; i < buf->states.states_size; i++) {
-        if (buf->states.states[i].trans_size > 0)
-          free((char *) buf->states.states[i].trans);
-        if (buf->states.states[i].st.slist_size > 0)
-          free((char *) buf->states.states[i].st.slist);
-    }
-
-    if (buf->states.states_size > 0)
-      free((char *) buf->states.states);
-
-    if (buf->equiv_size > 0)
-      free((char *) buf->equiv);
-
-    free((char *) buf);
-}
-
-ure_dfa_t
-ure_compile(ucs2_t *re, unsigned long relen, int casefold, ure_buffer_t buf)
-{
-    ucs2_t i, j, state;
-    _ure_state_t *sp;
-    _ure_dstate_t *dsp;
-    _ure_trans_t *tp;
-    ure_dfa_t dfa;
-
-    if (re == 0 || *re == 0 || relen == 0 || buf == 0)
-      return 0;
-
-    /*
-     * Reset the various fields of the compilation buffer.  Default the flags
-     * to indicate the presense of the "^$" pattern.  If any other pattern
-     * occurs, then this flag will be removed.  This is done to catch this
-     * special pattern and handle it specially when matching.
-     */
-    buf->flags = _URE_DFA_BLANKLINE | ((casefold) ? _URE_DFA_CASEFOLD : 0);
-    buf->reducing = 0;
-    buf->stack.slist_used = 0;
-    buf->expr_used = 0;
-
-    for (i = 0; i < buf->symtab_used; i++)
-      buf->symtab[i].states.slist_used = 0;
-    buf->symtab_used = 0;
-
-    for (i = 0; i < buf->states.states_used; i++) {
-        buf->states.states[i].st.slist_used = 0;
-        buf->states.states[i].trans_used = 0;
-    }
-    buf->states.states_used = 0;
-
-    /*
-     * Construct the NFA.  If this stage returns a 0, then an error occurred or
-     * an empty expression was passed.
-     */
-    if ((state = _ure_re2nfa(re, relen, buf)) == _URE_NOOP)
-      return 0;
-
-    /*
-     * Do the expression reduction to get the initial DFA.
-     */
-    _ure_reduce(state, buf);
-
-    /*
-     * Merge all the equivalent DFA states.
-     */
-    _ure_merge_equiv(buf);
-
-    /*
-     * Construct the minimal DFA.
-     */
-    dfa = (ure_dfa_t) malloc(sizeof(_ure_dfa_t));
-    (void) memset((char *) dfa, '\0', sizeof(_ure_dfa_t));
-
-    dfa->flags = buf->flags & (_URE_DFA_CASEFOLD|_URE_DFA_BLANKLINE);
-
-    /*
-     * Free up the NFA state groups and transfer the symbols from the buffer
-     * to the DFA.
-     */
-    for (i = 0; i < buf->symtab_size; i++) {
-        if (buf->symtab[i].states.slist_size > 0)
-          free((char *) buf->symtab[i].states.slist);
-    }
-    dfa->syms = buf->symtab;
-    dfa->nsyms = buf->symtab_used;
-
-    buf->symtab_used = buf->symtab_size = 0;
-
-    /*
-     * Collect the total number of states and transitions needed for the DFA.
-     */
-    for (i = state = 0, sp = buf->states.states; i < buf->states.states_used;
-         i++, sp++) {
-        if (sp->id == state) {
-            dfa->nstates++;
-            dfa->ntrans += sp->trans_used;
-            state++;
-        }
-    }
-
-    /*
-     * Allocate enough space for the states and transitions.
-     */
-    dfa->states = (_ure_dstate_t *) malloc(sizeof(_ure_dstate_t) *
-                                           dfa->nstates);
-    dfa->trans = (_ure_trans_t *) malloc(sizeof(_ure_trans_t) * dfa->ntrans);
-
-    /*
-     * Actually transfer the DFA states from the buffer.
-     */
-    dsp = dfa->states;
-    tp = dfa->trans;
-    for (i = state = 0, sp = buf->states.states; i < buf->states.states_used;
-         i++, sp++) {
-        if (sp->id == state) {
-            dsp->trans = tp;
-            dsp->ntrans = sp->trans_used;
-            dsp->accepting = sp->accepting;
-
-            /*
-             * Add the transitions for the state.
-             */
-            for (j = 0; j < dsp->ntrans; j++, tp++) {
-                tp->symbol = sp->trans[j].lhs;
-                tp->next_state = buf->states.states[sp->trans[j].rhs].id;
-            }
-
-            dsp++;
-            state++;
-        }
-    }
-
-    return dfa;
-}
-
-void
-ure_dfa_free(ure_dfa_t dfa)
-{
-    ucs2_t i;
-
-    if (dfa == 0)
-      return;
-
-    for (i = 0; i < dfa->nsyms; i++) {
-        if ((dfa->syms[i].type == _URE_CCLASS ||
-             dfa->syms[i].type == _URE_NCCLASS) &&
-            dfa->syms[i].sym.ccl.ranges_size > 0)
-          free((char *) dfa->syms[i].sym.ccl.ranges);
-    }
-    if (dfa->nsyms > 0)
-      free((char *) dfa->syms);
-
-    if (dfa->nstates > 0)
-      free((char *) dfa->states);
-    if (dfa->ntrans > 0)
-      free((char *) dfa->trans);
-    free((char *) dfa);
-}
-
-void
-ure_write_dfa(ure_dfa_t dfa, FILE *out)
-{
-    ucs2_t i, j, k, h, l;
-    _ure_dstate_t *sp;
-    _ure_symtab_t *sym;
-    _ure_range_t *rp;
-
-    if (dfa == 0 || out == 0)
-      return;
-
-    /*
-     * Write all the different character classes.
-     */
-    for (i = 0, sym = dfa->syms; i < dfa->nsyms; i++, sym++) {
-        if (sym->type == _URE_CCLASS || sym->type == _URE_NCCLASS) {
-            fprintf(out, "C%hd = ", sym->id);
-            if (sym->sym.ccl.ranges_used > 0) {
-                putc('[', out);
-                if (sym->type == _URE_NCCLASS)
-                  putc('^', out);
-            }
-            if (sym->props != 0) {
-                if (sym->type == _URE_NCCLASS)
-                  fprintf(out, "\\P");
-                else
-                  fprintf(out, "\\p");
-                for (k = h = 0; k < 32; k++) {
-                    if (sym->props & (1 << k)) {
-                        if (h != 0)
-                          putc(',', out);
-                        fprintf(out, "%d", k + 1);
-                        h = 1;
-                    }
-                }
-            }
-            /*
-             * Dump the ranges.
-             */
-            for (k = 0, rp = sym->sym.ccl.ranges;
-                 k < sym->sym.ccl.ranges_used; k++, rp++) {
-                /*
-                 * Check for UTF16 characters.
-                 */
-                if (0x10000 <= rp->min_code &&
-                    rp->min_code <= 0x10ffff) {
-                    h = (ucs2_t) (((rp->min_code - 0x10000) >> 10) + 0xd800);
-                    l = (ucs2_t) (((rp->min_code - 0x10000) & 1023) + 0xdc00);
-                    fprintf(out, "\\x%04hX\\x%04hX", h, l);
-                } else
-                    fprintf(out, "\\x%04lX",
-                            (unsigned long)(rp->min_code & 0xffff));
-                if (rp->max_code != rp->min_code) {
-                    putc('-', out);
-                    if (rp->max_code >= 0x10000 &&
-                        rp->max_code <= 0x10ffff) {
-                        h = (ucs2_t) (((rp->max_code - 0x10000) >> 10) + 0xd800);
-                        l = (ucs2_t) (((rp->max_code - 0x10000) & 1023) + 0xdc00);
-                        fprintf(out, "\\x%04hX\\x%04hX", h, l);
-                    } else
-                        fprintf(out, "\\x%04lX",
-                                (unsigned long)(rp->max_code & 0xffff));
-                }
-            }
-            if (sym->sym.ccl.ranges_used > 0)
-              putc(']', out);
-            putc('\n', out);
-        }
-    }
-
-    for (i = 0, sp = dfa->states; i < dfa->nstates; i++, sp++) {
-        fprintf(out, "S%hd = ", i);
-        if (sp->accepting) {
-            fprintf(out, "1 ");
-            if (sp->ntrans)
-              fprintf(out, "| ");
-        }
-        for (j = 0; j < sp->ntrans; j++) {
-            if (j > 0)
-              fprintf(out, "| ");
-
-            sym = dfa->syms + sp->trans[j].symbol;
-            switch (sym->type) {
-              case _URE_CHAR:
-                if (0x10000 <= sym->sym.chr && sym->sym.chr <= 0x10ffff) {
-                    /*
-                     * Take care of UTF16 characters.
-                     */
-                    h = (ucs2_t) (((sym->sym.chr - 0x10000) >> 10) + 0xd800);
-                    l = (ucs2_t) (((sym->sym.chr - 0x10000) & 1023) + 0xdc00);
-                    fprintf(out, "\\x%04hX\\x%04hX ", h, l);
-                } else
-                    fprintf(out, "\\x%04lX ",
-                            (unsigned long)(sym->sym.chr & 0xffff));
-                break;
-              case _URE_ANY_CHAR:
-                fprintf(out, "<any> ");
-                break;
-              case _URE_BOL_ANCHOR:
-                fprintf(out, "<bol-anchor> ");
-                break;
-              case _URE_EOL_ANCHOR:
-                fprintf(out, "<eol-anchor> ");
-                break;
-              case _URE_CCLASS:
-              case _URE_NCCLASS:
-                fprintf(out, "[C%hd] ", sym->id);
-                break;
-            }
-            fprintf(out, "S%hd", sp->trans[j].next_state);
-            if (j + 1 < sp->ntrans)
-              putc(' ', out);
-        }
-        putc('\n', out);
-    }
-}
-
-#define _ure_issep(cc) ((cc) == '\n' || (cc) == '\r' || (cc) == 0x2028 ||\
-                        (cc) == 0x2029)
-
-int
-ure_exec(ure_dfa_t dfa, int flags, ucs2_t *text, unsigned long textlen,
-         unsigned long *match_start, unsigned long *match_end)
-{
-    int i, j, matched, found;
-    unsigned long ms, me;
-    ucs4_t c;
-    ucs2_t *sp, *ep, *lp;
-    _ure_dstate_t *stp;
-    _ure_symtab_t *sym;
-    _ure_range_t *rp;
-
-    if (dfa == 0 || text == 0)
-      return 0;
-
-    /*
-     * Handle the special case of an empty string matching the "^$" pattern.
-     */
-    if (textlen == 0 && (dfa->flags & _URE_DFA_BLANKLINE)) {
-        *match_start = *match_end = 0;
-        return 1;
-    }
-
-    sp = text;
-    ep = sp + textlen;
-
-    ms = me = ~0;
-
-    stp = dfa->states;
-
-    for (found = 0; found == 0 && sp < ep; ) {
-        lp = sp;
-        c = *sp++;
-
-        /*
-         * Check to see if this is a high surrogate that should be
-         * combined with a following low surrogate.
-         */
-        if (sp < ep && 0xd800 <= c && c <= 0xdbff &&
-            0xdc00 <= *sp && *sp <= 0xdfff)
-          c = 0x10000 + (((c & 0x03ff) << 10) | (*sp++ & 0x03ff));
-
-        /*
-         * Determine if the character is non-spacing and should be skipped.
-         */
-        if (_ure_matches_properties(_URE_NONSPACING, c) &&
-            (flags & URE_IGNORE_NONSPACING)) {
-            sp++;
-            continue;
-        }
-
-        if (dfa->flags & _URE_DFA_CASEFOLD)
-          c = _ure_tolower(c);
-
-        /*
-         * See if one of the transitions matches.
-         */
-        for (i = 0, matched = 0; matched == 0 && i < stp->ntrans; i++) {
-            sym = dfa->syms + stp->trans[i].symbol;
-            switch (sym->type) {
-              case _URE_ANY_CHAR:
-                if ((flags & URE_DOT_MATCHES_SEPARATORS) ||
-                    !_ure_issep(c))
-                  matched = 1;
-                break;
-              case _URE_CHAR:
-                if (c == sym->sym.chr)
-                  matched = 1;
-                break;
-              case _URE_BOL_ANCHOR:
-                if (lp == text) {
-                    sp = lp;
-                    matched = 1;
-                } else if (_ure_issep(c)) {
-                    if (c == '\r' && sp < ep && *sp == '\n')
-                      sp++;
-                    lp = sp;
-                    matched = 1;
-                }
-                break;
-              case _URE_EOL_ANCHOR:
-                if (_ure_issep(c)) {
-                    /*
-                     * Put the pointer back before the separator so the match
-                     * end position will be correct.  This case will also
-                     * cause the `sp' pointer to be advanced over the current
-                     * separator once the match end point has been recorded.
-                     */
-                    sp = lp;
-                    matched = 1;
-                }
-                break;
-              case _URE_CCLASS:
-              case _URE_NCCLASS:
-                if (sym->props != 0)
-                  matched = _ure_matches_properties(sym->props, c);
-                for (j = 0, rp = sym->sym.ccl.ranges;
-                     j < sym->sym.ccl.ranges_used; j++, rp++) {
-                    if (rp->min_code <= c && c <= rp->max_code)
-                      matched = 1;
-                }
-                if (sym->type == _URE_NCCLASS)
-                  matched = !matched;
-                break;
-            }
-
-            if (matched) {
-                if (ms == ~0UL)
-                  ms = lp - text;
-                else
-                  me = sp - text;
-                stp = dfa->states + stp->trans[i].next_state;
-
-                /*
-                 * If the match was an EOL anchor, adjust the pointer past the
-                 * separator that caused the match.  The correct match
-                 * position has been recorded already.
-                 */
-                if (sym->type == _URE_EOL_ANCHOR) {
-                    /*
-                     * Skip the character that caused the match.
-                     */
-                    sp++;
-
-                    /*
-                     * Handle the infamous CRLF situation.
-                     */
-                    if (sp < ep && c == '\r' && *sp == '\n')
-                      sp++;
-                }
-            }
-        }
-
-        if (matched == 0) {
-            if (stp->accepting == 0) {
-                /*
-                 * If the last state was not accepting, then reset
-                 * and start over.
-                 */
-                stp = dfa->states;
-                ms = me = ~0;
-            } else
-              /*
-               * The last state was accepting, so terminate the matching
-               * loop to avoid more work.
-               */
-              found = 1;
-        } else if (sp == ep) {
-            if (!stp->accepting) {
-                /*
-                 * This ugly hack is to make sure the end-of-line anchors
-                 * match when the source text hits the end.  This is only done
-                 * if the last subexpression matches.
-                 */
-                for (i = 0; found == 0 && i < stp->ntrans; i++) {
-                    sym = dfa->syms + stp->trans[i].symbol;
-                    if (sym->type ==_URE_EOL_ANCHOR) {
-                        stp = dfa->states + stp->trans[i].next_state;
-                        if (stp->accepting) {
-                            me = sp - text;
-                            found = 1;
-                        } else
-                          break;
-                    }
-                }
-            } else {
-                /*
-                 * Make sure any conditions that match all the way to the end
-                 * of the string match.
-                 */
-                found = 1;
-                me = sp - text;
-            }
-        }
-    }
-
-    if (found == 0)
-      ms = me = ~0;
-
-    *match_start = ms;
-    *match_end = me;
-
-    return (ms != ~0UL) ? 1 : 0;
-}
diff --git a/src/lib/krb5/unicode/ure/ure.h b/src/lib/krb5/unicode/ure/ure.h

deleted file mode 100644 (file)

index b83c97e..0000000
--- a/src/lib/krb5/unicode/ure/ure.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <http://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <http://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ure/ure.h,v 1.15 2008/01/07 23:20:05 kurt Exp $
- * $Id: ure.h,v 1.2 1999/09/21 15:47:44 mleisher Exp $
- */
-
-#ifndef _h_ure
-#define _h_ure
-
-#include "k5-int.h"
-
-#include <stdio.h>
-
-/*
- * Set of character class flags.
- */
-#define _URE_NONSPACING  0x00000001
-#define _URE_COMBINING   0x00000002
-#define _URE_NUMDIGIT    0x00000004
-#define _URE_NUMOTHER    0x00000008
-#define _URE_SPACESEP    0x00000010
-#define _URE_LINESEP     0x00000020
-#define _URE_PARASEP     0x00000040
-#define _URE_CNTRL       0x00000080
-#define _URE_PUA         0x00000100
-
-#define _URE_UPPER       0x00000200
-#define _URE_LOWER       0x00000400
-#define _URE_TITLE       0x00000800
-#define _URE_MODIFIER    0x00001000
-#define _URE_OTHERLETTER 0x00002000
-#define _URE_DASHPUNCT   0x00004000
-#define _URE_OPENPUNCT   0x00008000
-#define _URE_CLOSEPUNCT  0x00010000
-#define _URE_OTHERPUNCT  0x00020000
-#define _URE_MATHSYM     0x00040000
-#define _URE_CURRENCYSYM 0x00080000
-#define _URE_OTHERSYM    0x00100000
-
-#define _URE_LTR         0x00200000
-#define _URE_RTL         0x00400000
-
-#define _URE_EURONUM     0x00800000
-#define _URE_EURONUMSEP  0x01000000
-#define _URE_EURONUMTERM 0x02000000
-#define _URE_ARABNUM     0x04000000
-#define _URE_COMMONSEP   0x08000000
-
-#define _URE_BLOCKSEP    0x10000000
-#define _URE_SEGMENTSEP  0x20000000
-
-#define _URE_WHITESPACE  0x40000000
-#define _URE_OTHERNEUT   0x80000000
-
-/*
- * Error codes.
- */
-#define _URE_OK               0
-#define _URE_UNEXPECTED_EOS   -1
-#define _URE_CCLASS_OPEN      -2
-#define _URE_UNBALANCED_GROUP -3
-#define _URE_INVALID_PROPERTY -4
-
-/*
- * Options that can be combined for searching.
- */
-#define URE_IGNORE_NONSPACING      0x01
-#define URE_DOT_MATCHES_SEPARATORS 0x02
-
-typedef krb5_ui_4 ucs4_t;
-typedef krb5_ui_2 ucs2_t;
-
-/*
- * Opaque type for memory used when compiling expressions.
- */
-typedef struct _ure_buffer_t *ure_buffer_t;
-
-/*
- * Opaque type for the minimal DFA used when matching.
- */
-typedef struct _ure_dfa_t *ure_dfa_t;
-
-/*************************************************************************
- *
- * API.
- *
- *************************************************************************/
-
-ure_buffer_t ure_buffer_create (void);
-
-void ure_buffer_free (ure_buffer_t buf);
-
-ure_dfa_t
-ure_compile (ucs2_t *re, unsigned long relen,
-                   int casefold, ure_buffer_t buf);
-
-void ure_dfa_free (ure_dfa_t dfa);
-
-void ure_write_dfa (ure_dfa_t dfa, FILE *out);
-
-int
-ure_exec (ure_dfa_t dfa, int flags, ucs2_t *text,
-                unsigned long textlen, unsigned long *match_start,
-                unsigned long *match_end);
-
-/*************************************************************************
- *
- * Prototypes for stub functions used for URE.  These need to be rewritten to
- * use the Unicode support available on the system.
- *
- *************************************************************************/
-
-ucs4_t _ure_tolower (ucs4_t c);
-
-int
-_ure_matches_properties (unsigned long props, ucs4_t c);
-
-#endif /* _h_ure */
diff --git a/src/lib/krb5/unicode/ure/urestubs.c b/src/lib/krb5/unicode/ure/urestubs.c

deleted file mode 100644 (file)

index 0f17951..0000000
--- a/src/lib/krb5/unicode/ure/urestubs.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/*
- * Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ure/urestubs.c,v 1.16 2008/01/07 23:20:05 kurt Exp $
- * $Id: urestubs.c,v 1.2 1999/09/21 15:47:44 mleisher Exp $"
- */
-
-#include "k5-int.h"
-
-#include "ure.h"
-
-#include "ucdata.h"
-
-/*
- * This file contains stub routines needed by the URE package to test
- * character properties and other Unicode implementation specific details.
- */
-
-/*
- * This routine should return the lower case equivalent for the character or,
- * if there is no lower case quivalent, the character itself.
- */
-ucs4_t _ure_tolower(ucs4_t c)
-{
-    return uctoupper(c);
-}
-
-static struct ucmaskmap {
-       unsigned long mask1;
-       unsigned long mask2;
-} masks[32] = {
-       { UC_MN, 0 },   /* _URE_NONSPACING */
-       { UC_MC, 0 },   /* _URE_COMBINING */
-       { UC_ND, 0 },   /* _URE_NUMDIGIT */
-       { UC_NL|UC_NO, 0 },     /* _URE_NUMOTHER */
-       { UC_ZS, 0 },   /* _URE_SPACESEP */
-       { UC_ZL, 0 },   /* _URE_LINESEP */
-       { UC_ZP, 0 },   /* _URE_PARASEP */
-       { UC_CC, 0 },   /* _URE_CNTRL */
-       { UC_CO, 0 },   /* _URE_PUA */
-
-       { UC_LU, 0 },   /* _URE_UPPER */
-       { UC_LL, 0 },   /* _URE_LOWER */
-       { UC_LT, 0 },   /* _URE_TITLE */
-       { UC_LM, 0 },   /* _URE_MODIFIER */
-       { UC_LO, 0 },   /* _URE_OTHERLETTER */
-       { UC_PD, 0 },   /* _URE_DASHPUNCT */
-       { UC_PS, 0 },   /* _URE_OPENPUNCT */
-       { UC_PC, 0 },   /* _URE_CLOSEPUNCT */
-       { UC_PO, 0 },   /* _URE_OTHERPUNCT */
-       { UC_SM, 0 },   /* _URE_MATHSYM */
-       { UC_SC, 0 },   /* _URE_CURRENCYSYM */
-       { UC_SO, 0 },   /* _URE_OTHERSYM */
-
-       { UC_L, 0 },    /* _URE_LTR */
-       { UC_R, 0 },    /* _URE_RTL */
-
-       { 0, UC_EN },   /* _URE_EURONUM */
-       { 0, UC_ES },   /* _URE_EURONUMSEP */
-       { 0, UC_ET },   /* _URE_EURONUMTERM */
-       { 0, UC_AN },   /* _URE_ARABNUM */
-       { 0, UC_CS },   /* _URE_COMMONSEP */
-
-       { 0, UC_B },    /* _URE_BLOCKSEP */
-       { 0, UC_S },    /* _URE_SEGMENTSEP */
-
-       { 0, UC_WS },   /* _URE_WHITESPACE */
-       { 0, UC_ON }    /* _URE_OTHERNEUT */
-};
-
-
-/*
- * This routine takes a set of URE character property flags (see ure.h) along
- * with a character and tests to see if the character has one or more of those
- * properties.
- */
-int
-_ure_matches_properties(unsigned long props, ucs4_t c)
-{
-       int i;
-       unsigned long mask1=0, mask2=0;
-
-       for( i=0; i<32; i++ ) {
-               if( props & (1 << i) ) {
-                       mask1 |= masks[i].mask1;
-                       mask2 |= masks[i].mask2;
-               }
-       }
-
-       return ucisprop( mask1, mask2, c );
-}
diff --git a/src/lib/krb5/unicode/utbm/README b/src/lib/krb5/unicode/utbm/README

deleted file mode 100644 (file)

index 8c0212d..0000000
--- a/src/lib/krb5/unicode/utbm/README
+++ /dev/null
@@ -1,121 +0,0 @@
-#
-# $Id: README,v 1.1 1999/09/21 15:45:17 mleisher Exp $
-#
-# Copyright 1997, 1998, 1999 Computing Research Labs,
-# New Mexico State University
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
-# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-
-                       Unicode and Boyer-Moore Searching
-                                 Version 0.2
-
-UTBM (Unicode Tuned Boyer-Moore) is a simple package that provides tuned
-Boyer-Moore searches on Unicode UCS2 text (handles high and low surrogates).
-
----------------------------------------------------------------------------
-
-Assumptions:
-
-  o  Search pattern and text already normalized in some fasion.
-
-  o  Upper, lower, and title case conversions are one-to-one.
-
-  o  For conversions between upper, lower, and title case, UCS2 characters
-     always convert to other UCS2 characters, and UTF-16 characters always
-     convert to other UTF-16 characters.
-
-Flags:
-
-  UTBM provides three processing flags:
-
-  o  UTBM_CASEFOLD          - search in a case-insensitive manner.
-
-  o  UTBM_IGNORE_NONSPACING - ignore non-spacing characters in the pattern and
-                              the text.
-
-  o  UTBM_SPACE_COMPRESS    - view as a *single space*, sequential groups of
-                              U+2028, U+2029, '\n', '\r', '\t', and any
-                              character identified as a space by the Unicode
-                              support on the platform.
-
-                              This flag also causes all characters identified
-                              as control by the Unicode support on the
-                              platform to be ignored (except for '\n', '\r',
-                              and '\t').
-
----------------------------------------------------------------------------
-
-Before using UTBM
------------------
-Before UTBM is used, some functions need to be created.  The "utbmstub.c" file
-contains stubs that need to be rewritten so they work with the Unicode support
-on the platform on which this package is being used.
-
-Using UTBM
-----------
-
-Sample pseudo-code fragment.
-
-  utbm_pattern_t pat;
-  ucs2_t *pattern, *text;
-  unsigned long patternlen, textlen;
-  unsigned long flags, match_start, match_end;
-
-  /*
-   * Allocate the dynamic storage needed for a search pattern.
-   */
-  pat = utbm_create_pattern();
-
-  /*
-   * Set the search flags desired.
-   */
-  flags = UTBM_CASEFOLD|UTBM_IGNORE_NONSPACING;
-
-  /*
-   * Compile the search pattern.
-   */
-  utbm_compile(pattern, patternlen, flags, pat);
-
-  /*
-   * Find the first occurance of the search pattern in the text.
-   */
-  if (utbm_exec(pat, text, textlen, &match_start, &match_end))
-    printf("MATCH: %ld %ld\n", match_start, match_end);
-
-  /*
-   * Free the dynamic storage used for the search pattern.
-   */
-  ure_free_pattern(pat);
-
----------------------------------------------------------------------------
-
-Mark Leisher <mleisher@crl.nmsu.edu>
-2 May 1997
-
-===========================================================================
-
-CHANGES
--------
-
-Version: 0.2
-Date   : 21 September 1999
-==========================
-  1. Added copyright stuff and put in CVS.
-
diff --git a/src/lib/krb5/unicode/utbm/utbm.c b/src/lib/krb5/unicode/utbm/utbm.c

deleted file mode 100644 (file)

index cc895e5..0000000
--- a/src/lib/krb5/unicode/utbm/utbm.c
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/utbm/utbm.c,v 1.9 2008/01/07 23:20:05 kurt Exp $
- * $Id: utbm.c,v 1.1 1999/09/21 15:45:17 mleisher Exp $
- */
-
-/*
- * Assumptions:
- * 1. Case conversions of UTF-16 characters must also be UTF-16 characters.
- * 2. Case conversions are all one-to-one.
- * 3. Text and pattern have already been normalized in some fashion.
- */
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include "utbm.h"
-
-/*
- * Single pattern character.
- */
-typedef struct {
-    ucs4_t lc;
-    ucs4_t uc;
-    ucs4_t tc;
-} _utbm_char_t;
-
-typedef struct {
-    _utbm_char_t *ch;
-    unsigned long skip;
-} _utbm_skip_t;
-
-typedef struct _utbm_pattern_t {
-    unsigned long flags;
-
-    _utbm_char_t *pat;
-    unsigned long pat_used;
-    unsigned long pat_size;
-    unsigned long patlen;
-
-    _utbm_skip_t *skip;
-    unsigned long skip_used;
-    unsigned long skip_size;
-
-    unsigned long md4;
-} _utbm_pattern_t;
-
-/*************************************************************************
- *
- * Support functions.
- *
- *************************************************************************/
-
-/*
- * Routine to look up the skip value for a character.
- */
-static unsigned long
-_utbm_skip(utbm_pattern_t p, ucs2_t *start, ucs2_t *end)
-{
-    unsigned long i;
-    ucs4_t c1, c2;
-    _utbm_skip_t *sp;
-
-    if (start >= end)
-      return 0;
-
-    c1 = *start;
-    c2 = (start + 1 < end) ? *(start + 1) : ~0;
-    if (0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff)
-      c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
-
-    for (i = 0, sp = p->skip; i < p->skip_used; i++, sp++) {
-        if (!((c1 ^ sp->ch->uc) & (c1 ^ sp->ch->lc) & (c1 ^ sp->ch->tc))) {
-            return ((unsigned long) (end - start) < sp->skip) ?
-                end - start : sp->skip;
-        }
-    }
-    return p->patlen;
-}
-
-static int
-_utbm_match(utbm_pattern_t pat, ucs2_t *text, ucs2_t *start, ucs2_t *end,
-            unsigned long *match_start, unsigned long *match_end)
-{
-    int check_space;
-    ucs4_t c1, c2;
-    unsigned long count;
-    _utbm_char_t *cp;
-
-    /*
-     * Set the potential match endpoint first.
-     */
-    *match_end = (start - text) + 1;
-
-    c1 = *start;
-    c2 = (start + 1 < end) ? *(start + 1) : ~0;
-    if (0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff) {
-        c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
-        /*
-         * Adjust the match end point to occur after the UTF-16 character.
-         */
-        *match_end = *match_end + 1;
-    }
-
-    if (pat->pat_used == 1) {
-        *match_start = start - text;
-        return 1;
-    }
-
-    /*
-     * Compare backward.
-     */
-    cp = pat->pat + (pat->pat_used - 1);
-
-    for (count = pat->patlen; start > text && count > 0;) {
-        /*
-         * Ignore non-spacing characters if indicated.
-         */
-        if (pat->flags & UTBM_IGNORE_NONSPACING) {
-            while (start > text && _utbm_nonspacing(c1)) {
-                c2 = *--start;
-                c1 = (start - 1 > text) ? *(start - 1) : ~0;
-                if (0xdc00 <= c2 && c2 <= 0xdfff &&
-                    0xd800 <= c1 && c1 <= 0xdbff) {
-                    c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
-                    start--;
-                } else
-                  c1 = c2;
-            }
-        }
-
-        /*
-         * Handle space compression if indicated.
-         */
-        if (pat->flags & UTBM_SPACE_COMPRESS) {
-            check_space = 0;
-            while (start > text &&
-                   (_utbm_isspace(c1, 1) || _utbm_iscntrl(c1))) {
-                check_space = _utbm_isspace(c1, 1);
-                c2 = *--start;
-                c1 = (start - 1 > text) ? *(start - 1) : ~0;
-                if (0xdc00 <= c2 && c2 <= 0xdfff &&
-                    0xd800 <= c1 && c1 <= 0xdbff) {
-                    c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
-                    start--;
-                } else
-                  c1 = c2;
-            }
-            /*
-             * Handle things if space compression was indicated and one or
-             * more member characters were found.
-             */
-            if (check_space) {
-                if (cp->uc != ' ')
-                  return 0;
-                cp--;
-                count--;
-            }
-        }
-
-        /*
-         * Handle the normal comparison cases.
-         */
-        if (count > 0 && ((c1 ^ cp->uc) & (c1 ^ cp->lc) & (c1 ^ cp->tc)))
-          return 0;
-
-        count -= (c1 >= 0x10000) ? 2 : 1;
-        if (count > 0) {
-            cp--;
-
-            /*
-             * Get the next preceding character.
-             */
-            if (start > text) {
-                c2 = *--start;
-                c1 = (start - 1 > text) ? *(start - 1) : ~0;
-                if (0xdc00 <= c2 && c2 <= 0xdfff &&
-                    0xd800 <= c1 && c1 <= 0xdbff) {
-                    c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
-                    start--;
-                } else
-                  c1 = c2;
-            }
-        }
-    }
-
-    /*
-     * Set the match start position.
-     */
-    *match_start = start - text;
-    return 1;
-}
-
-/*************************************************************************
- *
- * API.
- *
- *************************************************************************/
-
-utbm_pattern_t
-utbm_create_pattern(void)
-{
-    utbm_pattern_t p;
-
-    p = (utbm_pattern_t) malloc(sizeof(_utbm_pattern_t));
-    (void) memset((char *) p, '\0', sizeof(_utbm_pattern_t));
-    return p;
-}
-
-void
-utbm_free_pattern(utbm_pattern_t pattern)
-{
-    if (pattern == 0)
-      return;
-
-    if (pattern->pat_size > 0)
-      free((char *) pattern->pat);
-
-    if (pattern->skip_size > 0)
-      free((char *) pattern->skip);
-
-    free((char *) pattern);
-}
-
-void
-utbm_compile(ucs2_t *pat, unsigned long patlen, unsigned long flags,
-             utbm_pattern_t p)
-{
-    int have_space;
-    unsigned long i, j, k, slen;
-    _utbm_char_t *cp;
-    _utbm_skip_t *sp;
-    ucs4_t c1, c2, sentinel;
-
-    if (p == 0 || pat == 0 || *pat == 0 || patlen == 0)
-      return;
-
-    /*
-     * Reset the pattern buffer.
-     */
-    p->patlen = p->pat_used = p->skip_used = 0;
-
-    /*
-     * Set the flags.
-     */
-    p->flags = flags;
-
-    /*
-     * Initialize the extra skip flag.
-     */
-    p->md4 = 1;
-
-    /*
-     * Allocate more storage if necessary.
-     */
-    if (patlen > p->pat_size) {
-        if (p->pat_size == 0) {
-            p->pat = (_utbm_char_t *) malloc(sizeof(_utbm_char_t) * patlen);
-            p->skip = (_utbm_skip_t *) malloc(sizeof(_utbm_skip_t) * patlen);
-        } else {
-            p->pat = (_utbm_char_t *)
-                realloc((char *) p->pat, sizeof(_utbm_char_t) * patlen);
-            p->skip = (_utbm_skip_t *)
-                realloc((char *) p->skip, sizeof(_utbm_skip_t) * patlen);
-        }
-        p->pat_size = p->skip_size = patlen;
-    }
-
-    /*
-     * Preprocess the pattern to remove controls (if specified) and determine
-     * case.
-     */
-    for (have_space = 0, cp = p->pat, i = 0; i < patlen; i++) {
-        c1 = pat[i];
-        c2 = (i + 1 < patlen) ? pat[i + 1] : ~0;
-        if (0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff)
-          c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
-
-        /*
-         * Make sure the `have_space' flag is turned off if the character
-         * is not an appropriate one.
-         */
-        if (!_utbm_isspace(c1, flags & UTBM_SPACE_COMPRESS))
-          have_space = 0;
-
-        /*
-         * If non-spacing characters should be ignored, do it here.
-         */
-        if ((flags & UTBM_IGNORE_NONSPACING) && _utbm_nonspacing(c1))
-          continue;
-
-        /*
-         * Check if spaces and controls need to be compressed.
-         */
-        if (flags & UTBM_SPACE_COMPRESS) {
-            if (_utbm_isspace(c1, 1)) {
-                if (!have_space) {
-                    /*
-                     * Add a space and set the flag.
-                     */
-                    cp->uc = cp->lc = cp->tc = ' ';
-                    cp++;
-
-                    /*
-                     * Increase the real pattern length.
-                     */
-                    p->patlen++;
-                    sentinel = ' ';
-                    have_space = 1;
-                }
-                continue;
-            }
-
-            /*
-             * Ignore all control characters.
-             */
-            if (_utbm_iscntrl(c1))
-              continue;
-        }
-
-        /*
-         * Add the character.
-         */
-        if (flags & UTBM_CASEFOLD) {
-            cp->uc = _utbm_toupper(c1);
-            cp->lc = _utbm_tolower(c1);
-            cp->tc = _utbm_totitle(c1);
-        } else
-          cp->uc = cp->lc = cp->tc = c1;
-
-        /*
-         * Set the sentinel character.
-         */
-        sentinel = cp->uc;
-
-        /*
-         * Move to the next character.
-         */
-        cp++;
-
-        /*
-         * Increase the real pattern length appropriately.
-         */
-        p->patlen += (c1 >= 0x10000) ? 2 : 1;
-
-        /*
-         * Increment the loop index for UTF-16 characters.
-         */
-        i += (c1 >= 0x10000) ? 1 : 0;
-
-    }
-
-    /*
-     * Set the number of characters actually used.
-     */
-    p->pat_used = cp - p->pat;
-
-    /*
-     * Go through and construct the skip array and determine the actual length
-     * of the pattern in UCS2 terms.
-     */
-    slen = p->patlen - 1;
-    cp = p->pat;
-    for (i = k = 0; i < p->pat_used; i++, cp++) {
-        /*
-         * Locate the character in the skip array.
-         */
-        for (sp = p->skip, j = 0;
-             j < p->skip_used && sp->ch->uc != cp->uc; j++, sp++) ;
-
-        /*
-         * If the character is not found, set the new skip element and
-         * increase the number of skip elements.
-         */
-        if (j == p->skip_used) {
-            sp->ch = cp;
-            p->skip_used++;
-        }
-
-        /*
-         * Set the updated skip value.  If the character is UTF-16 and is
-         * not the last one in the pattern, add one to its skip value.
-         */
-        sp->skip = slen - k;
-        if (cp->uc >= 0x10000 && k + 2 < slen)
-          sp->skip++;
-
-        /*
-         * Set the new extra skip for the sentinel character.
-         */
-        if (((cp->uc >= 0x10000 && k + 2 <= slen) || k + 1 <= slen) &&
-            cp->uc == sentinel)
-          p->md4 = slen - k;
-
-        /*
-         * Increase the actual index.
-         */
-        k += (cp->uc >= 0x10000) ? 2 : 1;
-    }
-}
-
-int
-utbm_exec(utbm_pattern_t pat, ucs2_t *text, unsigned long textlen,
-          unsigned long *match_start, unsigned long *match_end)
-{
-    unsigned long k;
-    ucs2_t *start, *end;
-
-    if (pat == 0 || pat->pat_used == 0 || text == 0 || textlen == 0 ||
-        textlen < pat->patlen)
-      return 0;
-
-    start = text + pat->patlen;
-    end = text + textlen;
-
-    /*
-     * Adjust the start point if it points to a low surrogate.
-     */
-    if (0xdc00 <= *start && *start <= 0xdfff &&
-        0xd800 <= *(start - 1) && *(start - 1) <= 0xdbff)
-      start--;
-
-    while (start < end) {
-        while ((k = _utbm_skip(pat, start, end))) {
-            start += k;
-            if (start < end && 0xdc00 <= *start && *start <= 0xdfff &&
-                0xd800 <= *(start - 1) && *(start - 1) <= 0xdbff)
-              start--;
-        }
-
-        if (start < end &&
-            _utbm_match(pat, text, start, end, match_start, match_end))
-          return 1;
-
-        start += pat->md4;
-        if (start < end && 0xdc00 <= *start && *start <= 0xdfff &&
-            0xd800 <= *(start - 1) && *(start - 1) <= 0xdbff)
-          start--;
-    }
-    return 0;
-}
diff --git a/src/lib/krb5/unicode/utbm/utbm.h b/src/lib/krb5/unicode/utbm/utbm.h

deleted file mode 100644 (file)

index 1ab8b91..0000000
--- a/src/lib/krb5/unicode/utbm/utbm.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/utbm/utbm.h,v 1.10 2008/01/07 23:20:05 kurt Exp $
- * $Id: utbm.h,v 1.1 1999/09/21 15:45:18 mleisher Exp $
- */
-
-#ifndef _h_utbm
-#define _h_utbm
-
-#include "k5-int.h"
-
-/*************************************************************************
- *
- * Types.
- *
- *************************************************************************/
-
-/*
- * Fundamental character types.
- */
-typedef krb5_ui_4 ucs4_t;
-typedef krb5_ui_2 ucs2_t;
-
-/*
- * An opaque type used for the search pattern.
- */
-typedef struct _utbm_pattern_t *utbm_pattern_t;
-
-/*************************************************************************
- *
- * Flags.
- *
- *************************************************************************/
-
-#define UTBM_CASEFOLD          0x01
-#define UTBM_IGNORE_NONSPACING 0x02
-#define UTBM_SPACE_COMPRESS    0x04
-
-/*************************************************************************
- *
- * API.
- *
- *************************************************************************/
-
-utbm_pattern_t utbm_create_pattern (void);
-
-void utbm_free_pattern (utbm_pattern_t pattern);
-
-void
-utbm_compile (ucs2_t *pat, unsigned long patlen,
-                    unsigned long flags, utbm_pattern_t pattern);
-
-int
-utbm_exec (utbm_pattern_t pat, ucs2_t *text,
-                 unsigned long textlen, unsigned long *match_start,
-                 unsigned long *match_end);
-
-/*************************************************************************
- *
- * Prototypes for the stub functions needed.
- *
- *************************************************************************/
-
-int _utbm_isspace (ucs4_t c, int compress);
-
-int _utbm_iscntrl (ucs4_t c);
-
-int _utbm_nonspacing (ucs4_t c);
-
-ucs4_t _utbm_tolower (ucs4_t c);
-
-ucs4_t _utbm_toupper (ucs4_t c);
-
-ucs4_t _utbm_totitle (ucs4_t c);
-
-#endif /* _h_utbm */
diff --git a/src/lib/krb5/unicode/utbm/utbmstub.c b/src/lib/krb5/unicode/utbm/utbmstub.c

deleted file mode 100644 (file)

index 9a6f60a..0000000
--- a/src/lib/krb5/unicode/utbm/utbmstub.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/utbm/utbmstub.c,v 1.8 2008/01/07 23:20:05 kurt Exp $
- * $Id: utbmstub.c,v 1.1 1999/09/21 15:45:18 mleisher Exp $
- */
-
-#include "utbm.h"
-
-/*
- * This should be redefined to use the `isspace' function available in the
- * Unicode support on the platform where this is being used.
- */
-#define _platform_isspace(x) 0
-
-/*
- * Return non-zero for any character that should be considered the equivalent
- * of a space character.  Return zero otherwise.
- */
-int
-_utbm_isspace(ucs4_t c, int compress)
-{
-    if (compress)
-      return (c == 0x09 || c == 0x0a || c == 0x0d ||
-              c == 0x2028 || c == 0x2029 || _platform_isspace(c)) ? 1 : 0;
-
-    return _platform_isspace(c);
-
-}
-
-/*
- * Return non-zero if the character is a control character, or zero otherwise.
- */
-int
-_utbm_iscntrl(ucs4_t c)
-{
-    return 0;
-}
-
-/*
- * Return non-zero if the character is a non-spacing character, or zero
- * otherwise.
- */
-int
-_utbm_nonspacing(ucs4_t c)
-{
-    return 0;
-}
-
-/*
- * Convert a character to lower case.
- */
-ucs4_t
-_utbm_tolower(ucs4_t c)
-{
-    return c;
-}
-
-/*
- * Convert a character to upper case.
- */
-ucs4_t
-_utbm_toupper(ucs4_t c)
-{
-    return c;
-}
-
-/*
- * Convert a character to title case.
- */
-ucs4_t
-_utbm_totitle(ucs4_t c)
-{
-    return c;
-}
diff --git a/src/util/support/libkrb5support-fixed.exports b/src/util/support/libkrb5support-fixed.exports

index df3c78f9e1a124b48b15fe42f01fd8c2060c0089..0bafe1c84c60ccbbe31ab87372a5121c09bd877e 100644 (file)
--- a/src/util/support/libkrb5support-fixed.exports
+++ b/src/util/support/libkrb5support-fixed.exports
@@ -95,5 +95,4 @@ krb5int_ucs4_to_utf8
  krb5int_utf8_to_ucs4
  krb5int_utf8_lentab
  krb5int_utf8_mintab
-krb5int_utf8_next
  krb5int_zap
diff --git a/src/util/support/t_utf8.c b/src/util/support/t_utf8.c

index 583270165a60fe95171aeb09d98c2278c8aa929b..6493bae3e94170a16203885b8145b8bbe0b945dd 100644 (file)
--- a/src/util/support/t_utf8.c
+++ b/src/util/support/t_utf8.c
@@ -49,13 +49,13 @@
  #endif
  
  /*
- * len is 0 for invalid encoding prefixes (krb5int_utf8_charlen2() partially
+ * len is 0 for invalid encoding prefixes (KRB5_UTF8_CHARLEN2() partially
   * enforces the validity of the first two bytes, based on masking the second
   * byte.  It doesn't check whether bit 6 is 0, though, and doesn't catch the
   * range between U+110000 and U+13FFFF).
   *
   * ucs is 0 for invalid encodings (including ones with valid prefixes according
- * to krb5int_utf8_charlen2(); krb5int_utf8_to_ucs4() will still fail on them
+ * to KRB5_UTF8_CHARLEN2(); krb5int_utf8_to_ucs4() will still fail on them
   * because it checks more things.)  Code points above U+10FFFF are excluded by
   * the actual test code and remain in the table for possibly testing the old
   * implementation that didn't exclude them.
@@ -129,7 +129,7 @@ test_decode(struct testcase *t, int high4)
      int len, status = 0;
      krb5_ucs4 u = 0;
  
-    len = krb5int_utf8_charlen2(t->p);
+    len = KRB5_UTF8_CHARLEN2(t->p, len);
      if (len != t->len) {
          printf("expected len=%d, got len=%d\n", t->len, len);
          status = 1;
diff --git a/src/util/support/utf8.c b/src/util/support/utf8.c

index dfbf12baa186d40cc7aa417f2aa543c36a3d608d..08bdcf9a32de0d8625bf9ab681a838191840fc45 100644 (file)
--- a/src/util/support/utf8.c
+++ b/src/util/support/utf8.c
@@ -52,50 +52,6 @@
  #include "k5-utf8.h"
  #include "supp-int.h"
  
-/*
- * return the number of bytes required to hold the
- * NULL-terminated UTF-8 string NOT INCLUDING the
- * termination.
- */
-size_t krb5int_utf8_bytes(const char *p)
-{
-    size_t bytes;
-
-    for (bytes = 0; p[bytes]; bytes++)
-        ;
-
-    return bytes;
-}
-
-size_t krb5int_utf8_chars(const char *p)
-{
-    /* could be optimized and could check for invalid sequences */
-    size_t chars = 0;
-
-    for ( ; *p ; KRB5_UTF8_INCR(p))
-        chars++;
-
-    return chars;
-}
-
-size_t krb5int_utf8c_chars(const char *p, size_t length)
-{
-    /* could be optimized and could check for invalid sequences */
-    size_t chars = 0;
-    const char *end = p + length;
-
-    for ( ; p < end; KRB5_UTF8_INCR(p))
-        chars++;
-
-    return chars;
-}
-
-/* return offset to next character */
-int krb5int_utf8_offset(const char *p)
-{
-    return KRB5_UTF8_NEXT(p) - p;
-}
-
  /*
   * Returns length indicated by first byte.
   */
@@ -109,14 +65,6 @@ const char krb5int_utf8_lentab[] = {
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
  
-int krb5int_utf8_charlen(const char *p)
-{
-    if (!(*p & 0x80))
-        return 1;
-
-    return krb5int_utf8_lentab[*(const unsigned char *)p ^ 0x80];
-}
-
  /*
   * Make sure the UTF-8 char used the shortest possible encoding
   * returns charlen if valid, 0 if not.
@@ -147,18 +95,6 @@ c krb5int_utf8_mintab[] = {
      (c)0x00, (c)0x00, (c)0x00, (c)0x00, (c)0x00, (c)0x00, (c)0x00, (c)0x00 };
  #undef c
  
-int krb5int_utf8_charlen2(const char *p)
-{
-    int i = KRB5_UTF8_CHARLEN(p);
-
-    if (i > 2) {
-        if (!(krb5int_utf8_mintab[*p & 0x1f] & p[1]))
-            i = 0;
-    }
-
-    return i;
-}
-
  /*
   * Convert a UTF8 character to a UCS4 character.  Return 0 on success,
   * -1 on failure.
@@ -194,17 +130,6 @@ int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out)
      return 0;
  }
  
-int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out)
-{
-    krb5_ucs4 ch;
-
-    *out = 0;
-    if (krb5int_utf8_to_ucs4(p, &ch) == -1 || ch > 0xFFFF)
-        return -1;
-    *out = (krb5_ucs2) ch;
-    return 0;
-}
-
  /* conv UCS-4 to UTF-8 */
  size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf)
  {
@@ -241,271 +166,3 @@ size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf)
  
      return len;
  }
-
-size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf)
-{
-    return krb5int_ucs4_to_utf8((krb5_ucs4)c, buf);
-}
-
-/*
- * Advance to the next UTF-8 character
- *
- * Ignores length of multibyte character, instead rely on
- * continuation markers to find start of next character.
- * This allows for "resyncing" of when invalid characters
- * are provided provided the start of the next character
- * is appears within the 6 bytes examined.
- */
-char *krb5int_utf8_next(const char *p)
-{
-    int i;
-    const unsigned char *u = (const unsigned char *) p;
-
-    if (KRB5_UTF8_ISASCII(u)) {
-        return (char *) &p[1];
-    }
-
-    for (i = 1; i < 6; i++) {
-        if ((u[i] & 0xc0) != 0x80) {
-            return (char *) &p[i];
-        }
-    }
-
-    return (char *) &p[i];
-}
-
-/*
- * Advance to the previous UTF-8 character
- *
- * Ignores length of multibyte character, instead rely on
- * continuation markers to find start of next character.
- * This allows for "resyncing" of when invalid characters
- * are provided provided the start of the next character
- * is appears within the 6 bytes examined.
- */
-char *krb5int_utf8_prev(const char *p)
-{
-    int i;
-    const unsigned char *u = (const unsigned char *) p;
-
-    for (i = -1; i>-6 ; i--) {
-        if ((u[i] & 0xc0 ) != 0x80) {
-            return (char *) &p[i];
-        }
-    }
-
-    return (char *) &p[i];
-}
-
-/*
- * Copy one UTF-8 character from src to dst returning
- * number of bytes copied.
- *
- * Ignores length of multibyte character, instead rely on
- * continuation markers to find start of next character.
- * This allows for "resyncing" of when invalid characters
- * are provided provided the start of the next character
- * is appears within the 6 bytes examined.
- */
-int krb5int_utf8_copy(char* dst, const char *src)
-{
-    int i;
-    const unsigned char *u = (const unsigned char *) src;
-
-    dst[0] = src[0];
-
-    if (KRB5_UTF8_ISASCII(u)) {
-        return 1;
-    }
-
-    for (i=1; i<6; i++) {
-        if ((u[i] & 0xc0) != 0x80) {
-            return i;
-        }
-        dst[i] = src[i];
-    }
-
-    return i;
-}
-
-#ifndef UTF8_ALPHA_CTYPE
-/*
- * UTF-8 ctype routines
- * Only deals with characters < 0x80 (ie: US-ASCII)
- */
-
-int krb5int_utf8_isascii(const char * p)
-{
-    unsigned c = * (const unsigned char *) p;
-
-    return KRB5_ASCII(c);
-}
-
-int krb5int_utf8_isdigit(const char * p)
-{
-    unsigned c = * (const unsigned char *) p;
-
-    if (!KRB5_ASCII(c))
-        return 0;
-
-    return KRB5_DIGIT( c );
-}
-
-int krb5int_utf8_isxdigit(const char * p)
-{
-    unsigned c = * (const unsigned char *) p;
-
-    if (!KRB5_ASCII(c))
-        return 0;
-
-    return KRB5_HEX(c);
-}
-
-int krb5int_utf8_isspace(const char * p)
-{
-    unsigned c = * (const unsigned char *) p;
-
-    if (!KRB5_ASCII(c))
-        return 0;
-
-    switch(c) {
-    case ' ':
-    case '\t':
-    case '\n':
-    case '\r':
-    case '\v':
-    case '\f':
-        return 1;
-    }
-
-    return 0;
-}
-
-/*
- * These are not needed by the C SDK and are
- * not "good enough" for general use.
- */
-int krb5int_utf8_isalpha(const char * p)
-{
-    unsigned c = * (const unsigned char *) p;
-
-    if (!KRB5_ASCII(c))
-        return 0;
-
-    return KRB5_ALPHA(c);
-}
-
-int krb5int_utf8_isalnum(const char * p)
-{
-    unsigned c = * (const unsigned char *) p;
-
-    if (!KRB5_ASCII(c))
-        return 0;
-
-    return KRB5_ALNUM(c);
-}
-#endif
-
-
-/*
- * UTF-8 string routines
- */
-
-/* like strchr() */
-char *krb5int_utf8_strchr(const char *str, const char *chr)
-{
-    krb5_ucs4 chs, ch;
-
-    if (krb5int_utf8_to_ucs4(chr, &ch) == -1)
-        return NULL;
-    for ( ; *str != '\0'; KRB5_UTF8_INCR(str)) {
-        if (krb5int_utf8_to_ucs4(str, &chs) == 0 && chs == ch)
-            return (char *)str;
-    }
-
-    return NULL;
-}
-
-/* like strcspn() but returns number of bytes, not characters */
-size_t krb5int_utf8_strcspn(const char *str, const char *set)
-{
-    const char *cstr, *cset;
-    krb5_ucs4 chstr, chset;
-
-    for (cstr = str; *cstr != '\0'; KRB5_UTF8_INCR(cstr)) {
-        for (cset = set; *cset != '\0'; KRB5_UTF8_INCR(cset)) {
-            if (krb5int_utf8_to_ucs4(cstr, &chstr) == 0
-                && krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
-                return cstr - str;
-        }
-    }
-
-    return cstr - str;
-}
-
-/* like strspn() but returns number of bytes, not characters */
-size_t krb5int_utf8_strspn(const char *str, const char *set)
-{
-    const char *cstr, *cset;
-    krb5_ucs4 chstr, chset;
-
-    for (cstr = str; *cstr != '\0'; KRB5_UTF8_INCR(cstr)) {
-        for (cset = set; ; KRB5_UTF8_INCR(cset)) {
-            if (*cset == '\0')
-                return cstr - str;
-            if (krb5int_utf8_to_ucs4(cstr, &chstr) == 0
-                && krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
-                break;
-        }
-    }
-
-    return cstr - str;
-}
-
-/* like strpbrk(), replaces strchr() as well */
-char *krb5int_utf8_strpbrk(const char *str, const char *set)
-{
-    const char *cset;
-    krb5_ucs4 chstr, chset;
-
-    for ( ; *str != '\0'; KRB5_UTF8_INCR(str)) {
-        for (cset = set; *cset != '\0'; KRB5_UTF8_INCR(cset)) {
-            if (krb5int_utf8_to_ucs4(str, &chstr) == 0
-                && krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
-                return (char *)str;
-        }
-    }
-
-    return NULL;
-}
-
-/* like strtok_r(), not strtok() */
-char *krb5int_utf8_strtok(char *str, const char *sep, char **last)
-{
-    char *begin;
-    char *end;
-
-    if (last == NULL)
-        return NULL;
-
-    begin = str ? str : *last;
-
-    begin += krb5int_utf8_strspn(begin, sep);
-
-    if (*begin == '\0') {
-        *last = NULL;
-        return NULL;
-    }
-
-    end = &begin[krb5int_utf8_strcspn(begin, sep)];
-
-    if (*end != '\0') {
-        char *next = KRB5_UTF8_NEXT(end);
-        *end = '\0';
-        end = next;
-    }
-
-    *last = end;
-
-    return begin;
-}
author	Greg Hudson <ghudson@mit.edu>
	Sat, 10 Dec 2022 06:26:36 +0000 (01:26 -0500)
committer	Greg Hudson <ghudson@mit.edu>
	Mon, 26 Dec 2022 07:30:31 +0000 (02:30 -0500)
src/include/k5-unicode.h		patch \| blob \| blame \| history
src/include/k5-utf8.h		patch \| blob \| blame \| history
src/lib/krb5/unicode/Makefile.in		patch \| blob \| blame \| history
src/lib/krb5/unicode/ucdata/bidiapi.txt	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/ucdata/ucpgba.c	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/ucdata/ucpgba.h	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/ucdata/ucpgba.man	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/ucstr.c		patch \| blob \| blame \| history
src/lib/krb5/unicode/ure/README	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/ure/ure.c	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/ure/ure.h	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/ure/urestubs.c	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/utbm/README	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/utbm/utbm.c	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/utbm/utbm.h	[deleted file]	patch \| blob \| blame \| history
src/lib/krb5/unicode/utbm/utbmstub.c	[deleted file]	patch \| blob \| blame \| history
src/util/support/libkrb5support-fixed.exports		patch \| blob \| blame \| history
src/util/support/t_utf8.c		patch \| blob \| blame \| history
src/util/support/utf8.c		patch \| blob \| blame \| history