* config.h.in (RE_ENABLE_I18N) [!GRUB_UTIL]: New define.
* grub-core/lib/posix_wrap/ctype.h (islower): Use grub_islower.
(isupper): Use grub_isupper.
(isascii): New inline function.
* grub-core/lib/posix_wrap/wchar.h: Replace dummy with real contents.
* grub-core/lib/posix_wrap/wctype.h: Likewise.
* grub-core/normal/charset.c (grub_utf8_process): New function.
(grub_utf8_to_utf16): Use grub_utf8_process.
(grub_encode_utf8_character): New function.
(grub_ucs4_to_utf8): Use grub_encode_utf8_character.
* include/grub/charset.h (grub_utf8_process): New declaration.
(grub_encode_utf8_character): Likewise.
* include/grub/misc.h (grub_islower): New inline function.
(grub_isupper): Likewise.
(grub_strchrsub): Moved down to fix the definitions.
+2011-12-13 Vladimir Serbinenko <phcoder@gmail.com>
+
+ Enable UTF8 in gnulib regexp.
+
+ * config.h.in (RE_ENABLE_I18N) [!GRUB_UTIL]: New define.
+ * grub-core/lib/posix_wrap/ctype.h (islower): Use grub_islower.
+ (isupper): Use grub_isupper.
+ (isascii): New inline function.
+ * grub-core/lib/posix_wrap/wchar.h: Replace dummy with real contents.
+ * grub-core/lib/posix_wrap/wctype.h: Likewise.
+ * grub-core/normal/charset.c (grub_utf8_process): New function.
+ (grub_utf8_to_utf16): Use grub_utf8_process.
+ (grub_encode_utf8_character): New function.
+ (grub_ucs4_to_utf8): Use grub_encode_utf8_character.
+ * include/grub/charset.h (grub_utf8_process): New declaration.
+ (grub_encode_utf8_character): Likewise.
+ * include/grub/misc.h (grub_islower): New inline function.
+ (grub_isupper): Likewise.
+ (grub_strchrsub): Moved down to fix the definitions.
+
2011-12-13 Vladimir Serbinenko <phcoder@gmail.com>
* grub-core/bus/usb/ohci.c (grub_ohci_check_transfer): Add an unsigned
/* Define to 1 to enable disk cache statistics. */
#define DISK_CACHE_STATS @DISK_CACHE_STATS@
+#define RE_ENABLE_I18N 1
+
#if defined(__i386__)
#define NESTED_FUNC_ATTR __attribute__ ((__regparm__ (1)))
#else
static inline int
islower (int c)
{
- return (c >= 'a' && c <= 'z');
+ return grub_islower (c);
+}
+
+static inline int
+isascii (int c)
+{
+ return !(c & ~0x7f);
}
static inline int
isupper (int c)
{
- return (c >= 'A' && c <= 'Z');
+ return grub_isupper (c);
}
static inline int
#ifndef GRUB_POSIX_WCHAR_H
#define GRUB_POSIX_WCHAR_H 1
+#include <grub/charset.h>
+
+/* UCS-4. */
+typedef grub_int32_t wint_t;
+enum
+ {
+ WEOF = -1
+ };
+
+#define MB_LEN_MAX 4
+
/* UCS-4. */
-typedef grub_uint32_t wchar_t;
+typedef grub_int32_t wchar_t;
+
+typedef struct mbstate {
+ grub_uint32_t code;
+ int count;
+} mbstate_t;
+
+static inline size_t
+mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
+{
+ const char *ptr;
+ if (!s)
+ {
+ pwc = 0;
+ s = "";
+ n = 1;
+ }
+
+ for (ptr = s; ptr < s + n; ptr++)
+ {
+ if (!grub_utf8_process (*ptr, &ps->code, &ps->count))
+ return -1;
+ if (ps->count)
+ continue;
+ if (pwc)
+ *pwc = ps->code;
+ if (ps->code == 0)
+ return 0;
+ return ptr - s + 1;
+ }
+ return -2;
+}
+
+static inline int
+mbsinit(const mbstate_t *ps)
+{
+ return ps->count == 0;
+}
+
+static inline size_t
+wcrtomb (char *s, wchar_t wc, mbstate_t *ps __attribute__ ((unused)))
+{
+ if (s == 0)
+ return 1;
+ return grub_encode_utf8_character ((grub_uint8_t *) s,
+ (grub_uint8_t *) s + MB_LEN_MAX,
+ wc);
+}
+
+static inline wint_t btowc (int c)
+{
+ if (c & ~0x7f)
+ return WEOF;
+ return c;
+}
+
+
+static inline int
+wcscoll (const wchar_t *s1, const wchar_t *s2)
+{
+ while (*s1 && *s2)
+ {
+ if (*s1 != *s2)
+ break;
+
+ s1++;
+ s2++;
+ }
+
+ if (*s1 < *s2)
+ return -1;
+ if (*s1 > *s2)
+ return +1;
+ return 0;
+}
#endif
+/*
+ * GRUB -- GRand Unified Bootloader
+ * Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+ *
+ * GRUB is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GRUB is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GRUB. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GRUB_POSIX_WCTYPE_H
+#define GRUB_POSIX_WCTYPE_H 1
+
+#include <grub/misc.h>
+#include <wchar.h>
+
+typedef enum { GRUB_CTYPE_INVALID,
+ GRUB_CTYPE_ALNUM, GRUB_CTYPE_CNTRL, GRUB_CTYPE_LOWER,
+ GRUB_CTYPE_SPACE, GRUB_CTYPE_ALPHA, GRUB_CTYPE_DIGIT,
+ GRUB_CTYPE_PRINT, GRUB_CTYPE_UPPER, GRUB_CTYPE_BLANK,
+ GRUB_CTYPE_GRAPH, GRUB_CTYPE_PUNCT, GRUB_CTYPE_XDIGIT,
+ GRUB_CTYPE_MAX} wctype_t;
+
+static inline wctype_t
+wctype (const char *name)
+{
+ wctype_t i;
+ static const char names[][10] = { "",
+ "alnum", "cntrl", "lower",
+ "space", "alpha", "digit",
+ "print", "upper", "blank",
+ "graph", "punct", "xdigit" };
+ for (i = GRUB_CTYPE_INVALID; i < GRUB_CTYPE_MAX; i++)
+ if (grub_strcmp (names[i], name) == 0)
+ return i;
+ return GRUB_CTYPE_INVALID;
+}
+
+/* FIXME: take into account international lowercase characters. */
+static inline int
+iswlower (wint_t wc)
+{
+ return grub_islower (wc);
+}
+
+static inline wint_t
+towlower (wint_t c)
+{
+ return grub_tolower (c);
+}
+
+static inline wint_t
+towupper (wint_t c)
+{
+ return grub_toupper (c);
+}
+
+static inline int
+iswalnum (wint_t c)
+{
+ return grub_isalpha (c) || grub_isdigit (c);
+}
+
+static inline int
+iswctype (wint_t wc, wctype_t desc)
+{
+ switch (desc)
+ {
+ case GRUB_CTYPE_ALNUM:
+ return iswalnum (wc);
+ case GRUB_CTYPE_CNTRL:
+ return grub_iscntrl (wc);
+ case GRUB_CTYPE_LOWER:
+ return iswlower (wc);
+ case GRUB_CTYPE_SPACE:
+ return grub_isspace (wc);
+ case GRUB_CTYPE_ALPHA:
+ return grub_isalpha (wc);
+ case GRUB_CTYPE_DIGIT:
+ return grub_isdigit (wc);
+ case GRUB_CTYPE_PRINT:
+ return grub_isprint (wc);
+ case GRUB_CTYPE_UPPER:
+ return grub_isupper (wc);
+ case GRUB_CTYPE_BLANK:
+ return wc == ' ' || wc == '\t';
+ case GRUB_CTYPE_GRAPH:
+ return grub_isgraph (wc);
+ case GRUB_CTYPE_PUNCT:
+ return grub_isprint (wc) && !grub_isspace (wc) && !iswalnum (wc);
+ case GRUB_CTYPE_XDIGIT:
+ return grub_isxdigit (wc);
+ default:
+ return 0;
+ }
+}
+
+#endif
#include "widthspec.h"
#endif
+int
+grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count)
+{
+ if (*count)
+ {
+ if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
+ {
+ /* invalid */
+ return 0;
+ }
+ else
+ {
+ *code <<= 6;
+ *code |= (c & GRUB_UINT8_6_TRAILINGBITS);
+ (*count)--;
+ return 1;
+ }
+ }
+
+ if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
+ {
+ *code = c;
+ return 1;
+ }
+ if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
+ {
+ *count = 1;
+ *code = c & GRUB_UINT8_5_TRAILINGBITS;
+ return 1;
+ }
+ if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
+ {
+ *count = 2;
+ *code = c & GRUB_UINT8_4_TRAILINGBITS;
+ return 1;
+ }
+ if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
+ {
+ *count = 3;
+ *code = c & GRUB_UINT8_3_TRAILINGBITS;
+ return 1;
+ }
+ return 0;
+}
+
grub_ssize_t
grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
const grub_uint8_t *src, grub_size_t srcsize,
while (srcsize && destsize)
{
- grub_uint32_t c = *src++;
+ grub_uint8_t c = *src++;
if (srcsize != (grub_size_t)-1)
srcsize--;
- if (count)
+ if (!grub_utf8_process (c, &code, &count))
+ return -1;
+ if (count != 0)
+ continue;
+ if (code == 0)
+ break;
+ if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
+ break;
+ if (code >= GRUB_UCS2_LIMIT)
{
- if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
- {
- /* invalid */
- return -1;
- }
- else
- {
- code <<= 6;
- code |= (c & GRUB_UINT8_6_TRAILINGBITS);
- count--;
- }
+ *p++ = GRUB_UTF16_UPPER_SURROGATE (code);
+ *p++ = GRUB_UTF16_LOWER_SURROGATE (code);
+ destsize -= 2;
}
else
{
- if (c == 0)
- break;
-
- if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
- code = c;
- else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
- {
- count = 1;
- code = c & GRUB_UINT8_5_TRAILINGBITS;
- }
- else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
- {
- count = 2;
- code = c & GRUB_UINT8_4_TRAILINGBITS;
- }
- else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
- {
- count = 3;
- code = c & GRUB_UINT8_3_TRAILINGBITS;
- }
- else
- return -1;
- }
-
- if (count == 0)
- {
- if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
- break;
- if (code >= GRUB_UCS2_LIMIT)
- {
- *p++ = GRUB_UTF16_UPPER_SURROGATE (code);
- *p++ = GRUB_UTF16_LOWER_SURROGATE (code);
- destsize -= 2;
- }
- else
- {
- *p++ = code;
- destsize--;
- }
+ *p++ = code;
+ destsize--;
}
}
return p - dest;
}
+/* Returns -2 if not enough space, -1 on invalid character. */
+grub_ssize_t
+grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
+ grub_uint32_t code)
+{
+ if (dest >= destend)
+ return -2;
+ if (code <= 0x007F)
+ {
+ *dest++ = code;
+ return 1;
+ }
+ if (code <= 0x07FF)
+ {
+ if (dest + 1 >= destend)
+ return -2;
+ *dest++ = (code >> 6) | 0xC0;
+ *dest++ = (code & 0x3F) | 0x80;
+ return 2;
+ }
+ if ((code >= 0xDC00 && code <= 0xDFFF)
+ || (code >= 0xD800 && code <= 0xDBFF))
+ {
+ /* No surrogates in UCS-4... */
+ return -1;
+ }
+ if (code < 0x10000)
+ {
+ if (dest + 2 >= destend)
+ return -2;
+ *dest++ = (code >> 12) | 0xE0;
+ *dest++ = ((code >> 6) & 0x3F) | 0x80;
+ *dest++ = (code & 0x3F) | 0x80;
+ return 3;
+ }
+ {
+ if (dest + 3 >= destend)
+ return -2;
+ *dest++ = (code >> 18) | 0xF0;
+ *dest++ = ((code >> 12) & 0x3F) | 0x80;
+ *dest++ = ((code >> 6) & 0x3F) | 0x80;
+ *dest++ = (code & 0x3F) | 0x80;
+ return 4;
+ }
+
+}
+
/* Convert UCS-4 to UTF-8. */
void
grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
while (size-- && dest < destend)
{
grub_uint32_t code = *src++;
-
- if (code <= 0x007F)
- *dest++ = code;
- else if (code <= 0x07FF)
- {
- if (dest + 1 >= destend)
- break;
- *dest++ = (code >> 6) | 0xC0;
- *dest++ = (code & 0x3F) | 0x80;
- }
- else if ((code >= 0xDC00 && code <= 0xDFFF)
- || (code >= 0xD800 && code <= 0xDBFF))
+ grub_ssize_t s;
+ s = grub_encode_utf8_character (dest, destend,
+ code);
+ if (s == -2)
+ break;
+ if (s == -1)
{
- /* No surrogates in UCS-4... */
*dest++ = '?';
+ continue;
}
- else if (code < 0x10000)
- {
- if (dest + 2 >= destend)
- break;
- *dest++ = (code >> 12) | 0xE0;
- *dest++ = ((code >> 6) & 0x3F) | 0x80;
- *dest++ = (code & 0x3F) | 0x80;
- }
- else
- {
- if (dest + 3 >= destend)
- break;
- *dest++ = (code >> 18) | 0xF0;
- *dest++ = ((code >> 12) & 0x3F) | 0x80;
- *dest++ = ((code >> 6) & 0x3F) | 0x80;
- *dest++ = (code & 0x3F) | 0x80;
- }
+ dest += s;
}
*dest = 0;
}
int grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
grub_uint32_t **last_position);
+
+/* Process one character from UTF8 sequence.
+ At beginning set *code = 0, *count = 0. Returns 0 on failure and
+ 1 on success. *count holds the number of trailing bytes. */
+int
+grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count);
+
void
grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
grub_uint8_t *dest, grub_size_t destsize);
grub_size_t grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
const grub_uint8_t *src, grub_size_t srcsize,
const grub_uint8_t **srcend);
+/* Returns -2 if not enough space, -1 on invalid character. */
+grub_ssize_t
+grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
+ grub_uint32_t code);
#endif
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
+static inline int
+grub_islower (int c)
+{
+ return (c >= 'a' && c <= 'z');
+}
+
+static inline int
+grub_isupper (int c)
+{
+ return (c >= 'A' && c <= 'Z');
+}
+
static inline int
grub_isgraph (int c)
{
return (int) grub_tolower (*s1) - (int) grub_tolower (*s2);
}
-/* Replace all `ch' characters of `input' with `with' and copy the
- result into `output'; return EOS address of `output'. */
-static inline char *
-grub_strchrsub (char *output, const char *input, char ch, const char *with)
-{
- grub_size_t grub_strlen (const char *s);
- while (*input)
- {
- if (*input == ch)
- {
- grub_strcpy (output, with);
- output += grub_strlen (with);
- input++;
- continue;
- }
- *output++ = *input++;
- }
- *output = '\0';
- return output;
-}
-
unsigned long EXPORT_FUNC(grub_strtoul) (const char *str, char **end, int base);
unsigned long long EXPORT_FUNC(grub_strtoull) (const char *str, char **end, int base);
int EXPORT_FUNC(grub_printf) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
int EXPORT_FUNC(grub_printf_) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
+/* Replace all `ch' characters of `input' with `with' and copy the
+ result into `output'; return EOS address of `output'. */
+static inline char *
+grub_strchrsub (char *output, const char *input, char ch, const char *with)
+{
+ while (*input)
+ {
+ if (*input == ch)
+ {
+ grub_strcpy (output, with);
+ output += grub_strlen (with);
+ input++;
+ continue;
+ }
+ *output++ = *input++;
+ }
+ *output = '\0';
+ return output;
+}
+
extern void (*EXPORT_VAR (grub_xputs)) (const char *str);
static inline int