]> git.ipfire.org Git - thirdparty/grub.git/commitdiff
Make better Unicode-compliant and unify some UTF-8 code pathes.
authorVladimir 'phcoder' Serbinenko <phcoder@gmail.com>
Sun, 25 Dec 2011 14:57:50 +0000 (15:57 +0100)
committerVladimir 'phcoder' Serbinenko <phcoder@gmail.com>
Sun, 25 Dec 2011 14:57:50 +0000 (15:57 +0100)
* grub-core/normal/charset.c (grub_utf8_to_utf16): Don't eat possibly
valid character.
(grub_is_valid_utf8): Use grub_utf8_process.
Check resulting code range.
(grub_utf8_to_ucs4): Use grub_utf8_process.
* include/grub/charset.h (grub_utf16_to_utf8): Don't eat up a possibly
valid character.

ChangeLog
grub-core/normal/charset.c
include/grub/charset.h

index ee7611cafb8102f7d71622cca5d3086f03504c1a..ada61af4c82e10c7e3c75a08bfde26a331048889 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2011-12-25  Vladimir Serbinenko  <phcoder@gmail.com>
+
+       Make better Unicode-compliant and unify some UTF-8 code pathes.
+
+       * grub-core/normal/charset.c (grub_utf8_to_utf16): Don't eat possibly
+       valid character.
+       (grub_is_valid_utf8): Use grub_utf8_process.
+       Check resulting code range.
+       (grub_utf8_to_ucs4): Use grub_utf8_process.
+       * include/grub/charset.h (grub_utf16_to_utf8): Don't eat up a possibly
+       valid character.
+
 2011-12-25  Vladimir Serbinenko  <phcoder@gmail.com>
 
        * grub-core/io/bufio.c (grub_bufio_read): Fix handling of corner cases.
index ee4a7ef5f37d5f1a75f37ed977a845f689d6b977..6ddd91827a58f563d259be37e16ac190330fbbb5 100644 (file)
@@ -119,11 +119,17 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
 
   while (srcsize && destsize)
     {
-      grub_uint8_t c = *src++;
+      int was_count = count;
       if (srcsize != (grub_size_t)-1)
        srcsize--;
-      if (!grub_utf8_process (c, &code, &count))
-       return -1;
+      if (!grub_utf8_process (*src++, &code, &count))
+       {
+         code = '?';
+         count = 0;
+         /* Character c may be valid, don't eat it.  */
+         if (was_count)
+           src--;
+       }
       if (count != 0)
        continue;
       if (code == 0)
@@ -263,53 +269,21 @@ grub_ucs4_to_utf8_alloc (const grub_uint32_t *src, grub_size_t size)
 int
 grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize)
 {
-  grub_uint32_t code = 0;
   int count = 0;
+  grub_uint32_t code = 0;
 
   while (srcsize)
     {
-      grub_uint32_t c = *src++;
       if (srcsize != (grub_size_t)-1)
        srcsize--;
-      if (count)
-       {
-         if ((c & 0xc0) != 0x80)
-           {
-             /* invalid */
-             return 0;
-           }
-         else
-           {
-             code <<= 6;
-             code |= (c & 0x3f);
-             count--;
-           }
-       }
-      else
-       {
-         if (c == 0)
-           break;
-
-         if ((c & 0x80) == 0x00)
-           code = c;
-         else if ((c & 0xe0) == 0xc0)
-           {
-             count = 1;
-             code = c & 0x1f;
-           }
-         else if ((c & 0xf0) == 0xe0)
-           {
-             count = 2;
-             code = c & 0x0f;
-           }
-         else if ((c & 0xf8) == 0xf0)
-           {
-             count = 3;
-             code = c & 0x07;
-           }
-         else
-           return 0;
-       }
+      if (!grub_utf8_process (*src++, &code, &count))
+       return 0;
+      if (count != 0)
+       continue;
+      if (code == 0)
+       return 1;
+      if (code > GRUB_UNICODE_LAST_VALID)
+       return 0;
     }
 
   return 1;
@@ -355,63 +329,23 @@ grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
 
   while (srcsize && destsize)
     {
-      grub_uint32_t c = *src++;
+      int was_count = count;
       if (srcsize != (grub_size_t)-1)
        srcsize--;
-      if (count)
-       {
-         if ((c & 0xc0) != 0x80)
-           {
-             /* invalid */
-             code = '?';
-             /* Character c may be valid, don't eat it.  */
-             src--;
-             if (srcsize != (grub_size_t)-1)
-               srcsize++;
-             count = 0;
-           }
-         else
-           {
-             code <<= 6;
-             code |= (c & 0x3f);
-             count--;
-           }
-       }
-      else
+      if (!grub_utf8_process (*src++, &code, &count))
        {
-         if (c == 0)
-           break;
-
-         if ((c & 0x80) == 0x00)
-           code = c;
-         else if ((c & 0xe0) == 0xc0)
-           {
-             count = 1;
-             code = c & 0x1f;
-           }
-         else if ((c & 0xf0) == 0xe0)
-           {
-             count = 2;
-             code = c & 0x0f;
-           }
-         else if ((c & 0xf8) == 0xf0)
-           {
-             count = 3;
-             code = c & 0x07;
-           }
-         else
-           {
-             /* invalid */
-             code = '?';
-             count = 0;
-           }
-       }
-
-      if (count == 0)
-       {
-         *p++ = code;
-         destsize--;
+         code = '?';
+         count = 0;
+         /* Character c may be valid, don't eat it.  */
+         if (was_count)
+           src--;
        }
+      if (count != 0)
+       continue;
+      if (code == 0)
+       break;
+      *p++ = code;
+      destsize--;
     }
 
   if (srcend)
index b0960c940e00d8a267781e357e79f84147356abe..82a005f32d97cf1a10bf8c1ba9eb1294b18e5c1e 100644 (file)
@@ -76,6 +76,8 @@ grub_utf16_to_utf8 (grub_uint8_t *dest, const grub_uint16_t *src,
            {
              /* Error...  */
              *dest++ = '?';
+             /* *src may be valid. Don't eat it.  */
+             src--;
            }
 
          code_high = 0;