Inline basic UTF-8 functions.

author Jeff Davis <jdavis@postgresql.org>

Wed, 20 Mar 2024 16:40:57 +0000 (09:40 -0700)

committer Jeff Davis <jdavis@postgresql.org>

Wed, 20 Mar 2024 16:40:57 +0000 (09:40 -0700)
author Jeff Davis <jdavis@postgresql.org>
Wed, 20 Mar 2024 16:40:57 +0000 (09:40 -0700)
committer Jeff Davis <jdavis@postgresql.org>
Wed, 20 Mar 2024 16:40:57 +0000 (09:40 -0700)
diff --git a/src/common/wchar.c b/src/common/wchar.c

index a238c0106c6efcd7b1940d6f965e7e2cc4ee2c0a..76b7dfdfcb668704646bd03a5a8eb58424af3aa0 100644 (file)
--- a/src/common/wchar.c
+++ b/src/common/wchar.c
@@ -476,39 +476,6 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
  }
  
  
-/*
- * Map a Unicode code point to UTF-8.  utf8string must have at least
- * unicode_utf8len(c) bytes available.
- */
-unsigned char *
-unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
-{
-       if (c <= 0x7F)
-       {
-               utf8string[0] = c;
-       }
-       else if (c <= 0x7FF)
-       {
-               utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
-               utf8string[1] = 0x80 | (c & 0x3F);
-       }
-       else if (c <= 0xFFFF)
-       {
-               utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
-               utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
-               utf8string[2] = 0x80 | (c & 0x3F);
-       }
-       else
-       {
-               utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
-               utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
-               utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
-               utf8string[3] = 0x80 | (c & 0x3F);
-       }
-
-       return utf8string;
-}
-
  /*
   * Trivial conversion from pg_wchar to UTF-8.
   * caller should allocate enough space for "to"
@@ -670,34 +637,6 @@ ucs_wcwidth(pg_wchar ucs)
         return 1;
  }
  
-/*
- * Convert a UTF-8 character to a Unicode code point.
- * This is a one-character version of pg_utf2wchar_with_len.
- *
- * No error checks here, c must point to a long-enough string.
- */
-pg_wchar
-utf8_to_unicode(const unsigned char *c)
-{
-       if ((*c & 0x80) == 0)
-               return (pg_wchar) c[0];
-       else if ((*c & 0xe0) == 0xc0)
-               return (pg_wchar) (((c[0] & 0x1f) << 6) |
-                                                  (c[1] & 0x3f));
-       else if ((*c & 0xf0) == 0xe0)
-               return (pg_wchar) (((c[0] & 0x0f) << 12) |
-                                                  ((c[1] & 0x3f) << 6) |
-                                                  (c[2] & 0x3f));
-       else if ((*c & 0xf8) == 0xf0)
-               return (pg_wchar) (((c[0] & 0x07) << 18) |
-                                                  ((c[1] & 0x3f) << 12) |
-                                                  ((c[2] & 0x3f) << 6) |
-                                                  (c[3] & 0x3f));
-       else
-               /* that is an invalid code on purpose */
-               return 0xffffffff;
-}
-
  static int
  pg_utf_dsplen(const unsigned char *s)
  {
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h

index 69a55b66f44d23ed1bf2d76e1224720556d10715..249cd18a35701f32a479de54d0d60ba895cbe369 100644 (file)
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -555,6 +555,67 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
         return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
  }
  
+/*
+ * Convert a UTF-8 character to a Unicode code point.
+ * This is a one-character version of pg_utf2wchar_with_len.
+ *
+ * No error checks here, c must point to a long-enough string.
+ */
+static inline pg_wchar
+utf8_to_unicode(const unsigned char *c)
+{
+       if ((*c & 0x80) == 0)
+               return (pg_wchar) c[0];
+       else if ((*c & 0xe0) == 0xc0)
+               return (pg_wchar) (((c[0] & 0x1f) << 6) |
+                                                  (c[1] & 0x3f));
+       else if ((*c & 0xf0) == 0xe0)
+               return (pg_wchar) (((c[0] & 0x0f) << 12) |
+                                                  ((c[1] & 0x3f) << 6) |
+                                                  (c[2] & 0x3f));
+       else if ((*c & 0xf8) == 0xf0)
+               return (pg_wchar) (((c[0] & 0x07) << 18) |
+                                                  ((c[1] & 0x3f) << 12) |
+                                                  ((c[2] & 0x3f) << 6) |
+                                                  (c[3] & 0x3f));
+       else
+               /* that is an invalid code on purpose */
+               return 0xffffffff;
+}
+
+/*
+ * Map a Unicode code point to UTF-8.  utf8string must have at least
+ * unicode_utf8len(c) bytes available.
+ */
+static inline unsigned char *
+unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+{
+       if (c <= 0x7F)
+       {
+               utf8string[0] = c;
+       }
+       else if (c <= 0x7FF)
+       {
+               utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
+               utf8string[1] = 0x80 | (c & 0x3F);
+       }
+       else if (c <= 0xFFFF)
+       {
+               utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
+               utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
+               utf8string[2] = 0x80 | (c & 0x3F);
+       }
+       else
+       {
+               utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
+               utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
+               utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
+               utf8string[3] = 0x80 | (c & 0x3F);
+       }
+
+       return utf8string;
+}
+
  /*
   * Number of bytes needed to represent the given char in UTF8.
   */
author	Jeff Davis <jdavis@postgresql.org>
	Wed, 20 Mar 2024 16:40:57 +0000 (09:40 -0700)
committer	Jeff Davis <jdavis@postgresql.org>
	Wed, 20 Mar 2024 16:40:57 +0000 (09:40 -0700)
src/common/wchar.c		patch \| blob \| blame \| history
src/include/mb/pg_wchar.h		patch \| blob \| blame \| history