[thirdparty/systemd.git] / src / shared / gunicode.c

/* gunicode.c - Unicode manipulation functions
 *
 *  Copyright (C) 1999, 2000 Tom Tromey
 *  Copyright 2000, 2005 Red Hat, Inc.
 */

#include "gunicode.h"

#define unichar uint32_t

/**
 * g_utf8_prev_char:
 * @p: a pointer to a position within a UTF-8 encoded string
 *
 * Finds the previous UTF-8 character in the string before @p.
 *
 * @p does not have to be at the beginning of a UTF-8 character. No check
 * is made to see if the character found is actually valid other than
 * it starts with an appropriate byte. If @p might be the first
 * character of the string, you must use g_utf8_find_prev_char() instead.
 *
 * Return value: a pointer to the found character.
 **/
char *
utf8_prev_char (const char *p)
{
  while (1)
    {
      p--;
      if ((*p & 0xc0) != 0x80)
        return (char *)p;
    }
}

struct Interval
{
  unichar start, end;
};

static int
interval_compare (const void *key, const void *elt)
{
  unichar c = (unichar) (long) (key);
  struct Interval *interval = (struct Interval *)elt;

  if (c < interval->start)
    return -1;
  if (c > interval->end)
    return +1;

  return 0;
}

/*
 * NOTE:
 *
 * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are
 * generated from the Unicode Character Database's file
 * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py
 * in this way:
 *
 *   ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt
 *
 * Last update for Unicode 6.0.
 */

/**
 * g_unichar_iswide:
 * @c: a Unicode character
 *
 * Determines if a character is typically rendered in a double-width
 * cell.
 *
 * Return value: %TRUE if the character is wide
 **/
bool
unichar_iswide (unichar c)
{
  /* See NOTE earlier for how to update this table. */
  static const struct Interval wide[] = {
    {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
    {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096},
    {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
    {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE},
    {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
    {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
    {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
    {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A}, {0x1F240,
    0x1F248}, {0x1F250, 0x1F251}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}
  };

  if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0],
               interval_compare))
    return true;

  return false;
}

const char utf8_skip_data[256] = {
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};
Commit	Line	Data
f405e86d SL	1	/* gunicode.c - Unicode manipulation functions
	2	*
	3	* Copyright (C) 1999, 2000 Tom Tromey
	4	* Copyright 2000, 2005 Red Hat, Inc.
	5	*/
	6
	7	#include "gunicode.h"
	8
	9	#define unichar uint32_t
	10
	11	/**
	12	* g_utf8_prev_char:
	13	* @p: a pointer to a position within a UTF-8 encoded string
	14	*
	15	* Finds the previous UTF-8 character in the string before @p.
	16	*
	17	* @p does not have to be at the beginning of a UTF-8 character. No check
	18	* is made to see if the character found is actually valid other than
	19	* it starts with an appropriate byte. If @p might be the first
	20	* character of the string, you must use g_utf8_find_prev_char() instead.
	21	*
	22	* Return value: a pointer to the found character.
	23	**/
	24	char *
	25	utf8_prev_char (const char *p)
	26	{
	27	while (1)
	28	{
	29	p--;
	30	if ((*p & 0xc0) != 0x80)
	31	return (char *)p;
	32	}
	33	}
	34
	35	struct Interval
	36	{
	37	unichar start, end;
	38	};
	39
	40	static int
	41	interval_compare (const void key, const void elt)
	42	{
	43	unichar c = (unichar) (long) (key);
	44	struct Interval interval = (struct Interval )elt;
	45
	46	if (c < interval->start)
	47	return -1;
	48	if (c > interval->end)
	49	return +1;
	50
	51	return 0;
	52	}
	53
	54	/*
	55	* NOTE:
	56	*
	57	* The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are
	58	* generated from the Unicode Character Database's file
	59	* extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py
	60	* in this way:
	61	*
	62	* ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt \| fmt
	63	*
	64	* Last update for Unicode 6.0.
65	*/
66
67	/**
68	* g_unichar_iswide:
69	* @c: a Unicode character
70	*
71	* Determines if a character is typically rendered in a double-width
72	* cell.
73	*
74	* Return value: %TRUE if the character is wide
75	**/
76	bool
77	unichar_iswide (unichar c)
78	{
79	/* See NOTE earlier for how to update this table. */
80	static const struct Interval wide[] = {
81	{0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
82	{0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096},
83	{0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
84	{0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE},
85	{0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
86	{0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
87	{0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
88	{0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A}, {0x1F240,
89	0x1F248}, {0x1F250, 0x1F251}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}
90	};
91
92	if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0],
93	interval_compare))
94	return true;
95
96	return false;
97	}
98
99	const char utf8_skip_data[256] = {
100	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
101	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
102	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
103	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
104	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
105	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
106	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
107	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
108	};