[thirdparty/glibc.git] / stdlib / grouping.c

/* Internal header for proving correct grouping in strings of numbers.
   Copyright (C) 1995-2018 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <limits.h>
#include <stddef.h>
#include <string.h>

#ifndef MAX
#define MAX(a,b)	({ typeof(a) _a = (a); typeof(b) _b = (b); \
			   _a > _b ? _a : _b; })
#endif

#ifdef USE_WIDE_CHAR
# include <wctype.h>
# define L_(Ch) L##Ch
# define UCHAR_TYPE wint_t
# define STRING_TYPE wchar_t
#else
# define L_(Ch) Ch
# define UCHAR_TYPE unsigned char
# define STRING_TYPE char
#endif

#include "grouping.h"

/* Find the maximum prefix of the string between BEGIN and END which
   satisfies the grouping rules.  It is assumed that at least one digit
   follows BEGIN directly.  */

const STRING_TYPE *
#ifdef USE_WIDE_CHAR
__correctly_grouped_prefixwc (const STRING_TYPE *begin, const STRING_TYPE *end,
			      wchar_t thousands,
#else
__correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
			      const char *thousands,
#endif
			      const char *grouping)
{
#ifndef USE_WIDE_CHAR
  size_t thousands_len;
  int cnt;
#endif

  if (grouping == NULL)
    return end;

#ifndef USE_WIDE_CHAR
  thousands_len = strlen (thousands);
#endif

  while (end > begin)
    {
      const STRING_TYPE *cp = end - 1;
      const char *gp = grouping;

      /* Check first group.  */
      while (cp >= begin)
	{
#ifdef USE_WIDE_CHAR
	  if (*cp == thousands)
	    break;
#else
	  if (cp[thousands_len - 1] == *thousands)
	    {
	      for (cnt = 1; thousands[cnt] != '\0'; ++cnt)
		if (thousands[cnt] != cp[thousands_len - 1 - cnt])
		  break;
	      if (thousands[cnt] == '\0')
		break;
	    }
#endif
	  --cp;
	}

      /* We allow the representation to contain no grouping at all even if
	 the locale specifies we can have grouping.  */
      if (cp < begin)
	return end;

      if (end - cp == (int) *gp + 1)
	{
	  /* This group matches the specification.  */

	  const STRING_TYPE *new_end;

	  if (cp < begin)
	    /* There is just one complete group.  We are done.  */
	    return end;

	  /* CP points to a thousands separator character.  The preceding
	     remainder of the string from BEGIN to NEW_END is the part we
	     will consider if there is a grouping error in this trailing
	     portion from CP to END.  */
	  new_end = cp - 1;

	  /* Loop while the grouping is correct.  */
	  while (1)
	    {
	      /* Get the next grouping rule.  */
	      ++gp;
	      if (*gp == 0)
		/* If end is reached use last rule.  */
	        --gp;

	      /* Skip the thousands separator.  */
	      --cp;

	      if (*gp == CHAR_MAX
#if CHAR_MIN < 0
		  || *gp < 0
#endif
		  )
	        {
	          /* No more thousands separators are allowed to follow.  */
	          while (cp >= begin)
		    {
#ifdef USE_WIDE_CHAR
		      if (*cp == thousands)
			break;
#else
		      for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
			if (thousands[cnt] != cp[thousands_len - cnt - 1])
			  break;
		      if (thousands[cnt] == '\0')
			break;
#endif
		      --cp;
		    }

	          if (cp < begin)
		    /* OK, only digits followed.  */
		    return end;
	        }
	      else
	        {
		  /* Check the next group.  */
	          const STRING_TYPE *group_end = cp;

		  while (cp >= begin)
		    {
#ifdef USE_WIDE_CHAR
		      if (*cp == thousands)
			break;
#else
		      for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
			if (thousands[cnt] != cp[thousands_len - cnt - 1])
			  break;
		      if (thousands[cnt] == '\0')
			break;
#endif
		      --cp;
		    }

		  if (cp < begin && group_end - cp <= (int) *gp)
		    /* Final group is correct.  */
		    return end;

		  if (cp < begin || group_end - cp != (int) *gp)
		    /* Incorrect group.  Punt.  */
		    break;
		}
	    }

	  /* The trailing portion of the string starting at NEW_END
	     contains a grouping error.  So we will look for a correctly
	     grouped number in the preceding portion instead.  */
	  end = new_end;
	}
      else
	{
	  /* Even the first group was wrong; determine maximum shift.  */
	  if (end - cp > (int) *gp + 1)
	    end = cp + (int) *gp + 1;
	  else if (cp < begin)
	    /* This number does not fill the first group, but is correct.  */
	    return end;
	  else
	    /* CP points to a thousands separator character.  */
	    end = cp;
	}
    }

  return MAX (begin, end);
}
Commit	Line	Data
9c7ff11a	1	/* Internal header for proving correct grouping in strings of numbers.
688903eb	2	Copyright (C) 1995-2018 Free Software Foundation, Inc.
9c7ff11a UD	3	This file is part of the GNU C Library.
	4	Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
	5
	6	The GNU C Library is free software; you can redistribute it and/or
	7	modify it under the terms of the GNU Lesser General Public
	8	License as published by the Free Software Foundation; either
	9	version 2.1 of the License, or (at your option) any later version.
	10
	11	The GNU C Library is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	Lesser General Public License for more details.
	15
	16	You should have received a copy of the GNU Lesser General Public
59ba27a6 PE	17	License along with the GNU C Library; if not, see
59ba27a6 PE	18	<http://www.gnu.org/licenses/>. */
9c7ff11a UD	19
	20	#include <limits.h>
	21	#include <stddef.h>
	22	#include <string.h>
	23
	24	#ifndef MAX
	25	#define MAX(a,b) ({ typeof(a) _a = (a); typeof(b) _b = (b); \
	26	_a > _b ? _a : _b; })
	27	#endif
	28
	29	#ifdef USE_WIDE_CHAR
	30	# include <wctype.h>
	31	# define L_(Ch) L##Ch
	32	# define UCHAR_TYPE wint_t
	33	# define STRING_TYPE wchar_t
	34	#else
	35	# define L_(Ch) Ch
	36	# define UCHAR_TYPE unsigned char
	37	# define STRING_TYPE char
	38	#endif
	39
	40	#include "grouping.h"
	41
	42	/* Find the maximum prefix of the string between BEGIN and END which
	43	satisfies the grouping rules. It is assumed that at least one digit
	44	follows BEGIN directly. */
	45
	46	const STRING_TYPE *
	47	#ifdef USE_WIDE_CHAR
	48	__correctly_grouped_prefixwc (const STRING_TYPE begin, const STRING_TYPE end,
	49	wchar_t thousands,
	50	#else
	51	__correctly_grouped_prefixmb (const STRING_TYPE begin, const STRING_TYPE end,
	52	const char *thousands,
	53	#endif
	54	const char *grouping)
	55	{
	56	#ifndef USE_WIDE_CHAR
	57	size_t thousands_len;
	58	int cnt;
	59	#endif
	60
	61	if (grouping == NULL)
	62	return end;
	63
	64	#ifndef USE_WIDE_CHAR
	65	thousands_len = strlen (thousands);
	66	#endif
	67
	68	while (end > begin)
	69	{
	70	const STRING_TYPE *cp = end - 1;
	71	const char *gp = grouping;
	72
	73	/* Check first group. */
	74	while (cp >= begin)
	75	{
	76	#ifdef USE_WIDE_CHAR
	77	if (*cp == thousands)
	78	break;
	79	#else
	80	if (cp[thousands_len - 1] == *thousands)
	81	{
	82	for (cnt = 1; thousands[cnt] != '\0'; ++cnt)
83	if (thousands[cnt] != cp[thousands_len - 1 - cnt])
84	break;
85	if (thousands[cnt] == '\0')
86	break;
87	}
88	#endif
89	--cp;
90	}
91
92	/* We allow the representation to contain no grouping at all even if
93	the locale specifies we can have grouping. */
94	if (cp < begin)
95	return end;
96
97	if (end - cp == (int) *gp + 1)
98	{
99	/* This group matches the specification. */
100
101	const STRING_TYPE *new_end;
102
103	if (cp < begin)
104	/* There is just one complete group. We are done. */
105	return end;
106
107	/* CP points to a thousands separator character. The preceding
108	remainder of the string from BEGIN to NEW_END is the part we
109	will consider if there is a grouping error in this trailing
110	portion from CP to END. */
111	new_end = cp - 1;
112
113	/* Loop while the grouping is correct. */
114	while (1)
115	{
116	/* Get the next grouping rule. */
117	++gp;
118	if (*gp == 0)
119	/* If end is reached use last rule. */
120	--gp;
121
122	/* Skip the thousands separator. */
123	--cp;
124
125	if (*gp == CHAR_MAX
126	#if CHAR_MIN < 0
127	\|\| *gp < 0
128	#endif
129	)
130	{
131	/* No more thousands separators are allowed to follow. */
132	while (cp >= begin)
133	{
134	#ifdef USE_WIDE_CHAR
135	if (*cp == thousands)
136	break;
137	#else
138	for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
139	if (thousands[cnt] != cp[thousands_len - cnt - 1])
140	break;
141	if (thousands[cnt] == '\0')
142	break;
143	#endif
144	--cp;
145	}
146
147	if (cp < begin)
148	/* OK, only digits followed. */
149	return end;
150	}
151	else
152	{
153	/* Check the next group. */
154	const STRING_TYPE *group_end = cp;
155
156	while (cp >= begin)
157	{
158	#ifdef USE_WIDE_CHAR
159	if (*cp == thousands)
160	break;
161	#else
162	for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
163	if (thousands[cnt] != cp[thousands_len - cnt - 1])
164	break;
165	if (thousands[cnt] == '\0')
166	break;
167	#endif
168	--cp;
169	}
170
171	if (cp < begin && group_end - cp <= (int) *gp)
172	/* Final group is correct. */
173	return end;
174
175	if (cp < begin \|\| group_end - cp != (int) *gp)
176	/* Incorrect group. Punt. */
177	break;
178	}
179	}
180
181	/* The trailing portion of the string starting at NEW_END
182	contains a grouping error. So we will look for a correctly
183	grouped number in the preceding portion instead. */
184	end = new_end;
185	}
186	else
187	{
188	/* Even the first group was wrong; determine maximum shift. */
189	if (end - cp > (int) *gp + 1)
190	end = cp + (int) *gp + 1;
191	else if (cp < begin)
192	/* This number does not fill the first group, but is correct. */
193	return end;
194	else
195	/* CP points to a thousands separator character. */
196	end = cp;
197	}
198	}
199
200	return MAX (begin, end);
201	}