]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blame - readline/mbutil.c
TUI window resize should not need invisibility
[thirdparty/binutils-gdb.git] / readline / mbutil.c
CommitLineData
9255ee31
EZ
1/* mbutil.c -- readline multibyte character utility functions */
2
cb41b9e7 3/* Copyright (C) 2001-2017 Free Software Foundation, Inc.
9255ee31 4
cc88a640
JK
5 This file is part of the GNU Readline Library (Readline), a library
6 for reading lines of text with interactive input and history editing.
9255ee31 7
cc88a640
JK
8 Readline is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
9255ee31
EZ
11 (at your option) any later version.
12
cc88a640
JK
13 Readline is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9255ee31
EZ
16 GNU General Public License for more details.
17
cc88a640
JK
18 You should have received a copy of the GNU General Public License
19 along with Readline. If not, see <http://www.gnu.org/licenses/>.
20*/
21
9255ee31
EZ
22#define READLINE_LIBRARY
23
24#if defined (HAVE_CONFIG_H)
25# include <config.h>
26#endif
27
28#include <sys/types.h>
29#include <fcntl.h>
30#include "posixjmp.h"
31
32#if defined (HAVE_UNISTD_H)
33# include <unistd.h> /* for _POSIX_VERSION */
34#endif /* HAVE_UNISTD_H */
35
36#if defined (HAVE_STDLIB_H)
37# include <stdlib.h>
38#else
39# include "ansi_stdlib.h"
40#endif /* HAVE_STDLIB_H */
41
42#include <stdio.h>
43#include <ctype.h>
44
45/* System-specific feature definitions and include files. */
46#include "rldefs.h"
47#include "rlmbutil.h"
48
49#if defined (TIOCSTAT_IN_SYS_IOCTL)
50# include <sys/ioctl.h>
51#endif /* TIOCSTAT_IN_SYS_IOCTL */
52
53/* Some standard library routines. */
54#include "readline.h"
55
56#include "rlprivate.h"
57#include "xmalloc.h"
58
59/* Declared here so it can be shared between the readline and history
60 libraries. */
61#if defined (HANDLE_MULTIBYTE)
62int rl_byte_oriented = 0;
63#else
64int rl_byte_oriented = 1;
65#endif
66
775e241e
TT
67/* Ditto */
68int _rl_utf8locale = 0;
69
9255ee31
EZ
70/* **************************************************************** */
71/* */
72/* Multibyte Character Utility Functions */
73/* */
74/* **************************************************************** */
75
76#if defined(HANDLE_MULTIBYTE)
77
cb41b9e7
TT
78/* **************************************************************** */
79/* */
80/* UTF-8 specific Character Utility Functions */
81/* */
82/* **************************************************************** */
83
84/* Return the length in bytes of the possibly-multibyte character beginning
85 at S. Encoding is UTF-8. */
86static int
87_rl_utf8_mblen (const char *s, size_t n)
88{
89 unsigned char c, c1;
90
91 if (s == 0)
92 return (0); /* no shift states */
93 if (n <= 0)
94 return (-1);
95
96 c = (unsigned char)*s;
97 if (c < 0x80)
98 return (c != 0);
99 if (c >= 0xc2)
100 {
101 c1 = (unsigned char)s[1];
102 if (c < 0xe0)
103 {
104 if (n >= 2 && (s[1] ^ 0x80) < 0x40)
105 return 2;
106 }
107 else if (c < 0xf0)
108 {
109 if (n >= 3
110 && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
111 && (c >= 0xe1 || c1 >= 0xa0)
112 && (c != 0xed || c1 < 0xa0))
113 return 3;
114 }
115 else if (c < 0xf8)
116 {
117 if (n >= 4
118 && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
119 && (s[3] ^ 0x80) < 0x40
120 && (c >= 0xf1 || c1 >= 0x90)
121 && (c < 0xf4 || (c == 0xf4 && c1 < 0x90)))
122 return 4;
123 }
124 }
125 /* invalid or incomplete multibyte character */
126 return -1;
127}
128
9255ee31 129static int
cb41b9e7 130_rl_find_next_mbchar_internal (char *string, int seed, int count, int find_non_zero)
9255ee31 131{
cc88a640 132 size_t tmp, len;
9255ee31 133 mbstate_t ps;
5bdf8622 134 int point;
9255ee31
EZ
135 wchar_t wc;
136
5bdf8622
DJ
137 tmp = 0;
138
9255ee31
EZ
139 memset(&ps, 0, sizeof (mbstate_t));
140 if (seed < 0)
141 seed = 0;
142 if (count <= 0)
143 return seed;
144
5bdf8622 145 point = seed + _rl_adjust_point (string, seed, &ps);
cb41b9e7
TT
146 /* if _rl_adjust_point returns -1, the character or string is invalid.
147 treat as a byte. */
148 if (point == seed - 1) /* invalid */
149 return seed + 1;
150
cc88a640
JK
151 /* if this is true, means that seed was not pointing to a byte indicating
152 the beginning of a multibyte character. Correct the point and consume
153 one char. */
9255ee31 154 if (seed < point)
5bdf8622 155 count--;
9255ee31
EZ
156
157 while (count > 0)
158 {
cc88a640
JK
159 len = strlen (string + point);
160 if (len == 0)
161 break;
cb41b9e7
TT
162 if (_rl_utf8locale && UTF8_SINGLEBYTE(string[point]))
163 {
164 tmp = 1;
165 wc = (wchar_t) string[point];
166 memset(&ps, 0, sizeof(mbstate_t));
167 }
168 else
169 tmp = mbrtowc (&wc, string+point, len, &ps);
5bdf8622 170 if (MB_INVALIDCH ((size_t)tmp))
9255ee31 171 {
cc88a640 172 /* invalid bytes. assume a byte represents a character */
9255ee31
EZ
173 point++;
174 count--;
175 /* reset states. */
176 memset(&ps, 0, sizeof(mbstate_t));
177 }
5bdf8622
DJ
178 else if (MB_NULLWCH (tmp))
179 break; /* found wide '\0' */
9255ee31
EZ
180 else
181 {
182 /* valid bytes */
183 point += tmp;
184 if (find_non_zero)
185 {
775e241e 186 if (WCWIDTH (wc) == 0)
9255ee31
EZ
187 continue;
188 else
189 count--;
190 }
191 else
192 count--;
193 }
194 }
195
196 if (find_non_zero)
197 {
198 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
775e241e 199 while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && WCWIDTH (wc) == 0)
9255ee31
EZ
200 {
201 point += tmp;
202 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
9255ee31
EZ
203 }
204 }
5bdf8622
DJ
205
206 return point;
9255ee31
EZ
207}
208
775e241e 209/*static*/ int
cb41b9e7 210_rl_find_prev_mbchar_internal (char *string, int seed, int find_non_zero)
9255ee31
EZ
211{
212 mbstate_t ps;
213 int prev, non_zero_prev, point, length;
214 size_t tmp;
215 wchar_t wc;
216
217 memset(&ps, 0, sizeof(mbstate_t));
218 length = strlen(string);
219
220 if (seed < 0)
221 return 0;
222 else if (length < seed)
223 return length;
224
225 prev = non_zero_prev = point = 0;
226 while (point < seed)
227 {
cb41b9e7
TT
228 if (_rl_utf8locale && UTF8_SINGLEBYTE(string[point]))
229 {
230 tmp = 1;
231 wc = (wchar_t) string[point];
232 memset(&ps, 0, sizeof(mbstate_t));
233 }
234 else
235 tmp = mbrtowc (&wc, string + point, length - point, &ps);
5bdf8622 236 if (MB_INVALIDCH ((size_t)tmp))
9255ee31 237 {
cb41b9e7 238 /* in this case, bytes are invalid or too short to compose
9255ee31
EZ
239 multibyte char, so assume that the first byte represents
240 a single character anyway. */
241 tmp = 1;
242 /* clear the state of the byte sequence, because
243 in this case effect of mbstate is undefined */
244 memset(&ps, 0, sizeof (mbstate_t));
5bdf8622
DJ
245
246 /* Since we're assuming that this byte represents a single
247 non-zero-width character, don't forget about it. */
248 prev = point;
9255ee31 249 }
5bdf8622 250 else if (MB_NULLWCH (tmp))
9255ee31
EZ
251 break; /* Found '\0' char. Can this happen? */
252 else
253 {
254 if (find_non_zero)
255 {
775e241e 256 if (WCWIDTH (wc) != 0)
9255ee31
EZ
257 prev = point;
258 }
259 else
260 prev = point;
261 }
262
263 point += tmp;
264 }
265
266 return prev;
267}
268
269/* return the number of bytes parsed from the multibyte sequence starting
270 at src, if a non-L'\0' wide character was recognized. It returns 0,
271 if a L'\0' wide character was recognized. It returns (size_t)(-1),
272 if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
273 if it couldn't parse a complete multibyte character. */
274int
cb41b9e7 275_rl_get_char_len (char *src, mbstate_t *ps)
9255ee31 276{
cb41b9e7
TT
277 size_t tmp, l;
278 int mb_cur_max;
9255ee31 279
cb41b9e7
TT
280 /* Look at no more than MB_CUR_MAX characters */
281 l = (size_t)strlen (src);
282 if (_rl_utf8locale && l > 0 && UTF8_SINGLEBYTE(*src))
283 tmp = (*src != 0) ? 1 : 0;
284 else
285 {
286 mb_cur_max = MB_CUR_MAX;
287 tmp = mbrlen((const char *)src, (l < mb_cur_max) ? l : mb_cur_max, ps);
288 }
9255ee31
EZ
289 if (tmp == (size_t)(-2))
290 {
cb41b9e7 291 /* too short to compose multibyte char */
5af408ce
EZ
292 if (ps)
293 memset (ps, 0, sizeof(mbstate_t));
9255ee31
EZ
294 return -2;
295 }
296 else if (tmp == (size_t)(-1))
297 {
298 /* invalid to compose multibyte char */
299 /* initialize the conversion state */
5af408ce
EZ
300 if (ps)
301 memset (ps, 0, sizeof(mbstate_t));
9255ee31
EZ
302 return -1;
303 }
304 else if (tmp == (size_t)0)
305 return 0;
306 else
307 return (int)tmp;
308}
309
310/* compare the specified two characters. If the characters matched,
311 return 1. Otherwise return 0. */
312int
cb41b9e7 313_rl_compare_chars (char *buf1, int pos1, mbstate_t *ps1, char *buf2, int pos2, mbstate_t *ps2)
9255ee31
EZ
314{
315 int i, w1, w2;
316
317 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
318 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
319 (w1 != w2) ||
320 (buf1[pos1] != buf2[pos2]))
321 return 0;
322
323 for (i = 1; i < w1; i++)
324 if (buf1[pos1+i] != buf2[pos2+i])
325 return 0;
326
327 return 1;
328}
329
330/* adjust pointed byte and find mbstate of the point of string.
331 adjusted point will be point <= adjusted_point, and returns
332 differences of the byte(adjusted_point - point).
cb41b9e7 333 if point is invalid (point < 0 || more than string length),
9255ee31
EZ
334 it returns -1 */
335int
cb41b9e7 336_rl_adjust_point (char *string, int point, mbstate_t *ps)
9255ee31 337{
cb41b9e7
TT
338 size_t tmp;
339 int length, pos;
9255ee31 340
cb41b9e7
TT
341 tmp = 0;
342 pos = 0;
9255ee31
EZ
343 length = strlen(string);
344 if (point < 0)
345 return -1;
346 if (length < point)
347 return -1;
348
349 while (pos < point)
350 {
cb41b9e7
TT
351 if (_rl_utf8locale && UTF8_SINGLEBYTE(string[pos]))
352 tmp = 1;
353 else
354 tmp = mbrlen (string + pos, length - pos, ps);
5bdf8622 355 if (MB_INVALIDCH ((size_t)tmp))
9255ee31 356 {
cb41b9e7 357 /* in this case, bytes are invalid or too short to compose
9255ee31
EZ
358 multibyte char, so assume that the first byte represents
359 a single character anyway. */
360 pos++;
361 /* clear the state of the byte sequence, because
362 in this case effect of mbstate is undefined */
5af408ce
EZ
363 if (ps)
364 memset (ps, 0, sizeof (mbstate_t));
9255ee31 365 }
5bdf8622 366 else if (MB_NULLWCH (tmp))
5af408ce 367 pos++;
9255ee31
EZ
368 else
369 pos += tmp;
370 }
371
372 return (pos - point);
373}
374
375int
cb41b9e7 376_rl_is_mbchar_matched (char *string, int seed, int end, char *mbchar, int length)
9255ee31
EZ
377{
378 int i;
379
380 if ((end - seed) < length)
381 return 0;
382
383 for (i = 0; i < length; i++)
384 if (string[seed + i] != mbchar[i])
385 return 0;
386 return 1;
387}
5bdf8622
DJ
388
389wchar_t
cb41b9e7 390_rl_char_value (char *buf, int ind)
5bdf8622
DJ
391{
392 size_t tmp;
393 wchar_t wc;
394 mbstate_t ps;
395 int l;
396
397 if (MB_LEN_MAX == 1 || rl_byte_oriented)
398 return ((wchar_t) buf[ind]);
cb41b9e7
TT
399 if (_rl_utf8locale && UTF8_SINGLEBYTE(buf[ind]))
400 return ((wchar_t) buf[ind]);
5bdf8622
DJ
401 l = strlen (buf);
402 if (ind >= l - 1)
403 return ((wchar_t) buf[ind]);
cb41b9e7
TT
404 if (l < ind) /* Sanity check */
405 l = strlen (buf+ind);
5bdf8622
DJ
406 memset (&ps, 0, sizeof (mbstate_t));
407 tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
408 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
409 return ((wchar_t) buf[ind]);
410 return wc;
411}
9255ee31
EZ
412#endif /* HANDLE_MULTIBYTE */
413
414/* Find next `count' characters started byte point of the specified seed.
415 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
416 characters. */
417#undef _rl_find_next_mbchar
418int
cb41b9e7 419_rl_find_next_mbchar (char *string, int seed, int count, int flags)
9255ee31
EZ
420{
421#if defined (HANDLE_MULTIBYTE)
422 return _rl_find_next_mbchar_internal (string, seed, count, flags);
423#else
424 return (seed + count);
425#endif
426}
427
428/* Find previous character started byte point of the specified seed.
429 Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
430 we look for non-zero-width multibyte characters. */
431#undef _rl_find_prev_mbchar
432int
cb41b9e7 433_rl_find_prev_mbchar (char *string, int seed, int flags)
9255ee31
EZ
434{
435#if defined (HANDLE_MULTIBYTE)
436 return _rl_find_prev_mbchar_internal (string, seed, flags);
437#else
438 return ((seed == 0) ? seed : seed - 1);
439#endif
440}