]> git.ipfire.org Git - thirdparty/bash.git/blob - include/shmbutil.h
changes for multibyte characters with intermediate state; small change to readline...
[thirdparty/bash.git] / include / shmbutil.h
1 /* shmbutil.h -- utility functions for multibyte characters. */
2
3 /* Copyright (C) 2002-2022 Free Software Foundation, Inc.
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #if !defined (_SH_MBUTIL_H_)
22 #define _SH_MBUTIL_H_
23
24 #include "stdc.h"
25
26 /* Include config.h for HANDLE_MULTIBYTE */
27 #include <config.h>
28
29 #if defined (HANDLE_MULTIBYTE)
30 #include "shmbchar.h"
31
32 extern size_t xwcsrtombs PARAMS((char *, const wchar_t **, size_t, mbstate_t *));
33 extern size_t xmbsrtowcs PARAMS((wchar_t *, const char **, size_t, mbstate_t *));
34 extern size_t xdupmbstowcs PARAMS((wchar_t **, char ***, const char *));
35
36 extern size_t mbstrlen PARAMS((const char *));
37
38 extern char *xstrchr PARAMS((const char *, int));
39
40 extern int locale_mb_cur_max; /* XXX */
41 extern int locale_utf8locale; /* XXX */
42
43 #ifndef MB_INVALIDCH
44 #define MB_INVALIDCH(x) ((x) == (size_t)-1 || (x) == (size_t)-2)
45 #define MB_NULLWCH(x) ((x) == 0)
46 #endif
47
48 #define MBSLEN(s) (((s) && (s)[0]) ? ((s)[1] ? mbstrlen (s) : 1) : 0)
49 #define MB_STRLEN(s) ((MB_CUR_MAX > 1) ? MBSLEN (s) : STRLEN (s))
50
51 #define MBLEN(s, n) ((MB_CUR_MAX > 1) ? mblen ((s), (n)) : 1)
52 #define MBRLEN(s, n, p) ((MB_CUR_MAX > 1) ? mbrlen ((s), (n), (p)) : 1)
53
54 #define UTF8_SINGLEBYTE(c) (((c) & 0x80) == 0)
55 #define UTF8_MBFIRSTCHAR(c) (((c) & 0xc0) == 0xc0)
56 #define UTF8_MBCHAR(c) (((c) & 0xc0) == 0x80)
57
58 #else /* !HANDLE_MULTIBYTE */
59
60 #undef MB_LEN_MAX
61 #undef MB_CUR_MAX
62
63 #define MB_LEN_MAX 1
64 #define MB_CUR_MAX 1
65
66 #undef xstrchr
67 #define xstrchr(s, c) strchr(s, c)
68
69 #ifndef MB_INVALIDCH
70 #define MB_INVALIDCH(x) (0)
71 #define MB_NULLWCH(x) (0)
72 #endif
73
74 #define MB_STRLEN(s) (STRLEN(s))
75
76 #define MBLEN(s, n) 1
77 #define MBRLEN(s, n, p) 1
78
79 #ifndef wchar_t
80 # define wchar_t int
81 #endif
82
83 #define UTF8_SINGLEBYTE(c) (1)
84 #define UTF8_MBFIRSTCHAR(c) (0)
85
86 #endif /* !HANDLE_MULTIBYTE */
87
88 /* Declare and initialize a multibyte state. Call must be terminated
89 with `;'. */
90 #if defined (HANDLE_MULTIBYTE)
91 # define DECLARE_MBSTATE \
92 mbstate_t state; \
93 memset (&state, '\0', sizeof (mbstate_t))
94 #else
95 # define DECLARE_MBSTATE
96 #endif /* !HANDLE_MULTIBYTE */
97
98 /* Initialize or reinitialize a multibyte state named `state'. Call must be
99 terminated with `;'. */
100 #if defined (HANDLE_MULTIBYTE)
101 # define INITIALIZE_MBSTATE memset (&state, '\0', sizeof (mbstate_t))
102 #else
103 # define INITIALIZE_MBSTATE
104 #endif /* !HANDLE_MULTIBYTE */
105
106 /* Advance one (possibly multi-byte) character in string _STR of length
107 _STRSIZE, starting at index _I. STATE must have already been declared. */
108 #if defined (HANDLE_MULTIBYTE)
109 # define ADVANCE_CHAR(_str, _strsize, _i) \
110 do \
111 { \
112 if (locale_mb_cur_max > 1) \
113 { \
114 mbstate_t state_bak; \
115 size_t mblength; \
116 int _f; \
117 \
118 _f = is_basic ((_str)[_i]); \
119 if (_f) \
120 mblength = 1; \
121 else if (locale_utf8locale && (((_str)[_i] & 0x80) == 0)) \
122 mblength = (_str)[_i] != 0; \
123 else \
124 { \
125 state_bak = state; \
126 mblength = mbrlen ((_str) + (_i), (_strsize) - (_i), &state); \
127 } \
128 \
129 if (mblength == (size_t)-2 || mblength == (size_t)-1) \
130 { \
131 state = state_bak; \
132 (_i)++; \
133 } \
134 else if (mblength == 0) \
135 (_i)++; \
136 else \
137 (_i) += mblength; \
138 } \
139 else \
140 (_i)++; \
141 } \
142 while (0)
143 #else
144 # define ADVANCE_CHAR(_str, _strsize, _i) (_i)++
145 #endif /* !HANDLE_MULTIBYTE */
146
147 /* Advance one (possibly multibyte) character in the string _STR of length
148 _STRSIZE.
149 SPECIAL: assume that _STR will be incremented by 1 after this call. */
150 #if defined (HANDLE_MULTIBYTE)
151 # define ADVANCE_CHAR_P(_str, _strsize) \
152 do \
153 { \
154 if (locale_mb_cur_max > 1) \
155 { \
156 mbstate_t state_bak; \
157 size_t mblength; \
158 int _f; \
159 \
160 _f = is_basic (*(_str)); \
161 if (_f) \
162 mblength = 1; \
163 else if (locale_utf8locale && ((*(_str) & 0x80) == 0)) \
164 mblength = *(_str) != 0; \
165 else \
166 { \
167 state_bak = state; \
168 mblength = mbrlen ((_str), (_strsize), &state); \
169 } \
170 \
171 if (mblength == (size_t)-2 || mblength == (size_t)-1) \
172 { \
173 state = state_bak; \
174 mblength = 1; \
175 } \
176 else \
177 (_str) += (mblength < 1) ? 0 : (mblength - 1); \
178 } \
179 } \
180 while (0)
181 #else
182 # define ADVANCE_CHAR_P(_str, _strsize)
183 #endif /* !HANDLE_MULTIBYTE */
184
185 /* Back up one (possibly multi-byte) character in string _STR of length
186 _STRSIZE, starting at index _I. STATE must have already been declared. */
187 #if defined (HANDLE_MULTIBYTE)
188 # define BACKUP_CHAR(_str, _strsize, _i) \
189 do \
190 { \
191 if (locale_mb_cur_max > 1) \
192 { \
193 mbstate_t state_bak; \
194 size_t mblength; \
195 int _x, _p; /* _x == temp index into string, _p == prev index */ \
196 \
197 _x = _p = 0; \
198 while (_x < (_i)) \
199 { \
200 state_bak = state; \
201 mblength = mbrlen ((_str) + (_x), (_strsize) - (_x), &state); \
202 \
203 if (mblength == (size_t)-2 || mblength == (size_t)-1) \
204 { \
205 state = state_bak; \
206 _x++; \
207 } \
208 else if (mblength == 0) \
209 _x++; \
210 else \
211 { \
212 _p = _x; /* _p == start of prev mbchar */ \
213 _x += mblength; \
214 } \
215 } \
216 (_i) = _p; \
217 } \
218 else \
219 (_i)--; \
220 } \
221 while (0)
222 #else
223 # define BACKUP_CHAR(_str, _strsize, _i) (_i)--
224 #endif /* !HANDLE_MULTIBYTE */
225
226 /* Back up one (possibly multibyte) character in the string _BASE of length
227 _STRSIZE starting at _STR (_BASE <= _STR <= (_BASE + _STRSIZE) ).
228 SPECIAL: DO NOT assume that _STR will be decremented by 1 after this call. */
229 #if defined (HANDLE_MULTIBYTE)
230 # define BACKUP_CHAR_P(_base, _strsize, _str) \
231 do \
232 { \
233 if (locale_mb_cur_max > 1) \
234 { \
235 mbstate_t state_bak; \
236 size_t mblength; \
237 char *_x, _p; /* _x == temp pointer into string, _p == prev pointer */ \
238 \
239 _x = _p = _base; \
240 while (_x < (_str)) \
241 { \
242 state_bak = state; \
243 mblength = mbrlen (_x, (_strsize) - _x, &state); \
244 \
245 if (mblength == (size_t)-2 || mblength == (size_t)-1) \
246 { \
247 state = state_bak; \
248 _x++; \
249 } \
250 else if (mblength == 0) \
251 _x++; \
252 else \
253 { \
254 _p = _x; /* _p == start of prev mbchar */ \
255 _x += mblength; \
256 } \
257 } \
258 (_str) = _p; \
259 } \
260 else \
261 (_str)--; \
262 } \
263 while (0)
264 #else
265 # define BACKUP_CHAR_P(_base, _strsize, _str) (_str)--
266 #endif /* !HANDLE_MULTIBYTE */
267
268 /* Copy a single character from the string _SRC to the string _DST.
269 _SRCEND is a pointer to the end of _SRC. */
270 #if defined (HANDLE_MULTIBYTE)
271 # define COPY_CHAR_P(_dst, _src, _srcend) \
272 do \
273 { \
274 if (locale_mb_cur_max > 1) \
275 { \
276 mbstate_t state_bak; \
277 size_t mblength; \
278 int _k; \
279 \
280 _k = is_basic (*(_src)); \
281 if (_k) \
282 mblength = 1; \
283 else if (locale_utf8locale && ((*(_src) & 0x80) == 0)) \
284 mblength = *(_src) != 0; \
285 else \
286 { \
287 state_bak = state; \
288 mblength = mbrlen ((_src), (_srcend) - (_src), &state); \
289 } \
290 if (mblength == (size_t)-2 || mblength == (size_t)-1) \
291 { \
292 state = state_bak; \
293 mblength = 1; \
294 } \
295 else \
296 mblength = (mblength < 1) ? 1 : mblength; \
297 \
298 for (_k = 0; _k < mblength; _k++) \
299 *(_dst)++ = *(_src)++; \
300 } \
301 else \
302 *(_dst)++ = *(_src)++; \
303 } \
304 while (0)
305 #else
306 # define COPY_CHAR_P(_dst, _src, _srcend) *(_dst)++ = *(_src)++
307 #endif /* !HANDLE_MULTIBYTE */
308
309 /* Copy a single character from the string _SRC at index _SI to the string
310 _DST at index _DI. _SRCEND is a pointer to the end of _SRC. */
311 #if defined (HANDLE_MULTIBYTE)
312 # define COPY_CHAR_I(_dst, _di, _src, _srcend, _si) \
313 do \
314 { \
315 if (locale_mb_cur_max > 1) \
316 { \
317 mbstate_t state_bak; \
318 size_t mblength; \
319 int _k; \
320 \
321 _k = is_basic ((_src)[(_si)]); \
322 if (_k) \
323 mblength = 1; \
324 else if (locale_utf8locale && ((_src)[(_si)] & 0x80) == 0) \
325 mblength = (_src)[(_si)] != 0; \
326 else \
327 {\
328 state_bak = state; \
329 mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src)+(_si)), &state); \
330 } \
331 if (mblength == (size_t)-2 || mblength == (size_t)-1) \
332 { \
333 state = state_bak; \
334 mblength = 1; \
335 } \
336 else \
337 mblength = (mblength < 1) ? 1 : mblength; \
338 \
339 for (_k = 0; _k < mblength; _k++) \
340 _dst[_di++] = _src[_si++]; \
341 } \
342 else \
343 _dst[_di++] = _src[_si++]; \
344 } \
345 while (0)
346 #else
347 # define COPY_CHAR_I(_dst, _di, _src, _srcend, _si) _dst[_di++] = _src[_si++]
348 #endif /* !HANDLE_MULTIBYTE */
349
350 /****************************************************************
351 * *
352 * The following are only guaranteed to work in subst.c *
353 * *
354 ****************************************************************/
355
356 #if defined (HANDLE_MULTIBYTE)
357 # define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
358 do \
359 { \
360 if (locale_mb_cur_max > 1) \
361 { \
362 mbstate_t state_bak; \
363 size_t mblength; \
364 int _i; \
365 \
366 _i = is_basic ((_src)[(_si)]); \
367 if (_i) \
368 mblength = 1; \
369 else if (locale_utf8locale && ((_src)[(_si)] & 0x80) == 0) \
370 mblength = (_src)[(_si)] != 0; \
371 else \
372 { \
373 state_bak = state; \
374 mblength = mbrlen ((_src) + (_si), (_slen) - (_si), &state); \
375 } \
376 if (mblength == (size_t)-2 || mblength == (size_t)-1) \
377 { \
378 state = state_bak; \
379 mblength = 1; \
380 } \
381 else \
382 mblength = (mblength < 1) ? 1 : mblength; \
383 \
384 temp = xmalloc (mblength + 2); \
385 temp[0] = _escchar; \
386 for (_i = 0; _i < mblength; _i++) \
387 temp[_i + 1] = _src[_si++]; \
388 temp[mblength + 1] = '\0'; \
389 \
390 goto add_string; \
391 } \
392 else \
393 { \
394 _dst[0] = _escchar; \
395 _dst[1] = _sc; \
396 } \
397 } \
398 while (0)
399 #else
400 # define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
401 _dst[0] = _escchar; \
402 _dst[1] = _sc
403 #endif /* !HANDLE_MULTIBYTE */
404
405 #if defined (HANDLE_MULTIBYTE)
406 # define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
407 do \
408 { \
409 if (locale_mb_cur_max > 1) \
410 { \
411 mbstate_t state_bak; \
412 size_t mblength; \
413 int _i; \
414 \
415 _i = is_basic (*((_src) + (_si))); \
416 if (_i) \
417 mblength = 1; \
418 else if (locale_utf8locale && (((_src)[_si] & 0x80) == 0)) \
419 mblength = (_src)[_si] != 0; \
420 else \
421 { \
422 state_bak = state; \
423 mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src) + (_si)), &state); \
424 } \
425 if (mblength == (size_t)-2 || mblength == (size_t)-1) \
426 { \
427 state = state_bak; \
428 mblength = 1; \
429 } \
430 else \
431 mblength = (mblength < 1) ? 1 : mblength; \
432 \
433 FASTCOPY(((_src) + (_si)), (_dst), mblength); \
434 \
435 (_dst) += mblength; \
436 (_si) += mblength; \
437 } \
438 else \
439 { \
440 *(_dst)++ = _src[(_si)]; \
441 (_si)++; \
442 } \
443 } \
444 while (0)
445 #else
446 # define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
447 *(_dst)++ = _src[(_si)]; \
448 (_si)++
449 #endif /* !HANDLE_MULTIBYTE */
450
451 #if HANDLE_MULTIBYTE
452 # define SADD_MBCHAR(_dst, _src, _si, _srcsize) \
453 do \
454 { \
455 if (locale_mb_cur_max > 1) \
456 { \
457 int i; \
458 mbstate_t state_bak; \
459 size_t mblength; \
460 \
461 i = is_basic (*((_src) + (_si))); \
462 if (i) \
463 mblength = 1; \
464 else if (locale_utf8locale && (((_src)[_si] & 0x80) == 0)) \
465 mblength = (_src)[_si] != 0; \
466 else \
467 { \
468 state_bak = state; \
469 mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
470 } \
471 if (mblength == (size_t)-1 || mblength == (size_t)-2) \
472 { \
473 state = state_bak; \
474 mblength = 1; \
475 } \
476 if (mblength < 1) \
477 mblength = 1; \
478 \
479 _dst = (char *)xmalloc (mblength + 1); \
480 for (i = 0; i < mblength; i++) \
481 (_dst)[i] = (_src)[(_si)++]; \
482 (_dst)[mblength] = '\0'; \
483 \
484 goto add_string; \
485 } \
486 } \
487 while (0)
488
489 #else
490 # define SADD_MBCHAR(_dst, _src, _si, _srcsize)
491 #endif
492
493 /* Watch out when using this -- it's just straight textual substitution */
494 #if defined (HANDLE_MULTIBYTE)
495 # define SADD_MBQCHAR_BODY(_dst, _src, _si, _srcsize) \
496 \
497 int i; \
498 mbstate_t state_bak; \
499 size_t mblength; \
500 \
501 i = is_basic (*((_src) + (_si))); \
502 if (i) \
503 mblength = 1; \
504 else if (locale_utf8locale && (((_src)[_si] & 0x80) == 0)) \
505 mblength = (_src)[_si] != 0; \
506 else \
507 { \
508 state_bak = state; \
509 mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
510 } \
511 if (mblength == (size_t)-1 || mblength == (size_t)-2) \
512 { \
513 state = state_bak; \
514 mblength = 1; \
515 } \
516 if (mblength < 1) \
517 mblength = 1; \
518 \
519 (_dst) = (char *)xmalloc (mblength + 2); \
520 (_dst)[0] = CTLESC; \
521 for (i = 0; i < mblength; i++) \
522 (_dst)[i+1] = (_src)[(_si)++]; \
523 (_dst)[mblength+1] = '\0'; \
524 \
525 goto add_string
526
527 # define SADD_MBCHAR_BODY(_dst, _src, _si, _srcsize) \
528 \
529 int i; \
530 mbstate_t state_bak; \
531 size_t mblength; \
532 \
533 i = is_basic (*((_src) + (_si))); \
534 if (i) \
535 mblength = 1; \
536 else if (locale_utf8locale && (((_src)[_si] & 0x80) == 0)) \
537 mblength = (_src)[_si] != 0; \
538 else \
539 { \
540 state_bak = state; \
541 mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
542 } \
543 if (mblength == (size_t)-1 || mblength == (size_t)-2) \
544 { \
545 state = state_bak; \
546 mblength = 1; \
547 } \
548 if (mblength < 1) \
549 mblength = 1; \
550 \
551 (_dst) = (char *)xmalloc (mblength + 1); \
552 for (i = 0; i < mblength; i++) \
553 (_dst)[i+1] = (_src)[(_si)++]; \
554 (_dst)[mblength+1] = '\0'; \
555 \
556 goto add_string
557
558 #endif /* HANDLE_MULTIBYTE */
559 #endif /* _SH_MBUTIL_H_ */