]>
git.ipfire.org Git - thirdparty/util-linux.git/blob - lib/mbsalign.c
2 * SPDX-License-Identifier: LGPL-2.1-or-later
4 * Align/Truncate a string in a given screen width
5 * Copyright (C) 2009-2010 Free Software Foundation, Inc.
7 * This program is free software: you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by the
9 * Free Software Foundation, either version 2.1 of the License, or (at your
10 * option) any later version.
12 * Written by Pádraig Brady.
27 * Counts number of cells in multibyte string. All control and
28 * non-printable chars are ignored.
30 * Returns: number of cells.
32 size_t mbs_nwidth(const char *buf
, size_t bufsz
)
34 const char *p
= buf
, *last
= buf
;
39 memset(&st
, 0, sizeof(st
));
42 last
= p
+ (bufsz
- 1);
44 while (p
&& *p
&& p
<= last
) {
45 if (iscntrl((unsigned char) *p
)) {
48 /* try detect "\e[x;ym" and skip on success */
49 if (*p
&& *p
== '[') {
51 while (*e
&& e
< last
&& *e
!= 'm')
60 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
64 if (len
> 0 && iswprint(wc
)) {
68 } else if (len
== (size_t) -1 || len
== (size_t) -2)
72 if (isprint((unsigned char) *p
))
81 size_t mbs_width(const char *s
)
85 return mbs_nwidth(s
, strlen(s
));
89 * Counts number of cells in multibyte string. For all control and
90 * non-printable chars is the result width enlarged to store \x?? hex
91 * sequence. See mbs_safe_encode().
93 * Returns: number of cells, @sz returns number of bytes.
95 size_t mbs_safe_nwidth(const char *buf
, size_t bufsz
, size_t *sz
)
97 const char *p
= buf
, *last
= buf
;
98 size_t width
= 0, bytes
= 0;
102 memset(&st
, 0, sizeof(st
));
104 if (p
&& *p
&& bufsz
)
105 last
= p
+ (bufsz
- 1);
107 while (p
&& *p
&& p
<= last
) {
108 if ((p
< last
&& *p
== '\\' && *(p
+ 1) == 'x')
109 || iscntrl((unsigned char) *p
)) {
110 width
+= 4, bytes
+= 4; /* *p encoded to \x?? */
116 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
121 if (len
== (size_t) -1 || len
== (size_t) -2) {
123 if (isprint((unsigned char) *p
))
124 width
+= 1, bytes
+= 1;
126 width
+= 4, bytes
+= 4;
128 } else if (!iswprint(wc
)) {
129 width
+= len
* 4; /* hex encode whole sequence */
132 width
+= wcwidth(wc
); /* number of cells */
133 bytes
+= len
; /* number of bytes */
138 else if (!isprint((unsigned char) *p
)) {
139 width
+= 4, bytes
+= 4; /* *p encoded to \x?? */
153 size_t mbs_safe_width(const char *s
)
157 return mbs_safe_nwidth(s
, strlen(s
), NULL
);
161 * Copy @s to @buf and replace control and non-printable chars with
162 * \x?? hex sequence. The @width returns number of cells. The @safechars
165 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
168 char *mbs_safe_encode_to_buffer(const char *s
, size_t *width
, char *buf
, const char *safechars
)
172 size_t sz
= s
? strlen(s
) : 0;
176 memset(&st
, 0, sizeof(st
));
185 if (safechars
&& strchr(safechars
, *p
)) {
190 if ((*p
== '\\' && *(p
+ 1) == 'x')
191 || iscntrl((unsigned char) *p
)) {
192 sprintf(r
, "\\x%02x", (unsigned char) *p
);
200 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
203 break; /* end of string */
205 if (len
== (size_t) -1 || len
== (size_t) -2) {
208 * Not valid multibyte sequence -- maybe it's
209 * printable char according to the current locales.
211 if (!isprint((unsigned char) *p
)) {
212 sprintf(r
, "\\x%02x", (unsigned char) *p
);
219 } else if (!iswprint(wc
)) {
221 for (i
= 0; i
< len
; i
++) {
222 sprintf(r
, "\\x%02x", (unsigned char) p
[i
]);
229 *width
+= wcwidth(wc
);
234 else if (!isprint((unsigned char) *p
)) {
235 sprintf(r
, "\\x%02x", (unsigned char) *p
);
251 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
252 * @width returns number of cells. The @safechars are not encoded.
254 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
257 char *mbs_invalid_encode_to_buffer(const char *s
, size_t *width
, char *buf
)
261 size_t sz
= s
? strlen(s
) : 0;
265 memset(&st
, 0, sizeof(st
));
276 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
282 break; /* end of string */
284 if (len
== (size_t) -1 || len
== (size_t) -2) {
287 * Not valid multibyte sequence -- maybe it's
288 * printable char according to the current locales.
290 if (!isprint((unsigned char) *p
)) {
291 sprintf(r
, "\\x%02x", (unsigned char) *p
);
298 } else if (*p
== '\\' && *(p
+ 1) == 'x') {
299 sprintf(r
, "\\x%02x", (unsigned char) *p
);
303 r
= mempcpy(r
, p
, len
);
304 *width
+= wcwidth(wc
);
316 size_t mbs_safe_encode_size(size_t bytes
)
318 return (bytes
* 4) + 1;
322 * Count size of the original string in bytes (count \x?? as one byte)
324 size_t mbs_safe_decode_size(const char *p
)
329 if (*p
== '\\' && *(p
+ 1) == 'x' &&
330 isxdigit(*(p
+ 2)) && isxdigit(*(p
+ 3)))
340 * Returns allocated string where all control and non-printable chars are
341 * replaced with \x?? hex sequence.
343 char *mbs_safe_encode(const char *s
, size_t *width
)
345 size_t sz
= s
? strlen(s
) : 0;
346 char *buf
, *ret
= NULL
;
350 buf
= malloc(mbs_safe_encode_size(sz
));
352 ret
= mbs_safe_encode_to_buffer(s
, width
, buf
, NULL
);
359 * Returns allocated string where all broken widechars chars are
360 * replaced with \x?? hex sequence.
362 char *mbs_invalid_encode(const char *s
, size_t *width
)
364 size_t sz
= s
? strlen(s
) : 0;
365 char *buf
, *ret
= NULL
;
369 buf
= malloc(mbs_safe_encode_size(sz
));
371 ret
= mbs_invalid_encode_to_buffer(s
, width
, buf
);
380 wc_ensure_printable (wchar_t *wchars
)
382 bool replaced
= false;
383 wchar_t *wc
= wchars
;
386 if (!iswprint ((wint_t) *wc
))
388 *wc
= 0xFFFD; /* L'\uFFFD' (replacement char) */
396 /* Truncate wchar string to width cells.
397 * Returns number of cells used. */
400 wc_truncate (wchar_t *wc
, size_t width
)
407 next_cells
= wcwidth (*wc
);
408 if (next_cells
== -1) /* non printable */
410 *wc
= 0xFFFD; /* L'\uFFFD' (replacement char) */
413 if (cells
+ next_cells
> width
)
424 rpl_wcswidth (const wchar_t *s
, size_t n
)
428 while (n
-- > 0 && *s
!= L
'\0')
430 int nwidth
= wcwidth (*s
++);
431 if (nwidth
== -1) /* non printable */
433 if (ret
> (INT_MAX
- nwidth
)) /* overflow */
440 #endif /* HAVE_WIDECHAR */
442 /* Truncate multi-byte string to @width and returns number of
443 * bytes of the new string @str, and in @width returns number
447 mbs_truncate(char *str
, size_t *width
)
449 ssize_t bytes
= strlen(str
);
451 ssize_t sz
= mbstowcs(NULL
, str
, 0);
454 if (sz
== (ssize_t
) -1)
457 wcs
= calloc(1, (sz
+ 1) * sizeof(wchar_t));
461 if (!mbstowcs(wcs
, str
, sz
))
463 *width
= wc_truncate(wcs
, *width
);
464 bytes
= wcstombs(str
, wcs
, bytes
);
468 if (bytes
>= 0 && *width
< (size_t) bytes
)
476 /* Write N_SPACES space characters to DEST while ensuring
477 nothing is written beyond DEST_END. A terminating NUL
478 is always added to DEST.
479 A pointer to the terminating NUL is returned. */
482 mbs_align_pad (char *dest
, const char* dest_end
, size_t n_spaces
, int padchar
)
484 for (/* nothing */; n_spaces
&& (dest
< dest_end
); n_spaces
--)
491 mbsalign (const char *src
, char *dest
, size_t dest_size
,
492 size_t *width
, mbs_align_t align
, int flags
)
494 return mbsalign_with_padding(src
, dest
, dest_size
, width
, align
, flags
, ' ');
497 /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
498 characters; write the result into the DEST_SIZE-byte buffer, DEST.
499 ALIGNMENT specifies whether to left- or right-justify or to center.
500 If SRC requires more than *WIDTH columns, truncate it to fit.
501 When centering, the number of trailing spaces may be one less than the
502 number of leading spaces. The FLAGS parameter is unused at present.
503 Return the length in bytes required for the final result, not counting
504 the trailing NUL. A return value of DEST_SIZE or larger means there
505 wasn't enough space. DEST will be NUL terminated in any case.
506 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
507 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
508 Update *WIDTH to indicate how many columns were used before padding. */
511 mbsalign_with_padding (const char *src
, char *dest
, size_t dest_size
,
512 size_t *width
, mbs_align_t align
,
516 int flags
__attribute__((__unused__
)),
521 size_t src_size
= strlen (src
) + 1;
523 wchar_t *str_wc
= NULL
;
524 const char *str_to_print
= src
;
525 size_t n_cols
= src_size
- 1;
526 size_t n_used_bytes
= n_cols
; /* Not including NUL */
527 size_t n_spaces
= 0, space_left
;
530 bool conversion
= false;
531 bool wc_enabled
= false;
533 /* In multi-byte locales convert to wide characters
534 to allow easy truncation. Also determine number
535 of screen columns used. */
538 size_t src_chars
= mbstowcs (NULL
, src
, 0);
539 if (src_chars
== (size_t) -1)
541 if (flags
& MBA_UNIBYTE_FALLBACK
)
542 goto mbsalign_unibyte
;
544 goto mbsalign_cleanup
;
546 src_chars
+= 1; /* make space for NUL */
547 str_wc
= malloc (src_chars
* sizeof (wchar_t));
550 if (flags
& MBA_UNIBYTE_FALLBACK
)
551 goto mbsalign_unibyte
;
553 goto mbsalign_cleanup
;
555 if (mbstowcs (str_wc
, src
, src_chars
) != 0)
557 str_wc
[src_chars
- 1] = L
'\0';
559 conversion
= wc_ensure_printable (str_wc
);
560 n_cols
= rpl_wcswidth (str_wc
, src_chars
);
564 /* If we transformed or need to truncate the source string
565 then create a modified copy of it. */
566 if (wc_enabled
&& (conversion
|| (n_cols
> *width
)))
570 /* May have increased the size by converting
571 \t to \uFFFD for example. */
572 src_size
= wcstombs(NULL
, str_wc
, 0) + 1;
574 newstr
= malloc (src_size
);
577 if (flags
& MBA_UNIBYTE_FALLBACK
)
578 goto mbsalign_unibyte
;
580 goto mbsalign_cleanup
;
582 str_to_print
= newstr
;
583 n_cols
= wc_truncate (str_wc
, *width
);
584 n_used_bytes
= wcstombs (newstr
, str_wc
, src_size
);
590 if (n_cols
> *width
) /* Unibyte truncation required. */
593 n_used_bytes
= n_cols
;
596 if (*width
> n_cols
) /* Padding required. */
597 n_spaces
= *width
- n_cols
;
599 /* indicate to caller how many cells needed (not including padding). */
602 /* indicate to caller how many bytes needed (not including NUL). */
603 ret
= n_used_bytes
+ (n_spaces
* 1);
605 /* Write as much NUL terminated output to DEST as possible. */
608 char *dest_end
= dest
+ dest_size
- 1;
614 case MBS_ALIGN_CENTER
:
615 start_spaces
= n_spaces
/ 2 + n_spaces
% 2;
616 end_spaces
= n_spaces
/ 2;
620 end_spaces
= n_spaces
;
622 case MBS_ALIGN_RIGHT
:
623 start_spaces
= n_spaces
;
630 dest
= mbs_align_pad (dest
, dest_end
, start_spaces
, padchar
);
631 space_left
= dest_end
- dest
;
632 dest
= mempcpy (dest
, str_to_print
, min (n_used_bytes
, space_left
));
633 mbs_align_pad (dest
, dest_end
, end_spaces
, padchar
);