]>
git.ipfire.org Git - thirdparty/util-linux.git/blob - lib/mbsalign.c
2 * SPDX-License-Identifier: LGPL-2.1-or-later
4 * Align/Truncate a string in a given screen width
5 * Copyright (C) 2009-2010 Free Software Foundation, Inc.
7 * This program is free software: you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by the
9 * Free Software Foundation, either version 2.1 of the License, or (at your
10 * option) any later version.
12 * Written by Pádraig Brady.
27 * Counts number of cells in multibyte string. All control and
28 * non-printable chars are ignored.
30 * Returns: number of cells.
32 size_t mbs_nwidth(const char *buf
, size_t bufsz
)
34 const char *p
= buf
, *last
= buf
;
39 memset(&st
, 0, sizeof(st
));
42 last
= p
+ (bufsz
- 1);
44 while (p
&& *p
&& p
<= last
) {
45 if (iscntrl((unsigned char) *p
)) {
48 /* try detect "\e[x;ym" and skip on success */
49 if (*p
&& *p
== '[') {
51 while (*e
&& e
< last
&& *e
!= 'm')
60 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
64 if (len
> 0 && iswprint(wc
)) {
68 } else if (len
== (size_t) -1 || len
== (size_t) -2)
72 if (isprint((unsigned char) *p
))
81 size_t mbs_width(const char *s
)
85 return mbs_nwidth(s
, strlen(s
));
89 * Counts number of cells in multibyte string. For all control and
90 * non-printable chars is the result width enlarged to store \x?? hex
91 * sequence. See mbs_safe_encode().
93 * Returns: number of cells, @sz returns number of bytes.
95 size_t mbs_safe_nwidth(const char *buf
, size_t bufsz
, size_t *sz
)
97 const char *p
= buf
, *last
= buf
;
98 size_t width
= 0, bytes
= 0;
102 memset(&st
, 0, sizeof(st
));
104 if (p
&& *p
&& bufsz
)
105 last
= p
+ (bufsz
- 1);
107 while (p
&& *p
&& p
<= last
) {
108 if ((p
< last
&& *p
== '\\' && *(p
+ 1) == 'x')
109 || iscntrl((unsigned char) *p
)) {
110 width
+= 4, bytes
+= 4; /* *p encoded to \x?? */
116 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
121 if (len
== (size_t) -1 || len
== (size_t) -2) {
123 if (isprint((unsigned char) *p
))
124 width
+= 1, bytes
+= 1;
126 width
+= 4, bytes
+= 4;
128 } else if (!iswprint(wc
)) {
129 width
+= len
* 4; /* hex encode whole sequence */
132 width
+= wcwidth(wc
); /* number of cells */
133 bytes
+= len
; /* number of bytes */
138 else if (!isprint((unsigned char) *p
)) {
139 width
+= 4, bytes
+= 4; /* *p encoded to \x?? */
153 size_t mbs_safe_width(const char *s
)
157 return mbs_safe_nwidth(s
, strlen(s
), NULL
);
161 * Copy @s to @buf and replace control and non-printable chars with
162 * \x?? hex sequence. The @width returns number of cells. The @safechars
165 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
168 char *mbs_safe_encode_to_buffer(const char *s
, size_t *width
, char *buf
, const char *safechars
)
172 size_t sz
= s
? strlen(s
) : 0;
176 memset(&st
, 0, sizeof(st
));
185 if (safechars
&& strchr(safechars
, *p
)) {
190 if ((*p
== '\\' && *(p
+ 1) == 'x')
191 || iscntrl((unsigned char) *p
)) {
192 sprintf(r
, "\\x%02x", (unsigned char) *p
);
200 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
203 break; /* end of string */
205 if (len
== (size_t) -1 || len
== (size_t) -2) {
208 * Not valid multibyte sequence -- maybe it's
209 * printable char according to the current locales.
211 if (!isprint((unsigned char) *p
)) {
212 sprintf(r
, "\\x%02x", (unsigned char) *p
);
219 } else if (!iswprint(wc
)) {
221 for (i
= 0; i
< len
; i
++) {
222 sprintf(r
, "\\x%02x", (unsigned char) p
[i
]);
229 *width
+= wcwidth(wc
);
234 else if (!isprint((unsigned char) *p
)) {
235 sprintf(r
, "\\x%02x", (unsigned char) *p
);
251 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
252 * @width returns number of cells. The @safechars are not encoded.
254 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
257 char *mbs_invalid_encode_to_buffer(const char *s
, size_t *width
, char *buf
)
261 size_t sz
= s
? strlen(s
) : 0;
265 memset(&st
, 0, sizeof(st
));
276 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
282 break; /* end of string */
284 if (len
== (size_t) -1 || len
== (size_t) -2) {
287 * Not valid multibyte sequence -- maybe it's
288 * printable char according to the current locales.
290 if (!isprint((unsigned char) *p
)) {
291 sprintf(r
, "\\x%02x", (unsigned char) *p
);
298 } else if (*p
== '\\' && *(p
+ 1) == 'x') {
299 sprintf(r
, "\\x%02x", (unsigned char) *p
);
303 r
= mempcpy(r
, p
, len
);
304 *width
+= wcwidth(wc
);
313 size_t mbs_safe_encode_size(size_t bytes
)
315 return (bytes
* 4) + 1;
319 * Returns allocated string where all control and non-printable chars are
320 * replaced with \x?? hex sequence.
322 char *mbs_safe_encode(const char *s
, size_t *width
)
324 size_t sz
= s
? strlen(s
) : 0;
325 char *buf
, *ret
= NULL
;
329 buf
= malloc(mbs_safe_encode_size(sz
));
331 ret
= mbs_safe_encode_to_buffer(s
, width
, buf
, NULL
);
338 * Returns allocated string where all broken widechars chars are
339 * replaced with \x?? hex sequence.
341 char *mbs_invalid_encode(const char *s
, size_t *width
)
343 size_t sz
= s
? strlen(s
) : 0;
344 char *buf
, *ret
= NULL
;
348 buf
= malloc(mbs_safe_encode_size(sz
));
350 ret
= mbs_invalid_encode_to_buffer(s
, width
, buf
);
359 wc_ensure_printable (wchar_t *wchars
)
361 bool replaced
= false;
362 wchar_t *wc
= wchars
;
365 if (!iswprint ((wint_t) *wc
))
367 *wc
= 0xFFFD; /* L'\uFFFD' (replacement char) */
375 /* Truncate wchar string to width cells.
376 * Returns number of cells used. */
379 wc_truncate (wchar_t *wc
, size_t width
)
386 next_cells
= wcwidth (*wc
);
387 if (next_cells
== -1) /* non printable */
389 *wc
= 0xFFFD; /* L'\uFFFD' (replacement char) */
392 if (cells
+ next_cells
> width
)
403 rpl_wcswidth (const wchar_t *s
, size_t n
)
407 while (n
-- > 0 && *s
!= L
'\0')
409 int nwidth
= wcwidth (*s
++);
410 if (nwidth
== -1) /* non printable */
412 if (ret
> (INT_MAX
- nwidth
)) /* overflow */
419 #endif /* HAVE_WIDECHAR */
421 /* Truncate multi-byte string to @width and returns number of
422 * bytes of the new string @str, and in @width returns number
426 mbs_truncate(char *str
, size_t *width
)
428 ssize_t bytes
= strlen(str
);
430 ssize_t sz
= mbstowcs(NULL
, str
, 0);
433 if (sz
== (ssize_t
) -1)
436 wcs
= calloc(1, (sz
+ 1) * sizeof(wchar_t));
440 if (!mbstowcs(wcs
, str
, sz
))
442 *width
= wc_truncate(wcs
, *width
);
443 bytes
= wcstombs(str
, wcs
, bytes
);
447 if (bytes
>= 0 && *width
< (size_t) bytes
)
455 /* Write N_SPACES space characters to DEST while ensuring
456 nothing is written beyond DEST_END. A terminating NUL
457 is always added to DEST.
458 A pointer to the terminating NUL is returned. */
461 mbs_align_pad (char *dest
, const char* dest_end
, size_t n_spaces
, int padchar
)
463 for (/* nothing */; n_spaces
&& (dest
< dest_end
); n_spaces
--)
470 mbsalign (const char *src
, char *dest
, size_t dest_size
,
471 size_t *width
, mbs_align_t align
, int flags
)
473 return mbsalign_with_padding(src
, dest
, dest_size
, width
, align
, flags
, ' ');
476 /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
477 characters; write the result into the DEST_SIZE-byte buffer, DEST.
478 ALIGNMENT specifies whether to left- or right-justify or to center.
479 If SRC requires more than *WIDTH columns, truncate it to fit.
480 When centering, the number of trailing spaces may be one less than the
481 number of leading spaces. The FLAGS parameter is unused at present.
482 Return the length in bytes required for the final result, not counting
483 the trailing NUL. A return value of DEST_SIZE or larger means there
484 wasn't enough space. DEST will be NUL terminated in any case.
485 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
486 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
487 Update *WIDTH to indicate how many columns were used before padding. */
490 mbsalign_with_padding (const char *src
, char *dest
, size_t dest_size
,
491 size_t *width
, mbs_align_t align
,
495 int flags
__attribute__((__unused__
)),
500 size_t src_size
= strlen (src
) + 1;
502 wchar_t *str_wc
= NULL
;
503 const char *str_to_print
= src
;
504 size_t n_cols
= src_size
- 1;
505 size_t n_used_bytes
= n_cols
; /* Not including NUL */
506 size_t n_spaces
= 0, space_left
;
509 bool conversion
= false;
510 bool wc_enabled
= false;
512 /* In multi-byte locales convert to wide characters
513 to allow easy truncation. Also determine number
514 of screen columns used. */
517 size_t src_chars
= mbstowcs (NULL
, src
, 0);
518 if (src_chars
== (size_t) -1)
520 if (flags
& MBA_UNIBYTE_FALLBACK
)
521 goto mbsalign_unibyte
;
523 goto mbsalign_cleanup
;
525 src_chars
+= 1; /* make space for NUL */
526 str_wc
= malloc (src_chars
* sizeof (wchar_t));
529 if (flags
& MBA_UNIBYTE_FALLBACK
)
530 goto mbsalign_unibyte
;
532 goto mbsalign_cleanup
;
534 if (mbstowcs (str_wc
, src
, src_chars
) != 0)
536 str_wc
[src_chars
- 1] = L
'\0';
538 conversion
= wc_ensure_printable (str_wc
);
539 n_cols
= rpl_wcswidth (str_wc
, src_chars
);
543 /* If we transformed or need to truncate the source string
544 then create a modified copy of it. */
545 if (wc_enabled
&& (conversion
|| (n_cols
> *width
)))
549 /* May have increased the size by converting
550 \t to \uFFFD for example. */
551 src_size
= wcstombs(NULL
, str_wc
, 0) + 1;
553 newstr
= malloc (src_size
);
556 if (flags
& MBA_UNIBYTE_FALLBACK
)
557 goto mbsalign_unibyte
;
559 goto mbsalign_cleanup
;
561 str_to_print
= newstr
;
562 n_cols
= wc_truncate (str_wc
, *width
);
563 n_used_bytes
= wcstombs (newstr
, str_wc
, src_size
);
569 if (n_cols
> *width
) /* Unibyte truncation required. */
572 n_used_bytes
= n_cols
;
575 if (*width
> n_cols
) /* Padding required. */
576 n_spaces
= *width
- n_cols
;
578 /* indicate to caller how many cells needed (not including padding). */
581 /* indicate to caller how many bytes needed (not including NUL). */
582 ret
= n_used_bytes
+ (n_spaces
* 1);
584 /* Write as much NUL terminated output to DEST as possible. */
587 char *dest_end
= dest
+ dest_size
- 1;
593 case MBS_ALIGN_CENTER
:
594 start_spaces
= n_spaces
/ 2 + n_spaces
% 2;
595 end_spaces
= n_spaces
/ 2;
599 end_spaces
= n_spaces
;
601 case MBS_ALIGN_RIGHT
:
602 start_spaces
= n_spaces
;
609 dest
= mbs_align_pad (dest
, dest_end
, start_spaces
, padchar
);
610 space_left
= dest_end
- dest
;
611 dest
= mempcpy (dest
, str_to_print
, min (n_used_bytes
, space_left
));
612 mbs_align_pad (dest
, dest_end
, end_spaces
, padchar
);