]>
git.ipfire.org Git - thirdparty/util-linux.git/blob - lib/mbsalign.c
1 /* Align/Truncate a string in a given screen width
2 Copyright (C) 2009-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2.1 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Pádraig Brady. */
33 /* Replace non printable chars.
34 Note \t and \n etc. are non printable.
35 Return 1 if replacement made, 0 otherwise. */
38 * Counts number of cells in multibyte string. For all control and
39 * non-printable chars is the result width enlarged to store \x?? hex
40 * sequence. See mbs_safe_encode().
42 * Returns: number of cells, @sz returns number of bytes.
44 size_t mbs_safe_nwidth(const char *buf
, size_t bufsz
, size_t *sz
)
46 const char *p
= buf
, *last
= buf
;
47 size_t width
= 0, bytes
= 0;
51 memset(&st
, 0, sizeof(st
));
54 last
= p
+ (bufsz
- 1);
56 while (p
&& *p
&& p
<= last
) {
57 if ((p
< last
&& *p
== '\\' && *(p
+ 1) == 'x')
58 || iscntrl((unsigned char) *p
)) {
59 width
+= 4, bytes
+= 4; /* *p encoded to \x?? */
65 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
70 if (len
== (size_t) -1 || len
== (size_t) -2) {
72 if (isprint((unsigned char) *p
))
73 width
+= 1, bytes
+= 1;
75 width
+= 4, bytes
+= 4;
77 } else if (!iswprint(wc
)) {
78 width
+= len
* 4; /* hex encode whole sequence */
81 width
+= wcwidth(wc
); /* number of cells */
82 bytes
+= len
; /* number of bytes */
87 else if (!isprint((unsigned char) *p
)) {
88 width
+= 4, bytes
+= 4; /* *p encoded to \x?? */
102 size_t mbs_safe_width(const char *s
)
106 return mbs_safe_nwidth(s
, strlen(s
), NULL
);
110 * Copy @s to @buf and replace control and non-printable chars with
111 * \x?? hex sequence. The @width returns number of cells. The @safechars
114 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
117 char *mbs_safe_encode_to_buffer(const char *s
, size_t *width
, char *buf
, const char *safechars
)
121 size_t sz
= s
? strlen(s
) : 0;
125 memset(&st
, 0, sizeof(st
));
134 if (safechars
&& strchr(safechars
, *p
)) {
139 if ((*p
== '\\' && *(p
+ 1) == 'x')
140 || iscntrl((unsigned char) *p
)) {
141 sprintf(r
, "\\x%02x", (unsigned char) *p
);
149 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
152 break; /* end of string */
154 if (len
== (size_t) -1 || len
== (size_t) -2) {
157 * Not valid multibyte sequence -- maybe it's
158 * printable char according to the current locales.
160 if (!isprint((unsigned char) *p
)) {
161 sprintf(r
, "\\x%02x", (unsigned char) *p
);
168 } else if (!iswprint(wc
)) {
170 for (i
= 0; i
< len
; i
++) {
171 sprintf(r
, "\\x%02x", (unsigned char) p
[i
]);
178 *width
+= wcwidth(wc
);
183 else if (!isprint((unsigned char) *p
)) {
184 sprintf(r
, "\\x%02x", (unsigned char) *p
);
200 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
201 * @width returns number of cells. The @safechars are not encoded.
203 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
206 char *mbs_invalid_encode_to_buffer(const char *s
, size_t *width
, char *buf
)
210 size_t sz
= s
? strlen(s
) : 0;
214 memset(&st
, 0, sizeof(st
));
225 size_t len
= mbrtowc(&wc
, p
, MB_CUR_MAX
, &st
);
231 break; /* end of string */
233 if (len
== (size_t) -1 || len
== (size_t) -2) {
236 * Not valid multibyte sequence -- maybe it's
237 * printable char according to the current locales.
239 if (!isprint((unsigned char) *p
)) {
240 sprintf(r
, "\\x%02x", (unsigned char) *p
);
247 } else if (*p
== '\\' && *(p
+ 1) == 'x') {
248 sprintf(r
, "\\x%02x", (unsigned char) *p
);
254 *width
+= wcwidth(wc
);
263 size_t mbs_safe_encode_size(size_t bytes
)
265 return (bytes
* 4) + 1;
269 * Returns allocated string where all control and non-printable chars are
270 * replaced with \x?? hex sequence.
272 char *mbs_safe_encode(const char *s
, size_t *width
)
274 size_t sz
= s
? strlen(s
) : 0;
275 char *buf
, *ret
= NULL
;
279 buf
= malloc(mbs_safe_encode_size(sz
));
281 ret
= mbs_safe_encode_to_buffer(s
, width
, buf
, NULL
);
288 * Returns allocated string where all broken widechars chars are
289 * replaced with \x?? hex sequence.
291 char *mbs_invalid_encode(const char *s
, size_t *width
)
293 size_t sz
= s
? strlen(s
) : 0;
294 char *buf
, *ret
= NULL
;
298 buf
= malloc(mbs_safe_encode_size(sz
));
300 ret
= mbs_invalid_encode_to_buffer(s
, width
, buf
);
309 wc_ensure_printable (wchar_t *wchars
)
311 bool replaced
= false;
312 wchar_t *wc
= wchars
;
315 if (!iswprint ((wint_t) *wc
))
317 *wc
= 0xFFFD; /* L'\uFFFD' (replacement char) */
325 /* Truncate wchar string to width cells.
326 * Returns number of cells used. */
329 wc_truncate (wchar_t *wc
, size_t width
)
336 next_cells
= wcwidth (*wc
);
337 if (next_cells
== -1) /* non printable */
339 *wc
= 0xFFFD; /* L'\uFFFD' (replacement char) */
342 if (cells
+ next_cells
> width
)
352 /* FIXME: move this function to gnulib as it's missing on:
353 OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */
356 rpl_wcswidth (const wchar_t *s
, size_t n
)
360 while (n
-- > 0 && *s
!= L
'\0')
362 int nwidth
= wcwidth (*s
++);
363 if (nwidth
== -1) /* non printable */
365 if (ret
> (INT_MAX
- nwidth
)) /* overflow */
372 #endif /* HAVE_WIDECHAR */
374 /* Truncate multi-byte string to @width and returns number of
375 * bytes of the new string @str, and in @width returns number
379 mbs_truncate(char *str
, size_t *width
)
381 ssize_t bytes
= strlen(str
);
383 ssize_t sz
= mbstowcs(NULL
, str
, 0);
386 if (sz
== (ssize_t
) -1)
389 wcs
= calloc(1, (sz
+ 1) * sizeof(wchar_t));
393 if (!mbstowcs(wcs
, str
, sz
))
395 *width
= wc_truncate(wcs
, *width
);
396 bytes
= wcstombs(str
, wcs
, bytes
);
400 if (bytes
>= 0 && *width
< (size_t) bytes
)
408 /* Write N_SPACES space characters to DEST while ensuring
409 nothing is written beyond DEST_END. A terminating NUL
410 is always added to DEST.
411 A pointer to the terminating NUL is returned. */
414 mbs_align_pad (char *dest
, const char* dest_end
, size_t n_spaces
, int padchar
)
416 /* FIXME: Should we pad with "figure space" (\u2007)
417 if non ascii data present? */
418 for (/* nothing */; n_spaces
&& (dest
< dest_end
); n_spaces
--)
425 mbsalign (const char *src
, char *dest
, size_t dest_size
,
426 size_t *width
, mbs_align_t align
, int flags
)
428 return mbsalign_with_padding(src
, dest
, dest_size
, width
, align
, flags
, ' ');
431 /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
432 characters; write the result into the DEST_SIZE-byte buffer, DEST.
433 ALIGNMENT specifies whether to left- or right-justify or to center.
434 If SRC requires more than *WIDTH columns, truncate it to fit.
435 When centering, the number of trailing spaces may be one less than the
436 number of leading spaces. The FLAGS parameter is unused at present.
437 Return the length in bytes required for the final result, not counting
438 the trailing NUL. A return value of DEST_SIZE or larger means there
439 wasn't enough space. DEST will be NUL terminated in any case.
440 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
441 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
442 Update *WIDTH to indicate how many columns were used before padding. */
445 mbsalign_with_padding (const char *src
, char *dest
, size_t dest_size
,
446 size_t *width
, mbs_align_t align
,
450 int flags
__attribute__((__unused__
)),
455 size_t src_size
= strlen (src
) + 1;
457 wchar_t *str_wc
= NULL
;
458 const char *str_to_print
= src
;
459 size_t n_cols
= src_size
- 1;
460 size_t n_used_bytes
= n_cols
; /* Not including NUL */
461 size_t n_spaces
= 0, space_left
;
464 bool conversion
= false;
465 bool wc_enabled
= false;
467 /* In multi-byte locales convert to wide characters
468 to allow easy truncation. Also determine number
469 of screen columns used. */
472 size_t src_chars
= mbstowcs (NULL
, src
, 0);
473 if (src_chars
== (size_t) -1)
475 if (flags
& MBA_UNIBYTE_FALLBACK
)
476 goto mbsalign_unibyte
;
478 goto mbsalign_cleanup
;
480 src_chars
+= 1; /* make space for NUL */
481 str_wc
= malloc (src_chars
* sizeof (wchar_t));
484 if (flags
& MBA_UNIBYTE_FALLBACK
)
485 goto mbsalign_unibyte
;
487 goto mbsalign_cleanup
;
489 if (mbstowcs (str_wc
, src
, src_chars
) != 0)
491 str_wc
[src_chars
- 1] = L
'\0';
493 conversion
= wc_ensure_printable (str_wc
);
494 n_cols
= rpl_wcswidth (str_wc
, src_chars
);
498 /* If we transformed or need to truncate the source string
499 then create a modified copy of it. */
500 if (wc_enabled
&& (conversion
|| (n_cols
> *width
)))
504 /* May have increased the size by converting
505 \t to \uFFFD for example. */
506 src_size
= wcstombs(NULL
, str_wc
, 0) + 1;
508 newstr
= malloc (src_size
);
511 if (flags
& MBA_UNIBYTE_FALLBACK
)
512 goto mbsalign_unibyte
;
514 goto mbsalign_cleanup
;
516 str_to_print
= newstr
;
517 n_cols
= wc_truncate (str_wc
, *width
);
518 n_used_bytes
= wcstombs (newstr
, str_wc
, src_size
);
524 if (n_cols
> *width
) /* Unibyte truncation required. */
527 n_used_bytes
= n_cols
;
530 if (*width
> n_cols
) /* Padding required. */
531 n_spaces
= *width
- n_cols
;
533 /* indicate to caller how many cells needed (not including padding). */
536 /* indicate to caller how many bytes needed (not including NUL). */
537 ret
= n_used_bytes
+ (n_spaces
* 1);
539 /* Write as much NUL terminated output to DEST as possible. */
542 char *dest_end
= dest
+ dest_size
- 1;
548 case MBS_ALIGN_CENTER
:
549 start_spaces
= n_spaces
/ 2 + n_spaces
% 2;
550 end_spaces
= n_spaces
/ 2;
554 end_spaces
= n_spaces
;
556 case MBS_ALIGN_RIGHT
:
557 start_spaces
= n_spaces
;
564 dest
= mbs_align_pad (dest
, dest_end
, start_spaces
, padchar
);
565 space_left
= dest_end
- dest
;
566 dest
= mempcpy (dest
, str_to_print
, min (n_used_bytes
, space_left
));
567 mbs_align_pad (dest
, dest_end
, end_spaces
, padchar
);