]> git.ipfire.org Git - thirdparty/util-linux.git/blob - lib/mbsalign.c
Merge branch 'minor-improvement' of https://github.com/calestyo/util-linux
[thirdparty/util-linux.git] / lib / mbsalign.c
1 /*
2 * SPDX-License-Identifier: LGPL-2.1-or-later
3 *
4 * Align/Truncate a string in a given screen width
5 * Copyright (C) 2009-2010 Free Software Foundation, Inc.
6 *
7 * This program is free software: you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by the
9 * Free Software Foundation, either version 2.1 of the License, or (at your
10 * option) any later version.
11 *
12 * Written by Pádraig Brady.
13 */
14 #include <stdlib.h>
15 #include <string.h>
16 #include <stdio.h>
17 #include <stdbool.h>
18 #include <limits.h>
19 #include <ctype.h>
20
21 #include "c.h"
22 #include "mbsalign.h"
23 #include "strutils.h"
24 #include "widechar.h"
25
26 /*
27 * Counts number of cells in multibyte string. All control and
28 * non-printable chars are ignored.
29 *
30 * Returns: number of cells.
31 */
32 size_t mbs_nwidth(const char *buf, size_t bufsz)
33 {
34 const char *p = buf, *last = buf;
35 size_t width = 0;
36
37 #ifdef HAVE_WIDECHAR
38 mbstate_t st;
39 memset(&st, 0, sizeof(st));
40 #endif
41 if (p && *p && bufsz)
42 last = p + (bufsz - 1);
43
44 while (p && *p && p <= last) {
45 if (iscntrl((unsigned char) *p)) {
46 p++;
47
48 /* try detect "\e[x;ym" and skip on success */
49 if (*p && *p == '[') {
50 const char *e = p;
51 while (*e && e < last && *e != 'm')
52 e++;
53 if (*e == 'm')
54 p = e + 1;
55 }
56 continue;
57 }
58 #ifdef HAVE_WIDECHAR
59 wchar_t wc;
60 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
61
62 if (len == 0)
63 break;
64 if (len > 0 && iswprint(wc)) {
65 int x = wcwidth(wc);
66 if (x > 0)
67 width += x;
68 } else if (len == (size_t) -1 || len == (size_t) -2)
69 len = 1;
70 p += len;
71 #else
72 if (isprint((unsigned char) *p))
73 width++;
74 p++;
75 #endif
76 }
77
78 return width;
79 }
80
81 size_t mbs_width(const char *s)
82 {
83 if (!s || !*s)
84 return 0;
85 return mbs_nwidth(s, strlen(s));
86 }
87
88 /*
89 * Counts number of cells in multibyte string. For all control and
90 * non-printable chars is the result width enlarged to store \x?? hex
91 * sequence. See mbs_safe_encode().
92 *
93 * Returns: number of cells, @sz returns number of bytes.
94 */
95 size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
96 {
97 const char *p = buf, *last = buf;
98 size_t width = 0, bytes = 0;
99
100 #ifdef HAVE_WIDECHAR
101 mbstate_t st;
102 memset(&st, 0, sizeof(st));
103 #endif
104 if (p && *p && bufsz)
105 last = p + (bufsz - 1);
106
107 while (p && *p && p <= last) {
108 if ((p < last && *p == '\\' && *(p + 1) == 'x')
109 || iscntrl((unsigned char) *p)) {
110 width += 4, bytes += 4; /* *p encoded to \x?? */
111 p++;
112 }
113 #ifdef HAVE_WIDECHAR
114 else {
115 wchar_t wc;
116 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
117
118 if (len == 0)
119 break;
120
121 if (len == (size_t) -1 || len == (size_t) -2) {
122 len = 1;
123 if (isprint((unsigned char) *p))
124 width += 1, bytes += 1;
125 else
126 width += 4, bytes += 4;
127
128 } else if (!iswprint(wc)) {
129 width += len * 4; /* hex encode whole sequence */
130 bytes += len * 4;
131 } else {
132 width += wcwidth(wc); /* number of cells */
133 bytes += len; /* number of bytes */
134 }
135 p += len;
136 }
137 #else
138 else if (!isprint((unsigned char) *p)) {
139 width += 4, bytes += 4; /* *p encoded to \x?? */
140 p++;
141 } else {
142 width++, bytes++;
143 p++;
144 }
145 #endif
146 }
147
148 if (sz)
149 *sz = bytes;
150 return width;
151 }
152
153 size_t mbs_safe_width(const char *s)
154 {
155 if (!s || !*s)
156 return 0;
157 return mbs_safe_nwidth(s, strlen(s), NULL);
158 }
159
160 /*
161 * Copy @s to @buf and replace control and non-printable chars with
162 * \x?? hex sequence. The @width returns number of cells. The @safechars
163 * are not encoded.
164 *
165 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
166 * bytes.
167 */
168 char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
169 {
170 const char *p = s;
171 char *r;
172 size_t sz = s ? strlen(s) : 0;
173
174 #ifdef HAVE_WIDECHAR
175 mbstate_t st;
176 memset(&st, 0, sizeof(st));
177 #endif
178 if (!sz || !buf)
179 return NULL;
180
181 r = buf;
182 *width = 0;
183
184 while (p && *p) {
185 if (safechars && strchr(safechars, *p)) {
186 *r++ = *p++;
187 continue;
188 }
189
190 if ((*p == '\\' && *(p + 1) == 'x')
191 || iscntrl((unsigned char) *p)) {
192 sprintf(r, "\\x%02x", (unsigned char) *p);
193 r += 4;
194 *width += 4;
195 p++;
196 }
197 #ifdef HAVE_WIDECHAR
198 else {
199 wchar_t wc;
200 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
201
202 if (len == 0)
203 break; /* end of string */
204
205 if (len == (size_t) -1 || len == (size_t) -2) {
206 len = 1;
207 /*
208 * Not valid multibyte sequence -- maybe it's
209 * printable char according to the current locales.
210 */
211 if (!isprint((unsigned char) *p)) {
212 sprintf(r, "\\x%02x", (unsigned char) *p);
213 r += 4;
214 *width += 4;
215 } else {
216 (*width)++;
217 *r++ = *p;
218 }
219 } else if (!iswprint(wc)) {
220 size_t i;
221 for (i = 0; i < len; i++) {
222 sprintf(r, "\\x%02x", (unsigned char) p[i]);
223 r += 4;
224 *width += 4;
225 }
226 } else {
227 memcpy(r, p, len);
228 r += len;
229 *width += wcwidth(wc);
230 }
231 p += len;
232 }
233 #else
234 else if (!isprint((unsigned char) *p)) {
235 sprintf(r, "\\x%02x", (unsigned char) *p);
236 p++;
237 r += 4;
238 *width += 4;
239 } else {
240 *r++ = *p++;
241 (*width)++;
242 }
243 #endif
244 }
245
246 *r = '\0';
247 return buf;
248 }
249
250 /*
251 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
252 * @width returns number of cells. The @safechars are not encoded.
253 *
254 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
255 * bytes.
256 */
257 char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
258 {
259 const char *p = s;
260 char *r;
261 size_t sz = s ? strlen(s) : 0;
262
263 #ifdef HAVE_WIDECHAR
264 mbstate_t st;
265 memset(&st, 0, sizeof(st));
266 #endif
267 if (!sz || !buf)
268 return NULL;
269
270 r = buf;
271 *width = 0;
272
273 while (p && *p) {
274 #ifdef HAVE_WIDECHAR
275 wchar_t wc;
276 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
277 #else
278 size_t len = 1;
279 #endif
280
281 if (len == 0)
282 break; /* end of string */
283
284 if (len == (size_t) -1 || len == (size_t) -2) {
285 len = 1;
286 /*
287 * Not valid multibyte sequence -- maybe it's
288 * printable char according to the current locales.
289 */
290 if (!isprint((unsigned char) *p)) {
291 sprintf(r, "\\x%02x", (unsigned char) *p);
292 r += 4;
293 *width += 4;
294 } else {
295 (*width)++;
296 *r++ = *p;
297 }
298 } else if (*p == '\\' && *(p + 1) == 'x') {
299 sprintf(r, "\\x%02x", (unsigned char) *p);
300 r += 4;
301 *width += 4;
302 } else {
303 r = mempcpy(r, p, len);
304 *width += wcwidth(wc);
305 }
306 p += len;
307 }
308
309 *r = '\0';
310 return buf;
311 }
312
313 /*
314 * Guess size
315 */
316 size_t mbs_safe_encode_size(size_t bytes)
317 {
318 return (bytes * 4) + 1;
319 }
320
321 /*
322 * Count size of the original string in bytes (count \x?? as one byte)
323 */
324 size_t mbs_safe_decode_size(const char *p)
325 {
326 size_t bytes = 0;
327
328 while (p && *p) {
329 if (*p == '\\' && *(p + 1) == 'x' &&
330 isxdigit(*(p + 2)) && isxdigit(*(p + 3)))
331 p += 4;
332 else
333 p++;
334 bytes++;
335 }
336 return bytes;
337 }
338
339 /*
340 * Returns allocated string where all control and non-printable chars are
341 * replaced with \x?? hex sequence.
342 */
343 char *mbs_safe_encode(const char *s, size_t *width)
344 {
345 size_t sz = s ? strlen(s) : 0;
346 char *buf, *ret = NULL;
347
348 if (!sz)
349 return NULL;
350 buf = malloc(mbs_safe_encode_size(sz));
351 if (buf)
352 ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
353 if (!ret)
354 free(buf);
355 return ret;
356 }
357
358 /*
359 * Returns allocated string where all broken widechars chars are
360 * replaced with \x?? hex sequence.
361 */
362 char *mbs_invalid_encode(const char *s, size_t *width)
363 {
364 size_t sz = s ? strlen(s) : 0;
365 char *buf, *ret = NULL;
366
367 if (!sz)
368 return NULL;
369 buf = malloc(mbs_safe_encode_size(sz));
370 if (buf)
371 ret = mbs_invalid_encode_to_buffer(s, width, buf);
372 if (!ret)
373 free(buf);
374 return ret;
375 }
376
377 #ifdef HAVE_WIDECHAR
378
379 static bool
380 wc_ensure_printable (wchar_t *wchars)
381 {
382 bool replaced = false;
383 wchar_t *wc = wchars;
384 while (*wc)
385 {
386 if (!iswprint ((wint_t) *wc))
387 {
388 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
389 replaced = true;
390 }
391 wc++;
392 }
393 return replaced;
394 }
395
396 /* Truncate wchar string to width cells.
397 * Returns number of cells used. */
398
399 static size_t
400 wc_truncate (wchar_t *wc, size_t width)
401 {
402 size_t cells = 0;
403 int next_cells = 0;
404
405 while (*wc)
406 {
407 next_cells = wcwidth (*wc);
408 if (next_cells == -1) /* non printable */
409 {
410 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
411 next_cells = 1;
412 }
413 if (cells + next_cells > width)
414 break;
415
416 cells += next_cells;
417 wc++;
418 }
419 *wc = L'\0';
420 return cells;
421 }
422
423 static int
424 rpl_wcswidth (const wchar_t *s, size_t n)
425 {
426 int ret = 0;
427
428 while (n-- > 0 && *s != L'\0')
429 {
430 int nwidth = wcwidth (*s++);
431 if (nwidth == -1) /* non printable */
432 return -1;
433 if (ret > (INT_MAX - nwidth)) /* overflow */
434 return -1;
435 ret += nwidth;
436 }
437
438 return ret;
439 }
440 #endif /* HAVE_WIDECHAR */
441
442 /* Truncate multi-byte string to @width and returns number of
443 * bytes of the new string @str, and in @width returns number
444 * of cells.
445 */
446 size_t
447 mbs_truncate(char *str, size_t *width)
448 {
449 ssize_t bytes = strlen(str);
450 #ifdef HAVE_WIDECHAR
451 ssize_t sz = mbstowcs(NULL, str, 0);
452 wchar_t *wcs = NULL;
453
454 if (sz == (ssize_t) -1)
455 goto done;
456
457 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
458 if (!wcs)
459 goto done;
460
461 if (!mbstowcs(wcs, str, sz))
462 goto done;
463 *width = wc_truncate(wcs, *width);
464 bytes = wcstombs(str, wcs, bytes);
465 done:
466 free(wcs);
467 #else
468 if (bytes >= 0 && *width < (size_t) bytes)
469 bytes = *width;
470 #endif
471 if (bytes >= 0)
472 str[bytes] = '\0';
473 return bytes;
474 }
475
476 /* Write N_SPACES space characters to DEST while ensuring
477 nothing is written beyond DEST_END. A terminating NUL
478 is always added to DEST.
479 A pointer to the terminating NUL is returned. */
480
481 static char*
482 mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
483 {
484 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
485 *dest++ = padchar;
486 *dest = '\0';
487 return dest;
488 }
489
490 size_t
491 mbsalign (const char *src, char *dest, size_t dest_size,
492 size_t *width, mbs_align_t align, int flags)
493 {
494 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
495 }
496
497 /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
498 characters; write the result into the DEST_SIZE-byte buffer, DEST.
499 ALIGNMENT specifies whether to left- or right-justify or to center.
500 If SRC requires more than *WIDTH columns, truncate it to fit.
501 When centering, the number of trailing spaces may be one less than the
502 number of leading spaces. The FLAGS parameter is unused at present.
503 Return the length in bytes required for the final result, not counting
504 the trailing NUL. A return value of DEST_SIZE or larger means there
505 wasn't enough space. DEST will be NUL terminated in any case.
506 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
507 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
508 Update *WIDTH to indicate how many columns were used before padding. */
509
510 size_t
511 mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
512 size_t *width, mbs_align_t align,
513 #ifdef HAVE_WIDECHAR
514 int flags,
515 #else
516 int flags __attribute__((__unused__)),
517 #endif
518 int padchar)
519 {
520 size_t ret = -1;
521 size_t src_size = strlen (src) + 1;
522 char *newstr = NULL;
523 wchar_t *str_wc = NULL;
524 const char *str_to_print = src;
525 size_t n_cols = src_size - 1;
526 size_t n_used_bytes = n_cols; /* Not including NUL */
527 size_t n_spaces = 0, space_left;
528
529 #ifdef HAVE_WIDECHAR
530 bool conversion = false;
531 bool wc_enabled = false;
532
533 /* In multi-byte locales convert to wide characters
534 to allow easy truncation. Also determine number
535 of screen columns used. */
536 if (MB_CUR_MAX > 1)
537 {
538 size_t src_chars = mbstowcs (NULL, src, 0);
539 if (src_chars == (size_t) -1)
540 {
541 if (flags & MBA_UNIBYTE_FALLBACK)
542 goto mbsalign_unibyte;
543 else
544 goto mbsalign_cleanup;
545 }
546 src_chars += 1; /* make space for NUL */
547 str_wc = malloc (src_chars * sizeof (wchar_t));
548 if (str_wc == NULL)
549 {
550 if (flags & MBA_UNIBYTE_FALLBACK)
551 goto mbsalign_unibyte;
552 else
553 goto mbsalign_cleanup;
554 }
555 if (mbstowcs (str_wc, src, src_chars) != 0)
556 {
557 str_wc[src_chars - 1] = L'\0';
558 wc_enabled = true;
559 conversion = wc_ensure_printable (str_wc);
560 n_cols = rpl_wcswidth (str_wc, src_chars);
561 }
562 }
563
564 /* If we transformed or need to truncate the source string
565 then create a modified copy of it. */
566 if (wc_enabled && (conversion || (n_cols > *width)))
567 {
568 if (conversion)
569 {
570 /* May have increased the size by converting
571 \t to \uFFFD for example. */
572 src_size = wcstombs(NULL, str_wc, 0) + 1;
573 }
574 newstr = malloc (src_size);
575 if (newstr == NULL)
576 {
577 if (flags & MBA_UNIBYTE_FALLBACK)
578 goto mbsalign_unibyte;
579 else
580 goto mbsalign_cleanup;
581 }
582 str_to_print = newstr;
583 n_cols = wc_truncate (str_wc, *width);
584 n_used_bytes = wcstombs (newstr, str_wc, src_size);
585 }
586
587 mbsalign_unibyte:
588 #endif
589
590 if (n_cols > *width) /* Unibyte truncation required. */
591 {
592 n_cols = *width;
593 n_used_bytes = n_cols;
594 }
595
596 if (*width > n_cols) /* Padding required. */
597 n_spaces = *width - n_cols;
598
599 /* indicate to caller how many cells needed (not including padding). */
600 *width = n_cols;
601
602 /* indicate to caller how many bytes needed (not including NUL). */
603 ret = n_used_bytes + (n_spaces * 1);
604
605 /* Write as much NUL terminated output to DEST as possible. */
606 if (dest_size != 0)
607 {
608 char *dest_end = dest + dest_size - 1;
609 size_t start_spaces;
610 size_t end_spaces;
611
612 switch (align)
613 {
614 case MBS_ALIGN_CENTER:
615 start_spaces = n_spaces / 2 + n_spaces % 2;
616 end_spaces = n_spaces / 2;
617 break;
618 case MBS_ALIGN_LEFT:
619 start_spaces = 0;
620 end_spaces = n_spaces;
621 break;
622 case MBS_ALIGN_RIGHT:
623 start_spaces = n_spaces;
624 end_spaces = 0;
625 break;
626 default:
627 abort();
628 }
629
630 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
631 space_left = dest_end - dest;
632 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
633 mbs_align_pad (dest, dest_end, end_spaces, padchar);
634 }
635 #ifdef HAVE_WIDECHAR
636 mbsalign_cleanup:
637 #endif
638 free (str_wc);
639 free (newstr);
640
641 return ret;
642 }