]> git.ipfire.org Git - thirdparty/util-linux.git/blame - lib/mbsalign.c
Merge branch 'lsfd--multiplexed-flag-of-XMODE-column' of https://github.com/masatake...
[thirdparty/util-linux.git] / lib / mbsalign.c
CommitLineData
79feaa60
KZ
1/*
2 * SPDX-License-Identifier: LGPL-2.1-or-later
3 *
4 * Align/Truncate a string in a given screen width
5 * Copyright (C) 2009-2010 Free Software Foundation, Inc.
6 *
7 * This program is free software: you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by the
9 * Free Software Foundation, either version 2.1 of the License, or (at your
10 * option) any later version.
11 *
12 * Written by Pádraig Brady.
13 */
104b92f8
PB
14#include <stdlib.h>
15#include <string.h>
16#include <stdio.h>
17#include <stdbool.h>
18#include <limits.h>
1b1f66e4 19#include <ctype.h>
104b92f8
PB
20
21#include "c.h"
22#include "mbsalign.h"
ab753d8f 23#include "strutils.h"
104b92f8
PB
24#include "widechar.h"
25
0957fdca
KZ
26/*
27 * Counts number of cells in multibyte string. All control and
28 * non-printable chars are ignored.
29 *
30 * Returns: number of cells.
31 */
32size_t mbs_nwidth(const char *buf, size_t bufsz)
33{
34 const char *p = buf, *last = buf;
35 size_t width = 0;
36
37#ifdef HAVE_WIDECHAR
38 mbstate_t st;
39 memset(&st, 0, sizeof(st));
40#endif
41 if (p && *p && bufsz)
42 last = p + (bufsz - 1);
43
44 while (p && *p && p <= last) {
45 if (iscntrl((unsigned char) *p)) {
46 p++;
47
48 /* try detect "\e[x;ym" and skip on success */
49 if (*p && *p == '[') {
50 const char *e = p;
51 while (*e && e < last && *e != 'm')
52 e++;
53 if (*e == 'm')
54 p = e + 1;
55 }
56 continue;
57 }
58#ifdef HAVE_WIDECHAR
59 wchar_t wc;
60 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
61
62 if (len == 0)
63 break;
64 if (len > 0 && iswprint(wc)) {
65 int x = wcwidth(wc);
66 if (x > 0)
67 width += x;
68 } else if (len == (size_t) -1 || len == (size_t) -2)
69 len = 1;
70 p += len;
71#else
72 if (isprint((unsigned char) *p))
73 width++;
74 p++;
75#endif
76 }
77
78 return width;
79}
80
81size_t mbs_width(const char *s)
82{
83 if (!s || !*s)
84 return 0;
85 return mbs_nwidth(s, strlen(s));
86}
104b92f8 87
1b1f66e4
KZ
88/*
89 * Counts number of cells in multibyte string. For all control and
90 * non-printable chars is the result width enlarged to store \x?? hex
91 * sequence. See mbs_safe_encode().
0c33fcbf
KZ
92 *
93 * Returns: number of cells, @sz returns number of bytes.
1b1f66e4 94 */
0c33fcbf 95size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
1b1f66e4 96{
0c33fcbf
KZ
97 const char *p = buf, *last = buf;
98 size_t width = 0, bytes = 0;
1b1f66e4 99
4a423fb9
KZ
100#ifdef HAVE_WIDECHAR
101 mbstate_t st;
1b1f66e4 102 memset(&st, 0, sizeof(st));
4a423fb9 103#endif
0c33fcbf
KZ
104 if (p && *p && bufsz)
105 last = p + (bufsz - 1);
106
107 while (p && *p && p <= last) {
43afa845
KZ
108 if ((p < last && *p == '\\' && *(p + 1) == 'x')
109 || iscntrl((unsigned char) *p)) {
0c33fcbf 110 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
111 p++;
112 }
113#ifdef HAVE_WIDECHAR
114 else {
115 wchar_t wc;
116 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
117
118 if (len == 0)
119 break;
120
121 if (len == (size_t) -1 || len == (size_t) -2) {
122 len = 1;
0c33fcbf
KZ
123 if (isprint((unsigned char) *p))
124 width += 1, bytes += 1;
125 else
126 width += 4, bytes += 4;
1b1f66e4 127
0c33fcbf 128 } else if (!iswprint(wc)) {
1b1f66e4 129 width += len * 4; /* hex encode whole sequence */
0c33fcbf
KZ
130 bytes += len * 4;
131 } else {
1b1f66e4 132 width += wcwidth(wc); /* number of cells */
0c33fcbf
KZ
133 bytes += len; /* number of bytes */
134 }
1b1f66e4
KZ
135 p += len;
136 }
137#else
138 else if (!isprint((unsigned char) *p)) {
0c33fcbf 139 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
140 p++;
141 } else {
0c33fcbf 142 width++, bytes++;
1b1f66e4
KZ
143 p++;
144 }
145#endif
146 }
147
0c33fcbf
KZ
148 if (sz)
149 *sz = bytes;
1b1f66e4
KZ
150 return width;
151}
152
0c33fcbf
KZ
153size_t mbs_safe_width(const char *s)
154{
155 if (!s || !*s)
156 return 0;
157 return mbs_safe_nwidth(s, strlen(s), NULL);
158}
159
1b1f66e4 160/*
c426f70f 161 * Copy @s to @buf and replace control and non-printable chars with
ff471d89
KZ
162 * \x?? hex sequence. The @width returns number of cells. The @safechars
163 * are not encoded.
c426f70f
KZ
164 *
165 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
166 * bytes.
1b1f66e4 167 */
ff471d89 168char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
1b1f66e4 169{
1b1f66e4 170 const char *p = s;
c426f70f 171 char *r;
1b1f66e4
KZ
172 size_t sz = s ? strlen(s) : 0;
173
4a423fb9
KZ
174#ifdef HAVE_WIDECHAR
175 mbstate_t st;
176 memset(&st, 0, sizeof(st));
177#endif
c426f70f 178 if (!sz || !buf)
1b1f66e4
KZ
179 return NULL;
180
c426f70f 181 r = buf;
1b1f66e4
KZ
182 *width = 0;
183
184 while (p && *p) {
ff471d89
KZ
185 if (safechars && strchr(safechars, *p)) {
186 *r++ = *p++;
187 continue;
188 }
189
43afa845
KZ
190 if ((*p == '\\' && *(p + 1) == 'x')
191 || iscntrl((unsigned char) *p)) {
1b1f66e4
KZ
192 sprintf(r, "\\x%02x", (unsigned char) *p);
193 r += 4;
194 *width += 4;
195 p++;
196 }
197#ifdef HAVE_WIDECHAR
198 else {
199 wchar_t wc;
200 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
201
202 if (len == 0)
203 break; /* end of string */
204
205 if (len == (size_t) -1 || len == (size_t) -2) {
206 len = 1;
207 /*
208 * Not valid multibyte sequence -- maybe it's
209 * printable char according to the current locales.
210 */
211 if (!isprint((unsigned char) *p)) {
212 sprintf(r, "\\x%02x", (unsigned char) *p);
213 r += 4;
214 *width += 4;
215 } else {
4a423fb9 216 (*width)++;
1b1f66e4
KZ
217 *r++ = *p;
218 }
219 } else if (!iswprint(wc)) {
220 size_t i;
221 for (i = 0; i < len; i++) {
edf86d6b 222 sprintf(r, "\\x%02x", (unsigned char) p[i]);
1b1f66e4
KZ
223 r += 4;
224 *width += 4;
225 }
226 } else {
227 memcpy(r, p, len);
228 r += len;
229 *width += wcwidth(wc);
230 }
231 p += len;
232 }
233#else
234 else if (!isprint((unsigned char) *p)) {
235 sprintf(r, "\\x%02x", (unsigned char) *p);
236 p++;
237 r += 4;
238 *width += 4;
239 } else {
240 *r++ = *p++;
4a423fb9 241 (*width)++;
1b1f66e4
KZ
242 }
243#endif
244 }
245
246 *r = '\0';
c426f70f
KZ
247 return buf;
248}
249
365ed9f6
KZ
250/*
251 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
252 * @width returns number of cells. The @safechars are not encoded.
253 *
254 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
255 * bytes.
256 */
257char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
258{
259 const char *p = s;
260 char *r;
261 size_t sz = s ? strlen(s) : 0;
262
263#ifdef HAVE_WIDECHAR
264 mbstate_t st;
265 memset(&st, 0, sizeof(st));
266#endif
267 if (!sz || !buf)
268 return NULL;
269
270 r = buf;
271 *width = 0;
272
273 while (p && *p) {
274#ifdef HAVE_WIDECHAR
275 wchar_t wc;
276 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
2ba65f37
VD
277#else
278 size_t len = 1;
279#endif
365ed9f6
KZ
280
281 if (len == 0)
282 break; /* end of string */
283
284 if (len == (size_t) -1 || len == (size_t) -2) {
285 len = 1;
286 /*
287 * Not valid multibyte sequence -- maybe it's
288 * printable char according to the current locales.
289 */
290 if (!isprint((unsigned char) *p)) {
291 sprintf(r, "\\x%02x", (unsigned char) *p);
292 r += 4;
293 *width += 4;
294 } else {
295 (*width)++;
296 *r++ = *p;
297 }
43afa845
KZ
298 } else if (*p == '\\' && *(p + 1) == 'x') {
299 sprintf(r, "\\x%02x", (unsigned char) *p);
300 r += 4;
301 *width += 4;
365ed9f6 302 } else {
cba33452 303 r = mempcpy(r, p, len);
365ed9f6
KZ
304 *width += wcwidth(wc);
305 }
306 p += len;
365ed9f6
KZ
307 }
308
309 *r = '\0';
310 return buf;
311}
312
c426f70f
KZ
313size_t mbs_safe_encode_size(size_t bytes)
314{
315 return (bytes * 4) + 1;
316}
317
318/*
319 * Returns allocated string where all control and non-printable chars are
320 * replaced with \x?? hex sequence.
321 */
322char *mbs_safe_encode(const char *s, size_t *width)
323{
324 size_t sz = s ? strlen(s) : 0;
f4d37838 325 char *buf, *ret = NULL;
c426f70f
KZ
326
327 if (!sz)
328 return NULL;
329 buf = malloc(mbs_safe_encode_size(sz));
a86b9f25
KZ
330 if (buf)
331 ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
332 if (!ret)
333 free(buf);
334 return ret;
1b1f66e4
KZ
335}
336
365ed9f6
KZ
337/*
338 * Returns allocated string where all broken widechars chars are
339 * replaced with \x?? hex sequence.
340 */
341char *mbs_invalid_encode(const char *s, size_t *width)
342{
343 size_t sz = s ? strlen(s) : 0;
344 char *buf, *ret = NULL;
345
346 if (!sz)
347 return NULL;
348 buf = malloc(mbs_safe_encode_size(sz));
349 if (buf)
350 ret = mbs_invalid_encode_to_buffer(s, width, buf);
351 if (!ret)
352 free(buf);
353 return ret;
354}
355
4a423fb9
KZ
356#ifdef HAVE_WIDECHAR
357
104b92f8
PB
358static bool
359wc_ensure_printable (wchar_t *wchars)
360{
361 bool replaced = false;
362 wchar_t *wc = wchars;
363 while (*wc)
364 {
365 if (!iswprint ((wint_t) *wc))
366 {
367 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
368 replaced = true;
369 }
370 wc++;
371 }
372 return replaced;
373}
374
375/* Truncate wchar string to width cells.
376 * Returns number of cells used. */
377
378static size_t
379wc_truncate (wchar_t *wc, size_t width)
380{
381 size_t cells = 0;
382 int next_cells = 0;
383
384 while (*wc)
385 {
386 next_cells = wcwidth (*wc);
387 if (next_cells == -1) /* non printable */
388 {
389 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
390 next_cells = 1;
391 }
392 if (cells + next_cells > width)
393 break;
d94c5198 394
104b92f8
PB
395 cells += next_cells;
396 wc++;
397 }
398 *wc = L'\0';
399 return cells;
400}
401
104b92f8
PB
402static int
403rpl_wcswidth (const wchar_t *s, size_t n)
404{
405 int ret = 0;
406
407 while (n-- > 0 && *s != L'\0')
408 {
409 int nwidth = wcwidth (*s++);
410 if (nwidth == -1) /* non printable */
411 return -1;
412 if (ret > (INT_MAX - nwidth)) /* overflow */
413 return -1;
414 ret += nwidth;
415 }
416
417 return ret;
418}
4a423fb9 419#endif /* HAVE_WIDECHAR */
104b92f8 420
5f94ca33
KZ
421/* Truncate multi-byte string to @width and returns number of
422 * bytes of the new string @str, and in @width returns number
423 * of cells.
424 */
425size_t
426mbs_truncate(char *str, size_t *width)
427{
2897f29a 428 ssize_t bytes = strlen(str);
5f94ca33 429#ifdef HAVE_WIDECHAR
2897f29a 430 ssize_t sz = mbstowcs(NULL, str, 0);
5f94ca33
KZ
431 wchar_t *wcs = NULL;
432
2897f29a 433 if (sz == (ssize_t) -1)
5f94ca33
KZ
434 goto done;
435
d94c5198 436 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
5f94ca33
KZ
437 if (!wcs)
438 goto done;
439
440 if (!mbstowcs(wcs, str, sz))
441 goto done;
442 *width = wc_truncate(wcs, *width);
443 bytes = wcstombs(str, wcs, bytes);
444done:
445 free(wcs);
446#else
6426f926 447 if (bytes >= 0 && *width < (size_t) bytes)
5f94ca33
KZ
448 bytes = *width;
449#endif
450 if (bytes >= 0)
451 str[bytes] = '\0';
452 return bytes;
453}
454
104b92f8
PB
455/* Write N_SPACES space characters to DEST while ensuring
456 nothing is written beyond DEST_END. A terminating NUL
457 is always added to DEST.
458 A pointer to the terminating NUL is returned. */
459
460static char*
57867795 461mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
104b92f8 462{
efb2fe5f 463 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
57867795 464 *dest++ = padchar;
104b92f8
PB
465 *dest = '\0';
466 return dest;
467}
468
57867795
KZ
469size_t
470mbsalign (const char *src, char *dest, size_t dest_size,
471 size_t *width, mbs_align_t align, int flags)
472{
473 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
474}
475
104b92f8
PB
476/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
477 characters; write the result into the DEST_SIZE-byte buffer, DEST.
478 ALIGNMENT specifies whether to left- or right-justify or to center.
479 If SRC requires more than *WIDTH columns, truncate it to fit.
480 When centering, the number of trailing spaces may be one less than the
481 number of leading spaces. The FLAGS parameter is unused at present.
482 Return the length in bytes required for the final result, not counting
483 the trailing NUL. A return value of DEST_SIZE or larger means there
484 wasn't enough space. DEST will be NUL terminated in any case.
485 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
486 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
487 Update *WIDTH to indicate how many columns were used before padding. */
488
489size_t
57867795 490mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
c191740c
KZ
491 size_t *width, mbs_align_t align,
492#ifdef HAVE_WIDECHAR
493 int flags,
494#else
495 int flags __attribute__((__unused__)),
496#endif
57867795 497 int padchar)
104b92f8
PB
498{
499 size_t ret = -1;
500 size_t src_size = strlen (src) + 1;
501 char *newstr = NULL;
502 wchar_t *str_wc = NULL;
503 const char *str_to_print = src;
504 size_t n_cols = src_size - 1;
505 size_t n_used_bytes = n_cols; /* Not including NUL */
3acc206d 506 size_t n_spaces = 0, space_left;
6426f926
KZ
507
508#ifdef HAVE_WIDECHAR
104b92f8
PB
509 bool conversion = false;
510 bool wc_enabled = false;
511
104b92f8
PB
512 /* In multi-byte locales convert to wide characters
513 to allow easy truncation. Also determine number
514 of screen columns used. */
515 if (MB_CUR_MAX > 1)
516 {
517 size_t src_chars = mbstowcs (NULL, src, 0);
518 if (src_chars == (size_t) -1)
519 {
520 if (flags & MBA_UNIBYTE_FALLBACK)
521 goto mbsalign_unibyte;
522 else
523 goto mbsalign_cleanup;
524 }
525 src_chars += 1; /* make space for NUL */
526 str_wc = malloc (src_chars * sizeof (wchar_t));
527 if (str_wc == NULL)
528 {
529 if (flags & MBA_UNIBYTE_FALLBACK)
530 goto mbsalign_unibyte;
531 else
532 goto mbsalign_cleanup;
533 }
534 if (mbstowcs (str_wc, src, src_chars) != 0)
535 {
536 str_wc[src_chars - 1] = L'\0';
537 wc_enabled = true;
538 conversion = wc_ensure_printable (str_wc);
539 n_cols = rpl_wcswidth (str_wc, src_chars);
540 }
541 }
542
543 /* If we transformed or need to truncate the source string
544 then create a modified copy of it. */
545 if (wc_enabled && (conversion || (n_cols > *width)))
546 {
547 if (conversion)
548 {
549 /* May have increased the size by converting
550 \t to \uFFFD for example. */
551 src_size = wcstombs(NULL, str_wc, 0) + 1;
552 }
553 newstr = malloc (src_size);
554 if (newstr == NULL)
555 {
556 if (flags & MBA_UNIBYTE_FALLBACK)
557 goto mbsalign_unibyte;
558 else
559 goto mbsalign_cleanup;
560 }
561 str_to_print = newstr;
562 n_cols = wc_truncate (str_wc, *width);
563 n_used_bytes = wcstombs (newstr, str_wc, src_size);
564 }
104b92f8
PB
565
566mbsalign_unibyte:
6426f926 567#endif
104b92f8
PB
568
569 if (n_cols > *width) /* Unibyte truncation required. */
570 {
571 n_cols = *width;
572 n_used_bytes = n_cols;
573 }
574
575 if (*width > n_cols) /* Padding required. */
576 n_spaces = *width - n_cols;
577
578 /* indicate to caller how many cells needed (not including padding). */
579 *width = n_cols;
580
581 /* indicate to caller how many bytes needed (not including NUL). */
582 ret = n_used_bytes + (n_spaces * 1);
583
584 /* Write as much NUL terminated output to DEST as possible. */
585 if (dest_size != 0)
586 {
587 char *dest_end = dest + dest_size - 1;
f7ff0414
SK
588 size_t start_spaces;
589 size_t end_spaces;
104b92f8
PB
590
591 switch (align)
592 {
593 case MBS_ALIGN_CENTER:
594 start_spaces = n_spaces / 2 + n_spaces % 2;
595 end_spaces = n_spaces / 2;
596 break;
597 case MBS_ALIGN_LEFT:
598 start_spaces = 0;
599 end_spaces = n_spaces;
600 break;
601 case MBS_ALIGN_RIGHT:
602 start_spaces = n_spaces;
603 end_spaces = 0;
604 break;
ce7b11a9
SK
605 default:
606 abort();
104b92f8
PB
607 }
608
57867795 609 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
3acc206d 610 space_left = dest_end - dest;
104b92f8 611 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
57867795 612 mbs_align_pad (dest, dest_end, end_spaces, padchar);
104b92f8 613 }
6426f926 614#ifdef HAVE_WIDECHAR
104b92f8 615mbsalign_cleanup:
6426f926 616#endif
104b92f8
PB
617 free (str_wc);
618 free (newstr);
619
620 return ret;
621}