]> git.ipfire.org Git - thirdparty/util-linux.git/blame - lib/mbsalign.c
Revert "lib/pager: Apply pager-specific fixes only when needed"
[thirdparty/util-linux.git] / lib / mbsalign.c
CommitLineData
79feaa60
KZ
1/*
2 * SPDX-License-Identifier: LGPL-2.1-or-later
3 *
4 * Align/Truncate a string in a given screen width
5 * Copyright (C) 2009-2010 Free Software Foundation, Inc.
6 *
7 * This program is free software: you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by the
9 * Free Software Foundation, either version 2.1 of the License, or (at your
10 * option) any later version.
11 *
12 * Written by Pádraig Brady.
13 */
104b92f8
PB
14#include <stdlib.h>
15#include <string.h>
16#include <stdio.h>
17#include <stdbool.h>
18#include <limits.h>
1b1f66e4 19#include <ctype.h>
104b92f8
PB
20
21#include "c.h"
22#include "mbsalign.h"
ab753d8f 23#include "strutils.h"
104b92f8
PB
24#include "widechar.h"
25
0957fdca
KZ
26/*
27 * Counts number of cells in multibyte string. All control and
28 * non-printable chars are ignored.
29 *
30 * Returns: number of cells.
31 */
32size_t mbs_nwidth(const char *buf, size_t bufsz)
33{
34 const char *p = buf, *last = buf;
35 size_t width = 0;
36
37#ifdef HAVE_WIDECHAR
38 mbstate_t st;
39 memset(&st, 0, sizeof(st));
40#endif
41 if (p && *p && bufsz)
42 last = p + (bufsz - 1);
43
44 while (p && *p && p <= last) {
45 if (iscntrl((unsigned char) *p)) {
46 p++;
47
48 /* try detect "\e[x;ym" and skip on success */
49 if (*p && *p == '[') {
50 const char *e = p;
51 while (*e && e < last && *e != 'm')
52 e++;
53 if (*e == 'm')
54 p = e + 1;
55 }
56 continue;
57 }
58#ifdef HAVE_WIDECHAR
59 wchar_t wc;
60 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
61
62 if (len == 0)
63 break;
64 if (len > 0 && iswprint(wc)) {
65 int x = wcwidth(wc);
66 if (x > 0)
67 width += x;
68 } else if (len == (size_t) -1 || len == (size_t) -2)
69 len = 1;
70 p += len;
71#else
72 if (isprint((unsigned char) *p))
73 width++;
74 p++;
75#endif
76 }
77
78 return width;
79}
80
81size_t mbs_width(const char *s)
82{
83 if (!s || !*s)
84 return 0;
85 return mbs_nwidth(s, strlen(s));
86}
104b92f8 87
1b1f66e4
KZ
88/*
89 * Counts number of cells in multibyte string. For all control and
90 * non-printable chars is the result width enlarged to store \x?? hex
91 * sequence. See mbs_safe_encode().
0c33fcbf
KZ
92 *
93 * Returns: number of cells, @sz returns number of bytes.
1b1f66e4 94 */
0c33fcbf 95size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
1b1f66e4 96{
0c33fcbf
KZ
97 const char *p = buf, *last = buf;
98 size_t width = 0, bytes = 0;
1b1f66e4 99
4a423fb9
KZ
100#ifdef HAVE_WIDECHAR
101 mbstate_t st;
1b1f66e4 102 memset(&st, 0, sizeof(st));
4a423fb9 103#endif
0c33fcbf
KZ
104 if (p && *p && bufsz)
105 last = p + (bufsz - 1);
106
107 while (p && *p && p <= last) {
43afa845
KZ
108 if ((p < last && *p == '\\' && *(p + 1) == 'x')
109 || iscntrl((unsigned char) *p)) {
0c33fcbf 110 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
111 p++;
112 }
113#ifdef HAVE_WIDECHAR
114 else {
115 wchar_t wc;
116 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
117
118 if (len == 0)
119 break;
120
121 if (len == (size_t) -1 || len == (size_t) -2) {
122 len = 1;
0c33fcbf
KZ
123 if (isprint((unsigned char) *p))
124 width += 1, bytes += 1;
125 else
126 width += 4, bytes += 4;
1b1f66e4 127
0c33fcbf 128 } else if (!iswprint(wc)) {
1b1f66e4 129 width += len * 4; /* hex encode whole sequence */
0c33fcbf
KZ
130 bytes += len * 4;
131 } else {
1b1f66e4 132 width += wcwidth(wc); /* number of cells */
0c33fcbf
KZ
133 bytes += len; /* number of bytes */
134 }
1b1f66e4
KZ
135 p += len;
136 }
137#else
138 else if (!isprint((unsigned char) *p)) {
0c33fcbf 139 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
140 p++;
141 } else {
0c33fcbf 142 width++, bytes++;
1b1f66e4
KZ
143 p++;
144 }
145#endif
146 }
147
0c33fcbf
KZ
148 if (sz)
149 *sz = bytes;
1b1f66e4
KZ
150 return width;
151}
152
0c33fcbf
KZ
153size_t mbs_safe_width(const char *s)
154{
155 if (!s || !*s)
156 return 0;
157 return mbs_safe_nwidth(s, strlen(s), NULL);
158}
159
1b1f66e4 160/*
c426f70f 161 * Copy @s to @buf and replace control and non-printable chars with
ff471d89
KZ
162 * \x?? hex sequence. The @width returns number of cells. The @safechars
163 * are not encoded.
c426f70f
KZ
164 *
165 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
166 * bytes.
1b1f66e4 167 */
ff471d89 168char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
1b1f66e4 169{
1b1f66e4 170 const char *p = s;
c426f70f 171 char *r;
1b1f66e4
KZ
172 size_t sz = s ? strlen(s) : 0;
173
4a423fb9
KZ
174#ifdef HAVE_WIDECHAR
175 mbstate_t st;
176 memset(&st, 0, sizeof(st));
177#endif
c426f70f 178 if (!sz || !buf)
1b1f66e4
KZ
179 return NULL;
180
c426f70f 181 r = buf;
1b1f66e4
KZ
182 *width = 0;
183
184 while (p && *p) {
ff471d89
KZ
185 if (safechars && strchr(safechars, *p)) {
186 *r++ = *p++;
187 continue;
188 }
189
43afa845
KZ
190 if ((*p == '\\' && *(p + 1) == 'x')
191 || iscntrl((unsigned char) *p)) {
1b1f66e4
KZ
192 sprintf(r, "\\x%02x", (unsigned char) *p);
193 r += 4;
194 *width += 4;
195 p++;
196 }
197#ifdef HAVE_WIDECHAR
198 else {
199 wchar_t wc;
200 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
201
202 if (len == 0)
203 break; /* end of string */
204
205 if (len == (size_t) -1 || len == (size_t) -2) {
206 len = 1;
207 /*
208 * Not valid multibyte sequence -- maybe it's
209 * printable char according to the current locales.
210 */
211 if (!isprint((unsigned char) *p)) {
212 sprintf(r, "\\x%02x", (unsigned char) *p);
213 r += 4;
214 *width += 4;
215 } else {
4a423fb9 216 (*width)++;
1b1f66e4
KZ
217 *r++ = *p;
218 }
219 } else if (!iswprint(wc)) {
220 size_t i;
221 for (i = 0; i < len; i++) {
edf86d6b 222 sprintf(r, "\\x%02x", (unsigned char) p[i]);
1b1f66e4
KZ
223 r += 4;
224 *width += 4;
225 }
226 } else {
227 memcpy(r, p, len);
228 r += len;
229 *width += wcwidth(wc);
230 }
231 p += len;
232 }
233#else
234 else if (!isprint((unsigned char) *p)) {
235 sprintf(r, "\\x%02x", (unsigned char) *p);
236 p++;
237 r += 4;
238 *width += 4;
239 } else {
240 *r++ = *p++;
4a423fb9 241 (*width)++;
1b1f66e4
KZ
242 }
243#endif
244 }
245
246 *r = '\0';
c426f70f
KZ
247 return buf;
248}
249
365ed9f6
KZ
250/*
251 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
252 * @width returns number of cells. The @safechars are not encoded.
253 *
254 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
255 * bytes.
256 */
257char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
258{
259 const char *p = s;
260 char *r;
261 size_t sz = s ? strlen(s) : 0;
262
263#ifdef HAVE_WIDECHAR
264 mbstate_t st;
265 memset(&st, 0, sizeof(st));
266#endif
267 if (!sz || !buf)
268 return NULL;
269
270 r = buf;
271 *width = 0;
272
273 while (p && *p) {
274#ifdef HAVE_WIDECHAR
275 wchar_t wc;
276 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
2ba65f37
VD
277#else
278 size_t len = 1;
279#endif
365ed9f6
KZ
280
281 if (len == 0)
282 break; /* end of string */
283
284 if (len == (size_t) -1 || len == (size_t) -2) {
285 len = 1;
286 /*
287 * Not valid multibyte sequence -- maybe it's
288 * printable char according to the current locales.
289 */
290 if (!isprint((unsigned char) *p)) {
291 sprintf(r, "\\x%02x", (unsigned char) *p);
292 r += 4;
293 *width += 4;
294 } else {
295 (*width)++;
296 *r++ = *p;
297 }
43afa845
KZ
298 } else if (*p == '\\' && *(p + 1) == 'x') {
299 sprintf(r, "\\x%02x", (unsigned char) *p);
300 r += 4;
301 *width += 4;
365ed9f6 302 } else {
cba33452 303 r = mempcpy(r, p, len);
365ed9f6
KZ
304 *width += wcwidth(wc);
305 }
306 p += len;
365ed9f6
KZ
307 }
308
309 *r = '\0';
310 return buf;
311}
312
6b724741
KZ
313/*
314 * Guess size
315 */
c426f70f
KZ
316size_t mbs_safe_encode_size(size_t bytes)
317{
318 return (bytes * 4) + 1;
319}
320
6b724741
KZ
321/*
322 * Count size of the original string in bytes (count \x?? as one byte)
323 */
324size_t mbs_safe_decode_size(const char *p)
325{
326 size_t bytes = 0;
327
328 while (p && *p) {
329 if (*p == '\\' && *(p + 1) == 'x' &&
330 isxdigit(*(p + 2)) && isxdigit(*(p + 3)))
331 p += 4;
332 else
333 p++;
334 bytes++;
335 }
336 return bytes;
337}
338
c426f70f
KZ
339/*
340 * Returns allocated string where all control and non-printable chars are
341 * replaced with \x?? hex sequence.
342 */
343char *mbs_safe_encode(const char *s, size_t *width)
344{
345 size_t sz = s ? strlen(s) : 0;
f4d37838 346 char *buf, *ret = NULL;
c426f70f
KZ
347
348 if (!sz)
349 return NULL;
350 buf = malloc(mbs_safe_encode_size(sz));
a86b9f25
KZ
351 if (buf)
352 ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
353 if (!ret)
354 free(buf);
355 return ret;
1b1f66e4
KZ
356}
357
365ed9f6
KZ
358/*
359 * Returns allocated string where all broken widechars chars are
360 * replaced with \x?? hex sequence.
361 */
362char *mbs_invalid_encode(const char *s, size_t *width)
363{
364 size_t sz = s ? strlen(s) : 0;
365 char *buf, *ret = NULL;
366
367 if (!sz)
368 return NULL;
369 buf = malloc(mbs_safe_encode_size(sz));
370 if (buf)
371 ret = mbs_invalid_encode_to_buffer(s, width, buf);
372 if (!ret)
373 free(buf);
374 return ret;
375}
376
4a423fb9
KZ
377#ifdef HAVE_WIDECHAR
378
104b92f8
PB
379static bool
380wc_ensure_printable (wchar_t *wchars)
381{
382 bool replaced = false;
383 wchar_t *wc = wchars;
384 while (*wc)
385 {
386 if (!iswprint ((wint_t) *wc))
387 {
388 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
389 replaced = true;
390 }
391 wc++;
392 }
393 return replaced;
394}
395
396/* Truncate wchar string to width cells.
397 * Returns number of cells used. */
398
399static size_t
400wc_truncate (wchar_t *wc, size_t width)
401{
402 size_t cells = 0;
403 int next_cells = 0;
404
405 while (*wc)
406 {
407 next_cells = wcwidth (*wc);
408 if (next_cells == -1) /* non printable */
409 {
410 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
411 next_cells = 1;
412 }
413 if (cells + next_cells > width)
414 break;
d94c5198 415
104b92f8
PB
416 cells += next_cells;
417 wc++;
418 }
419 *wc = L'\0';
420 return cells;
421}
422
104b92f8
PB
423static int
424rpl_wcswidth (const wchar_t *s, size_t n)
425{
426 int ret = 0;
427
428 while (n-- > 0 && *s != L'\0')
429 {
430 int nwidth = wcwidth (*s++);
431 if (nwidth == -1) /* non printable */
432 return -1;
433 if (ret > (INT_MAX - nwidth)) /* overflow */
434 return -1;
435 ret += nwidth;
436 }
437
438 return ret;
439}
4a423fb9 440#endif /* HAVE_WIDECHAR */
104b92f8 441
5f94ca33
KZ
442/* Truncate multi-byte string to @width and returns number of
443 * bytes of the new string @str, and in @width returns number
444 * of cells.
445 */
446size_t
447mbs_truncate(char *str, size_t *width)
448{
2897f29a 449 ssize_t bytes = strlen(str);
5f94ca33 450#ifdef HAVE_WIDECHAR
2897f29a 451 ssize_t sz = mbstowcs(NULL, str, 0);
5f94ca33
KZ
452 wchar_t *wcs = NULL;
453
2897f29a 454 if (sz == (ssize_t) -1)
5f94ca33
KZ
455 goto done;
456
d94c5198 457 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
5f94ca33
KZ
458 if (!wcs)
459 goto done;
460
461 if (!mbstowcs(wcs, str, sz))
462 goto done;
463 *width = wc_truncate(wcs, *width);
464 bytes = wcstombs(str, wcs, bytes);
465done:
466 free(wcs);
467#else
6426f926 468 if (bytes >= 0 && *width < (size_t) bytes)
5f94ca33
KZ
469 bytes = *width;
470#endif
471 if (bytes >= 0)
472 str[bytes] = '\0';
473 return bytes;
474}
475
104b92f8
PB
476/* Write N_SPACES space characters to DEST while ensuring
477 nothing is written beyond DEST_END. A terminating NUL
478 is always added to DEST.
479 A pointer to the terminating NUL is returned. */
480
481static char*
57867795 482mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
104b92f8 483{
efb2fe5f 484 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
57867795 485 *dest++ = padchar;
104b92f8
PB
486 *dest = '\0';
487 return dest;
488}
489
57867795
KZ
490size_t
491mbsalign (const char *src, char *dest, size_t dest_size,
492 size_t *width, mbs_align_t align, int flags)
493{
494 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
495}
496
104b92f8
PB
497/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
498 characters; write the result into the DEST_SIZE-byte buffer, DEST.
499 ALIGNMENT specifies whether to left- or right-justify or to center.
500 If SRC requires more than *WIDTH columns, truncate it to fit.
501 When centering, the number of trailing spaces may be one less than the
502 number of leading spaces. The FLAGS parameter is unused at present.
503 Return the length in bytes required for the final result, not counting
504 the trailing NUL. A return value of DEST_SIZE or larger means there
505 wasn't enough space. DEST will be NUL terminated in any case.
506 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
507 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
508 Update *WIDTH to indicate how many columns were used before padding. */
509
510size_t
57867795 511mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
c191740c
KZ
512 size_t *width, mbs_align_t align,
513#ifdef HAVE_WIDECHAR
514 int flags,
515#else
516 int flags __attribute__((__unused__)),
517#endif
57867795 518 int padchar)
104b92f8
PB
519{
520 size_t ret = -1;
521 size_t src_size = strlen (src) + 1;
522 char *newstr = NULL;
523 wchar_t *str_wc = NULL;
524 const char *str_to_print = src;
525 size_t n_cols = src_size - 1;
526 size_t n_used_bytes = n_cols; /* Not including NUL */
3acc206d 527 size_t n_spaces = 0, space_left;
6426f926
KZ
528
529#ifdef HAVE_WIDECHAR
104b92f8
PB
530 bool conversion = false;
531 bool wc_enabled = false;
532
104b92f8
PB
533 /* In multi-byte locales convert to wide characters
534 to allow easy truncation. Also determine number
535 of screen columns used. */
536 if (MB_CUR_MAX > 1)
537 {
538 size_t src_chars = mbstowcs (NULL, src, 0);
539 if (src_chars == (size_t) -1)
540 {
541 if (flags & MBA_UNIBYTE_FALLBACK)
542 goto mbsalign_unibyte;
543 else
544 goto mbsalign_cleanup;
545 }
546 src_chars += 1; /* make space for NUL */
547 str_wc = malloc (src_chars * sizeof (wchar_t));
548 if (str_wc == NULL)
549 {
550 if (flags & MBA_UNIBYTE_FALLBACK)
551 goto mbsalign_unibyte;
552 else
553 goto mbsalign_cleanup;
554 }
555 if (mbstowcs (str_wc, src, src_chars) != 0)
556 {
557 str_wc[src_chars - 1] = L'\0';
558 wc_enabled = true;
559 conversion = wc_ensure_printable (str_wc);
560 n_cols = rpl_wcswidth (str_wc, src_chars);
561 }
562 }
563
564 /* If we transformed or need to truncate the source string
565 then create a modified copy of it. */
566 if (wc_enabled && (conversion || (n_cols > *width)))
567 {
568 if (conversion)
569 {
570 /* May have increased the size by converting
571 \t to \uFFFD for example. */
572 src_size = wcstombs(NULL, str_wc, 0) + 1;
573 }
574 newstr = malloc (src_size);
575 if (newstr == NULL)
576 {
577 if (flags & MBA_UNIBYTE_FALLBACK)
578 goto mbsalign_unibyte;
579 else
580 goto mbsalign_cleanup;
581 }
582 str_to_print = newstr;
583 n_cols = wc_truncate (str_wc, *width);
584 n_used_bytes = wcstombs (newstr, str_wc, src_size);
585 }
104b92f8
PB
586
587mbsalign_unibyte:
6426f926 588#endif
104b92f8
PB
589
590 if (n_cols > *width) /* Unibyte truncation required. */
591 {
592 n_cols = *width;
593 n_used_bytes = n_cols;
594 }
595
596 if (*width > n_cols) /* Padding required. */
597 n_spaces = *width - n_cols;
598
599 /* indicate to caller how many cells needed (not including padding). */
600 *width = n_cols;
601
602 /* indicate to caller how many bytes needed (not including NUL). */
603 ret = n_used_bytes + (n_spaces * 1);
604
605 /* Write as much NUL terminated output to DEST as possible. */
606 if (dest_size != 0)
607 {
608 char *dest_end = dest + dest_size - 1;
f7ff0414
SK
609 size_t start_spaces;
610 size_t end_spaces;
104b92f8
PB
611
612 switch (align)
613 {
614 case MBS_ALIGN_CENTER:
615 start_spaces = n_spaces / 2 + n_spaces % 2;
616 end_spaces = n_spaces / 2;
617 break;
618 case MBS_ALIGN_LEFT:
619 start_spaces = 0;
620 end_spaces = n_spaces;
621 break;
622 case MBS_ALIGN_RIGHT:
623 start_spaces = n_spaces;
624 end_spaces = 0;
625 break;
ce7b11a9
SK
626 default:
627 abort();
104b92f8
PB
628 }
629
57867795 630 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
3acc206d 631 space_left = dest_end - dest;
104b92f8 632 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
57867795 633 mbs_align_pad (dest, dest_end, end_spaces, padchar);
104b92f8 634 }
6426f926 635#ifdef HAVE_WIDECHAR
104b92f8 636mbsalign_cleanup:
6426f926 637#endif
104b92f8
PB
638 free (str_wc);
639 free (newstr);
640
641 return ret;
642}