]> git.ipfire.org Git - thirdparty/util-linux.git/blob - lib/mbsalign.c
Merge branch 'lsclocks/relative-time' of https://github.com/t-8ch/util-linux
[thirdparty/util-linux.git] / lib / mbsalign.c
1 /*
2 * SPDX-License-Identifier: LGPL-2.1-or-later
3 *
4 * Align/Truncate a string in a given screen width
5 * Copyright (C) 2009-2010 Free Software Foundation, Inc.
6 *
7 * This program is free software: you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by the
9 * Free Software Foundation, either version 2.1 of the License, or (at your
10 * option) any later version.
11 *
12 * Written by Pádraig Brady.
13 */
14 #include <stdlib.h>
15 #include <string.h>
16 #include <stdio.h>
17 #include <stdbool.h>
18 #include <limits.h>
19 #include <ctype.h>
20
21 #include "c.h"
22 #include "mbsalign.h"
23 #include "strutils.h"
24 #include "widechar.h"
25
26 /*
27 * Counts number of cells in multibyte string. All control and
28 * non-printable chars are ignored.
29 *
30 * Returns: number of cells.
31 */
32 size_t mbs_nwidth(const char *buf, size_t bufsz)
33 {
34 const char *p = buf, *last = buf;
35 size_t width = 0;
36
37 #ifdef HAVE_WIDECHAR
38 mbstate_t st;
39 memset(&st, 0, sizeof(st));
40 #endif
41 if (p && *p && bufsz)
42 last = p + (bufsz - 1);
43
44 while (p && *p && p <= last) {
45 if (iscntrl((unsigned char) *p)) {
46 p++;
47
48 /* try detect "\e[x;ym" and skip on success */
49 if (*p && *p == '[') {
50 const char *e = p;
51 while (*e && e < last && *e != 'm')
52 e++;
53 if (*e == 'm')
54 p = e + 1;
55 }
56 continue;
57 }
58 #ifdef HAVE_WIDECHAR
59 wchar_t wc;
60 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
61
62 if (len == 0)
63 break;
64 if (len > 0 && iswprint(wc)) {
65 int x = wcwidth(wc);
66 if (x > 0)
67 width += x;
68 } else if (len == (size_t) -1 || len == (size_t) -2)
69 len = 1;
70 p += len;
71 #else
72 if (isprint((unsigned char) *p))
73 width++;
74 p++;
75 #endif
76 }
77
78 return width;
79 }
80
81 size_t mbs_width(const char *s)
82 {
83 if (!s || !*s)
84 return 0;
85 return mbs_nwidth(s, strlen(s));
86 }
87
88 /*
89 * Counts number of cells in multibyte string. For all control and
90 * non-printable chars is the result width enlarged to store \x?? hex
91 * sequence. See mbs_safe_encode().
92 *
93 * Returns: number of cells, @sz returns number of bytes.
94 */
95 size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
96 {
97 const char *p = buf, *last = buf;
98 size_t width = 0, bytes = 0;
99
100 #ifdef HAVE_WIDECHAR
101 mbstate_t st;
102 memset(&st, 0, sizeof(st));
103 #endif
104 if (p && *p && bufsz)
105 last = p + (bufsz - 1);
106
107 while (p && *p && p <= last) {
108 if ((p < last && *p == '\\' && *(p + 1) == 'x')
109 || iscntrl((unsigned char) *p)) {
110 width += 4, bytes += 4; /* *p encoded to \x?? */
111 p++;
112 }
113 #ifdef HAVE_WIDECHAR
114 else {
115 wchar_t wc;
116 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
117
118 if (len == 0)
119 break;
120
121 if (len == (size_t) -1 || len == (size_t) -2) {
122 len = 1;
123 if (isprint((unsigned char) *p))
124 width += 1, bytes += 1;
125 else
126 width += 4, bytes += 4;
127
128 } else if (!iswprint(wc)) {
129 width += len * 4; /* hex encode whole sequence */
130 bytes += len * 4;
131 } else {
132 width += wcwidth(wc); /* number of cells */
133 bytes += len; /* number of bytes */
134 }
135 p += len;
136 }
137 #else
138 else if (!isprint((unsigned char) *p)) {
139 width += 4, bytes += 4; /* *p encoded to \x?? */
140 p++;
141 } else {
142 width++, bytes++;
143 p++;
144 }
145 #endif
146 }
147
148 if (sz)
149 *sz = bytes;
150 return width;
151 }
152
153 size_t mbs_safe_width(const char *s)
154 {
155 if (!s || !*s)
156 return 0;
157 return mbs_safe_nwidth(s, strlen(s), NULL);
158 }
159
160 /*
161 * Copy @s to @buf and replace control and non-printable chars with
162 * \x?? hex sequence. The @width returns number of cells. The @safechars
163 * are not encoded.
164 *
165 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
166 * bytes.
167 */
168 char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
169 {
170 const char *p = s;
171 char *r;
172 size_t sz = s ? strlen(s) : 0;
173
174 #ifdef HAVE_WIDECHAR
175 mbstate_t st;
176 memset(&st, 0, sizeof(st));
177 #endif
178 if (!sz || !buf)
179 return NULL;
180
181 r = buf;
182 *width = 0;
183
184 while (p && *p) {
185 if (safechars && strchr(safechars, *p)) {
186 *r++ = *p++;
187 continue;
188 }
189
190 if ((*p == '\\' && *(p + 1) == 'x')
191 || iscntrl((unsigned char) *p)) {
192 sprintf(r, "\\x%02x", (unsigned char) *p);
193 r += 4;
194 *width += 4;
195 p++;
196 }
197 #ifdef HAVE_WIDECHAR
198 else {
199 wchar_t wc;
200 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
201
202 if (len == 0)
203 break; /* end of string */
204
205 if (len == (size_t) -1 || len == (size_t) -2) {
206 len = 1;
207 /*
208 * Not valid multibyte sequence -- maybe it's
209 * printable char according to the current locales.
210 */
211 if (!isprint((unsigned char) *p)) {
212 sprintf(r, "\\x%02x", (unsigned char) *p);
213 r += 4;
214 *width += 4;
215 } else {
216 (*width)++;
217 *r++ = *p;
218 }
219 } else if (!iswprint(wc)) {
220 size_t i;
221 for (i = 0; i < len; i++) {
222 sprintf(r, "\\x%02x", (unsigned char) p[i]);
223 r += 4;
224 *width += 4;
225 }
226 } else {
227 memcpy(r, p, len);
228 r += len;
229 *width += wcwidth(wc);
230 }
231 p += len;
232 }
233 #else
234 else if (!isprint((unsigned char) *p)) {
235 sprintf(r, "\\x%02x", (unsigned char) *p);
236 p++;
237 r += 4;
238 *width += 4;
239 } else {
240 *r++ = *p++;
241 (*width)++;
242 }
243 #endif
244 }
245
246 *r = '\0';
247 return buf;
248 }
249
250 /*
251 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
252 * @width returns number of cells. The @safechars are not encoded.
253 *
254 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
255 * bytes.
256 */
257 char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
258 {
259 const char *p = s;
260 char *r;
261 size_t sz = s ? strlen(s) : 0;
262
263 #ifdef HAVE_WIDECHAR
264 mbstate_t st;
265 memset(&st, 0, sizeof(st));
266 #endif
267 if (!sz || !buf)
268 return NULL;
269
270 r = buf;
271 *width = 0;
272
273 while (p && *p) {
274 #ifdef HAVE_WIDECHAR
275 wchar_t wc;
276 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
277 #else
278 size_t len = 1;
279 #endif
280
281 if (len == 0)
282 break; /* end of string */
283
284 if (len == (size_t) -1 || len == (size_t) -2) {
285 len = 1;
286 /*
287 * Not valid multibyte sequence -- maybe it's
288 * printable char according to the current locales.
289 */
290 if (!isprint((unsigned char) *p)) {
291 sprintf(r, "\\x%02x", (unsigned char) *p);
292 r += 4;
293 *width += 4;
294 } else {
295 (*width)++;
296 *r++ = *p;
297 }
298 } else if (*p == '\\' && *(p + 1) == 'x') {
299 sprintf(r, "\\x%02x", (unsigned char) *p);
300 r += 4;
301 *width += 4;
302 } else {
303 r = mempcpy(r, p, len);
304 *width += wcwidth(wc);
305 }
306 p += len;
307 }
308
309 *r = '\0';
310 return buf;
311 }
312
313 size_t mbs_safe_encode_size(size_t bytes)
314 {
315 return (bytes * 4) + 1;
316 }
317
318 /*
319 * Returns allocated string where all control and non-printable chars are
320 * replaced with \x?? hex sequence.
321 */
322 char *mbs_safe_encode(const char *s, size_t *width)
323 {
324 size_t sz = s ? strlen(s) : 0;
325 char *buf, *ret = NULL;
326
327 if (!sz)
328 return NULL;
329 buf = malloc(mbs_safe_encode_size(sz));
330 if (buf)
331 ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
332 if (!ret)
333 free(buf);
334 return ret;
335 }
336
337 /*
338 * Returns allocated string where all broken widechars chars are
339 * replaced with \x?? hex sequence.
340 */
341 char *mbs_invalid_encode(const char *s, size_t *width)
342 {
343 size_t sz = s ? strlen(s) : 0;
344 char *buf, *ret = NULL;
345
346 if (!sz)
347 return NULL;
348 buf = malloc(mbs_safe_encode_size(sz));
349 if (buf)
350 ret = mbs_invalid_encode_to_buffer(s, width, buf);
351 if (!ret)
352 free(buf);
353 return ret;
354 }
355
356 #ifdef HAVE_WIDECHAR
357
358 static bool
359 wc_ensure_printable (wchar_t *wchars)
360 {
361 bool replaced = false;
362 wchar_t *wc = wchars;
363 while (*wc)
364 {
365 if (!iswprint ((wint_t) *wc))
366 {
367 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
368 replaced = true;
369 }
370 wc++;
371 }
372 return replaced;
373 }
374
375 /* Truncate wchar string to width cells.
376 * Returns number of cells used. */
377
378 static size_t
379 wc_truncate (wchar_t *wc, size_t width)
380 {
381 size_t cells = 0;
382 int next_cells = 0;
383
384 while (*wc)
385 {
386 next_cells = wcwidth (*wc);
387 if (next_cells == -1) /* non printable */
388 {
389 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
390 next_cells = 1;
391 }
392 if (cells + next_cells > width)
393 break;
394
395 cells += next_cells;
396 wc++;
397 }
398 *wc = L'\0';
399 return cells;
400 }
401
402 static int
403 rpl_wcswidth (const wchar_t *s, size_t n)
404 {
405 int ret = 0;
406
407 while (n-- > 0 && *s != L'\0')
408 {
409 int nwidth = wcwidth (*s++);
410 if (nwidth == -1) /* non printable */
411 return -1;
412 if (ret > (INT_MAX - nwidth)) /* overflow */
413 return -1;
414 ret += nwidth;
415 }
416
417 return ret;
418 }
419 #endif /* HAVE_WIDECHAR */
420
421 /* Truncate multi-byte string to @width and returns number of
422 * bytes of the new string @str, and in @width returns number
423 * of cells.
424 */
425 size_t
426 mbs_truncate(char *str, size_t *width)
427 {
428 ssize_t bytes = strlen(str);
429 #ifdef HAVE_WIDECHAR
430 ssize_t sz = mbstowcs(NULL, str, 0);
431 wchar_t *wcs = NULL;
432
433 if (sz == (ssize_t) -1)
434 goto done;
435
436 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
437 if (!wcs)
438 goto done;
439
440 if (!mbstowcs(wcs, str, sz))
441 goto done;
442 *width = wc_truncate(wcs, *width);
443 bytes = wcstombs(str, wcs, bytes);
444 done:
445 free(wcs);
446 #else
447 if (bytes >= 0 && *width < (size_t) bytes)
448 bytes = *width;
449 #endif
450 if (bytes >= 0)
451 str[bytes] = '\0';
452 return bytes;
453 }
454
455 /* Write N_SPACES space characters to DEST while ensuring
456 nothing is written beyond DEST_END. A terminating NUL
457 is always added to DEST.
458 A pointer to the terminating NUL is returned. */
459
460 static char*
461 mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
462 {
463 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
464 *dest++ = padchar;
465 *dest = '\0';
466 return dest;
467 }
468
469 size_t
470 mbsalign (const char *src, char *dest, size_t dest_size,
471 size_t *width, mbs_align_t align, int flags)
472 {
473 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
474 }
475
476 /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
477 characters; write the result into the DEST_SIZE-byte buffer, DEST.
478 ALIGNMENT specifies whether to left- or right-justify or to center.
479 If SRC requires more than *WIDTH columns, truncate it to fit.
480 When centering, the number of trailing spaces may be one less than the
481 number of leading spaces. The FLAGS parameter is unused at present.
482 Return the length in bytes required for the final result, not counting
483 the trailing NUL. A return value of DEST_SIZE or larger means there
484 wasn't enough space. DEST will be NUL terminated in any case.
485 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
486 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
487 Update *WIDTH to indicate how many columns were used before padding. */
488
489 size_t
490 mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
491 size_t *width, mbs_align_t align,
492 #ifdef HAVE_WIDECHAR
493 int flags,
494 #else
495 int flags __attribute__((__unused__)),
496 #endif
497 int padchar)
498 {
499 size_t ret = -1;
500 size_t src_size = strlen (src) + 1;
501 char *newstr = NULL;
502 wchar_t *str_wc = NULL;
503 const char *str_to_print = src;
504 size_t n_cols = src_size - 1;
505 size_t n_used_bytes = n_cols; /* Not including NUL */
506 size_t n_spaces = 0, space_left;
507
508 #ifdef HAVE_WIDECHAR
509 bool conversion = false;
510 bool wc_enabled = false;
511
512 /* In multi-byte locales convert to wide characters
513 to allow easy truncation. Also determine number
514 of screen columns used. */
515 if (MB_CUR_MAX > 1)
516 {
517 size_t src_chars = mbstowcs (NULL, src, 0);
518 if (src_chars == (size_t) -1)
519 {
520 if (flags & MBA_UNIBYTE_FALLBACK)
521 goto mbsalign_unibyte;
522 else
523 goto mbsalign_cleanup;
524 }
525 src_chars += 1; /* make space for NUL */
526 str_wc = malloc (src_chars * sizeof (wchar_t));
527 if (str_wc == NULL)
528 {
529 if (flags & MBA_UNIBYTE_FALLBACK)
530 goto mbsalign_unibyte;
531 else
532 goto mbsalign_cleanup;
533 }
534 if (mbstowcs (str_wc, src, src_chars) != 0)
535 {
536 str_wc[src_chars - 1] = L'\0';
537 wc_enabled = true;
538 conversion = wc_ensure_printable (str_wc);
539 n_cols = rpl_wcswidth (str_wc, src_chars);
540 }
541 }
542
543 /* If we transformed or need to truncate the source string
544 then create a modified copy of it. */
545 if (wc_enabled && (conversion || (n_cols > *width)))
546 {
547 if (conversion)
548 {
549 /* May have increased the size by converting
550 \t to \uFFFD for example. */
551 src_size = wcstombs(NULL, str_wc, 0) + 1;
552 }
553 newstr = malloc (src_size);
554 if (newstr == NULL)
555 {
556 if (flags & MBA_UNIBYTE_FALLBACK)
557 goto mbsalign_unibyte;
558 else
559 goto mbsalign_cleanup;
560 }
561 str_to_print = newstr;
562 n_cols = wc_truncate (str_wc, *width);
563 n_used_bytes = wcstombs (newstr, str_wc, src_size);
564 }
565
566 mbsalign_unibyte:
567 #endif
568
569 if (n_cols > *width) /* Unibyte truncation required. */
570 {
571 n_cols = *width;
572 n_used_bytes = n_cols;
573 }
574
575 if (*width > n_cols) /* Padding required. */
576 n_spaces = *width - n_cols;
577
578 /* indicate to caller how many cells needed (not including padding). */
579 *width = n_cols;
580
581 /* indicate to caller how many bytes needed (not including NUL). */
582 ret = n_used_bytes + (n_spaces * 1);
583
584 /* Write as much NUL terminated output to DEST as possible. */
585 if (dest_size != 0)
586 {
587 char *dest_end = dest + dest_size - 1;
588 size_t start_spaces;
589 size_t end_spaces;
590
591 switch (align)
592 {
593 case MBS_ALIGN_CENTER:
594 start_spaces = n_spaces / 2 + n_spaces % 2;
595 end_spaces = n_spaces / 2;
596 break;
597 case MBS_ALIGN_LEFT:
598 start_spaces = 0;
599 end_spaces = n_spaces;
600 break;
601 case MBS_ALIGN_RIGHT:
602 start_spaces = n_spaces;
603 end_spaces = 0;
604 break;
605 default:
606 abort();
607 }
608
609 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
610 space_left = dest_end - dest;
611 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
612 mbs_align_pad (dest, dest_end, end_spaces, padchar);
613 }
614 #ifdef HAVE_WIDECHAR
615 mbsalign_cleanup:
616 #endif
617 free (str_wc);
618 free (newstr);
619
620 return ret;
621 }