]> git.ipfire.org Git - thirdparty/util-linux.git/blame - lib/mbsalign.c
lib/mbsalign: add function to calculate width
[thirdparty/util-linux.git] / lib / mbsalign.c
CommitLineData
104b92f8
PB
1/* Align/Truncate a string in a given screen width
2 Copyright (C) 2009-2010 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
36c7f785
PB
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2.1 of the License, or
104b92f8
PB
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17/* Written by Pádraig Brady. */
18
104b92f8
PB
19#include <stdlib.h>
20#include <string.h>
21#include <stdio.h>
22#include <stdbool.h>
23#include <limits.h>
1b1f66e4 24#include <ctype.h>
104b92f8
PB
25
26#include "c.h"
27#include "mbsalign.h"
ab753d8f 28#include "strutils.h"
104b92f8
PB
29#include "widechar.h"
30
0957fdca
KZ
31/*
32 * Counts number of cells in multibyte string. All control and
33 * non-printable chars are ignored.
34 *
35 * Returns: number of cells.
36 */
37size_t mbs_nwidth(const char *buf, size_t bufsz)
38{
39 const char *p = buf, *last = buf;
40 size_t width = 0;
41
42#ifdef HAVE_WIDECHAR
43 mbstate_t st;
44 memset(&st, 0, sizeof(st));
45#endif
46 if (p && *p && bufsz)
47 last = p + (bufsz - 1);
48
49 while (p && *p && p <= last) {
50 if (iscntrl((unsigned char) *p)) {
51 p++;
52
53 /* try detect "\e[x;ym" and skip on success */
54 if (*p && *p == '[') {
55 const char *e = p;
56 while (*e && e < last && *e != 'm')
57 e++;
58 if (*e == 'm')
59 p = e + 1;
60 }
61 continue;
62 }
63#ifdef HAVE_WIDECHAR
64 wchar_t wc;
65 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
66
67 if (len == 0)
68 break;
69 if (len > 0 && iswprint(wc)) {
70 int x = wcwidth(wc);
71 if (x > 0)
72 width += x;
73 } else if (len == (size_t) -1 || len == (size_t) -2)
74 len = 1;
75 p += len;
76#else
77 if (isprint((unsigned char) *p))
78 width++;
79 p++;
80#endif
81 }
82
83 return width;
84}
85
86size_t mbs_width(const char *s)
87{
88 if (!s || !*s)
89 return 0;
90 return mbs_nwidth(s, strlen(s));
91}
104b92f8 92
1b1f66e4
KZ
93/*
94 * Counts number of cells in multibyte string. For all control and
95 * non-printable chars is the result width enlarged to store \x?? hex
96 * sequence. See mbs_safe_encode().
0c33fcbf
KZ
97 *
98 * Returns: number of cells, @sz returns number of bytes.
1b1f66e4 99 */
0c33fcbf 100size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
1b1f66e4 101{
0c33fcbf
KZ
102 const char *p = buf, *last = buf;
103 size_t width = 0, bytes = 0;
1b1f66e4 104
4a423fb9
KZ
105#ifdef HAVE_WIDECHAR
106 mbstate_t st;
1b1f66e4 107 memset(&st, 0, sizeof(st));
4a423fb9 108#endif
0c33fcbf
KZ
109 if (p && *p && bufsz)
110 last = p + (bufsz - 1);
111
112 while (p && *p && p <= last) {
43afa845
KZ
113 if ((p < last && *p == '\\' && *(p + 1) == 'x')
114 || iscntrl((unsigned char) *p)) {
0c33fcbf 115 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
116 p++;
117 }
118#ifdef HAVE_WIDECHAR
119 else {
120 wchar_t wc;
121 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
122
123 if (len == 0)
124 break;
125
126 if (len == (size_t) -1 || len == (size_t) -2) {
127 len = 1;
0c33fcbf
KZ
128 if (isprint((unsigned char) *p))
129 width += 1, bytes += 1;
130 else
131 width += 4, bytes += 4;
1b1f66e4 132
0c33fcbf 133 } else if (!iswprint(wc)) {
1b1f66e4 134 width += len * 4; /* hex encode whole sequence */
0c33fcbf
KZ
135 bytes += len * 4;
136 } else {
1b1f66e4 137 width += wcwidth(wc); /* number of cells */
0c33fcbf
KZ
138 bytes += len; /* number of bytes */
139 }
1b1f66e4
KZ
140 p += len;
141 }
142#else
143 else if (!isprint((unsigned char) *p)) {
0c33fcbf 144 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
145 p++;
146 } else {
0c33fcbf 147 width++, bytes++;
1b1f66e4
KZ
148 p++;
149 }
150#endif
151 }
152
0c33fcbf
KZ
153 if (sz)
154 *sz = bytes;
1b1f66e4
KZ
155 return width;
156}
157
0c33fcbf
KZ
158size_t mbs_safe_width(const char *s)
159{
160 if (!s || !*s)
161 return 0;
162 return mbs_safe_nwidth(s, strlen(s), NULL);
163}
164
1b1f66e4 165/*
c426f70f 166 * Copy @s to @buf and replace control and non-printable chars with
ff471d89
KZ
167 * \x?? hex sequence. The @width returns number of cells. The @safechars
168 * are not encoded.
c426f70f
KZ
169 *
170 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
171 * bytes.
1b1f66e4 172 */
ff471d89 173char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
1b1f66e4 174{
1b1f66e4 175 const char *p = s;
c426f70f 176 char *r;
1b1f66e4
KZ
177 size_t sz = s ? strlen(s) : 0;
178
4a423fb9
KZ
179#ifdef HAVE_WIDECHAR
180 mbstate_t st;
181 memset(&st, 0, sizeof(st));
182#endif
c426f70f 183 if (!sz || !buf)
1b1f66e4
KZ
184 return NULL;
185
c426f70f 186 r = buf;
1b1f66e4
KZ
187 *width = 0;
188
189 while (p && *p) {
ff471d89
KZ
190 if (safechars && strchr(safechars, *p)) {
191 *r++ = *p++;
192 continue;
193 }
194
43afa845
KZ
195 if ((*p == '\\' && *(p + 1) == 'x')
196 || iscntrl((unsigned char) *p)) {
1b1f66e4
KZ
197 sprintf(r, "\\x%02x", (unsigned char) *p);
198 r += 4;
199 *width += 4;
200 p++;
201 }
202#ifdef HAVE_WIDECHAR
203 else {
204 wchar_t wc;
205 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
206
207 if (len == 0)
208 break; /* end of string */
209
210 if (len == (size_t) -1 || len == (size_t) -2) {
211 len = 1;
212 /*
213 * Not valid multibyte sequence -- maybe it's
214 * printable char according to the current locales.
215 */
216 if (!isprint((unsigned char) *p)) {
217 sprintf(r, "\\x%02x", (unsigned char) *p);
218 r += 4;
219 *width += 4;
220 } else {
4a423fb9 221 (*width)++;
1b1f66e4
KZ
222 *r++ = *p;
223 }
224 } else if (!iswprint(wc)) {
225 size_t i;
226 for (i = 0; i < len; i++) {
edf86d6b 227 sprintf(r, "\\x%02x", (unsigned char) p[i]);
1b1f66e4
KZ
228 r += 4;
229 *width += 4;
230 }
231 } else {
232 memcpy(r, p, len);
233 r += len;
234 *width += wcwidth(wc);
235 }
236 p += len;
237 }
238#else
239 else if (!isprint((unsigned char) *p)) {
240 sprintf(r, "\\x%02x", (unsigned char) *p);
241 p++;
242 r += 4;
243 *width += 4;
244 } else {
245 *r++ = *p++;
4a423fb9 246 (*width)++;
1b1f66e4
KZ
247 }
248#endif
249 }
250
251 *r = '\0';
c426f70f
KZ
252 return buf;
253}
254
365ed9f6
KZ
255/*
256 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
257 * @width returns number of cells. The @safechars are not encoded.
258 *
259 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
260 * bytes.
261 */
262char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
263{
264 const char *p = s;
265 char *r;
266 size_t sz = s ? strlen(s) : 0;
267
268#ifdef HAVE_WIDECHAR
269 mbstate_t st;
270 memset(&st, 0, sizeof(st));
271#endif
272 if (!sz || !buf)
273 return NULL;
274
275 r = buf;
276 *width = 0;
277
278 while (p && *p) {
279#ifdef HAVE_WIDECHAR
280 wchar_t wc;
281 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
2ba65f37
VD
282#else
283 size_t len = 1;
284#endif
365ed9f6
KZ
285
286 if (len == 0)
287 break; /* end of string */
288
289 if (len == (size_t) -1 || len == (size_t) -2) {
290 len = 1;
291 /*
292 * Not valid multibyte sequence -- maybe it's
293 * printable char according to the current locales.
294 */
295 if (!isprint((unsigned char) *p)) {
296 sprintf(r, "\\x%02x", (unsigned char) *p);
297 r += 4;
298 *width += 4;
299 } else {
300 (*width)++;
301 *r++ = *p;
302 }
43afa845
KZ
303 } else if (*p == '\\' && *(p + 1) == 'x') {
304 sprintf(r, "\\x%02x", (unsigned char) *p);
305 r += 4;
306 *width += 4;
365ed9f6
KZ
307 } else {
308 memcpy(r, p, len);
309 r += len;
310 *width += wcwidth(wc);
311 }
312 p += len;
365ed9f6
KZ
313 }
314
315 *r = '\0';
316 return buf;
317}
318
c426f70f
KZ
319size_t mbs_safe_encode_size(size_t bytes)
320{
321 return (bytes * 4) + 1;
322}
323
324/*
325 * Returns allocated string where all control and non-printable chars are
326 * replaced with \x?? hex sequence.
327 */
328char *mbs_safe_encode(const char *s, size_t *width)
329{
330 size_t sz = s ? strlen(s) : 0;
f4d37838 331 char *buf, *ret = NULL;
c426f70f
KZ
332
333 if (!sz)
334 return NULL;
335 buf = malloc(mbs_safe_encode_size(sz));
a86b9f25
KZ
336 if (buf)
337 ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
338 if (!ret)
339 free(buf);
340 return ret;
1b1f66e4
KZ
341}
342
365ed9f6
KZ
343/*
344 * Returns allocated string where all broken widechars chars are
345 * replaced with \x?? hex sequence.
346 */
347char *mbs_invalid_encode(const char *s, size_t *width)
348{
349 size_t sz = s ? strlen(s) : 0;
350 char *buf, *ret = NULL;
351
352 if (!sz)
353 return NULL;
354 buf = malloc(mbs_safe_encode_size(sz));
355 if (buf)
356 ret = mbs_invalid_encode_to_buffer(s, width, buf);
357 if (!ret)
358 free(buf);
359 return ret;
360}
361
4a423fb9
KZ
362#ifdef HAVE_WIDECHAR
363
104b92f8
PB
364static bool
365wc_ensure_printable (wchar_t *wchars)
366{
367 bool replaced = false;
368 wchar_t *wc = wchars;
369 while (*wc)
370 {
371 if (!iswprint ((wint_t) *wc))
372 {
373 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
374 replaced = true;
375 }
376 wc++;
377 }
378 return replaced;
379}
380
381/* Truncate wchar string to width cells.
382 * Returns number of cells used. */
383
384static size_t
385wc_truncate (wchar_t *wc, size_t width)
386{
387 size_t cells = 0;
388 int next_cells = 0;
389
390 while (*wc)
391 {
392 next_cells = wcwidth (*wc);
393 if (next_cells == -1) /* non printable */
394 {
395 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
396 next_cells = 1;
397 }
398 if (cells + next_cells > width)
399 break;
d94c5198 400
104b92f8
PB
401 cells += next_cells;
402 wc++;
403 }
404 *wc = L'\0';
405 return cells;
406}
407
104b92f8
PB
408static int
409rpl_wcswidth (const wchar_t *s, size_t n)
410{
411 int ret = 0;
412
413 while (n-- > 0 && *s != L'\0')
414 {
415 int nwidth = wcwidth (*s++);
416 if (nwidth == -1) /* non printable */
417 return -1;
418 if (ret > (INT_MAX - nwidth)) /* overflow */
419 return -1;
420 ret += nwidth;
421 }
422
423 return ret;
424}
4a423fb9 425#endif /* HAVE_WIDECHAR */
104b92f8 426
5f94ca33
KZ
427/* Truncate multi-byte string to @width and returns number of
428 * bytes of the new string @str, and in @width returns number
429 * of cells.
430 */
431size_t
432mbs_truncate(char *str, size_t *width)
433{
2897f29a 434 ssize_t bytes = strlen(str);
5f94ca33 435#ifdef HAVE_WIDECHAR
2897f29a 436 ssize_t sz = mbstowcs(NULL, str, 0);
5f94ca33
KZ
437 wchar_t *wcs = NULL;
438
2897f29a 439 if (sz == (ssize_t) -1)
5f94ca33
KZ
440 goto done;
441
d94c5198 442 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
5f94ca33
KZ
443 if (!wcs)
444 goto done;
445
446 if (!mbstowcs(wcs, str, sz))
447 goto done;
448 *width = wc_truncate(wcs, *width);
449 bytes = wcstombs(str, wcs, bytes);
450done:
451 free(wcs);
452#else
6426f926 453 if (bytes >= 0 && *width < (size_t) bytes)
5f94ca33
KZ
454 bytes = *width;
455#endif
456 if (bytes >= 0)
457 str[bytes] = '\0';
458 return bytes;
459}
460
104b92f8
PB
461/* Write N_SPACES space characters to DEST while ensuring
462 nothing is written beyond DEST_END. A terminating NUL
463 is always added to DEST.
464 A pointer to the terminating NUL is returned. */
465
466static char*
57867795 467mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
104b92f8 468{
efb2fe5f 469 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
57867795 470 *dest++ = padchar;
104b92f8
PB
471 *dest = '\0';
472 return dest;
473}
474
57867795
KZ
475size_t
476mbsalign (const char *src, char *dest, size_t dest_size,
477 size_t *width, mbs_align_t align, int flags)
478{
479 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
480}
481
104b92f8
PB
482/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
483 characters; write the result into the DEST_SIZE-byte buffer, DEST.
484 ALIGNMENT specifies whether to left- or right-justify or to center.
485 If SRC requires more than *WIDTH columns, truncate it to fit.
486 When centering, the number of trailing spaces may be one less than the
487 number of leading spaces. The FLAGS parameter is unused at present.
488 Return the length in bytes required for the final result, not counting
489 the trailing NUL. A return value of DEST_SIZE or larger means there
490 wasn't enough space. DEST will be NUL terminated in any case.
491 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
492 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
493 Update *WIDTH to indicate how many columns were used before padding. */
494
495size_t
57867795 496mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
c191740c
KZ
497 size_t *width, mbs_align_t align,
498#ifdef HAVE_WIDECHAR
499 int flags,
500#else
501 int flags __attribute__((__unused__)),
502#endif
57867795 503 int padchar)
104b92f8
PB
504{
505 size_t ret = -1;
506 size_t src_size = strlen (src) + 1;
507 char *newstr = NULL;
508 wchar_t *str_wc = NULL;
509 const char *str_to_print = src;
510 size_t n_cols = src_size - 1;
511 size_t n_used_bytes = n_cols; /* Not including NUL */
3acc206d 512 size_t n_spaces = 0, space_left;
6426f926
KZ
513
514#ifdef HAVE_WIDECHAR
104b92f8
PB
515 bool conversion = false;
516 bool wc_enabled = false;
517
104b92f8
PB
518 /* In multi-byte locales convert to wide characters
519 to allow easy truncation. Also determine number
520 of screen columns used. */
521 if (MB_CUR_MAX > 1)
522 {
523 size_t src_chars = mbstowcs (NULL, src, 0);
524 if (src_chars == (size_t) -1)
525 {
526 if (flags & MBA_UNIBYTE_FALLBACK)
527 goto mbsalign_unibyte;
528 else
529 goto mbsalign_cleanup;
530 }
531 src_chars += 1; /* make space for NUL */
532 str_wc = malloc (src_chars * sizeof (wchar_t));
533 if (str_wc == NULL)
534 {
535 if (flags & MBA_UNIBYTE_FALLBACK)
536 goto mbsalign_unibyte;
537 else
538 goto mbsalign_cleanup;
539 }
540 if (mbstowcs (str_wc, src, src_chars) != 0)
541 {
542 str_wc[src_chars - 1] = L'\0';
543 wc_enabled = true;
544 conversion = wc_ensure_printable (str_wc);
545 n_cols = rpl_wcswidth (str_wc, src_chars);
546 }
547 }
548
549 /* If we transformed or need to truncate the source string
550 then create a modified copy of it. */
551 if (wc_enabled && (conversion || (n_cols > *width)))
552 {
553 if (conversion)
554 {
555 /* May have increased the size by converting
556 \t to \uFFFD for example. */
557 src_size = wcstombs(NULL, str_wc, 0) + 1;
558 }
559 newstr = malloc (src_size);
560 if (newstr == NULL)
561 {
562 if (flags & MBA_UNIBYTE_FALLBACK)
563 goto mbsalign_unibyte;
564 else
565 goto mbsalign_cleanup;
566 }
567 str_to_print = newstr;
568 n_cols = wc_truncate (str_wc, *width);
569 n_used_bytes = wcstombs (newstr, str_wc, src_size);
570 }
104b92f8
PB
571
572mbsalign_unibyte:
6426f926 573#endif
104b92f8
PB
574
575 if (n_cols > *width) /* Unibyte truncation required. */
576 {
577 n_cols = *width;
578 n_used_bytes = n_cols;
579 }
580
581 if (*width > n_cols) /* Padding required. */
582 n_spaces = *width - n_cols;
583
584 /* indicate to caller how many cells needed (not including padding). */
585 *width = n_cols;
586
587 /* indicate to caller how many bytes needed (not including NUL). */
588 ret = n_used_bytes + (n_spaces * 1);
589
590 /* Write as much NUL terminated output to DEST as possible. */
591 if (dest_size != 0)
592 {
593 char *dest_end = dest + dest_size - 1;
f7ff0414
SK
594 size_t start_spaces;
595 size_t end_spaces;
104b92f8
PB
596
597 switch (align)
598 {
599 case MBS_ALIGN_CENTER:
600 start_spaces = n_spaces / 2 + n_spaces % 2;
601 end_spaces = n_spaces / 2;
602 break;
603 case MBS_ALIGN_LEFT:
604 start_spaces = 0;
605 end_spaces = n_spaces;
606 break;
607 case MBS_ALIGN_RIGHT:
608 start_spaces = n_spaces;
609 end_spaces = 0;
610 break;
ce7b11a9
SK
611 default:
612 abort();
104b92f8
PB
613 }
614
57867795 615 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
3acc206d 616 space_left = dest_end - dest;
104b92f8 617 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
57867795 618 mbs_align_pad (dest, dest_end, end_spaces, padchar);
104b92f8 619 }
6426f926 620#ifdef HAVE_WIDECHAR
104b92f8 621mbsalign_cleanup:
6426f926 622#endif
104b92f8
PB
623 free (str_wc);
624 free (newstr);
625
626 return ret;
627}