]> git.ipfire.org Git - thirdparty/util-linux.git/blame - lib/mbsalign.c
libmount: fix compiler warning [-Wsometimes-uninitialized]
[thirdparty/util-linux.git] / lib / mbsalign.c
CommitLineData
104b92f8
PB
1/* Align/Truncate a string in a given screen width
2 Copyright (C) 2009-2010 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
36c7f785
PB
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2.1 of the License, or
104b92f8
PB
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17/* Written by Pádraig Brady. */
18
19#include <config.h>
20
21#include <stdlib.h>
22#include <string.h>
23#include <stdio.h>
24#include <stdbool.h>
25#include <limits.h>
1b1f66e4 26#include <ctype.h>
104b92f8
PB
27
28#include "c.h"
29#include "mbsalign.h"
ab753d8f 30#include "strutils.h"
104b92f8
PB
31#include "widechar.h"
32
104b92f8
PB
33/* Replace non printable chars.
34 Note \t and \n etc. are non printable.
35 Return 1 if replacement made, 0 otherwise. */
36
1b1f66e4
KZ
37/*
38 * Counts number of cells in multibyte string. For all control and
39 * non-printable chars is the result width enlarged to store \x?? hex
40 * sequence. See mbs_safe_encode().
0c33fcbf
KZ
41 *
42 * Returns: number of cells, @sz returns number of bytes.
1b1f66e4 43 */
0c33fcbf 44size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
1b1f66e4 45{
0c33fcbf
KZ
46 const char *p = buf, *last = buf;
47 size_t width = 0, bytes = 0;
1b1f66e4 48
4a423fb9
KZ
49#ifdef HAVE_WIDECHAR
50 mbstate_t st;
1b1f66e4 51 memset(&st, 0, sizeof(st));
4a423fb9 52#endif
0c33fcbf
KZ
53 if (p && *p && bufsz)
54 last = p + (bufsz - 1);
55
56 while (p && *p && p <= last) {
43afa845
KZ
57 if ((p < last && *p == '\\' && *(p + 1) == 'x')
58 || iscntrl((unsigned char) *p)) {
0c33fcbf 59 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
60 p++;
61 }
62#ifdef HAVE_WIDECHAR
63 else {
64 wchar_t wc;
65 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
66
67 if (len == 0)
68 break;
69
70 if (len == (size_t) -1 || len == (size_t) -2) {
71 len = 1;
0c33fcbf
KZ
72 if (isprint((unsigned char) *p))
73 width += 1, bytes += 1;
74 else
75 width += 4, bytes += 4;
1b1f66e4 76
0c33fcbf 77 } else if (!iswprint(wc)) {
1b1f66e4 78 width += len * 4; /* hex encode whole sequence */
0c33fcbf
KZ
79 bytes += len * 4;
80 } else {
1b1f66e4 81 width += wcwidth(wc); /* number of cells */
0c33fcbf
KZ
82 bytes += len; /* number of bytes */
83 }
1b1f66e4
KZ
84 p += len;
85 }
86#else
87 else if (!isprint((unsigned char) *p)) {
0c33fcbf 88 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
89 p++;
90 } else {
0c33fcbf 91 width++, bytes++;
1b1f66e4
KZ
92 p++;
93 }
94#endif
95 }
96
0c33fcbf
KZ
97 if (sz)
98 *sz = bytes;
1b1f66e4
KZ
99 return width;
100}
101
0c33fcbf
KZ
102size_t mbs_safe_width(const char *s)
103{
104 if (!s || !*s)
105 return 0;
106 return mbs_safe_nwidth(s, strlen(s), NULL);
107}
108
1b1f66e4 109/*
c426f70f 110 * Copy @s to @buf and replace control and non-printable chars with
ff471d89
KZ
111 * \x?? hex sequence. The @width returns number of cells. The @safechars
112 * are not encoded.
c426f70f
KZ
113 *
114 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
115 * bytes.
1b1f66e4 116 */
ff471d89 117char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
1b1f66e4 118{
1b1f66e4 119 const char *p = s;
c426f70f 120 char *r;
1b1f66e4
KZ
121 size_t sz = s ? strlen(s) : 0;
122
4a423fb9
KZ
123#ifdef HAVE_WIDECHAR
124 mbstate_t st;
125 memset(&st, 0, sizeof(st));
126#endif
c426f70f 127 if (!sz || !buf)
1b1f66e4
KZ
128 return NULL;
129
c426f70f 130 r = buf;
1b1f66e4
KZ
131 *width = 0;
132
133 while (p && *p) {
ff471d89
KZ
134 if (safechars && strchr(safechars, *p)) {
135 *r++ = *p++;
136 continue;
137 }
138
43afa845
KZ
139 if ((*p == '\\' && *(p + 1) == 'x')
140 || iscntrl((unsigned char) *p)) {
1b1f66e4
KZ
141 sprintf(r, "\\x%02x", (unsigned char) *p);
142 r += 4;
143 *width += 4;
144 p++;
145 }
146#ifdef HAVE_WIDECHAR
147 else {
148 wchar_t wc;
149 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
150
151 if (len == 0)
152 break; /* end of string */
153
154 if (len == (size_t) -1 || len == (size_t) -2) {
155 len = 1;
156 /*
157 * Not valid multibyte sequence -- maybe it's
158 * printable char according to the current locales.
159 */
160 if (!isprint((unsigned char) *p)) {
161 sprintf(r, "\\x%02x", (unsigned char) *p);
162 r += 4;
163 *width += 4;
164 } else {
4a423fb9 165 (*width)++;
1b1f66e4
KZ
166 *r++ = *p;
167 }
168 } else if (!iswprint(wc)) {
169 size_t i;
170 for (i = 0; i < len; i++) {
edf86d6b 171 sprintf(r, "\\x%02x", (unsigned char) p[i]);
1b1f66e4
KZ
172 r += 4;
173 *width += 4;
174 }
175 } else {
176 memcpy(r, p, len);
177 r += len;
178 *width += wcwidth(wc);
179 }
180 p += len;
181 }
182#else
183 else if (!isprint((unsigned char) *p)) {
184 sprintf(r, "\\x%02x", (unsigned char) *p);
185 p++;
186 r += 4;
187 *width += 4;
188 } else {
189 *r++ = *p++;
4a423fb9 190 (*width)++;
1b1f66e4
KZ
191 }
192#endif
193 }
194
195 *r = '\0';
c426f70f
KZ
196 return buf;
197}
198
365ed9f6
KZ
199/*
200 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
201 * @width returns number of cells. The @safechars are not encoded.
202 *
203 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
204 * bytes.
205 */
206char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
207{
208 const char *p = s;
209 char *r;
210 size_t sz = s ? strlen(s) : 0;
211
212#ifdef HAVE_WIDECHAR
213 mbstate_t st;
214 memset(&st, 0, sizeof(st));
215#endif
216 if (!sz || !buf)
217 return NULL;
218
219 r = buf;
220 *width = 0;
221
222 while (p && *p) {
223#ifdef HAVE_WIDECHAR
224 wchar_t wc;
225 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
2ba65f37
VD
226#else
227 size_t len = 1;
228#endif
365ed9f6
KZ
229
230 if (len == 0)
231 break; /* end of string */
232
233 if (len == (size_t) -1 || len == (size_t) -2) {
234 len = 1;
235 /*
236 * Not valid multibyte sequence -- maybe it's
237 * printable char according to the current locales.
238 */
239 if (!isprint((unsigned char) *p)) {
240 sprintf(r, "\\x%02x", (unsigned char) *p);
241 r += 4;
242 *width += 4;
243 } else {
244 (*width)++;
245 *r++ = *p;
246 }
43afa845
KZ
247 } else if (*p == '\\' && *(p + 1) == 'x') {
248 sprintf(r, "\\x%02x", (unsigned char) *p);
249 r += 4;
250 *width += 4;
365ed9f6
KZ
251 } else {
252 memcpy(r, p, len);
253 r += len;
254 *width += wcwidth(wc);
255 }
256 p += len;
365ed9f6
KZ
257 }
258
259 *r = '\0';
260 return buf;
261}
262
c426f70f
KZ
263size_t mbs_safe_encode_size(size_t bytes)
264{
265 return (bytes * 4) + 1;
266}
267
268/*
269 * Returns allocated string where all control and non-printable chars are
270 * replaced with \x?? hex sequence.
271 */
272char *mbs_safe_encode(const char *s, size_t *width)
273{
274 size_t sz = s ? strlen(s) : 0;
f4d37838 275 char *buf, *ret = NULL;
c426f70f
KZ
276
277 if (!sz)
278 return NULL;
279 buf = malloc(mbs_safe_encode_size(sz));
a86b9f25
KZ
280 if (buf)
281 ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
282 if (!ret)
283 free(buf);
284 return ret;
1b1f66e4
KZ
285}
286
365ed9f6
KZ
287/*
288 * Returns allocated string where all broken widechars chars are
289 * replaced with \x?? hex sequence.
290 */
291char *mbs_invalid_encode(const char *s, size_t *width)
292{
293 size_t sz = s ? strlen(s) : 0;
294 char *buf, *ret = NULL;
295
296 if (!sz)
297 return NULL;
298 buf = malloc(mbs_safe_encode_size(sz));
299 if (buf)
300 ret = mbs_invalid_encode_to_buffer(s, width, buf);
301 if (!ret)
302 free(buf);
303 return ret;
304}
305
4a423fb9
KZ
306#ifdef HAVE_WIDECHAR
307
104b92f8
PB
308static bool
309wc_ensure_printable (wchar_t *wchars)
310{
311 bool replaced = false;
312 wchar_t *wc = wchars;
313 while (*wc)
314 {
315 if (!iswprint ((wint_t) *wc))
316 {
317 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
318 replaced = true;
319 }
320 wc++;
321 }
322 return replaced;
323}
324
325/* Truncate wchar string to width cells.
326 * Returns number of cells used. */
327
328static size_t
329wc_truncate (wchar_t *wc, size_t width)
330{
331 size_t cells = 0;
332 int next_cells = 0;
333
334 while (*wc)
335 {
336 next_cells = wcwidth (*wc);
337 if (next_cells == -1) /* non printable */
338 {
339 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
340 next_cells = 1;
341 }
342 if (cells + next_cells > width)
343 break;
d94c5198 344
104b92f8
PB
345 cells += next_cells;
346 wc++;
347 }
348 *wc = L'\0';
349 return cells;
350}
351
352/* FIXME: move this function to gnulib as it's missing on:
353 OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */
354
355static int
356rpl_wcswidth (const wchar_t *s, size_t n)
357{
358 int ret = 0;
359
360 while (n-- > 0 && *s != L'\0')
361 {
362 int nwidth = wcwidth (*s++);
363 if (nwidth == -1) /* non printable */
364 return -1;
365 if (ret > (INT_MAX - nwidth)) /* overflow */
366 return -1;
367 ret += nwidth;
368 }
369
370 return ret;
371}
4a423fb9 372#endif /* HAVE_WIDECHAR */
104b92f8 373
5f94ca33
KZ
374/* Truncate multi-byte string to @width and returns number of
375 * bytes of the new string @str, and in @width returns number
376 * of cells.
377 */
378size_t
379mbs_truncate(char *str, size_t *width)
380{
2897f29a 381 ssize_t bytes = strlen(str);
5f94ca33 382#ifdef HAVE_WIDECHAR
2897f29a 383 ssize_t sz = mbstowcs(NULL, str, 0);
5f94ca33
KZ
384 wchar_t *wcs = NULL;
385
2897f29a 386 if (sz == (ssize_t) -1)
5f94ca33
KZ
387 goto done;
388
d94c5198 389 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
5f94ca33
KZ
390 if (!wcs)
391 goto done;
392
393 if (!mbstowcs(wcs, str, sz))
394 goto done;
395 *width = wc_truncate(wcs, *width);
396 bytes = wcstombs(str, wcs, bytes);
397done:
398 free(wcs);
399#else
6426f926 400 if (bytes >= 0 && *width < (size_t) bytes)
5f94ca33
KZ
401 bytes = *width;
402#endif
403 if (bytes >= 0)
404 str[bytes] = '\0';
405 return bytes;
406}
407
104b92f8
PB
408/* Write N_SPACES space characters to DEST while ensuring
409 nothing is written beyond DEST_END. A terminating NUL
410 is always added to DEST.
411 A pointer to the terminating NUL is returned. */
412
413static char*
57867795 414mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
104b92f8
PB
415{
416 /* FIXME: Should we pad with "figure space" (\u2007)
417 if non ascii data present? */
efb2fe5f 418 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
57867795 419 *dest++ = padchar;
104b92f8
PB
420 *dest = '\0';
421 return dest;
422}
423
57867795
KZ
424size_t
425mbsalign (const char *src, char *dest, size_t dest_size,
426 size_t *width, mbs_align_t align, int flags)
427{
428 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
429}
430
104b92f8
PB
431/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
432 characters; write the result into the DEST_SIZE-byte buffer, DEST.
433 ALIGNMENT specifies whether to left- or right-justify or to center.
434 If SRC requires more than *WIDTH columns, truncate it to fit.
435 When centering, the number of trailing spaces may be one less than the
436 number of leading spaces. The FLAGS parameter is unused at present.
437 Return the length in bytes required for the final result, not counting
438 the trailing NUL. A return value of DEST_SIZE or larger means there
439 wasn't enough space. DEST will be NUL terminated in any case.
440 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
441 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
442 Update *WIDTH to indicate how many columns were used before padding. */
443
444size_t
57867795 445mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
c191740c
KZ
446 size_t *width, mbs_align_t align,
447#ifdef HAVE_WIDECHAR
448 int flags,
449#else
450 int flags __attribute__((__unused__)),
451#endif
57867795 452 int padchar)
104b92f8
PB
453{
454 size_t ret = -1;
455 size_t src_size = strlen (src) + 1;
456 char *newstr = NULL;
457 wchar_t *str_wc = NULL;
458 const char *str_to_print = src;
459 size_t n_cols = src_size - 1;
460 size_t n_used_bytes = n_cols; /* Not including NUL */
3acc206d 461 size_t n_spaces = 0, space_left;
6426f926
KZ
462
463#ifdef HAVE_WIDECHAR
104b92f8
PB
464 bool conversion = false;
465 bool wc_enabled = false;
466
104b92f8
PB
467 /* In multi-byte locales convert to wide characters
468 to allow easy truncation. Also determine number
469 of screen columns used. */
470 if (MB_CUR_MAX > 1)
471 {
472 size_t src_chars = mbstowcs (NULL, src, 0);
473 if (src_chars == (size_t) -1)
474 {
475 if (flags & MBA_UNIBYTE_FALLBACK)
476 goto mbsalign_unibyte;
477 else
478 goto mbsalign_cleanup;
479 }
480 src_chars += 1; /* make space for NUL */
481 str_wc = malloc (src_chars * sizeof (wchar_t));
482 if (str_wc == NULL)
483 {
484 if (flags & MBA_UNIBYTE_FALLBACK)
485 goto mbsalign_unibyte;
486 else
487 goto mbsalign_cleanup;
488 }
489 if (mbstowcs (str_wc, src, src_chars) != 0)
490 {
491 str_wc[src_chars - 1] = L'\0';
492 wc_enabled = true;
493 conversion = wc_ensure_printable (str_wc);
494 n_cols = rpl_wcswidth (str_wc, src_chars);
495 }
496 }
497
498 /* If we transformed or need to truncate the source string
499 then create a modified copy of it. */
500 if (wc_enabled && (conversion || (n_cols > *width)))
501 {
502 if (conversion)
503 {
504 /* May have increased the size by converting
505 \t to \uFFFD for example. */
506 src_size = wcstombs(NULL, str_wc, 0) + 1;
507 }
508 newstr = malloc (src_size);
509 if (newstr == NULL)
510 {
511 if (flags & MBA_UNIBYTE_FALLBACK)
512 goto mbsalign_unibyte;
513 else
514 goto mbsalign_cleanup;
515 }
516 str_to_print = newstr;
517 n_cols = wc_truncate (str_wc, *width);
518 n_used_bytes = wcstombs (newstr, str_wc, src_size);
519 }
104b92f8
PB
520
521mbsalign_unibyte:
6426f926 522#endif
104b92f8
PB
523
524 if (n_cols > *width) /* Unibyte truncation required. */
525 {
526 n_cols = *width;
527 n_used_bytes = n_cols;
528 }
529
530 if (*width > n_cols) /* Padding required. */
531 n_spaces = *width - n_cols;
532
533 /* indicate to caller how many cells needed (not including padding). */
534 *width = n_cols;
535
536 /* indicate to caller how many bytes needed (not including NUL). */
537 ret = n_used_bytes + (n_spaces * 1);
538
539 /* Write as much NUL terminated output to DEST as possible. */
540 if (dest_size != 0)
541 {
542 char *dest_end = dest + dest_size - 1;
f7ff0414
SK
543 size_t start_spaces;
544 size_t end_spaces;
104b92f8
PB
545
546 switch (align)
547 {
548 case MBS_ALIGN_CENTER:
549 start_spaces = n_spaces / 2 + n_spaces % 2;
550 end_spaces = n_spaces / 2;
551 break;
552 case MBS_ALIGN_LEFT:
553 start_spaces = 0;
554 end_spaces = n_spaces;
555 break;
556 case MBS_ALIGN_RIGHT:
557 start_spaces = n_spaces;
558 end_spaces = 0;
559 break;
ce7b11a9
SK
560 default:
561 abort();
104b92f8
PB
562 }
563
57867795 564 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
3acc206d 565 space_left = dest_end - dest;
104b92f8 566 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
57867795 567 mbs_align_pad (dest, dest_end, end_spaces, padchar);
104b92f8 568 }
6426f926 569#ifdef HAVE_WIDECHAR
104b92f8 570mbsalign_cleanup:
6426f926 571#endif
104b92f8
PB
572 free (str_wc);
573 free (newstr);
574
575 return ret;
576}