]> git.ipfire.org Git - thirdparty/util-linux.git/blob - lib/mbsalign.c
whereis: use xstrncpy()
[thirdparty/util-linux.git] / lib / mbsalign.c
1 /* Align/Truncate a string in a given screen width
2 Copyright (C) 2009-2010 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2.1 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 /* Written by Pádraig Brady. */
18
19 #include <config.h>
20
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdbool.h>
25 #include <limits.h>
26 #include <ctype.h>
27
28 #include "c.h"
29 #include "mbsalign.h"
30 #include "strutils.h"
31 #include "widechar.h"
32
33 /* Replace non printable chars.
34 Note \t and \n etc. are non printable.
35 Return 1 if replacement made, 0 otherwise. */
36
37 /*
38 * Counts number of cells in multibyte string. For all control and
39 * non-printable chars is the result width enlarged to store \x?? hex
40 * sequence. See mbs_safe_encode().
41 *
42 * Returns: number of cells, @sz returns number of bytes.
43 */
44 size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
45 {
46 const char *p = buf, *last = buf;
47 size_t width = 0, bytes = 0;
48
49 #ifdef HAVE_WIDECHAR
50 mbstate_t st;
51 memset(&st, 0, sizeof(st));
52 #endif
53 if (p && *p && bufsz)
54 last = p + (bufsz - 1);
55
56 while (p && *p && p <= last) {
57 if ((p < last && *p == '\\' && *(p + 1) == 'x')
58 || iscntrl((unsigned char) *p)) {
59 width += 4, bytes += 4; /* *p encoded to \x?? */
60 p++;
61 }
62 #ifdef HAVE_WIDECHAR
63 else {
64 wchar_t wc;
65 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
66
67 if (len == 0)
68 break;
69
70 if (len == (size_t) -1 || len == (size_t) -2) {
71 len = 1;
72 if (isprint((unsigned char) *p))
73 width += 1, bytes += 1;
74 else
75 width += 4, bytes += 4;
76
77 } else if (!iswprint(wc)) {
78 width += len * 4; /* hex encode whole sequence */
79 bytes += len * 4;
80 } else {
81 width += wcwidth(wc); /* number of cells */
82 bytes += len; /* number of bytes */
83 }
84 p += len;
85 }
86 #else
87 else if (!isprint((unsigned char) *p)) {
88 width += 4, bytes += 4; /* *p encoded to \x?? */
89 p++;
90 } else {
91 width++, bytes++;
92 p++;
93 }
94 #endif
95 }
96
97 if (sz)
98 *sz = bytes;
99 return width;
100 }
101
102 size_t mbs_safe_width(const char *s)
103 {
104 if (!s || !*s)
105 return 0;
106 return mbs_safe_nwidth(s, strlen(s), NULL);
107 }
108
109 /*
110 * Copy @s to @buf and replace control and non-printable chars with
111 * \x?? hex sequence. The @width returns number of cells. The @safechars
112 * are not encoded.
113 *
114 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
115 * bytes.
116 */
117 char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
118 {
119 const char *p = s;
120 char *r;
121 size_t sz = s ? strlen(s) : 0;
122
123 #ifdef HAVE_WIDECHAR
124 mbstate_t st;
125 memset(&st, 0, sizeof(st));
126 #endif
127 if (!sz || !buf)
128 return NULL;
129
130 r = buf;
131 *width = 0;
132
133 while (p && *p) {
134 if (safechars && strchr(safechars, *p)) {
135 *r++ = *p++;
136 continue;
137 }
138
139 if ((*p == '\\' && *(p + 1) == 'x')
140 || iscntrl((unsigned char) *p)) {
141 sprintf(r, "\\x%02x", (unsigned char) *p);
142 r += 4;
143 *width += 4;
144 p++;
145 }
146 #ifdef HAVE_WIDECHAR
147 else {
148 wchar_t wc;
149 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
150
151 if (len == 0)
152 break; /* end of string */
153
154 if (len == (size_t) -1 || len == (size_t) -2) {
155 len = 1;
156 /*
157 * Not valid multibyte sequence -- maybe it's
158 * printable char according to the current locales.
159 */
160 if (!isprint((unsigned char) *p)) {
161 sprintf(r, "\\x%02x", (unsigned char) *p);
162 r += 4;
163 *width += 4;
164 } else {
165 (*width)++;
166 *r++ = *p;
167 }
168 } else if (!iswprint(wc)) {
169 size_t i;
170 for (i = 0; i < len; i++) {
171 sprintf(r, "\\x%02x", (unsigned char) p[i]);
172 r += 4;
173 *width += 4;
174 }
175 } else {
176 memcpy(r, p, len);
177 r += len;
178 *width += wcwidth(wc);
179 }
180 p += len;
181 }
182 #else
183 else if (!isprint((unsigned char) *p)) {
184 sprintf(r, "\\x%02x", (unsigned char) *p);
185 p++;
186 r += 4;
187 *width += 4;
188 } else {
189 *r++ = *p++;
190 (*width)++;
191 }
192 #endif
193 }
194
195 *r = '\0';
196 return buf;
197 }
198
199 /*
200 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
201 * @width returns number of cells. The @safechars are not encoded.
202 *
203 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
204 * bytes.
205 */
206 char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
207 {
208 const char *p = s;
209 char *r;
210 size_t sz = s ? strlen(s) : 0;
211
212 #ifdef HAVE_WIDECHAR
213 mbstate_t st;
214 memset(&st, 0, sizeof(st));
215 #endif
216 if (!sz || !buf)
217 return NULL;
218
219 r = buf;
220 *width = 0;
221
222 while (p && *p) {
223 #ifdef HAVE_WIDECHAR
224 wchar_t wc;
225 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
226 #else
227 size_t len = 1;
228 #endif
229
230 if (len == 0)
231 break; /* end of string */
232
233 if (len == (size_t) -1 || len == (size_t) -2) {
234 len = 1;
235 /*
236 * Not valid multibyte sequence -- maybe it's
237 * printable char according to the current locales.
238 */
239 if (!isprint((unsigned char) *p)) {
240 sprintf(r, "\\x%02x", (unsigned char) *p);
241 r += 4;
242 *width += 4;
243 } else {
244 (*width)++;
245 *r++ = *p;
246 }
247 } else if (*p == '\\' && *(p + 1) == 'x') {
248 sprintf(r, "\\x%02x", (unsigned char) *p);
249 r += 4;
250 *width += 4;
251 } else {
252 memcpy(r, p, len);
253 r += len;
254 *width += wcwidth(wc);
255 }
256 p += len;
257 }
258
259 *r = '\0';
260 return buf;
261 }
262
263 size_t mbs_safe_encode_size(size_t bytes)
264 {
265 return (bytes * 4) + 1;
266 }
267
268 /*
269 * Returns allocated string where all control and non-printable chars are
270 * replaced with \x?? hex sequence.
271 */
272 char *mbs_safe_encode(const char *s, size_t *width)
273 {
274 size_t sz = s ? strlen(s) : 0;
275 char *buf, *ret = NULL;
276
277 if (!sz)
278 return NULL;
279 buf = malloc(mbs_safe_encode_size(sz));
280 if (buf)
281 ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
282 if (!ret)
283 free(buf);
284 return ret;
285 }
286
287 /*
288 * Returns allocated string where all broken widechars chars are
289 * replaced with \x?? hex sequence.
290 */
291 char *mbs_invalid_encode(const char *s, size_t *width)
292 {
293 size_t sz = s ? strlen(s) : 0;
294 char *buf, *ret = NULL;
295
296 if (!sz)
297 return NULL;
298 buf = malloc(mbs_safe_encode_size(sz));
299 if (buf)
300 ret = mbs_invalid_encode_to_buffer(s, width, buf);
301 if (!ret)
302 free(buf);
303 return ret;
304 }
305
306 #ifdef HAVE_WIDECHAR
307
308 static bool
309 wc_ensure_printable (wchar_t *wchars)
310 {
311 bool replaced = false;
312 wchar_t *wc = wchars;
313 while (*wc)
314 {
315 if (!iswprint ((wint_t) *wc))
316 {
317 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
318 replaced = true;
319 }
320 wc++;
321 }
322 return replaced;
323 }
324
325 /* Truncate wchar string to width cells.
326 * Returns number of cells used. */
327
328 static size_t
329 wc_truncate (wchar_t *wc, size_t width)
330 {
331 size_t cells = 0;
332 int next_cells = 0;
333
334 while (*wc)
335 {
336 next_cells = wcwidth (*wc);
337 if (next_cells == -1) /* non printable */
338 {
339 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
340 next_cells = 1;
341 }
342 if (cells + next_cells > width)
343 break;
344
345 cells += next_cells;
346 wc++;
347 }
348 *wc = L'\0';
349 return cells;
350 }
351
352 /* FIXME: move this function to gnulib as it's missing on:
353 OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */
354
355 static int
356 rpl_wcswidth (const wchar_t *s, size_t n)
357 {
358 int ret = 0;
359
360 while (n-- > 0 && *s != L'\0')
361 {
362 int nwidth = wcwidth (*s++);
363 if (nwidth == -1) /* non printable */
364 return -1;
365 if (ret > (INT_MAX - nwidth)) /* overflow */
366 return -1;
367 ret += nwidth;
368 }
369
370 return ret;
371 }
372 #endif /* HAVE_WIDECHAR */
373
374 /* Truncate multi-byte string to @width and returns number of
375 * bytes of the new string @str, and in @width returns number
376 * of cells.
377 */
378 size_t
379 mbs_truncate(char *str, size_t *width)
380 {
381 ssize_t bytes = strlen(str);
382 #ifdef HAVE_WIDECHAR
383 ssize_t sz = mbstowcs(NULL, str, 0);
384 wchar_t *wcs = NULL;
385
386 if (sz == (ssize_t) -1)
387 goto done;
388
389 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
390 if (!wcs)
391 goto done;
392
393 if (!mbstowcs(wcs, str, sz))
394 goto done;
395 *width = wc_truncate(wcs, *width);
396 bytes = wcstombs(str, wcs, bytes);
397 done:
398 free(wcs);
399 #else
400 if (bytes >= 0 && *width < (size_t) bytes)
401 bytes = *width;
402 #endif
403 if (bytes >= 0)
404 str[bytes] = '\0';
405 return bytes;
406 }
407
408 /* Write N_SPACES space characters to DEST while ensuring
409 nothing is written beyond DEST_END. A terminating NUL
410 is always added to DEST.
411 A pointer to the terminating NUL is returned. */
412
413 static char*
414 mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
415 {
416 /* FIXME: Should we pad with "figure space" (\u2007)
417 if non ascii data present? */
418 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
419 *dest++ = padchar;
420 *dest = '\0';
421 return dest;
422 }
423
424 size_t
425 mbsalign (const char *src, char *dest, size_t dest_size,
426 size_t *width, mbs_align_t align, int flags)
427 {
428 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
429 }
430
431 /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
432 characters; write the result into the DEST_SIZE-byte buffer, DEST.
433 ALIGNMENT specifies whether to left- or right-justify or to center.
434 If SRC requires more than *WIDTH columns, truncate it to fit.
435 When centering, the number of trailing spaces may be one less than the
436 number of leading spaces. The FLAGS parameter is unused at present.
437 Return the length in bytes required for the final result, not counting
438 the trailing NUL. A return value of DEST_SIZE or larger means there
439 wasn't enough space. DEST will be NUL terminated in any case.
440 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
441 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
442 Update *WIDTH to indicate how many columns were used before padding. */
443
444 size_t
445 mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
446 size_t *width, mbs_align_t align,
447 #ifdef HAVE_WIDECHAR
448 int flags,
449 #else
450 int flags __attribute__((__unused__)),
451 #endif
452 int padchar)
453 {
454 size_t ret = -1;
455 size_t src_size = strlen (src) + 1;
456 char *newstr = NULL;
457 wchar_t *str_wc = NULL;
458 const char *str_to_print = src;
459 size_t n_cols = src_size - 1;
460 size_t n_used_bytes = n_cols; /* Not including NUL */
461 size_t n_spaces = 0, space_left;
462
463 #ifdef HAVE_WIDECHAR
464 bool conversion = false;
465 bool wc_enabled = false;
466
467 /* In multi-byte locales convert to wide characters
468 to allow easy truncation. Also determine number
469 of screen columns used. */
470 if (MB_CUR_MAX > 1)
471 {
472 size_t src_chars = mbstowcs (NULL, src, 0);
473 if (src_chars == (size_t) -1)
474 {
475 if (flags & MBA_UNIBYTE_FALLBACK)
476 goto mbsalign_unibyte;
477 else
478 goto mbsalign_cleanup;
479 }
480 src_chars += 1; /* make space for NUL */
481 str_wc = malloc (src_chars * sizeof (wchar_t));
482 if (str_wc == NULL)
483 {
484 if (flags & MBA_UNIBYTE_FALLBACK)
485 goto mbsalign_unibyte;
486 else
487 goto mbsalign_cleanup;
488 }
489 if (mbstowcs (str_wc, src, src_chars) != 0)
490 {
491 str_wc[src_chars - 1] = L'\0';
492 wc_enabled = true;
493 conversion = wc_ensure_printable (str_wc);
494 n_cols = rpl_wcswidth (str_wc, src_chars);
495 }
496 }
497
498 /* If we transformed or need to truncate the source string
499 then create a modified copy of it. */
500 if (wc_enabled && (conversion || (n_cols > *width)))
501 {
502 if (conversion)
503 {
504 /* May have increased the size by converting
505 \t to \uFFFD for example. */
506 src_size = wcstombs(NULL, str_wc, 0) + 1;
507 }
508 newstr = malloc (src_size);
509 if (newstr == NULL)
510 {
511 if (flags & MBA_UNIBYTE_FALLBACK)
512 goto mbsalign_unibyte;
513 else
514 goto mbsalign_cleanup;
515 }
516 str_to_print = newstr;
517 n_cols = wc_truncate (str_wc, *width);
518 n_used_bytes = wcstombs (newstr, str_wc, src_size);
519 }
520
521 mbsalign_unibyte:
522 #endif
523
524 if (n_cols > *width) /* Unibyte truncation required. */
525 {
526 n_cols = *width;
527 n_used_bytes = n_cols;
528 }
529
530 if (*width > n_cols) /* Padding required. */
531 n_spaces = *width - n_cols;
532
533 /* indicate to caller how many cells needed (not including padding). */
534 *width = n_cols;
535
536 /* indicate to caller how many bytes needed (not including NUL). */
537 ret = n_used_bytes + (n_spaces * 1);
538
539 /* Write as much NUL terminated output to DEST as possible. */
540 if (dest_size != 0)
541 {
542 char *dest_end = dest + dest_size - 1;
543 size_t start_spaces;
544 size_t end_spaces;
545
546 switch (align)
547 {
548 case MBS_ALIGN_CENTER:
549 start_spaces = n_spaces / 2 + n_spaces % 2;
550 end_spaces = n_spaces / 2;
551 break;
552 case MBS_ALIGN_LEFT:
553 start_spaces = 0;
554 end_spaces = n_spaces;
555 break;
556 case MBS_ALIGN_RIGHT:
557 start_spaces = n_spaces;
558 end_spaces = 0;
559 break;
560 default:
561 abort();
562 }
563
564 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
565 space_left = dest_end - dest;
566 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
567 mbs_align_pad (dest, dest_end, end_spaces, padchar);
568 }
569 #ifdef HAVE_WIDECHAR
570 mbsalign_cleanup:
571 #endif
572 free (str_wc);
573 free (newstr);
574
575 return ret;
576 }