]> git.ipfire.org Git - thirdparty/util-linux.git/blame - lib/mbsalign.c
lib/randutils: add xsrand() and rand_get_number()
[thirdparty/util-linux.git] / lib / mbsalign.c
CommitLineData
104b92f8
PB
1/* Align/Truncate a string in a given screen width
2 Copyright (C) 2009-2010 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
36c7f785
PB
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2.1 of the License, or
104b92f8
PB
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17/* Written by Pádraig Brady. */
18
19#include <config.h>
20
21#include <stdlib.h>
22#include <string.h>
23#include <stdio.h>
24#include <stdbool.h>
25#include <limits.h>
1b1f66e4 26#include <ctype.h>
104b92f8
PB
27
28#include "c.h"
29#include "mbsalign.h"
ab753d8f 30#include "strutils.h"
104b92f8
PB
31#include "widechar.h"
32
104b92f8
PB
33/* Replace non printable chars.
34 Note \t and \n etc. are non printable.
35 Return 1 if replacement made, 0 otherwise. */
36
1b1f66e4
KZ
37/*
38 * Counts number of cells in multibyte string. For all control and
39 * non-printable chars is the result width enlarged to store \x?? hex
40 * sequence. See mbs_safe_encode().
0c33fcbf
KZ
41 *
42 * Returns: number of cells, @sz returns number of bytes.
1b1f66e4 43 */
0c33fcbf 44size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
1b1f66e4 45{
0c33fcbf
KZ
46 const char *p = buf, *last = buf;
47 size_t width = 0, bytes = 0;
1b1f66e4 48
4a423fb9
KZ
49#ifdef HAVE_WIDECHAR
50 mbstate_t st;
1b1f66e4 51 memset(&st, 0, sizeof(st));
4a423fb9 52#endif
0c33fcbf
KZ
53 if (p && *p && bufsz)
54 last = p + (bufsz - 1);
55
56 while (p && *p && p <= last) {
1b1f66e4 57 if (iscntrl((unsigned char) *p)) {
0c33fcbf 58 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
59 p++;
60 }
61#ifdef HAVE_WIDECHAR
62 else {
63 wchar_t wc;
64 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
65
66 if (len == 0)
67 break;
68
69 if (len == (size_t) -1 || len == (size_t) -2) {
70 len = 1;
0c33fcbf
KZ
71 if (isprint((unsigned char) *p))
72 width += 1, bytes += 1;
73 else
74 width += 4, bytes += 4;
1b1f66e4 75
0c33fcbf 76 } else if (!iswprint(wc)) {
1b1f66e4 77 width += len * 4; /* hex encode whole sequence */
0c33fcbf
KZ
78 bytes += len * 4;
79 } else {
1b1f66e4 80 width += wcwidth(wc); /* number of cells */
0c33fcbf
KZ
81 bytes += len; /* number of bytes */
82 }
1b1f66e4
KZ
83 p += len;
84 }
85#else
86 else if (!isprint((unsigned char) *p)) {
0c33fcbf 87 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
88 p++;
89 } else {
0c33fcbf 90 width++, bytes++;
1b1f66e4
KZ
91 p++;
92 }
93#endif
94 }
95
0c33fcbf
KZ
96 if (sz)
97 *sz = bytes;
1b1f66e4
KZ
98 return width;
99}
100
0c33fcbf
KZ
101size_t mbs_safe_width(const char *s)
102{
103 if (!s || !*s)
104 return 0;
105 return mbs_safe_nwidth(s, strlen(s), NULL);
106}
107
1b1f66e4 108/*
c426f70f
KZ
109 * Copy @s to @buf and replace control and non-printable chars with
110 * \x?? hex sequence. The @width returns number of cells.
111 *
112 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
113 * bytes.
1b1f66e4 114 */
c426f70f 115char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf)
1b1f66e4 116{
1b1f66e4 117 const char *p = s;
c426f70f 118 char *r;
1b1f66e4
KZ
119 size_t sz = s ? strlen(s) : 0;
120
4a423fb9
KZ
121#ifdef HAVE_WIDECHAR
122 mbstate_t st;
123 memset(&st, 0, sizeof(st));
124#endif
c426f70f 125 if (!sz || !buf)
1b1f66e4
KZ
126 return NULL;
127
c426f70f 128 r = buf;
1b1f66e4
KZ
129 *width = 0;
130
131 while (p && *p) {
132 if (iscntrl((unsigned char) *p)) {
133 sprintf(r, "\\x%02x", (unsigned char) *p);
134 r += 4;
135 *width += 4;
136 p++;
137 }
138#ifdef HAVE_WIDECHAR
139 else {
140 wchar_t wc;
141 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
142
143 if (len == 0)
144 break; /* end of string */
145
146 if (len == (size_t) -1 || len == (size_t) -2) {
147 len = 1;
148 /*
149 * Not valid multibyte sequence -- maybe it's
150 * printable char according to the current locales.
151 */
152 if (!isprint((unsigned char) *p)) {
153 sprintf(r, "\\x%02x", (unsigned char) *p);
154 r += 4;
155 *width += 4;
156 } else {
4a423fb9 157 (*width)++;
1b1f66e4
KZ
158 *r++ = *p;
159 }
160 } else if (!iswprint(wc)) {
161 size_t i;
162 for (i = 0; i < len; i++) {
163 sprintf(r, "\\x%02x", (unsigned char) *p);
164 r += 4;
165 *width += 4;
166 }
167 } else {
168 memcpy(r, p, len);
169 r += len;
170 *width += wcwidth(wc);
171 }
172 p += len;
173 }
174#else
175 else if (!isprint((unsigned char) *p)) {
176 sprintf(r, "\\x%02x", (unsigned char) *p);
177 p++;
178 r += 4;
179 *width += 4;
180 } else {
181 *r++ = *p++;
4a423fb9 182 (*width)++;
1b1f66e4
KZ
183 }
184#endif
185 }
186
187 *r = '\0';
c426f70f
KZ
188 return buf;
189}
190
191size_t mbs_safe_encode_size(size_t bytes)
192{
193 return (bytes * 4) + 1;
194}
195
196/*
197 * Returns allocated string where all control and non-printable chars are
198 * replaced with \x?? hex sequence.
199 */
200char *mbs_safe_encode(const char *s, size_t *width)
201{
202 size_t sz = s ? strlen(s) : 0;
203 char *buf;
204
205 if (!sz)
206 return NULL;
207 buf = malloc(mbs_safe_encode_size(sz));
208 if (!buf)
209 return NULL;
210
211 return mbs_safe_encode_to_buffer(s, width, buf);
1b1f66e4
KZ
212}
213
4a423fb9
KZ
214#ifdef HAVE_WIDECHAR
215
104b92f8
PB
216static bool
217wc_ensure_printable (wchar_t *wchars)
218{
219 bool replaced = false;
220 wchar_t *wc = wchars;
221 while (*wc)
222 {
223 if (!iswprint ((wint_t) *wc))
224 {
225 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
226 replaced = true;
227 }
228 wc++;
229 }
230 return replaced;
231}
232
233/* Truncate wchar string to width cells.
234 * Returns number of cells used. */
235
236static size_t
237wc_truncate (wchar_t *wc, size_t width)
238{
239 size_t cells = 0;
240 int next_cells = 0;
241
242 while (*wc)
243 {
244 next_cells = wcwidth (*wc);
245 if (next_cells == -1) /* non printable */
246 {
247 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
248 next_cells = 1;
249 }
250 if (cells + next_cells > width)
251 break;
d94c5198 252
104b92f8
PB
253 cells += next_cells;
254 wc++;
255 }
256 *wc = L'\0';
257 return cells;
258}
259
260/* FIXME: move this function to gnulib as it's missing on:
261 OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */
262
263static int
264rpl_wcswidth (const wchar_t *s, size_t n)
265{
266 int ret = 0;
267
268 while (n-- > 0 && *s != L'\0')
269 {
270 int nwidth = wcwidth (*s++);
271 if (nwidth == -1) /* non printable */
272 return -1;
273 if (ret > (INT_MAX - nwidth)) /* overflow */
274 return -1;
275 ret += nwidth;
276 }
277
278 return ret;
279}
4a423fb9 280#endif /* HAVE_WIDECHAR */
104b92f8 281
5f94ca33
KZ
282/* Truncate multi-byte string to @width and returns number of
283 * bytes of the new string @str, and in @width returns number
284 * of cells.
285 */
286size_t
287mbs_truncate(char *str, size_t *width)
288{
2897f29a 289 ssize_t bytes = strlen(str);
5f94ca33 290#ifdef HAVE_WIDECHAR
2897f29a 291 ssize_t sz = mbstowcs(NULL, str, 0);
5f94ca33
KZ
292 wchar_t *wcs = NULL;
293
2897f29a 294 if (sz == (ssize_t) -1)
5f94ca33
KZ
295 goto done;
296
d94c5198 297 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
5f94ca33
KZ
298 if (!wcs)
299 goto done;
300
301 if (!mbstowcs(wcs, str, sz))
302 goto done;
303 *width = wc_truncate(wcs, *width);
304 bytes = wcstombs(str, wcs, bytes);
305done:
306 free(wcs);
307#else
6426f926 308 if (bytes >= 0 && *width < (size_t) bytes)
5f94ca33
KZ
309 bytes = *width;
310#endif
311 if (bytes >= 0)
312 str[bytes] = '\0';
313 return bytes;
314}
315
104b92f8
PB
316/* Write N_SPACES space characters to DEST while ensuring
317 nothing is written beyond DEST_END. A terminating NUL
318 is always added to DEST.
319 A pointer to the terminating NUL is returned. */
320
321static char*
57867795 322mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
104b92f8
PB
323{
324 /* FIXME: Should we pad with "figure space" (\u2007)
325 if non ascii data present? */
efb2fe5f 326 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
57867795 327 *dest++ = padchar;
104b92f8
PB
328 *dest = '\0';
329 return dest;
330}
331
57867795
KZ
332size_t
333mbsalign (const char *src, char *dest, size_t dest_size,
334 size_t *width, mbs_align_t align, int flags)
335{
336 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
337}
338
104b92f8
PB
339/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
340 characters; write the result into the DEST_SIZE-byte buffer, DEST.
341 ALIGNMENT specifies whether to left- or right-justify or to center.
342 If SRC requires more than *WIDTH columns, truncate it to fit.
343 When centering, the number of trailing spaces may be one less than the
344 number of leading spaces. The FLAGS parameter is unused at present.
345 Return the length in bytes required for the final result, not counting
346 the trailing NUL. A return value of DEST_SIZE or larger means there
347 wasn't enough space. DEST will be NUL terminated in any case.
348 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
349 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
350 Update *WIDTH to indicate how many columns were used before padding. */
351
352size_t
57867795 353mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
c191740c
KZ
354 size_t *width, mbs_align_t align,
355#ifdef HAVE_WIDECHAR
356 int flags,
357#else
358 int flags __attribute__((__unused__)),
359#endif
57867795 360 int padchar)
104b92f8
PB
361{
362 size_t ret = -1;
363 size_t src_size = strlen (src) + 1;
364 char *newstr = NULL;
365 wchar_t *str_wc = NULL;
366 const char *str_to_print = src;
367 size_t n_cols = src_size - 1;
368 size_t n_used_bytes = n_cols; /* Not including NUL */
3acc206d 369 size_t n_spaces = 0, space_left;
6426f926
KZ
370
371#ifdef HAVE_WIDECHAR
104b92f8
PB
372 bool conversion = false;
373 bool wc_enabled = false;
374
104b92f8
PB
375 /* In multi-byte locales convert to wide characters
376 to allow easy truncation. Also determine number
377 of screen columns used. */
378 if (MB_CUR_MAX > 1)
379 {
380 size_t src_chars = mbstowcs (NULL, src, 0);
381 if (src_chars == (size_t) -1)
382 {
383 if (flags & MBA_UNIBYTE_FALLBACK)
384 goto mbsalign_unibyte;
385 else
386 goto mbsalign_cleanup;
387 }
388 src_chars += 1; /* make space for NUL */
389 str_wc = malloc (src_chars * sizeof (wchar_t));
390 if (str_wc == NULL)
391 {
392 if (flags & MBA_UNIBYTE_FALLBACK)
393 goto mbsalign_unibyte;
394 else
395 goto mbsalign_cleanup;
396 }
397 if (mbstowcs (str_wc, src, src_chars) != 0)
398 {
399 str_wc[src_chars - 1] = L'\0';
400 wc_enabled = true;
401 conversion = wc_ensure_printable (str_wc);
402 n_cols = rpl_wcswidth (str_wc, src_chars);
403 }
404 }
405
406 /* If we transformed or need to truncate the source string
407 then create a modified copy of it. */
408 if (wc_enabled && (conversion || (n_cols > *width)))
409 {
410 if (conversion)
411 {
412 /* May have increased the size by converting
413 \t to \uFFFD for example. */
414 src_size = wcstombs(NULL, str_wc, 0) + 1;
415 }
416 newstr = malloc (src_size);
417 if (newstr == NULL)
418 {
419 if (flags & MBA_UNIBYTE_FALLBACK)
420 goto mbsalign_unibyte;
421 else
422 goto mbsalign_cleanup;
423 }
424 str_to_print = newstr;
425 n_cols = wc_truncate (str_wc, *width);
426 n_used_bytes = wcstombs (newstr, str_wc, src_size);
427 }
104b92f8
PB
428
429mbsalign_unibyte:
6426f926 430#endif
104b92f8
PB
431
432 if (n_cols > *width) /* Unibyte truncation required. */
433 {
434 n_cols = *width;
435 n_used_bytes = n_cols;
436 }
437
438 if (*width > n_cols) /* Padding required. */
439 n_spaces = *width - n_cols;
440
441 /* indicate to caller how many cells needed (not including padding). */
442 *width = n_cols;
443
444 /* indicate to caller how many bytes needed (not including NUL). */
445 ret = n_used_bytes + (n_spaces * 1);
446
447 /* Write as much NUL terminated output to DEST as possible. */
448 if (dest_size != 0)
449 {
450 char *dest_end = dest + dest_size - 1;
f7ff0414
SK
451 size_t start_spaces;
452 size_t end_spaces;
104b92f8
PB
453
454 switch (align)
455 {
456 case MBS_ALIGN_CENTER:
457 start_spaces = n_spaces / 2 + n_spaces % 2;
458 end_spaces = n_spaces / 2;
459 break;
460 case MBS_ALIGN_LEFT:
461 start_spaces = 0;
462 end_spaces = n_spaces;
463 break;
464 case MBS_ALIGN_RIGHT:
465 start_spaces = n_spaces;
466 end_spaces = 0;
467 break;
ce7b11a9
SK
468 default:
469 abort();
104b92f8
PB
470 }
471
57867795 472 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
3acc206d 473 space_left = dest_end - dest;
104b92f8 474 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
57867795 475 mbs_align_pad (dest, dest_end, end_spaces, padchar);
104b92f8 476 }
6426f926 477#ifdef HAVE_WIDECHAR
104b92f8 478mbsalign_cleanup:
6426f926 479#endif
104b92f8
PB
480 free (str_wc);
481 free (newstr);
482
483 return ret;
484}