]> git.ipfire.org Git - thirdparty/util-linux.git/blame - lib/mbsalign.c
lib/colors: fix test compilation
[thirdparty/util-linux.git] / lib / mbsalign.c
CommitLineData
104b92f8
PB
1/* Align/Truncate a string in a given screen width
2 Copyright (C) 2009-2010 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
36c7f785
PB
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2.1 of the License, or
104b92f8
PB
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17/* Written by Pádraig Brady. */
18
19#include <config.h>
20
21#include <stdlib.h>
22#include <string.h>
23#include <stdio.h>
24#include <stdbool.h>
25#include <limits.h>
1b1f66e4 26#include <ctype.h>
104b92f8
PB
27
28#include "c.h"
29#include "mbsalign.h"
ab753d8f 30#include "strutils.h"
104b92f8
PB
31#include "widechar.h"
32
104b92f8
PB
33/* Replace non printable chars.
34 Note \t and \n etc. are non printable.
35 Return 1 if replacement made, 0 otherwise. */
36
1b1f66e4
KZ
37/*
38 * Counts number of cells in multibyte string. For all control and
39 * non-printable chars is the result width enlarged to store \x?? hex
40 * sequence. See mbs_safe_encode().
0c33fcbf
KZ
41 *
42 * Returns: number of cells, @sz returns number of bytes.
1b1f66e4 43 */
0c33fcbf 44size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
1b1f66e4 45{
0c33fcbf
KZ
46 const char *p = buf, *last = buf;
47 size_t width = 0, bytes = 0;
1b1f66e4 48
4a423fb9
KZ
49#ifdef HAVE_WIDECHAR
50 mbstate_t st;
1b1f66e4 51 memset(&st, 0, sizeof(st));
4a423fb9 52#endif
0c33fcbf
KZ
53 if (p && *p && bufsz)
54 last = p + (bufsz - 1);
55
56 while (p && *p && p <= last) {
1b1f66e4 57 if (iscntrl((unsigned char) *p)) {
0c33fcbf 58 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
59 p++;
60 }
61#ifdef HAVE_WIDECHAR
62 else {
63 wchar_t wc;
64 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
65
66 if (len == 0)
67 break;
68
69 if (len == (size_t) -1 || len == (size_t) -2) {
70 len = 1;
0c33fcbf
KZ
71 if (isprint((unsigned char) *p))
72 width += 1, bytes += 1;
73 else
74 width += 4, bytes += 4;
1b1f66e4 75
0c33fcbf 76 } else if (!iswprint(wc)) {
1b1f66e4 77 width += len * 4; /* hex encode whole sequence */
0c33fcbf
KZ
78 bytes += len * 4;
79 } else {
1b1f66e4 80 width += wcwidth(wc); /* number of cells */
0c33fcbf
KZ
81 bytes += len; /* number of bytes */
82 }
1b1f66e4
KZ
83 p += len;
84 }
85#else
86 else if (!isprint((unsigned char) *p)) {
0c33fcbf 87 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
88 p++;
89 } else {
0c33fcbf 90 width++, bytes++;
1b1f66e4
KZ
91 p++;
92 }
93#endif
94 }
95
0c33fcbf
KZ
96 if (sz)
97 *sz = bytes;
1b1f66e4
KZ
98 return width;
99}
100
0c33fcbf
KZ
101size_t mbs_safe_width(const char *s)
102{
103 if (!s || !*s)
104 return 0;
105 return mbs_safe_nwidth(s, strlen(s), NULL);
106}
107
1b1f66e4 108/*
c426f70f 109 * Copy @s to @buf and replace control and non-printable chars with
ff471d89
KZ
110 * \x?? hex sequence. The @width returns number of cells. The @safechars
111 * are not encoded.
c426f70f
KZ
112 *
113 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
114 * bytes.
1b1f66e4 115 */
ff471d89 116char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
1b1f66e4 117{
1b1f66e4 118 const char *p = s;
c426f70f 119 char *r;
1b1f66e4
KZ
120 size_t sz = s ? strlen(s) : 0;
121
4a423fb9
KZ
122#ifdef HAVE_WIDECHAR
123 mbstate_t st;
124 memset(&st, 0, sizeof(st));
125#endif
c426f70f 126 if (!sz || !buf)
1b1f66e4
KZ
127 return NULL;
128
c426f70f 129 r = buf;
1b1f66e4
KZ
130 *width = 0;
131
132 while (p && *p) {
ff471d89
KZ
133 if (safechars && strchr(safechars, *p)) {
134 *r++ = *p++;
135 continue;
136 }
137
1b1f66e4
KZ
138 if (iscntrl((unsigned char) *p)) {
139 sprintf(r, "\\x%02x", (unsigned char) *p);
140 r += 4;
141 *width += 4;
142 p++;
143 }
144#ifdef HAVE_WIDECHAR
145 else {
146 wchar_t wc;
147 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
148
149 if (len == 0)
150 break; /* end of string */
151
152 if (len == (size_t) -1 || len == (size_t) -2) {
153 len = 1;
154 /*
155 * Not valid multibyte sequence -- maybe it's
156 * printable char according to the current locales.
157 */
158 if (!isprint((unsigned char) *p)) {
159 sprintf(r, "\\x%02x", (unsigned char) *p);
160 r += 4;
161 *width += 4;
162 } else {
4a423fb9 163 (*width)++;
1b1f66e4
KZ
164 *r++ = *p;
165 }
166 } else if (!iswprint(wc)) {
167 size_t i;
168 for (i = 0; i < len; i++) {
169 sprintf(r, "\\x%02x", (unsigned char) *p);
170 r += 4;
171 *width += 4;
172 }
173 } else {
174 memcpy(r, p, len);
175 r += len;
176 *width += wcwidth(wc);
177 }
178 p += len;
179 }
180#else
181 else if (!isprint((unsigned char) *p)) {
182 sprintf(r, "\\x%02x", (unsigned char) *p);
183 p++;
184 r += 4;
185 *width += 4;
186 } else {
187 *r++ = *p++;
4a423fb9 188 (*width)++;
1b1f66e4
KZ
189 }
190#endif
191 }
192
193 *r = '\0';
c426f70f
KZ
194 return buf;
195}
196
197size_t mbs_safe_encode_size(size_t bytes)
198{
199 return (bytes * 4) + 1;
200}
201
202/*
203 * Returns allocated string where all control and non-printable chars are
204 * replaced with \x?? hex sequence.
205 */
206char *mbs_safe_encode(const char *s, size_t *width)
207{
208 size_t sz = s ? strlen(s) : 0;
a86b9f25 209 char *buf, *ret = NULL;;
c426f70f
KZ
210
211 if (!sz)
212 return NULL;
213 buf = malloc(mbs_safe_encode_size(sz));
a86b9f25
KZ
214 if (buf)
215 ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
216 if (!ret)
217 free(buf);
218 return ret;
1b1f66e4
KZ
219}
220
4a423fb9
KZ
221#ifdef HAVE_WIDECHAR
222
104b92f8
PB
223static bool
224wc_ensure_printable (wchar_t *wchars)
225{
226 bool replaced = false;
227 wchar_t *wc = wchars;
228 while (*wc)
229 {
230 if (!iswprint ((wint_t) *wc))
231 {
232 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
233 replaced = true;
234 }
235 wc++;
236 }
237 return replaced;
238}
239
240/* Truncate wchar string to width cells.
241 * Returns number of cells used. */
242
243static size_t
244wc_truncate (wchar_t *wc, size_t width)
245{
246 size_t cells = 0;
247 int next_cells = 0;
248
249 while (*wc)
250 {
251 next_cells = wcwidth (*wc);
252 if (next_cells == -1) /* non printable */
253 {
254 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
255 next_cells = 1;
256 }
257 if (cells + next_cells > width)
258 break;
d94c5198 259
104b92f8
PB
260 cells += next_cells;
261 wc++;
262 }
263 *wc = L'\0';
264 return cells;
265}
266
267/* FIXME: move this function to gnulib as it's missing on:
268 OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */
269
270static int
271rpl_wcswidth (const wchar_t *s, size_t n)
272{
273 int ret = 0;
274
275 while (n-- > 0 && *s != L'\0')
276 {
277 int nwidth = wcwidth (*s++);
278 if (nwidth == -1) /* non printable */
279 return -1;
280 if (ret > (INT_MAX - nwidth)) /* overflow */
281 return -1;
282 ret += nwidth;
283 }
284
285 return ret;
286}
4a423fb9 287#endif /* HAVE_WIDECHAR */
104b92f8 288
5f94ca33
KZ
289/* Truncate multi-byte string to @width and returns number of
290 * bytes of the new string @str, and in @width returns number
291 * of cells.
292 */
293size_t
294mbs_truncate(char *str, size_t *width)
295{
2897f29a 296 ssize_t bytes = strlen(str);
5f94ca33 297#ifdef HAVE_WIDECHAR
2897f29a 298 ssize_t sz = mbstowcs(NULL, str, 0);
5f94ca33
KZ
299 wchar_t *wcs = NULL;
300
2897f29a 301 if (sz == (ssize_t) -1)
5f94ca33
KZ
302 goto done;
303
d94c5198 304 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
5f94ca33
KZ
305 if (!wcs)
306 goto done;
307
308 if (!mbstowcs(wcs, str, sz))
309 goto done;
310 *width = wc_truncate(wcs, *width);
311 bytes = wcstombs(str, wcs, bytes);
312done:
313 free(wcs);
314#else
6426f926 315 if (bytes >= 0 && *width < (size_t) bytes)
5f94ca33
KZ
316 bytes = *width;
317#endif
318 if (bytes >= 0)
319 str[bytes] = '\0';
320 return bytes;
321}
322
104b92f8
PB
323/* Write N_SPACES space characters to DEST while ensuring
324 nothing is written beyond DEST_END. A terminating NUL
325 is always added to DEST.
326 A pointer to the terminating NUL is returned. */
327
328static char*
57867795 329mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
104b92f8
PB
330{
331 /* FIXME: Should we pad with "figure space" (\u2007)
332 if non ascii data present? */
efb2fe5f 333 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
57867795 334 *dest++ = padchar;
104b92f8
PB
335 *dest = '\0';
336 return dest;
337}
338
57867795
KZ
339size_t
340mbsalign (const char *src, char *dest, size_t dest_size,
341 size_t *width, mbs_align_t align, int flags)
342{
343 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
344}
345
104b92f8
PB
346/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
347 characters; write the result into the DEST_SIZE-byte buffer, DEST.
348 ALIGNMENT specifies whether to left- or right-justify or to center.
349 If SRC requires more than *WIDTH columns, truncate it to fit.
350 When centering, the number of trailing spaces may be one less than the
351 number of leading spaces. The FLAGS parameter is unused at present.
352 Return the length in bytes required for the final result, not counting
353 the trailing NUL. A return value of DEST_SIZE or larger means there
354 wasn't enough space. DEST will be NUL terminated in any case.
355 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
356 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
357 Update *WIDTH to indicate how many columns were used before padding. */
358
359size_t
57867795 360mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
c191740c
KZ
361 size_t *width, mbs_align_t align,
362#ifdef HAVE_WIDECHAR
363 int flags,
364#else
365 int flags __attribute__((__unused__)),
366#endif
57867795 367 int padchar)
104b92f8
PB
368{
369 size_t ret = -1;
370 size_t src_size = strlen (src) + 1;
371 char *newstr = NULL;
372 wchar_t *str_wc = NULL;
373 const char *str_to_print = src;
374 size_t n_cols = src_size - 1;
375 size_t n_used_bytes = n_cols; /* Not including NUL */
3acc206d 376 size_t n_spaces = 0, space_left;
6426f926
KZ
377
378#ifdef HAVE_WIDECHAR
104b92f8
PB
379 bool conversion = false;
380 bool wc_enabled = false;
381
104b92f8
PB
382 /* In multi-byte locales convert to wide characters
383 to allow easy truncation. Also determine number
384 of screen columns used. */
385 if (MB_CUR_MAX > 1)
386 {
387 size_t src_chars = mbstowcs (NULL, src, 0);
388 if (src_chars == (size_t) -1)
389 {
390 if (flags & MBA_UNIBYTE_FALLBACK)
391 goto mbsalign_unibyte;
392 else
393 goto mbsalign_cleanup;
394 }
395 src_chars += 1; /* make space for NUL */
396 str_wc = malloc (src_chars * sizeof (wchar_t));
397 if (str_wc == NULL)
398 {
399 if (flags & MBA_UNIBYTE_FALLBACK)
400 goto mbsalign_unibyte;
401 else
402 goto mbsalign_cleanup;
403 }
404 if (mbstowcs (str_wc, src, src_chars) != 0)
405 {
406 str_wc[src_chars - 1] = L'\0';
407 wc_enabled = true;
408 conversion = wc_ensure_printable (str_wc);
409 n_cols = rpl_wcswidth (str_wc, src_chars);
410 }
411 }
412
413 /* If we transformed or need to truncate the source string
414 then create a modified copy of it. */
415 if (wc_enabled && (conversion || (n_cols > *width)))
416 {
417 if (conversion)
418 {
419 /* May have increased the size by converting
420 \t to \uFFFD for example. */
421 src_size = wcstombs(NULL, str_wc, 0) + 1;
422 }
423 newstr = malloc (src_size);
424 if (newstr == NULL)
425 {
426 if (flags & MBA_UNIBYTE_FALLBACK)
427 goto mbsalign_unibyte;
428 else
429 goto mbsalign_cleanup;
430 }
431 str_to_print = newstr;
432 n_cols = wc_truncate (str_wc, *width);
433 n_used_bytes = wcstombs (newstr, str_wc, src_size);
434 }
104b92f8
PB
435
436mbsalign_unibyte:
6426f926 437#endif
104b92f8
PB
438
439 if (n_cols > *width) /* Unibyte truncation required. */
440 {
441 n_cols = *width;
442 n_used_bytes = n_cols;
443 }
444
445 if (*width > n_cols) /* Padding required. */
446 n_spaces = *width - n_cols;
447
448 /* indicate to caller how many cells needed (not including padding). */
449 *width = n_cols;
450
451 /* indicate to caller how many bytes needed (not including NUL). */
452 ret = n_used_bytes + (n_spaces * 1);
453
454 /* Write as much NUL terminated output to DEST as possible. */
455 if (dest_size != 0)
456 {
457 char *dest_end = dest + dest_size - 1;
f7ff0414
SK
458 size_t start_spaces;
459 size_t end_spaces;
104b92f8
PB
460
461 switch (align)
462 {
463 case MBS_ALIGN_CENTER:
464 start_spaces = n_spaces / 2 + n_spaces % 2;
465 end_spaces = n_spaces / 2;
466 break;
467 case MBS_ALIGN_LEFT:
468 start_spaces = 0;
469 end_spaces = n_spaces;
470 break;
471 case MBS_ALIGN_RIGHT:
472 start_spaces = n_spaces;
473 end_spaces = 0;
474 break;
ce7b11a9
SK
475 default:
476 abort();
104b92f8
PB
477 }
478
57867795 479 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
3acc206d 480 space_left = dest_end - dest;
104b92f8 481 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
57867795 482 mbs_align_pad (dest, dest_end, end_spaces, padchar);
104b92f8 483 }
6426f926 484#ifdef HAVE_WIDECHAR
104b92f8 485mbsalign_cleanup:
6426f926 486#endif
104b92f8
PB
487 free (str_wc);
488 free (newstr);
489
490 return ret;
491}