]> git.ipfire.org Git - thirdparty/util-linux.git/blob - lib/mbsalign.c
lib/signames: fix redefinition of 'sys_signame' on OSX
[thirdparty/util-linux.git] / lib / mbsalign.c
1 /* Align/Truncate a string in a given screen width
2 Copyright (C) 2009-2010 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2.1 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 /* Written by Pádraig Brady. */
18
19 #include <config.h>
20
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdbool.h>
25 #include <limits.h>
26 #include <ctype.h>
27
28 #include "c.h"
29 #include "mbsalign.h"
30 #include "strutils.h"
31 #include "widechar.h"
32
33 /* Replace non printable chars.
34 Note \t and \n etc. are non printable.
35 Return 1 if replacement made, 0 otherwise. */
36
37 /*
38 * Counts number of cells in multibyte string. For all control and
39 * non-printable chars is the result width enlarged to store \x?? hex
40 * sequence. See mbs_safe_encode().
41 *
42 * Returns: number of cells, @sz returns number of bytes.
43 */
44 size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
45 {
46 const char *p = buf, *last = buf;
47 size_t width = 0, bytes = 0;
48
49 #ifdef HAVE_WIDECHAR
50 mbstate_t st;
51 memset(&st, 0, sizeof(st));
52 #endif
53 if (p && *p && bufsz)
54 last = p + (bufsz - 1);
55
56 while (p && *p && p <= last) {
57 if (iscntrl((unsigned char) *p)) {
58 width += 4, bytes += 4; /* *p encoded to \x?? */
59 p++;
60 }
61 #ifdef HAVE_WIDECHAR
62 else {
63 wchar_t wc;
64 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
65
66 if (len == 0)
67 break;
68
69 if (len == (size_t) -1 || len == (size_t) -2) {
70 len = 1;
71 if (isprint((unsigned char) *p))
72 width += 1, bytes += 1;
73 else
74 width += 4, bytes += 4;
75
76 } else if (!iswprint(wc)) {
77 width += len * 4; /* hex encode whole sequence */
78 bytes += len * 4;
79 } else {
80 width += wcwidth(wc); /* number of cells */
81 bytes += len; /* number of bytes */
82 }
83 p += len;
84 }
85 #else
86 else if (!isprint((unsigned char) *p)) {
87 width += 4, bytes += 4; /* *p encoded to \x?? */
88 p++;
89 } else {
90 width++, bytes++;
91 p++;
92 }
93 #endif
94 }
95
96 if (sz)
97 *sz = bytes;
98 return width;
99 }
100
101 size_t mbs_safe_width(const char *s)
102 {
103 if (!s || !*s)
104 return 0;
105 return mbs_safe_nwidth(s, strlen(s), NULL);
106 }
107
108 /*
109 * Copy @s to @buf and replace control and non-printable chars with
110 * \x?? hex sequence. The @width returns number of cells. The @safechars
111 * are not encoded.
112 *
113 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
114 * bytes.
115 */
116 char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
117 {
118 const char *p = s;
119 char *r;
120 size_t sz = s ? strlen(s) : 0;
121
122 #ifdef HAVE_WIDECHAR
123 mbstate_t st;
124 memset(&st, 0, sizeof(st));
125 #endif
126 if (!sz || !buf)
127 return NULL;
128
129 r = buf;
130 *width = 0;
131
132 while (p && *p) {
133 if (safechars && strchr(safechars, *p)) {
134 *r++ = *p++;
135 continue;
136 }
137
138 if (iscntrl((unsigned char) *p)) {
139 sprintf(r, "\\x%02x", (unsigned char) *p);
140 r += 4;
141 *width += 4;
142 p++;
143 }
144 #ifdef HAVE_WIDECHAR
145 else {
146 wchar_t wc;
147 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
148
149 if (len == 0)
150 break; /* end of string */
151
152 if (len == (size_t) -1 || len == (size_t) -2) {
153 len = 1;
154 /*
155 * Not valid multibyte sequence -- maybe it's
156 * printable char according to the current locales.
157 */
158 if (!isprint((unsigned char) *p)) {
159 sprintf(r, "\\x%02x", (unsigned char) *p);
160 r += 4;
161 *width += 4;
162 } else {
163 (*width)++;
164 *r++ = *p;
165 }
166 } else if (!iswprint(wc)) {
167 size_t i;
168 for (i = 0; i < len; i++) {
169 sprintf(r, "\\x%02x", (unsigned char) *p);
170 r += 4;
171 *width += 4;
172 }
173 } else {
174 memcpy(r, p, len);
175 r += len;
176 *width += wcwidth(wc);
177 }
178 p += len;
179 }
180 #else
181 else if (!isprint((unsigned char) *p)) {
182 sprintf(r, "\\x%02x", (unsigned char) *p);
183 p++;
184 r += 4;
185 *width += 4;
186 } else {
187 *r++ = *p++;
188 (*width)++;
189 }
190 #endif
191 }
192
193 *r = '\0';
194 return buf;
195 }
196
197 size_t mbs_safe_encode_size(size_t bytes)
198 {
199 return (bytes * 4) + 1;
200 }
201
202 /*
203 * Returns allocated string where all control and non-printable chars are
204 * replaced with \x?? hex sequence.
205 */
206 char *mbs_safe_encode(const char *s, size_t *width)
207 {
208 size_t sz = s ? strlen(s) : 0;
209 char *buf, *ret = NULL;
210
211 if (!sz)
212 return NULL;
213 buf = malloc(mbs_safe_encode_size(sz));
214 if (buf)
215 ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
216 if (!ret)
217 free(buf);
218 return ret;
219 }
220
221 #ifdef HAVE_WIDECHAR
222
223 static bool
224 wc_ensure_printable (wchar_t *wchars)
225 {
226 bool replaced = false;
227 wchar_t *wc = wchars;
228 while (*wc)
229 {
230 if (!iswprint ((wint_t) *wc))
231 {
232 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
233 replaced = true;
234 }
235 wc++;
236 }
237 return replaced;
238 }
239
240 /* Truncate wchar string to width cells.
241 * Returns number of cells used. */
242
243 static size_t
244 wc_truncate (wchar_t *wc, size_t width)
245 {
246 size_t cells = 0;
247 int next_cells = 0;
248
249 while (*wc)
250 {
251 next_cells = wcwidth (*wc);
252 if (next_cells == -1) /* non printable */
253 {
254 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
255 next_cells = 1;
256 }
257 if (cells + next_cells > width)
258 break;
259
260 cells += next_cells;
261 wc++;
262 }
263 *wc = L'\0';
264 return cells;
265 }
266
267 /* FIXME: move this function to gnulib as it's missing on:
268 OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */
269
270 static int
271 rpl_wcswidth (const wchar_t *s, size_t n)
272 {
273 int ret = 0;
274
275 while (n-- > 0 && *s != L'\0')
276 {
277 int nwidth = wcwidth (*s++);
278 if (nwidth == -1) /* non printable */
279 return -1;
280 if (ret > (INT_MAX - nwidth)) /* overflow */
281 return -1;
282 ret += nwidth;
283 }
284
285 return ret;
286 }
287 #endif /* HAVE_WIDECHAR */
288
289 /* Truncate multi-byte string to @width and returns number of
290 * bytes of the new string @str, and in @width returns number
291 * of cells.
292 */
293 size_t
294 mbs_truncate(char *str, size_t *width)
295 {
296 ssize_t bytes = strlen(str);
297 #ifdef HAVE_WIDECHAR
298 ssize_t sz = mbstowcs(NULL, str, 0);
299 wchar_t *wcs = NULL;
300
301 if (sz == (ssize_t) -1)
302 goto done;
303
304 wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
305 if (!wcs)
306 goto done;
307
308 if (!mbstowcs(wcs, str, sz))
309 goto done;
310 *width = wc_truncate(wcs, *width);
311 bytes = wcstombs(str, wcs, bytes);
312 done:
313 free(wcs);
314 #else
315 if (bytes >= 0 && *width < (size_t) bytes)
316 bytes = *width;
317 #endif
318 if (bytes >= 0)
319 str[bytes] = '\0';
320 return bytes;
321 }
322
323 /* Write N_SPACES space characters to DEST while ensuring
324 nothing is written beyond DEST_END. A terminating NUL
325 is always added to DEST.
326 A pointer to the terminating NUL is returned. */
327
328 static char*
329 mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
330 {
331 /* FIXME: Should we pad with "figure space" (\u2007)
332 if non ascii data present? */
333 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
334 *dest++ = padchar;
335 *dest = '\0';
336 return dest;
337 }
338
339 size_t
340 mbsalign (const char *src, char *dest, size_t dest_size,
341 size_t *width, mbs_align_t align, int flags)
342 {
343 return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
344 }
345
346 /* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
347 characters; write the result into the DEST_SIZE-byte buffer, DEST.
348 ALIGNMENT specifies whether to left- or right-justify or to center.
349 If SRC requires more than *WIDTH columns, truncate it to fit.
350 When centering, the number of trailing spaces may be one less than the
351 number of leading spaces. The FLAGS parameter is unused at present.
352 Return the length in bytes required for the final result, not counting
353 the trailing NUL. A return value of DEST_SIZE or larger means there
354 wasn't enough space. DEST will be NUL terminated in any case.
355 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
356 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
357 Update *WIDTH to indicate how many columns were used before padding. */
358
359 size_t
360 mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
361 size_t *width, mbs_align_t align,
362 #ifdef HAVE_WIDECHAR
363 int flags,
364 #else
365 int flags __attribute__((__unused__)),
366 #endif
367 int padchar)
368 {
369 size_t ret = -1;
370 size_t src_size = strlen (src) + 1;
371 char *newstr = NULL;
372 wchar_t *str_wc = NULL;
373 const char *str_to_print = src;
374 size_t n_cols = src_size - 1;
375 size_t n_used_bytes = n_cols; /* Not including NUL */
376 size_t n_spaces = 0, space_left;
377
378 #ifdef HAVE_WIDECHAR
379 bool conversion = false;
380 bool wc_enabled = false;
381
382 /* In multi-byte locales convert to wide characters
383 to allow easy truncation. Also determine number
384 of screen columns used. */
385 if (MB_CUR_MAX > 1)
386 {
387 size_t src_chars = mbstowcs (NULL, src, 0);
388 if (src_chars == (size_t) -1)
389 {
390 if (flags & MBA_UNIBYTE_FALLBACK)
391 goto mbsalign_unibyte;
392 else
393 goto mbsalign_cleanup;
394 }
395 src_chars += 1; /* make space for NUL */
396 str_wc = malloc (src_chars * sizeof (wchar_t));
397 if (str_wc == NULL)
398 {
399 if (flags & MBA_UNIBYTE_FALLBACK)
400 goto mbsalign_unibyte;
401 else
402 goto mbsalign_cleanup;
403 }
404 if (mbstowcs (str_wc, src, src_chars) != 0)
405 {
406 str_wc[src_chars - 1] = L'\0';
407 wc_enabled = true;
408 conversion = wc_ensure_printable (str_wc);
409 n_cols = rpl_wcswidth (str_wc, src_chars);
410 }
411 }
412
413 /* If we transformed or need to truncate the source string
414 then create a modified copy of it. */
415 if (wc_enabled && (conversion || (n_cols > *width)))
416 {
417 if (conversion)
418 {
419 /* May have increased the size by converting
420 \t to \uFFFD for example. */
421 src_size = wcstombs(NULL, str_wc, 0) + 1;
422 }
423 newstr = malloc (src_size);
424 if (newstr == NULL)
425 {
426 if (flags & MBA_UNIBYTE_FALLBACK)
427 goto mbsalign_unibyte;
428 else
429 goto mbsalign_cleanup;
430 }
431 str_to_print = newstr;
432 n_cols = wc_truncate (str_wc, *width);
433 n_used_bytes = wcstombs (newstr, str_wc, src_size);
434 }
435
436 mbsalign_unibyte:
437 #endif
438
439 if (n_cols > *width) /* Unibyte truncation required. */
440 {
441 n_cols = *width;
442 n_used_bytes = n_cols;
443 }
444
445 if (*width > n_cols) /* Padding required. */
446 n_spaces = *width - n_cols;
447
448 /* indicate to caller how many cells needed (not including padding). */
449 *width = n_cols;
450
451 /* indicate to caller how many bytes needed (not including NUL). */
452 ret = n_used_bytes + (n_spaces * 1);
453
454 /* Write as much NUL terminated output to DEST as possible. */
455 if (dest_size != 0)
456 {
457 char *dest_end = dest + dest_size - 1;
458 size_t start_spaces;
459 size_t end_spaces;
460
461 switch (align)
462 {
463 case MBS_ALIGN_CENTER:
464 start_spaces = n_spaces / 2 + n_spaces % 2;
465 end_spaces = n_spaces / 2;
466 break;
467 case MBS_ALIGN_LEFT:
468 start_spaces = 0;
469 end_spaces = n_spaces;
470 break;
471 case MBS_ALIGN_RIGHT:
472 start_spaces = n_spaces;
473 end_spaces = 0;
474 break;
475 default:
476 abort();
477 }
478
479 dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
480 space_left = dest_end - dest;
481 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
482 mbs_align_pad (dest, dest_end, end_spaces, padchar);
483 }
484 #ifdef HAVE_WIDECHAR
485 mbsalign_cleanup:
486 #endif
487 free (str_wc);
488 free (newstr);
489
490 return ret;
491 }