]> git.ipfire.org Git - thirdparty/util-linux.git/blame - lib/mbsalign.c
libblkid: use internally uint64_t for offsets and sizes
[thirdparty/util-linux.git] / lib / mbsalign.c
CommitLineData
104b92f8
PB
1/* Align/Truncate a string in a given screen width
2 Copyright (C) 2009-2010 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
36c7f785
PB
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2.1 of the License, or
104b92f8
PB
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17/* Written by Pádraig Brady. */
18
19#include <config.h>
20
21#include <stdlib.h>
22#include <string.h>
23#include <stdio.h>
24#include <stdbool.h>
25#include <limits.h>
1b1f66e4 26#include <ctype.h>
104b92f8
PB
27
28#include "c.h"
29#include "mbsalign.h"
30#include "widechar.h"
31
104b92f8
PB
32#ifdef HAVE_WIDECHAR
33/* Replace non printable chars.
34 Note \t and \n etc. are non printable.
35 Return 1 if replacement made, 0 otherwise. */
36
1b1f66e4
KZ
37/*
38 * Counts number of cells in multibyte string. For all control and
39 * non-printable chars is the result width enlarged to store \x?? hex
40 * sequence. See mbs_safe_encode().
0c33fcbf
KZ
41 *
42 * Returns: number of cells, @sz returns number of bytes.
1b1f66e4 43 */
0c33fcbf 44size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
1b1f66e4
KZ
45{
46 mbstate_t st;
0c33fcbf
KZ
47 const char *p = buf, *last = buf;
48 size_t width = 0, bytes = 0;
1b1f66e4
KZ
49
50 memset(&st, 0, sizeof(st));
51
0c33fcbf
KZ
52 if (p && *p && bufsz)
53 last = p + (bufsz - 1);
54
55 while (p && *p && p <= last) {
1b1f66e4 56 if (iscntrl((unsigned char) *p)) {
0c33fcbf 57 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
58 p++;
59 }
60#ifdef HAVE_WIDECHAR
61 else {
62 wchar_t wc;
63 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
64
65 if (len == 0)
66 break;
67
68 if (len == (size_t) -1 || len == (size_t) -2) {
69 len = 1;
0c33fcbf
KZ
70 if (isprint((unsigned char) *p))
71 width += 1, bytes += 1;
72 else
73 width += 4, bytes += 4;
1b1f66e4 74
0c33fcbf 75 } else if (!iswprint(wc)) {
1b1f66e4 76 width += len * 4; /* hex encode whole sequence */
0c33fcbf
KZ
77 bytes += len * 4;
78 } else {
1b1f66e4 79 width += wcwidth(wc); /* number of cells */
0c33fcbf
KZ
80 bytes += len; /* number of bytes */
81 }
1b1f66e4
KZ
82 p += len;
83 }
84#else
85 else if (!isprint((unsigned char) *p)) {
0c33fcbf 86 width += 4, bytes += 4; /* *p encoded to \x?? */
1b1f66e4
KZ
87 p++;
88 } else {
0c33fcbf 89 width++, bytes++;
1b1f66e4
KZ
90 p++;
91 }
92#endif
93 }
94
0c33fcbf
KZ
95 if (sz)
96 *sz = bytes;
1b1f66e4
KZ
97 return width;
98}
99
0c33fcbf
KZ
100size_t mbs_safe_width(const char *s)
101{
102 if (!s || !*s)
103 return 0;
104 return mbs_safe_nwidth(s, strlen(s), NULL);
105}
106
1b1f66e4 107/*
c426f70f
KZ
108 * Copy @s to @buf and replace control and non-printable chars with
109 * \x?? hex sequence. The @width returns number of cells.
110 *
111 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
112 * bytes.
1b1f66e4 113 */
c426f70f 114char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf)
1b1f66e4
KZ
115{
116 mbstate_t st;
117 const char *p = s;
c426f70f 118 char *r;
1b1f66e4
KZ
119 size_t sz = s ? strlen(s) : 0;
120
c426f70f 121 if (!sz || !buf)
1b1f66e4
KZ
122 return NULL;
123
124 memset(&st, 0, sizeof(st));
125
c426f70f 126 r = buf;
1b1f66e4
KZ
127 *width = 0;
128
129 while (p && *p) {
130 if (iscntrl((unsigned char) *p)) {
131 sprintf(r, "\\x%02x", (unsigned char) *p);
132 r += 4;
133 *width += 4;
134 p++;
135 }
136#ifdef HAVE_WIDECHAR
137 else {
138 wchar_t wc;
139 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
140
141 if (len == 0)
142 break; /* end of string */
143
144 if (len == (size_t) -1 || len == (size_t) -2) {
145 len = 1;
146 /*
147 * Not valid multibyte sequence -- maybe it's
148 * printable char according to the current locales.
149 */
150 if (!isprint((unsigned char) *p)) {
151 sprintf(r, "\\x%02x", (unsigned char) *p);
152 r += 4;
153 *width += 4;
154 } else {
155 width++;
156 *r++ = *p;
157 }
158 } else if (!iswprint(wc)) {
159 size_t i;
160 for (i = 0; i < len; i++) {
161 sprintf(r, "\\x%02x", (unsigned char) *p);
162 r += 4;
163 *width += 4;
164 }
165 } else {
166 memcpy(r, p, len);
167 r += len;
168 *width += wcwidth(wc);
169 }
170 p += len;
171 }
172#else
173 else if (!isprint((unsigned char) *p)) {
174 sprintf(r, "\\x%02x", (unsigned char) *p);
175 p++;
176 r += 4;
177 *width += 4;
178 } else {
179 *r++ = *p++;
180 *width++;
181 }
182#endif
183 }
184
185 *r = '\0';
186
c426f70f
KZ
187 return buf;
188}
189
190size_t mbs_safe_encode_size(size_t bytes)
191{
192 return (bytes * 4) + 1;
193}
194
195/*
196 * Returns allocated string where all control and non-printable chars are
197 * replaced with \x?? hex sequence.
198 */
199char *mbs_safe_encode(const char *s, size_t *width)
200{
201 size_t sz = s ? strlen(s) : 0;
202 char *buf;
203
204 if (!sz)
205 return NULL;
206 buf = malloc(mbs_safe_encode_size(sz));
207 if (!buf)
208 return NULL;
209
210 return mbs_safe_encode_to_buffer(s, width, buf);
1b1f66e4
KZ
211}
212
104b92f8
PB
213static bool
214wc_ensure_printable (wchar_t *wchars)
215{
216 bool replaced = false;
217 wchar_t *wc = wchars;
218 while (*wc)
219 {
220 if (!iswprint ((wint_t) *wc))
221 {
222 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
223 replaced = true;
224 }
225 wc++;
226 }
227 return replaced;
228}
229
230/* Truncate wchar string to width cells.
231 * Returns number of cells used. */
232
233static size_t
234wc_truncate (wchar_t *wc, size_t width)
235{
236 size_t cells = 0;
237 int next_cells = 0;
238
239 while (*wc)
240 {
241 next_cells = wcwidth (*wc);
242 if (next_cells == -1) /* non printable */
243 {
244 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
245 next_cells = 1;
246 }
247 if (cells + next_cells > width)
248 break;
249 cells += next_cells;
250 wc++;
251 }
252 *wc = L'\0';
253 return cells;
254}
255
256/* FIXME: move this function to gnulib as it's missing on:
257 OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */
258
259static int
260rpl_wcswidth (const wchar_t *s, size_t n)
261{
262 int ret = 0;
263
264 while (n-- > 0 && *s != L'\0')
265 {
266 int nwidth = wcwidth (*s++);
267 if (nwidth == -1) /* non printable */
268 return -1;
269 if (ret > (INT_MAX - nwidth)) /* overflow */
270 return -1;
271 ret += nwidth;
272 }
273
274 return ret;
275}
276#endif
277
5f94ca33
KZ
278/* Truncate multi-byte string to @width and returns number of
279 * bytes of the new string @str, and in @width returns number
280 * of cells.
281 */
282size_t
283mbs_truncate(char *str, size_t *width)
284{
2897f29a 285 ssize_t bytes = strlen(str);
5f94ca33 286#ifdef HAVE_WIDECHAR
2897f29a 287 ssize_t sz = mbstowcs(NULL, str, 0);
5f94ca33
KZ
288 wchar_t *wcs = NULL;
289
2897f29a 290 if (sz == (ssize_t) -1)
5f94ca33
KZ
291 goto done;
292
293 wcs = malloc((sz + 1) * sizeof(wchar_t));
294 if (!wcs)
295 goto done;
296
297 if (!mbstowcs(wcs, str, sz))
298 goto done;
299 *width = wc_truncate(wcs, *width);
300 bytes = wcstombs(str, wcs, bytes);
301done:
302 free(wcs);
303#else
304 if (*width < bytes)
305 bytes = *width;
306#endif
307 if (bytes >= 0)
308 str[bytes] = '\0';
309 return bytes;
310}
311
104b92f8
PB
312/* Write N_SPACES space characters to DEST while ensuring
313 nothing is written beyond DEST_END. A terminating NUL
314 is always added to DEST.
315 A pointer to the terminating NUL is returned. */
316
317static char*
318mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces)
319{
320 /* FIXME: Should we pad with "figure space" (\u2007)
321 if non ascii data present? */
efb2fe5f 322 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
104b92f8
PB
323 *dest++ = ' ';
324 *dest = '\0';
325 return dest;
326}
327
328/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
329 characters; write the result into the DEST_SIZE-byte buffer, DEST.
330 ALIGNMENT specifies whether to left- or right-justify or to center.
331 If SRC requires more than *WIDTH columns, truncate it to fit.
332 When centering, the number of trailing spaces may be one less than the
333 number of leading spaces. The FLAGS parameter is unused at present.
334 Return the length in bytes required for the final result, not counting
335 the trailing NUL. A return value of DEST_SIZE or larger means there
336 wasn't enough space. DEST will be NUL terminated in any case.
337 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
338 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
339 Update *WIDTH to indicate how many columns were used before padding. */
340
341size_t
342mbsalign (const char *src, char *dest, size_t dest_size,
343 size_t *width, mbs_align_t align, int flags)
344{
345 size_t ret = -1;
346 size_t src_size = strlen (src) + 1;
347 char *newstr = NULL;
348 wchar_t *str_wc = NULL;
349 const char *str_to_print = src;
350 size_t n_cols = src_size - 1;
351 size_t n_used_bytes = n_cols; /* Not including NUL */
3acc206d 352 size_t n_spaces = 0, space_left;
104b92f8
PB
353 bool conversion = false;
354 bool wc_enabled = false;
355
356#ifdef HAVE_WIDECHAR
357 /* In multi-byte locales convert to wide characters
358 to allow easy truncation. Also determine number
359 of screen columns used. */
360 if (MB_CUR_MAX > 1)
361 {
362 size_t src_chars = mbstowcs (NULL, src, 0);
363 if (src_chars == (size_t) -1)
364 {
365 if (flags & MBA_UNIBYTE_FALLBACK)
366 goto mbsalign_unibyte;
367 else
368 goto mbsalign_cleanup;
369 }
370 src_chars += 1; /* make space for NUL */
371 str_wc = malloc (src_chars * sizeof (wchar_t));
372 if (str_wc == NULL)
373 {
374 if (flags & MBA_UNIBYTE_FALLBACK)
375 goto mbsalign_unibyte;
376 else
377 goto mbsalign_cleanup;
378 }
379 if (mbstowcs (str_wc, src, src_chars) != 0)
380 {
381 str_wc[src_chars - 1] = L'\0';
382 wc_enabled = true;
383 conversion = wc_ensure_printable (str_wc);
384 n_cols = rpl_wcswidth (str_wc, src_chars);
385 }
386 }
387
388 /* If we transformed or need to truncate the source string
389 then create a modified copy of it. */
390 if (wc_enabled && (conversion || (n_cols > *width)))
391 {
392 if (conversion)
393 {
394 /* May have increased the size by converting
395 \t to \uFFFD for example. */
396 src_size = wcstombs(NULL, str_wc, 0) + 1;
397 }
398 newstr = malloc (src_size);
399 if (newstr == NULL)
400 {
401 if (flags & MBA_UNIBYTE_FALLBACK)
402 goto mbsalign_unibyte;
403 else
404 goto mbsalign_cleanup;
405 }
406 str_to_print = newstr;
407 n_cols = wc_truncate (str_wc, *width);
408 n_used_bytes = wcstombs (newstr, str_wc, src_size);
409 }
410#endif
411
412mbsalign_unibyte:
413
414 if (n_cols > *width) /* Unibyte truncation required. */
415 {
416 n_cols = *width;
417 n_used_bytes = n_cols;
418 }
419
420 if (*width > n_cols) /* Padding required. */
421 n_spaces = *width - n_cols;
422
423 /* indicate to caller how many cells needed (not including padding). */
424 *width = n_cols;
425
426 /* indicate to caller how many bytes needed (not including NUL). */
427 ret = n_used_bytes + (n_spaces * 1);
428
429 /* Write as much NUL terminated output to DEST as possible. */
430 if (dest_size != 0)
431 {
432 char *dest_end = dest + dest_size - 1;
f7ff0414
SK
433 size_t start_spaces;
434 size_t end_spaces;
104b92f8
PB
435
436 switch (align)
437 {
438 case MBS_ALIGN_CENTER:
439 start_spaces = n_spaces / 2 + n_spaces % 2;
440 end_spaces = n_spaces / 2;
441 break;
442 case MBS_ALIGN_LEFT:
443 start_spaces = 0;
444 end_spaces = n_spaces;
445 break;
446 case MBS_ALIGN_RIGHT:
447 start_spaces = n_spaces;
448 end_spaces = 0;
449 break;
ce7b11a9
SK
450 default:
451 abort();
104b92f8
PB
452 }
453
454 dest = mbs_align_pad (dest, dest_end, start_spaces);
3acc206d 455 space_left = dest_end - dest;
104b92f8
PB
456 dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
457 mbs_align_pad (dest, dest_end, end_spaces);
458 }
459
460mbsalign_cleanup:
461
462 free (str_wc);
463 free (newstr);
464
465 return ret;
466}