]>
Commit | Line | Data |
---|---|---|
a1eaacb1 | 1 | '\" t |
fea681da | 2 | .\" Copyright (c) Bruno Haible <haible@clisp.cons.org> |
6d1a5754 | 3 | .\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com> |
fea681da | 4 | .\" |
e4a74ca8 | 5 | .\" SPDX-License-Identifier: GPL-2.0-or-later |
fea681da MK |
6 | .\" |
7 | .\" References consulted: | |
8 | .\" GNU glibc-2 source code and manual | |
9 | .\" Dinkumware C library reference http://www.dinkumware.com/ | |
008f1ecc | 10 | .\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html |
fea681da MK |
11 | .\" ISO/IEC 9899:1999 |
12 | .\" | |
4c1c5274 | 13 | .TH mbstowcs 3 (date) "Linux man-pages (unreleased)" |
fea681da | 14 | .SH NAME |
d0f17b57 | 15 | mbstowcs \- convert a multibyte string to a wide-character string |
d3ae3435 AC |
16 | .SH LIBRARY |
17 | Standard C library | |
8fc3b2cf | 18 | .RI ( libc ", " \-lc ) |
fea681da MK |
19 | .SH SYNOPSIS |
20 | .nf | |
21 | .B #include <stdlib.h> | |
68e4db0a | 22 | .PP |
1eed67e7 AC |
23 | .BI "size_t mbstowcs(wchar_t " dest "[restrict ." n "], \ |
24 | const char *restrict " src , | |
0634d31b | 25 | .BI " size_t " n ); |
fea681da MK |
26 | .fi |
27 | .SH DESCRIPTION | |
40aa0db0 MK |
28 | If |
29 | .I dest | |
b437fdd9 | 30 | is not NULL, |
60a90ecd MK |
31 | the |
32 | .BR mbstowcs () | |
33 | function converts the | |
40aa0db0 MK |
34 | multibyte string |
35 | .I src | |
36 | to a wide-character string starting at | |
37 | .IR dest . | |
38 | At most | |
39 | .I n | |
40 | wide characters are written to | |
41 | .IR dest . | |
3942b6bc MK |
42 | The sequence of characters in the string |
43 | .I src | |
44 | shall begin in the initial shift state. | |
c13182ef | 45 | The conversion can stop for three reasons: |
cdede5cd | 46 | .IP \[bu] 3 |
bce5f0ae | 47 | An invalid multibyte sequence has been encountered. |
96f2c6a7 | 48 | In this case, |
009df872 | 49 | .I (size_t)\ \-1 |
7d2cb9d5 | 50 | is returned. |
cdede5cd | 51 | .IP \[bu] |
40aa0db0 | 52 | .I n |
b957f81f | 53 | non-L\[aq]\e0\[aq] wide characters have been stored at |
40aa0db0 | 54 | .IR dest . |
96f2c6a7 | 55 | In this case, the number of wide characters written to |
40aa0db0 MK |
56 | .I dest |
57 | is returned, but the | |
fea681da | 58 | shift state at this point is lost. |
cdede5cd | 59 | .IP \[bu] |
bce5f0ae | 60 | The multibyte string has been completely converted, including the |
b957f81f | 61 | terminating null character (\[aq]\e0\[aq]). |
96f2c6a7 | 62 | In this case, the number of wide characters written to |
40aa0db0 MK |
63 | .IR dest , |
64 | excluding the terminating null wide character, is returned. | |
fea681da | 65 | .PP |
40aa0db0 MK |
66 | The programmer must ensure that there is room for at least |
67 | .I n | |
68 | wide | |
69 | characters at | |
70 | .IR dest . | |
fea681da | 71 | .PP |
40aa0db0 | 72 | If |
1ae6b2c7 | 73 | .I dest |
40aa0db0 MK |
74 | is NULL, |
75 | .I n | |
76 | is ignored, and the conversion proceeds as | |
fea681da MK |
77 | above, except that the converted wide characters are not written out to memory, |
78 | and that no length limit exists. | |
79 | .PP | |
40aa0db0 MK |
80 | In order to avoid the case 2 above, the programmer should make sure |
81 | .I n | |
82 | is | |
05766b02 | 83 | greater than or equal to |
40aa0db0 | 84 | .IR "mbstowcs(NULL,src,0)+1" . |
47297adb | 85 | .SH RETURN VALUE |
60a90ecd MK |
86 | The |
87 | .BR mbstowcs () | |
88 | function returns the number of wide characters that make | |
d0f17b57 | 89 | up the converted part of the wide-character string, not including the |
c13182ef MK |
90 | terminating null wide character. |
91 | If an invalid multibyte sequence was | |
7d2cb9d5 | 92 | encountered, |
009df872 | 93 | .I (size_t)\ \-1 |
7d2cb9d5 | 94 | is returned. |
cba6ce5e MS |
95 | .SH ATTRIBUTES |
96 | For an explanation of the terms used in this section, see | |
97 | .BR attributes (7). | |
98 | .TS | |
99 | allbox; | |
c466875e | 100 | lbx lb lb |
cba6ce5e MS |
101 | l l l. |
102 | Interface Attribute Value | |
103 | T{ | |
9e54434e BR |
104 | .na |
105 | .nh | |
cba6ce5e MS |
106 | .BR mbstowcs () |
107 | T} Thread safety MT-Safe | |
108 | .TE | |
c466875e | 109 | .sp 1 |
4131356c AC |
110 | .SH VERSIONS |
111 | The function | |
112 | .BR mbsrtowcs (3) | |
113 | provides a better interface to the same | |
114 | functionality. | |
3113c7f3 | 115 | .SH STANDARDS |
4131356c AC |
116 | C11, POSIX.1-2008. |
117 | .SH HISTORY | |
118 | POSIX.1-2001, C99. | |
fea681da | 119 | .SH NOTES |
d9bfdb9c | 120 | The behavior of |
60a90ecd | 121 | .BR mbstowcs () |
1274071a MK |
122 | depends on the |
123 | .B LC_CTYPE | |
124 | category of the | |
fea681da | 125 | current locale. |
a14af333 | 126 | .SH EXAMPLES |
6d1a5754 MK |
127 | The program below illustrates the use of |
128 | .BR mbstowcs (), | |
509a10ef | 129 | as well as some of the wide character classification functions. |
6d1a5754 | 130 | An example run is the following: |
e646a1ba | 131 | .PP |
6d1a5754 | 132 | .in +4n |
e646a1ba | 133 | .EX |
6d1a5754 MK |
134 | $ ./t_mbstowcs de_DE.UTF\-8 Grüße! |
135 | Length of source string (excluding terminator): | |
136 | 8 bytes | |
137 | 6 multibyte characters | |
fe5dba13 | 138 | \& |
6d1a5754 | 139 | Wide character string is: Grüße! (6 characters) |
89851a00 MK |
140 | G alpha upper |
141 | r alpha lower | |
142 | ü alpha lower | |
143 | ß alpha lower | |
144 | e alpha lower | |
6d1a5754 | 145 | ! !alpha |
b8302363 | 146 | .EE |
6d1a5754 MK |
147 | .in |
148 | .SS Program source | |
c7885256 | 149 | \& |
b0b6ab4e | 150 | .\" SRC BEGIN (mbstowcs.c) |
e7d0bb47 | 151 | .EX |
6d1a5754 | 152 | #include <locale.h> |
6d1a5754 | 153 | #include <stdio.h> |
6d1a5754 | 154 | #include <stdlib.h> |
ad3868f0 AC |
155 | #include <string.h> |
156 | #include <wchar.h> | |
157 | #include <wctype.h> | |
fe5dba13 | 158 | \& |
6d1a5754 MK |
159 | int |
160 | main(int argc, char *argv[]) | |
161 | { | |
162 | size_t mbslen; /* Number of multibyte characters in source */ | |
163 | wchar_t *wcs; /* Pointer to converted wide character string */ | |
fe5dba13 | 164 | \& |
6d1a5754 | 165 | if (argc < 3) { |
d1a71985 | 166 | fprintf(stderr, "Usage: %s <locale> <string>\en", argv[0]); |
6d1a5754 MK |
167 | exit(EXIT_FAILURE); |
168 | } | |
fe5dba13 | 169 | \& |
c6beb8a1 | 170 | /* Apply the specified locale. */ |
fe5dba13 | 171 | \& |
6d1a5754 MK |
172 | if (setlocale(LC_ALL, argv[1]) == NULL) { |
173 | perror("setlocale"); | |
174 | exit(EXIT_FAILURE); | |
175 | } | |
fe5dba13 | 176 | \& |
6d1a5754 | 177 | /* Calculate the length required to hold argv[2] converted to |
c6beb8a1 | 178 | a wide character string. */ |
fe5dba13 | 179 | \& |
6d1a5754 | 180 | mbslen = mbstowcs(NULL, argv[2], 0); |
35b07818 | 181 | if (mbslen == (size_t) \-1) { |
6d1a5754 MK |
182 | perror("mbstowcs"); |
183 | exit(EXIT_FAILURE); | |
184 | } | |
fe5dba13 | 185 | \& |
c6beb8a1 | 186 | /* Describe the source string to the user. */ |
fe5dba13 | 187 | \& |
d1a71985 MK |
188 | printf("Length of source string (excluding terminator):\en"); |
189 | printf(" %zu bytes\en", strlen(argv[2])); | |
190 | printf(" %zu multibyte characters\en\en", mbslen); | |
fe5dba13 | 191 | \& |
6d1a5754 | 192 | /* Allocate wide character string of the desired size. Add 1 |
b957f81f | 193 | to allow for terminating null wide character (L\[aq]\e0\[aq]). */ |
fe5dba13 | 194 | \& |
a7751aaf | 195 | wcs = calloc(mbslen + 1, sizeof(*wcs)); |
6d1a5754 MK |
196 | if (wcs == NULL) { |
197 | perror("calloc"); | |
198 | exit(EXIT_FAILURE); | |
199 | } | |
fe5dba13 | 200 | \& |
89851a00 | 201 | /* Convert the multibyte character string in argv[2] to a |
c6beb8a1 | 202 | wide character string. */ |
fe5dba13 | 203 | \& |
35b07818 | 204 | if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) { |
6d1a5754 MK |
205 | perror("mbstowcs"); |
206 | exit(EXIT_FAILURE); | |
207 | } | |
fe5dba13 | 208 | \& |
d1a71985 | 209 | printf("Wide character string is: %ls (%zu characters)\en", |
d917c31d | 210 | wcs, mbslen); |
fe5dba13 | 211 | \& |
6d1a5754 | 212 | /* Now do some inspection of the classes of the characters in |
c6beb8a1 | 213 | the wide character string. */ |
fe5dba13 | 214 | \& |
88893a77 | 215 | for (wchar_t *wp = wcs; *wp != 0; wp++) { |
6d1a5754 | 216 | printf(" %lc ", (wint_t) *wp); |
fe5dba13 | 217 | \& |
6d1a5754 MK |
218 | if (!iswalpha(*wp)) |
219 | printf("!"); | |
220 | printf("alpha "); | |
fe5dba13 | 221 | \& |
6d1a5754 MK |
222 | if (iswalpha(*wp)) { |
223 | if (iswupper(*wp)) | |
224 | printf("upper "); | |
fe5dba13 | 225 | \& |
6d1a5754 MK |
226 | if (iswlower(*wp)) |
227 | printf("lower "); | |
228 | } | |
fe5dba13 | 229 | \& |
b957f81f | 230 | putchar(\[aq]\en\[aq]); |
6d1a5754 | 231 | } |
fe5dba13 | 232 | \& |
6d1a5754 MK |
233 | exit(EXIT_SUCCESS); |
234 | } | |
e7d0bb47 | 235 | .EE |
b0b6ab4e | 236 | .\" SRC END |
47297adb | 237 | .SH SEE ALSO |
6d1a5754 | 238 | .BR mblen (3), |
e8df1b4c | 239 | .BR mbsrtowcs (3), |
6d1a5754 | 240 | .BR mbtowc (3), |
68d31600 MK |
241 | .BR wcstombs (3), |
242 | .BR wctomb (3) |