]>
Commit | Line | Data |
---|---|---|
a1eaacb1 | 1 | '\" t |
fea681da | 2 | .\" Copyright (c) Bruno Haible <haible@clisp.cons.org> |
6d1a5754 | 3 | .\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com> |
fea681da | 4 | .\" |
e4a74ca8 | 5 | .\" SPDX-License-Identifier: GPL-2.0-or-later |
fea681da MK |
6 | .\" |
7 | .\" References consulted: | |
8 | .\" GNU glibc-2 source code and manual | |
9 | .\" Dinkumware C library reference http://www.dinkumware.com/ | |
008f1ecc | 10 | .\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html |
fea681da MK |
11 | .\" ISO/IEC 9899:1999 |
12 | .\" | |
4c1c5274 | 13 | .TH mbstowcs 3 (date) "Linux man-pages (unreleased)" |
fea681da | 14 | .SH NAME |
d0f17b57 | 15 | mbstowcs \- convert a multibyte string to a wide-character string |
d3ae3435 AC |
16 | .SH LIBRARY |
17 | Standard C library | |
8fc3b2cf | 18 | .RI ( libc ", " \-lc ) |
fea681da MK |
19 | .SH SYNOPSIS |
20 | .nf | |
21 | .B #include <stdlib.h> | |
c6d039a3 | 22 | .P |
1eed67e7 AC |
23 | .BI "size_t mbstowcs(wchar_t " dest "[restrict ." n "], \ |
24 | const char *restrict " src , | |
0634d31b | 25 | .BI " size_t " n ); |
fea681da MK |
26 | .fi |
27 | .SH DESCRIPTION | |
40aa0db0 MK |
28 | If |
29 | .I dest | |
b437fdd9 | 30 | is not NULL, |
60a90ecd MK |
31 | the |
32 | .BR mbstowcs () | |
33 | function converts the | |
40aa0db0 MK |
34 | multibyte string |
35 | .I src | |
36 | to a wide-character string starting at | |
37 | .IR dest . | |
38 | At most | |
39 | .I n | |
40 | wide characters are written to | |
41 | .IR dest . | |
3942b6bc MK |
42 | The sequence of characters in the string |
43 | .I src | |
44 | shall begin in the initial shift state. | |
c13182ef | 45 | The conversion can stop for three reasons: |
cdede5cd | 46 | .IP \[bu] 3 |
bce5f0ae | 47 | An invalid multibyte sequence has been encountered. |
96f2c6a7 | 48 | In this case, |
009df872 | 49 | .I (size_t)\ \-1 |
7d2cb9d5 | 50 | is returned. |
cdede5cd | 51 | .IP \[bu] |
40aa0db0 | 52 | .I n |
b957f81f | 53 | non-L\[aq]\e0\[aq] wide characters have been stored at |
40aa0db0 | 54 | .IR dest . |
96f2c6a7 | 55 | In this case, the number of wide characters written to |
40aa0db0 MK |
56 | .I dest |
57 | is returned, but the | |
fea681da | 58 | shift state at this point is lost. |
cdede5cd | 59 | .IP \[bu] |
bce5f0ae | 60 | The multibyte string has been completely converted, including the |
b957f81f | 61 | terminating null character (\[aq]\e0\[aq]). |
96f2c6a7 | 62 | In this case, the number of wide characters written to |
40aa0db0 MK |
63 | .IR dest , |
64 | excluding the terminating null wide character, is returned. | |
c6d039a3 | 65 | .P |
40aa0db0 MK |
66 | The programmer must ensure that there is room for at least |
67 | .I n | |
68 | wide | |
69 | characters at | |
70 | .IR dest . | |
c6d039a3 | 71 | .P |
40aa0db0 | 72 | If |
1ae6b2c7 | 73 | .I dest |
40aa0db0 MK |
74 | is NULL, |
75 | .I n | |
76 | is ignored, and the conversion proceeds as | |
fea681da MK |
77 | above, except that the converted wide characters are not written out to memory, |
78 | and that no length limit exists. | |
c6d039a3 | 79 | .P |
40aa0db0 MK |
80 | In order to avoid the case 2 above, the programmer should make sure |
81 | .I n | |
82 | is | |
05766b02 | 83 | greater than or equal to |
40aa0db0 | 84 | .IR "mbstowcs(NULL,src,0)+1" . |
47297adb | 85 | .SH RETURN VALUE |
60a90ecd MK |
86 | The |
87 | .BR mbstowcs () | |
88 | function returns the number of wide characters that make | |
d0f17b57 | 89 | up the converted part of the wide-character string, not including the |
c13182ef MK |
90 | terminating null wide character. |
91 | If an invalid multibyte sequence was | |
7d2cb9d5 | 92 | encountered, |
009df872 | 93 | .I (size_t)\ \-1 |
7d2cb9d5 | 94 | is returned. |
cba6ce5e MS |
95 | .SH ATTRIBUTES |
96 | For an explanation of the terms used in this section, see | |
97 | .BR attributes (7). | |
98 | .TS | |
99 | allbox; | |
c466875e | 100 | lbx lb lb |
cba6ce5e MS |
101 | l l l. |
102 | Interface Attribute Value | |
103 | T{ | |
9e54434e BR |
104 | .na |
105 | .nh | |
cba6ce5e MS |
106 | .BR mbstowcs () |
107 | T} Thread safety MT-Safe | |
108 | .TE | |
4131356c AC |
109 | .SH VERSIONS |
110 | The function | |
111 | .BR mbsrtowcs (3) | |
112 | provides a better interface to the same | |
113 | functionality. | |
3113c7f3 | 114 | .SH STANDARDS |
4131356c AC |
115 | C11, POSIX.1-2008. |
116 | .SH HISTORY | |
117 | POSIX.1-2001, C99. | |
fea681da | 118 | .SH NOTES |
d9bfdb9c | 119 | The behavior of |
60a90ecd | 120 | .BR mbstowcs () |
1274071a MK |
121 | depends on the |
122 | .B LC_CTYPE | |
123 | category of the | |
fea681da | 124 | current locale. |
a14af333 | 125 | .SH EXAMPLES |
6d1a5754 MK |
126 | The program below illustrates the use of |
127 | .BR mbstowcs (), | |
509a10ef | 128 | as well as some of the wide character classification functions. |
6d1a5754 | 129 | An example run is the following: |
c6d039a3 | 130 | .P |
6d1a5754 | 131 | .in +4n |
e646a1ba | 132 | .EX |
6d1a5754 MK |
133 | $ ./t_mbstowcs de_DE.UTF\-8 Grüße! |
134 | Length of source string (excluding terminator): | |
135 | 8 bytes | |
136 | 6 multibyte characters | |
fe5dba13 | 137 | \& |
6d1a5754 | 138 | Wide character string is: Grüße! (6 characters) |
89851a00 MK |
139 | G alpha upper |
140 | r alpha lower | |
141 | ü alpha lower | |
142 | ß alpha lower | |
143 | e alpha lower | |
6d1a5754 | 144 | ! !alpha |
b8302363 | 145 | .EE |
6d1a5754 MK |
146 | .in |
147 | .SS Program source | |
c7885256 | 148 | \& |
b0b6ab4e | 149 | .\" SRC BEGIN (mbstowcs.c) |
e7d0bb47 | 150 | .EX |
6d1a5754 | 151 | #include <locale.h> |
6d1a5754 | 152 | #include <stdio.h> |
6d1a5754 | 153 | #include <stdlib.h> |
ad3868f0 AC |
154 | #include <string.h> |
155 | #include <wchar.h> | |
156 | #include <wctype.h> | |
fe5dba13 | 157 | \& |
6d1a5754 MK |
158 | int |
159 | main(int argc, char *argv[]) | |
160 | { | |
161 | size_t mbslen; /* Number of multibyte characters in source */ | |
162 | wchar_t *wcs; /* Pointer to converted wide character string */ | |
fe5dba13 | 163 | \& |
6d1a5754 | 164 | if (argc < 3) { |
d1a71985 | 165 | fprintf(stderr, "Usage: %s <locale> <string>\en", argv[0]); |
6d1a5754 MK |
166 | exit(EXIT_FAILURE); |
167 | } | |
fe5dba13 | 168 | \& |
c6beb8a1 | 169 | /* Apply the specified locale. */ |
fe5dba13 | 170 | \& |
6d1a5754 MK |
171 | if (setlocale(LC_ALL, argv[1]) == NULL) { |
172 | perror("setlocale"); | |
173 | exit(EXIT_FAILURE); | |
174 | } | |
fe5dba13 | 175 | \& |
6d1a5754 | 176 | /* Calculate the length required to hold argv[2] converted to |
c6beb8a1 | 177 | a wide character string. */ |
fe5dba13 | 178 | \& |
6d1a5754 | 179 | mbslen = mbstowcs(NULL, argv[2], 0); |
35b07818 | 180 | if (mbslen == (size_t) \-1) { |
6d1a5754 MK |
181 | perror("mbstowcs"); |
182 | exit(EXIT_FAILURE); | |
183 | } | |
fe5dba13 | 184 | \& |
c6beb8a1 | 185 | /* Describe the source string to the user. */ |
fe5dba13 | 186 | \& |
d1a71985 MK |
187 | printf("Length of source string (excluding terminator):\en"); |
188 | printf(" %zu bytes\en", strlen(argv[2])); | |
189 | printf(" %zu multibyte characters\en\en", mbslen); | |
fe5dba13 | 190 | \& |
6d1a5754 | 191 | /* Allocate wide character string of the desired size. Add 1 |
b957f81f | 192 | to allow for terminating null wide character (L\[aq]\e0\[aq]). */ |
fe5dba13 | 193 | \& |
a7751aaf | 194 | wcs = calloc(mbslen + 1, sizeof(*wcs)); |
6d1a5754 MK |
195 | if (wcs == NULL) { |
196 | perror("calloc"); | |
197 | exit(EXIT_FAILURE); | |
198 | } | |
fe5dba13 | 199 | \& |
89851a00 | 200 | /* Convert the multibyte character string in argv[2] to a |
c6beb8a1 | 201 | wide character string. */ |
fe5dba13 | 202 | \& |
35b07818 | 203 | if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) { |
6d1a5754 MK |
204 | perror("mbstowcs"); |
205 | exit(EXIT_FAILURE); | |
206 | } | |
fe5dba13 | 207 | \& |
d1a71985 | 208 | printf("Wide character string is: %ls (%zu characters)\en", |
d917c31d | 209 | wcs, mbslen); |
fe5dba13 | 210 | \& |
6d1a5754 | 211 | /* Now do some inspection of the classes of the characters in |
c6beb8a1 | 212 | the wide character string. */ |
fe5dba13 | 213 | \& |
88893a77 | 214 | for (wchar_t *wp = wcs; *wp != 0; wp++) { |
6d1a5754 | 215 | printf(" %lc ", (wint_t) *wp); |
fe5dba13 | 216 | \& |
6d1a5754 MK |
217 | if (!iswalpha(*wp)) |
218 | printf("!"); | |
219 | printf("alpha "); | |
fe5dba13 | 220 | \& |
6d1a5754 MK |
221 | if (iswalpha(*wp)) { |
222 | if (iswupper(*wp)) | |
223 | printf("upper "); | |
fe5dba13 | 224 | \& |
6d1a5754 MK |
225 | if (iswlower(*wp)) |
226 | printf("lower "); | |
227 | } | |
fe5dba13 | 228 | \& |
b957f81f | 229 | putchar(\[aq]\en\[aq]); |
6d1a5754 | 230 | } |
fe5dba13 | 231 | \& |
6d1a5754 MK |
232 | exit(EXIT_SUCCESS); |
233 | } | |
e7d0bb47 | 234 | .EE |
b0b6ab4e | 235 | .\" SRC END |
47297adb | 236 | .SH SEE ALSO |
6d1a5754 | 237 | .BR mblen (3), |
e8df1b4c | 238 | .BR mbsrtowcs (3), |
6d1a5754 | 239 | .BR mbtowc (3), |
68d31600 MK |
240 | .BR wcstombs (3), |
241 | .BR wctomb (3) |