]>
Commit | Line | Data |
---|---|---|
6d1a5754 | 1 | '\" t -*- coding: UTF-8 -*- |
fea681da | 2 | .\" Copyright (c) Bruno Haible <haible@clisp.cons.org> |
6d1a5754 | 3 | .\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com> |
fea681da | 4 | .\" |
89e3ffe9 | 5 | .\" %%%LICENSE_START(GPLv2+_DOC_ONEPARA) |
fea681da MK |
6 | .\" This is free documentation; you can redistribute it and/or |
7 | .\" modify it under the terms of the GNU General Public License as | |
8 | .\" published by the Free Software Foundation; either version 2 of | |
9 | .\" the License, or (at your option) any later version. | |
fe382ebf | 10 | .\" %%%LICENSE_END |
fea681da MK |
11 | .\" |
12 | .\" References consulted: | |
13 | .\" GNU glibc-2 source code and manual | |
14 | .\" Dinkumware C library reference http://www.dinkumware.com/ | |
008f1ecc | 15 | .\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html |
fea681da MK |
16 | .\" ISO/IEC 9899:1999 |
17 | .\" | |
460495ca | 18 | .TH MBSTOWCS 3 2015-08-08 "GNU" "Linux Programmer's Manual" |
fea681da | 19 | .SH NAME |
d0f17b57 | 20 | mbstowcs \- convert a multibyte string to a wide-character string |
fea681da MK |
21 | .SH SYNOPSIS |
22 | .nf | |
23 | .B #include <stdlib.h> | |
24 | .sp | |
25 | .BI "size_t mbstowcs(wchar_t *" dest ", const char *" src ", size_t " n ); | |
26 | .fi | |
27 | .SH DESCRIPTION | |
40aa0db0 MK |
28 | If |
29 | .I dest | |
b437fdd9 | 30 | is not NULL, |
60a90ecd MK |
31 | the |
32 | .BR mbstowcs () | |
33 | function converts the | |
40aa0db0 MK |
34 | multibyte string |
35 | .I src | |
36 | to a wide-character string starting at | |
37 | .IR dest . | |
38 | At most | |
39 | .I n | |
40 | wide characters are written to | |
41 | .IR dest . | |
c13182ef MK |
42 | The conversion starts |
43 | in the initial state. | |
44 | The conversion can stop for three reasons: | |
bce5f0ae MK |
45 | .IP 1. 3 |
46 | An invalid multibyte sequence has been encountered. | |
96f2c6a7 | 47 | In this case, |
009df872 | 48 | .I (size_t)\ \-1 |
7d2cb9d5 | 49 | is returned. |
bce5f0ae | 50 | .IP 2. |
40aa0db0 MK |
51 | .I n |
52 | non-L\(aq\\0\(aq wide characters have been stored at | |
53 | .IR dest . | |
96f2c6a7 | 54 | In this case, the number of wide characters written to |
40aa0db0 MK |
55 | .I dest |
56 | is returned, but the | |
fea681da | 57 | shift state at this point is lost. |
bce5f0ae MK |
58 | .IP 3. |
59 | The multibyte string has been completely converted, including the | |
e9c23bc6 | 60 | terminating null wide character (\(aq\\0\(aq). |
96f2c6a7 | 61 | In this case, the number of wide characters written to |
40aa0db0 MK |
62 | .IR dest , |
63 | excluding the terminating null wide character, is returned. | |
fea681da | 64 | .PP |
40aa0db0 MK |
65 | The programmer must ensure that there is room for at least |
66 | .I n | |
67 | wide | |
68 | characters at | |
69 | .IR dest . | |
fea681da | 70 | .PP |
40aa0db0 | 71 | If |
51700fd7 | 72 | .IR dest |
40aa0db0 MK |
73 | is NULL, |
74 | .I n | |
75 | is ignored, and the conversion proceeds as | |
fea681da MK |
76 | above, except that the converted wide characters are not written out to memory, |
77 | and that no length limit exists. | |
78 | .PP | |
40aa0db0 MK |
79 | In order to avoid the case 2 above, the programmer should make sure |
80 | .I n | |
81 | is | |
05766b02 | 82 | greater than or equal to |
40aa0db0 | 83 | .IR "mbstowcs(NULL,src,0)+1" . |
47297adb | 84 | .SH RETURN VALUE |
60a90ecd MK |
85 | The |
86 | .BR mbstowcs () | |
87 | function returns the number of wide characters that make | |
d0f17b57 | 88 | up the converted part of the wide-character string, not including the |
c13182ef MK |
89 | terminating null wide character. |
90 | If an invalid multibyte sequence was | |
7d2cb9d5 | 91 | encountered, |
009df872 | 92 | .I (size_t)\ \-1 |
7d2cb9d5 | 93 | is returned. |
cba6ce5e MS |
94 | .SH ATTRIBUTES |
95 | For an explanation of the terms used in this section, see | |
96 | .BR attributes (7). | |
97 | .TS | |
98 | allbox; | |
99 | lb lb lb | |
100 | l l l. | |
101 | Interface Attribute Value | |
102 | T{ | |
103 | .BR mbstowcs () | |
104 | T} Thread safety MT-Safe | |
105 | .TE | |
47297adb | 106 | .SH CONFORMING TO |
10401713 | 107 | POSIX.1-2001, POSIX.1-2008, C99. |
fea681da | 108 | .SH NOTES |
d9bfdb9c | 109 | The behavior of |
60a90ecd | 110 | .BR mbstowcs () |
1274071a MK |
111 | depends on the |
112 | .B LC_CTYPE | |
113 | category of the | |
fea681da MK |
114 | current locale. |
115 | .PP | |
60a90ecd MK |
116 | The function |
117 | .BR mbsrtowcs (3) | |
118 | provides a better interface to the same | |
fea681da | 119 | functionality. |
6d1a5754 MK |
120 | .SH EXAMPLE |
121 | The program below illustrates the use of | |
122 | .BR mbstowcs (), | |
509a10ef | 123 | as well as some of the wide character classification functions. |
6d1a5754 MK |
124 | An example run is the following: |
125 | .in +4n | |
126 | .nf | |
127 | ||
128 | $ ./t_mbstowcs de_DE.UTF\-8 Grüße! | |
129 | Length of source string (excluding terminator): | |
130 | 8 bytes | |
131 | 6 multibyte characters | |
132 | ||
133 | Wide character string is: Grüße! (6 characters) | |
89851a00 MK |
134 | G alpha upper |
135 | r alpha lower | |
136 | ü alpha lower | |
137 | ß alpha lower | |
138 | e alpha lower | |
6d1a5754 MK |
139 | ! !alpha |
140 | .fi | |
141 | .in | |
142 | .SS Program source | |
143 | .nf | |
144 | #include <locale.h> | |
145 | #include <wchar.h> | |
146 | #include <stdio.h> | |
147 | #include <string.h> | |
148 | #include <stdlib.h> | |
149 | ||
150 | int | |
151 | main(int argc, char *argv[]) | |
152 | { | |
153 | size_t mbslen; /* Number of multibyte characters in source */ | |
154 | wchar_t *wcs; /* Pointer to converted wide character string */ | |
155 | wchar_t *wp; | |
156 | ||
157 | if (argc < 3) { | |
158 | fprintf(stderr, "Usage: %s <locale> <string>\\n", argv[0]); | |
159 | exit(EXIT_FAILURE); | |
160 | } | |
161 | ||
162 | /* Apply the specified locale */ | |
163 | ||
164 | if (setlocale(LC_ALL, argv[1]) == NULL) { | |
165 | perror("setlocale"); | |
166 | exit(EXIT_FAILURE); | |
167 | } | |
168 | ||
169 | /* Calculate the length required to hold argv[2] converted to | |
170 | a wide character string */ | |
89851a00 | 171 | |
6d1a5754 | 172 | mbslen = mbstowcs(NULL, argv[2], 0); |
35b07818 | 173 | if (mbslen == (size_t) \-1) { |
6d1a5754 MK |
174 | perror("mbstowcs"); |
175 | exit(EXIT_FAILURE); | |
176 | } | |
177 | ||
178 | /* Describe the source string to the user */ | |
179 | ||
180 | printf("Length of source string (excluding terminator):\\n"); | |
a810c579 MK |
181 | printf(" %zu bytes\\n", strlen(argv[2])); |
182 | printf(" %zu multibyte characters\\n\\n", mbslen); | |
6d1a5754 MK |
183 | |
184 | /* Allocate wide character string of the desired size. Add 1 | |
185 | to allow for terminating null wide character (L\(aq\\0\(aq). */ | |
186 | ||
187 | wcs = calloc(mbslen + 1, sizeof(wchar_t)); | |
188 | if (wcs == NULL) { | |
189 | perror("calloc"); | |
190 | exit(EXIT_FAILURE); | |
191 | } | |
89851a00 MK |
192 | |
193 | /* Convert the multibyte character string in argv[2] to a | |
6d1a5754 MK |
194 | wide character string */ |
195 | ||
35b07818 | 196 | if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) { |
6d1a5754 MK |
197 | perror("mbstowcs"); |
198 | exit(EXIT_FAILURE); | |
199 | } | |
200 | ||
a810c579 MK |
201 | printf("Wide character string is: %ls (%zu characters)\\n", |
202 | wcs, mbslen); | |
6d1a5754 MK |
203 | |
204 | /* Now do some inspection of the classes of the characters in | |
205 | the wide character string */ | |
89851a00 | 206 | |
6d1a5754 MK |
207 | for (wp = wcs; *wp != 0; wp++) { |
208 | printf(" %lc ", (wint_t) *wp); | |
209 | ||
210 | if (!iswalpha(*wp)) | |
211 | printf("!"); | |
212 | printf("alpha "); | |
213 | ||
214 | if (iswalpha(*wp)) { | |
215 | if (iswupper(*wp)) | |
216 | printf("upper "); | |
217 | ||
218 | if (iswlower(*wp)) | |
219 | printf("lower "); | |
220 | } | |
221 | ||
222 | putchar(\(aq\\n\(aq); | |
223 | } | |
224 | ||
225 | exit(EXIT_SUCCESS); | |
226 | } | |
227 | .fi | |
47297adb | 228 | .SH SEE ALSO |
6d1a5754 | 229 | .BR mblen (3), |
e8df1b4c | 230 | .BR mbsrtowcs (3), |
6d1a5754 MK |
231 | .BR mbtowc (3), |
232 | .BR wctomb (3), | |
6fdbc779 | 233 | .BR wcstombs (3) |