]>
Commit | Line | Data |
---|---|---|
6d1a5754 | 1 | '\" t -*- coding: UTF-8 -*- |
fea681da | 2 | .\" Copyright (c) Bruno Haible <haible@clisp.cons.org> |
6d1a5754 | 3 | .\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com> |
fea681da | 4 | .\" |
89e3ffe9 | 5 | .\" %%%LICENSE_START(GPLv2+_DOC_ONEPARA) |
fea681da MK |
6 | .\" This is free documentation; you can redistribute it and/or |
7 | .\" modify it under the terms of the GNU General Public License as | |
8 | .\" published by the Free Software Foundation; either version 2 of | |
9 | .\" the License, or (at your option) any later version. | |
fe382ebf | 10 | .\" %%%LICENSE_END |
fea681da MK |
11 | .\" |
12 | .\" References consulted: | |
13 | .\" GNU glibc-2 source code and manual | |
14 | .\" Dinkumware C library reference http://www.dinkumware.com/ | |
008f1ecc | 15 | .\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html |
fea681da MK |
16 | .\" ISO/IEC 9899:1999 |
17 | .\" | |
4b8c67d9 | 18 | .TH MBSTOWCS 3 2017-09-15 "GNU" "Linux Programmer's Manual" |
fea681da | 19 | .SH NAME |
d0f17b57 | 20 | mbstowcs \- convert a multibyte string to a wide-character string |
fea681da MK |
21 | .SH SYNOPSIS |
22 | .nf | |
23 | .B #include <stdlib.h> | |
68e4db0a | 24 | .PP |
fea681da MK |
25 | .BI "size_t mbstowcs(wchar_t *" dest ", const char *" src ", size_t " n ); |
26 | .fi | |
27 | .SH DESCRIPTION | |
40aa0db0 MK |
28 | If |
29 | .I dest | |
b437fdd9 | 30 | is not NULL, |
60a90ecd MK |
31 | the |
32 | .BR mbstowcs () | |
33 | function converts the | |
40aa0db0 MK |
34 | multibyte string |
35 | .I src | |
36 | to a wide-character string starting at | |
37 | .IR dest . | |
38 | At most | |
39 | .I n | |
40 | wide characters are written to | |
41 | .IR dest . | |
3942b6bc MK |
42 | The sequence of characters in the string |
43 | .I src | |
44 | shall begin in the initial shift state. | |
c13182ef | 45 | The conversion can stop for three reasons: |
bce5f0ae MK |
46 | .IP 1. 3 |
47 | An invalid multibyte sequence has been encountered. | |
96f2c6a7 | 48 | In this case, |
009df872 | 49 | .I (size_t)\ \-1 |
7d2cb9d5 | 50 | is returned. |
bce5f0ae | 51 | .IP 2. |
40aa0db0 MK |
52 | .I n |
53 | non-L\(aq\\0\(aq wide characters have been stored at | |
54 | .IR dest . | |
96f2c6a7 | 55 | In this case, the number of wide characters written to |
40aa0db0 MK |
56 | .I dest |
57 | is returned, but the | |
fea681da | 58 | shift state at this point is lost. |
bce5f0ae MK |
59 | .IP 3. |
60 | The multibyte string has been completely converted, including the | |
bece0315 | 61 | terminating null character (\(aq\\0\(aq). |
96f2c6a7 | 62 | In this case, the number of wide characters written to |
40aa0db0 MK |
63 | .IR dest , |
64 | excluding the terminating null wide character, is returned. | |
fea681da | 65 | .PP |
40aa0db0 MK |
66 | The programmer must ensure that there is room for at least |
67 | .I n | |
68 | wide | |
69 | characters at | |
70 | .IR dest . | |
fea681da | 71 | .PP |
40aa0db0 | 72 | If |
51700fd7 | 73 | .IR dest |
40aa0db0 MK |
74 | is NULL, |
75 | .I n | |
76 | is ignored, and the conversion proceeds as | |
fea681da MK |
77 | above, except that the converted wide characters are not written out to memory, |
78 | and that no length limit exists. | |
79 | .PP | |
40aa0db0 MK |
80 | In order to avoid the case 2 above, the programmer should make sure |
81 | .I n | |
82 | is | |
05766b02 | 83 | greater than or equal to |
40aa0db0 | 84 | .IR "mbstowcs(NULL,src,0)+1" . |
47297adb | 85 | .SH RETURN VALUE |
60a90ecd MK |
86 | The |
87 | .BR mbstowcs () | |
88 | function returns the number of wide characters that make | |
d0f17b57 | 89 | up the converted part of the wide-character string, not including the |
c13182ef MK |
90 | terminating null wide character. |
91 | If an invalid multibyte sequence was | |
7d2cb9d5 | 92 | encountered, |
009df872 | 93 | .I (size_t)\ \-1 |
7d2cb9d5 | 94 | is returned. |
cba6ce5e MS |
95 | .SH ATTRIBUTES |
96 | For an explanation of the terms used in this section, see | |
97 | .BR attributes (7). | |
98 | .TS | |
99 | allbox; | |
100 | lb lb lb | |
101 | l l l. | |
102 | Interface Attribute Value | |
103 | T{ | |
104 | .BR mbstowcs () | |
105 | T} Thread safety MT-Safe | |
106 | .TE | |
47297adb | 107 | .SH CONFORMING TO |
10401713 | 108 | POSIX.1-2001, POSIX.1-2008, C99. |
fea681da | 109 | .SH NOTES |
d9bfdb9c | 110 | The behavior of |
60a90ecd | 111 | .BR mbstowcs () |
1274071a MK |
112 | depends on the |
113 | .B LC_CTYPE | |
114 | category of the | |
fea681da MK |
115 | current locale. |
116 | .PP | |
60a90ecd MK |
117 | The function |
118 | .BR mbsrtowcs (3) | |
119 | provides a better interface to the same | |
fea681da | 120 | functionality. |
6d1a5754 MK |
121 | .SH EXAMPLE |
122 | The program below illustrates the use of | |
123 | .BR mbstowcs (), | |
509a10ef | 124 | as well as some of the wide character classification functions. |
6d1a5754 | 125 | An example run is the following: |
e646a1ba | 126 | .PP |
6d1a5754 | 127 | .in +4n |
e646a1ba | 128 | .EX |
6d1a5754 MK |
129 | $ ./t_mbstowcs de_DE.UTF\-8 Grüße! |
130 | Length of source string (excluding terminator): | |
131 | 8 bytes | |
132 | 6 multibyte characters | |
133 | ||
134 | Wide character string is: Grüße! (6 characters) | |
89851a00 MK |
135 | G alpha upper |
136 | r alpha lower | |
137 | ü alpha lower | |
138 | ß alpha lower | |
139 | e alpha lower | |
6d1a5754 | 140 | ! !alpha |
b8302363 | 141 | .EE |
6d1a5754 MK |
142 | .in |
143 | .SS Program source | |
c7885256 | 144 | \& |
e7d0bb47 | 145 | .EX |
a57bca4d | 146 | #include <wctype.h> |
6d1a5754 MK |
147 | #include <locale.h> |
148 | #include <wchar.h> | |
149 | #include <stdio.h> | |
150 | #include <string.h> | |
151 | #include <stdlib.h> | |
152 | ||
153 | int | |
154 | main(int argc, char *argv[]) | |
155 | { | |
156 | size_t mbslen; /* Number of multibyte characters in source */ | |
157 | wchar_t *wcs; /* Pointer to converted wide character string */ | |
158 | wchar_t *wp; | |
159 | ||
160 | if (argc < 3) { | |
161 | fprintf(stderr, "Usage: %s <locale> <string>\\n", argv[0]); | |
162 | exit(EXIT_FAILURE); | |
163 | } | |
164 | ||
165 | /* Apply the specified locale */ | |
166 | ||
167 | if (setlocale(LC_ALL, argv[1]) == NULL) { | |
168 | perror("setlocale"); | |
169 | exit(EXIT_FAILURE); | |
170 | } | |
171 | ||
172 | /* Calculate the length required to hold argv[2] converted to | |
173 | a wide character string */ | |
89851a00 | 174 | |
6d1a5754 | 175 | mbslen = mbstowcs(NULL, argv[2], 0); |
35b07818 | 176 | if (mbslen == (size_t) \-1) { |
6d1a5754 MK |
177 | perror("mbstowcs"); |
178 | exit(EXIT_FAILURE); | |
179 | } | |
180 | ||
181 | /* Describe the source string to the user */ | |
182 | ||
183 | printf("Length of source string (excluding terminator):\\n"); | |
a810c579 MK |
184 | printf(" %zu bytes\\n", strlen(argv[2])); |
185 | printf(" %zu multibyte characters\\n\\n", mbslen); | |
6d1a5754 MK |
186 | |
187 | /* Allocate wide character string of the desired size. Add 1 | |
188 | to allow for terminating null wide character (L\(aq\\0\(aq). */ | |
189 | ||
190 | wcs = calloc(mbslen + 1, sizeof(wchar_t)); | |
191 | if (wcs == NULL) { | |
192 | perror("calloc"); | |
193 | exit(EXIT_FAILURE); | |
194 | } | |
89851a00 MK |
195 | |
196 | /* Convert the multibyte character string in argv[2] to a | |
6d1a5754 MK |
197 | wide character string */ |
198 | ||
35b07818 | 199 | if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) { |
6d1a5754 MK |
200 | perror("mbstowcs"); |
201 | exit(EXIT_FAILURE); | |
202 | } | |
203 | ||
a810c579 MK |
204 | printf("Wide character string is: %ls (%zu characters)\\n", |
205 | wcs, mbslen); | |
6d1a5754 MK |
206 | |
207 | /* Now do some inspection of the classes of the characters in | |
208 | the wide character string */ | |
89851a00 | 209 | |
6d1a5754 MK |
210 | for (wp = wcs; *wp != 0; wp++) { |
211 | printf(" %lc ", (wint_t) *wp); | |
212 | ||
213 | if (!iswalpha(*wp)) | |
214 | printf("!"); | |
215 | printf("alpha "); | |
216 | ||
217 | if (iswalpha(*wp)) { | |
218 | if (iswupper(*wp)) | |
219 | printf("upper "); | |
220 | ||
221 | if (iswlower(*wp)) | |
222 | printf("lower "); | |
223 | } | |
224 | ||
225 | putchar(\(aq\\n\(aq); | |
226 | } | |
227 | ||
228 | exit(EXIT_SUCCESS); | |
229 | } | |
e7d0bb47 | 230 | .EE |
47297adb | 231 | .SH SEE ALSO |
6d1a5754 | 232 | .BR mblen (3), |
e8df1b4c | 233 | .BR mbsrtowcs (3), |
6d1a5754 | 234 | .BR mbtowc (3), |
68d31600 MK |
235 | .BR wcstombs (3), |
236 | .BR wctomb (3) |