]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man3/mbstowcs.3
19651ea57ffb401d8861f7d134be72f1f4c3fdd0
[thirdparty/man-pages.git] / man3 / mbstowcs.3
1 '\" t
2 .\" Copyright (c) Bruno Haible <haible@clisp.cons.org>
3 .\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com>
4 .\"
5 .\" SPDX-License-Identifier: GPL-2.0-or-later
6 .\"
7 .\" References consulted:
8 .\" GNU glibc-2 source code and manual
9 .\" Dinkumware C library reference http://www.dinkumware.com/
10 .\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html
11 .\" ISO/IEC 9899:1999
12 .\"
13 .TH mbstowcs 3 (date) "Linux man-pages (unreleased)"
14 .SH NAME
15 mbstowcs \- convert a multibyte string to a wide-character string
16 .SH LIBRARY
17 Standard C library
18 .RI ( libc ", " \-lc )
19 .SH SYNOPSIS
20 .nf
21 .B #include <stdlib.h>
22 .PP
23 .BI "size_t mbstowcs(wchar_t " dest "[restrict ." n "], \
24 const char *restrict " src ,
25 .BI " size_t " n );
26 .fi
27 .SH DESCRIPTION
28 If
29 .I dest
30 is not NULL,
31 the
32 .BR mbstowcs ()
33 function converts the
34 multibyte string
35 .I src
36 to a wide-character string starting at
37 .IR dest .
38 At most
39 .I n
40 wide characters are written to
41 .IR dest .
42 The sequence of characters in the string
43 .I src
44 shall begin in the initial shift state.
45 The conversion can stop for three reasons:
46 .IP \[bu] 3
47 An invalid multibyte sequence has been encountered.
48 In this case,
49 .I (size_t)\ \-1
50 is returned.
51 .IP \[bu]
52 .I n
53 non-L\[aq]\e0\[aq] wide characters have been stored at
54 .IR dest .
55 In this case, the number of wide characters written to
56 .I dest
57 is returned, but the
58 shift state at this point is lost.
59 .IP \[bu]
60 The multibyte string has been completely converted, including the
61 terminating null character (\[aq]\e0\[aq]).
62 In this case, the number of wide characters written to
63 .IR dest ,
64 excluding the terminating null wide character, is returned.
65 .PP
66 The programmer must ensure that there is room for at least
67 .I n
68 wide
69 characters at
70 .IR dest .
71 .PP
72 If
73 .I dest
74 is NULL,
75 .I n
76 is ignored, and the conversion proceeds as
77 above, except that the converted wide characters are not written out to memory,
78 and that no length limit exists.
79 .PP
80 In order to avoid the case 2 above, the programmer should make sure
81 .I n
82 is
83 greater than or equal to
84 .IR "mbstowcs(NULL,src,0)+1" .
85 .SH RETURN VALUE
86 The
87 .BR mbstowcs ()
88 function returns the number of wide characters that make
89 up the converted part of the wide-character string, not including the
90 terminating null wide character.
91 If an invalid multibyte sequence was
92 encountered,
93 .I (size_t)\ \-1
94 is returned.
95 .SH ATTRIBUTES
96 For an explanation of the terms used in this section, see
97 .BR attributes (7).
98 .ad l
99 .nh
100 .TS
101 allbox;
102 lbx lb lb
103 l l l.
104 Interface Attribute Value
105 T{
106 .BR mbstowcs ()
107 T} Thread safety MT-Safe
108 .TE
109 .hy
110 .ad
111 .sp 1
112 .SH VERSIONS
113 The function
114 .BR mbsrtowcs (3)
115 provides a better interface to the same
116 functionality.
117 .SH STANDARDS
118 C11, POSIX.1-2008.
119 .SH HISTORY
120 POSIX.1-2001, C99.
121 .SH NOTES
122 The behavior of
123 .BR mbstowcs ()
124 depends on the
125 .B LC_CTYPE
126 category of the
127 current locale.
128 .SH EXAMPLES
129 The program below illustrates the use of
130 .BR mbstowcs (),
131 as well as some of the wide character classification functions.
132 An example run is the following:
133 .PP
134 .in +4n
135 .EX
136 $ ./t_mbstowcs de_DE.UTF\-8 Grüße!
137 Length of source string (excluding terminator):
138 8 bytes
139 6 multibyte characters
140 \&
141 Wide character string is: Grüße! (6 characters)
142 G alpha upper
143 r alpha lower
144 ü alpha lower
145 ß alpha lower
146 e alpha lower
147 ! !alpha
148 .EE
149 .in
150 .SS Program source
151 \&
152 .\" SRC BEGIN (mbstowcs.c)
153 .EX
154 #include <locale.h>
155 #include <stdio.h>
156 #include <stdlib.h>
157 #include <string.h>
158 #include <wchar.h>
159 #include <wctype.h>
160 \&
161 int
162 main(int argc, char *argv[])
163 {
164 size_t mbslen; /* Number of multibyte characters in source */
165 wchar_t *wcs; /* Pointer to converted wide character string */
166 \&
167 if (argc < 3) {
168 fprintf(stderr, "Usage: %s <locale> <string>\en", argv[0]);
169 exit(EXIT_FAILURE);
170 }
171 \&
172 /* Apply the specified locale. */
173 \&
174 if (setlocale(LC_ALL, argv[1]) == NULL) {
175 perror("setlocale");
176 exit(EXIT_FAILURE);
177 }
178 \&
179 /* Calculate the length required to hold argv[2] converted to
180 a wide character string. */
181 \&
182 mbslen = mbstowcs(NULL, argv[2], 0);
183 if (mbslen == (size_t) \-1) {
184 perror("mbstowcs");
185 exit(EXIT_FAILURE);
186 }
187 \&
188 /* Describe the source string to the user. */
189 \&
190 printf("Length of source string (excluding terminator):\en");
191 printf(" %zu bytes\en", strlen(argv[2]));
192 printf(" %zu multibyte characters\en\en", mbslen);
193 \&
194 /* Allocate wide character string of the desired size. Add 1
195 to allow for terminating null wide character (L\[aq]\e0\[aq]). */
196 \&
197 wcs = calloc(mbslen + 1, sizeof(*wcs));
198 if (wcs == NULL) {
199 perror("calloc");
200 exit(EXIT_FAILURE);
201 }
202 \&
203 /* Convert the multibyte character string in argv[2] to a
204 wide character string. */
205 \&
206 if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) {
207 perror("mbstowcs");
208 exit(EXIT_FAILURE);
209 }
210 \&
211 printf("Wide character string is: %ls (%zu characters)\en",
212 wcs, mbslen);
213 \&
214 /* Now do some inspection of the classes of the characters in
215 the wide character string. */
216 \&
217 for (wchar_t *wp = wcs; *wp != 0; wp++) {
218 printf(" %lc ", (wint_t) *wp);
219 \&
220 if (!iswalpha(*wp))
221 printf("!");
222 printf("alpha ");
223 \&
224 if (iswalpha(*wp)) {
225 if (iswupper(*wp))
226 printf("upper ");
227 \&
228 if (iswlower(*wp))
229 printf("lower ");
230 }
231 \&
232 putchar(\[aq]\en\[aq]);
233 }
234 \&
235 exit(EXIT_SUCCESS);
236 }
237 .EE
238 .\" SRC END
239 .SH SEE ALSO
240 .BR mblen (3),
241 .BR mbsrtowcs (3),
242 .BR mbtowc (3),
243 .BR wcstombs (3),
244 .BR wctomb (3)