]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man3/mbstowcs.3
Many pages: Use correct letter case in page titles (TH)
[thirdparty/man-pages.git] / man3 / mbstowcs.3
1 .\" Copyright (c) Bruno Haible <haible@clisp.cons.org>
2 .\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com>
3 .\"
4 .\" SPDX-License-Identifier: GPL-2.0-or-later
5 .\"
6 .\" References consulted:
7 .\" GNU glibc-2 source code and manual
8 .\" Dinkumware C library reference http://www.dinkumware.com/
9 .\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html
10 .\" ISO/IEC 9899:1999
11 .\"
12 .TH mbstowcs 3 (date) "Linux man-pages (unreleased)"
13 .SH NAME
14 mbstowcs \- convert a multibyte string to a wide-character string
15 .SH LIBRARY
16 Standard C library
17 .RI ( libc ", " \-lc )
18 .SH SYNOPSIS
19 .nf
20 .B #include <stdlib.h>
21 .PP
22 .BI "size_t mbstowcs(wchar_t *restrict " dest ", const char *restrict " src ,
23 .BI " size_t " n );
24 .fi
25 .SH DESCRIPTION
26 If
27 .I dest
28 is not NULL,
29 the
30 .BR mbstowcs ()
31 function converts the
32 multibyte string
33 .I src
34 to a wide-character string starting at
35 .IR dest .
36 At most
37 .I n
38 wide characters are written to
39 .IR dest .
40 The sequence of characters in the string
41 .I src
42 shall begin in the initial shift state.
43 The conversion can stop for three reasons:
44 .IP \(bu 3
45 An invalid multibyte sequence has been encountered.
46 In this case,
47 .I (size_t)\ \-1
48 is returned.
49 .IP \(bu
50 .I n
51 non-L\(aq\e0\(aq wide characters have been stored at
52 .IR dest .
53 In this case, the number of wide characters written to
54 .I dest
55 is returned, but the
56 shift state at this point is lost.
57 .IP \(bu
58 The multibyte string has been completely converted, including the
59 terminating null character (\(aq\e0\(aq).
60 In this case, the number of wide characters written to
61 .IR dest ,
62 excluding the terminating null wide character, is returned.
63 .PP
64 The programmer must ensure that there is room for at least
65 .I n
66 wide
67 characters at
68 .IR dest .
69 .PP
70 If
71 .I dest
72 is NULL,
73 .I n
74 is ignored, and the conversion proceeds as
75 above, except that the converted wide characters are not written out to memory,
76 and that no length limit exists.
77 .PP
78 In order to avoid the case 2 above, the programmer should make sure
79 .I n
80 is
81 greater than or equal to
82 .IR "mbstowcs(NULL,src,0)+1" .
83 .SH RETURN VALUE
84 The
85 .BR mbstowcs ()
86 function returns the number of wide characters that make
87 up the converted part of the wide-character string, not including the
88 terminating null wide character.
89 If an invalid multibyte sequence was
90 encountered,
91 .I (size_t)\ \-1
92 is returned.
93 .SH ATTRIBUTES
94 For an explanation of the terms used in this section, see
95 .BR attributes (7).
96 .ad l
97 .nh
98 .TS
99 allbox;
100 lbx lb lb
101 l l l.
102 Interface Attribute Value
103 T{
104 .BR mbstowcs ()
105 T} Thread safety MT-Safe
106 .TE
107 .hy
108 .ad
109 .sp 1
110 .SH STANDARDS
111 POSIX.1-2001, POSIX.1-2008, C99.
112 .SH NOTES
113 The behavior of
114 .BR mbstowcs ()
115 depends on the
116 .B LC_CTYPE
117 category of the
118 current locale.
119 .PP
120 The function
121 .BR mbsrtowcs (3)
122 provides a better interface to the same
123 functionality.
124 .SH EXAMPLES
125 The program below illustrates the use of
126 .BR mbstowcs (),
127 as well as some of the wide character classification functions.
128 An example run is the following:
129 .PP
130 .in +4n
131 .EX
132 $ ./t_mbstowcs de_DE.UTF\-8 Grüße!
133 Length of source string (excluding terminator):
134 8 bytes
135 6 multibyte characters
136
137 Wide character string is: Grüße! (6 characters)
138 G alpha upper
139 r alpha lower
140 ü alpha lower
141 ß alpha lower
142 e alpha lower
143 ! !alpha
144 .EE
145 .in
146 .SS Program source
147 \&
148 .\" SRC BEGIN (mbstowcs.c)
149 .EX
150 #include <locale.h>
151 #include <stdio.h>
152 #include <stdlib.h>
153 #include <string.h>
154 #include <wchar.h>
155 #include <wctype.h>
156
157 int
158 main(int argc, char *argv[])
159 {
160 size_t mbslen; /* Number of multibyte characters in source */
161 wchar_t *wcs; /* Pointer to converted wide character string */
162
163 if (argc < 3) {
164 fprintf(stderr, "Usage: %s <locale> <string>\en", argv[0]);
165 exit(EXIT_FAILURE);
166 }
167
168 /* Apply the specified locale. */
169
170 if (setlocale(LC_ALL, argv[1]) == NULL) {
171 perror("setlocale");
172 exit(EXIT_FAILURE);
173 }
174
175 /* Calculate the length required to hold argv[2] converted to
176 a wide character string. */
177
178 mbslen = mbstowcs(NULL, argv[2], 0);
179 if (mbslen == (size_t) \-1) {
180 perror("mbstowcs");
181 exit(EXIT_FAILURE);
182 }
183
184 /* Describe the source string to the user. */
185
186 printf("Length of source string (excluding terminator):\en");
187 printf(" %zu bytes\en", strlen(argv[2]));
188 printf(" %zu multibyte characters\en\en", mbslen);
189
190 /* Allocate wide character string of the desired size. Add 1
191 to allow for terminating null wide character (L\(aq\e0\(aq). */
192
193 wcs = calloc(mbslen + 1, sizeof(*wcs));
194 if (wcs == NULL) {
195 perror("calloc");
196 exit(EXIT_FAILURE);
197 }
198
199 /* Convert the multibyte character string in argv[2] to a
200 wide character string. */
201
202 if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) {
203 perror("mbstowcs");
204 exit(EXIT_FAILURE);
205 }
206
207 printf("Wide character string is: %ls (%zu characters)\en",
208 wcs, mbslen);
209
210 /* Now do some inspection of the classes of the characters in
211 the wide character string. */
212
213 for (wchar_t *wp = wcs; *wp != 0; wp++) {
214 printf(" %lc ", (wint_t) *wp);
215
216 if (!iswalpha(*wp))
217 printf("!");
218 printf("alpha ");
219
220 if (iswalpha(*wp)) {
221 if (iswupper(*wp))
222 printf("upper ");
223
224 if (iswlower(*wp))
225 printf("lower ");
226 }
227
228 putchar(\(aq\en\(aq);
229 }
230
231 exit(EXIT_SUCCESS);
232 }
233 .EE
234 .\" SRC END
235 .SH SEE ALSO
236 .BR mblen (3),
237 .BR mbsrtowcs (3),
238 .BR mbtowc (3),
239 .BR wcstombs (3),
240 .BR wctomb (3)