]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man3/mbstowcs.3
f92ab681c56f105cc3536b1f8d48111723fe5d6a
[thirdparty/man-pages.git] / man3 / mbstowcs.3
1 '\" t -*- coding: UTF-8 -*-
2 .\" Copyright (c) Bruno Haible <haible@clisp.cons.org>
3 .\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com>
4 .\"
5 .\" %%%LICENSE_START(GPLv2+_DOC_ONEPARA)
6 .\" This is free documentation; you can redistribute it and/or
7 .\" modify it under the terms of the GNU General Public License as
8 .\" published by the Free Software Foundation; either version 2 of
9 .\" the License, or (at your option) any later version.
10 .\" %%%LICENSE_END
11 .\"
12 .\" References consulted:
13 .\" GNU glibc-2 source code and manual
14 .\" Dinkumware C library reference http://www.dinkumware.com/
15 .\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html
16 .\" ISO/IEC 9899:1999
17 .\"
18 .TH MBSTOWCS 3 2020-06-09 "GNU" "Linux Programmer's Manual"
19 .SH NAME
20 mbstowcs \- convert a multibyte string to a wide-character string
21 .SH SYNOPSIS
22 .nf
23 .B #include <stdlib.h>
24 .PP
25 .BI "size_t mbstowcs(wchar_t *" dest ", const char *" src ", size_t " n );
26 .fi
27 .SH DESCRIPTION
28 If
29 .I dest
30 is not NULL,
31 the
32 .BR mbstowcs ()
33 function converts the
34 multibyte string
35 .I src
36 to a wide-character string starting at
37 .IR dest .
38 At most
39 .I n
40 wide characters are written to
41 .IR dest .
42 The sequence of characters in the string
43 .I src
44 shall begin in the initial shift state.
45 The conversion can stop for three reasons:
46 .IP 1. 3
47 An invalid multibyte sequence has been encountered.
48 In this case,
49 .I (size_t)\ \-1
50 is returned.
51 .IP 2.
52 .I n
53 non-L\(aq\e0\(aq wide characters have been stored at
54 .IR dest .
55 In this case, the number of wide characters written to
56 .I dest
57 is returned, but the
58 shift state at this point is lost.
59 .IP 3.
60 The multibyte string has been completely converted, including the
61 terminating null character (\(aq\e0\(aq).
62 In this case, the number of wide characters written to
63 .IR dest ,
64 excluding the terminating null wide character, is returned.
65 .PP
66 The programmer must ensure that there is room for at least
67 .I n
68 wide
69 characters at
70 .IR dest .
71 .PP
72 If
73 .IR dest
74 is NULL,
75 .I n
76 is ignored, and the conversion proceeds as
77 above, except that the converted wide characters are not written out to memory,
78 and that no length limit exists.
79 .PP
80 In order to avoid the case 2 above, the programmer should make sure
81 .I n
82 is
83 greater than or equal to
84 .IR "mbstowcs(NULL,src,0)+1" .
85 .SH RETURN VALUE
86 The
87 .BR mbstowcs ()
88 function returns the number of wide characters that make
89 up the converted part of the wide-character string, not including the
90 terminating null wide character.
91 If an invalid multibyte sequence was
92 encountered,
93 .I (size_t)\ \-1
94 is returned.
95 .SH ATTRIBUTES
96 For an explanation of the terms used in this section, see
97 .BR attributes (7).
98 .TS
99 allbox;
100 lb lb lb
101 l l l.
102 Interface Attribute Value
103 T{
104 .BR mbstowcs ()
105 T} Thread safety MT-Safe
106 .TE
107 .SH CONFORMING TO
108 POSIX.1-2001, POSIX.1-2008, C99.
109 .SH NOTES
110 The behavior of
111 .BR mbstowcs ()
112 depends on the
113 .B LC_CTYPE
114 category of the
115 current locale.
116 .PP
117 The function
118 .BR mbsrtowcs (3)
119 provides a better interface to the same
120 functionality.
121 .SH EXAMPLES
122 The program below illustrates the use of
123 .BR mbstowcs (),
124 as well as some of the wide character classification functions.
125 An example run is the following:
126 .PP
127 .in +4n
128 .EX
129 $ ./t_mbstowcs de_DE.UTF\-8 Grüße!
130 Length of source string (excluding terminator):
131 8 bytes
132 6 multibyte characters
133
134 Wide character string is: Grüße! (6 characters)
135 G alpha upper
136 r alpha lower
137 ü alpha lower
138 ß alpha lower
139 e alpha lower
140 ! !alpha
141 .EE
142 .in
143 .SS Program source
144 \&
145 .EX
146 #include <wctype.h>
147 #include <locale.h>
148 #include <wchar.h>
149 #include <stdio.h>
150 #include <string.h>
151 #include <stdlib.h>
152
153 int
154 main(int argc, char *argv[])
155 {
156 size_t mbslen; /* Number of multibyte characters in source */
157 wchar_t *wcs; /* Pointer to converted wide character string */
158 wchar_t *wp;
159
160 if (argc < 3) {
161 fprintf(stderr, "Usage: %s <locale> <string>\en", argv[0]);
162 exit(EXIT_FAILURE);
163 }
164
165 /* Apply the specified locale */
166
167 if (setlocale(LC_ALL, argv[1]) == NULL) {
168 perror("setlocale");
169 exit(EXIT_FAILURE);
170 }
171
172 /* Calculate the length required to hold argv[2] converted to
173 a wide character string */
174
175 mbslen = mbstowcs(NULL, argv[2], 0);
176 if (mbslen == (size_t) \-1) {
177 perror("mbstowcs");
178 exit(EXIT_FAILURE);
179 }
180
181 /* Describe the source string to the user */
182
183 printf("Length of source string (excluding terminator):\en");
184 printf(" %zu bytes\en", strlen(argv[2]));
185 printf(" %zu multibyte characters\en\en", mbslen);
186
187 /* Allocate wide character string of the desired size. Add 1
188 to allow for terminating null wide character (L\(aq\e0\(aq). */
189
190 wcs = calloc(mbslen + 1, sizeof(wchar_t));
191 if (wcs == NULL) {
192 perror("calloc");
193 exit(EXIT_FAILURE);
194 }
195
196 /* Convert the multibyte character string in argv[2] to a
197 wide character string */
198
199 if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) {
200 perror("mbstowcs");
201 exit(EXIT_FAILURE);
202 }
203
204 printf("Wide character string is: %ls (%zu characters)\en",
205 wcs, mbslen);
206
207 /* Now do some inspection of the classes of the characters in
208 the wide character string */
209
210 for (wp = wcs; *wp != 0; wp++) {
211 printf(" %lc ", (wint_t) *wp);
212
213 if (!iswalpha(*wp))
214 printf("!");
215 printf("alpha ");
216
217 if (iswalpha(*wp)) {
218 if (iswupper(*wp))
219 printf("upper ");
220
221 if (iswlower(*wp))
222 printf("lower ");
223 }
224
225 putchar(\(aq\en\(aq);
226 }
227
228 exit(EXIT_SUCCESS);
229 }
230 .EE
231 .SH SEE ALSO
232 .BR mblen (3),
233 .BR mbsrtowcs (3),
234 .BR mbtowc (3),
235 .BR wcstombs (3),
236 .BR wctomb (3)