]>
Commit | Line | Data |
---|---|---|
29d25b54 CR |
1 | /* casemod.c -- functions to change case of strings */ |
2 | ||
012bac39 | 3 | /* Copyright (C) 2008,2009 Free Software Foundation, Inc. |
29d25b54 CR |
4 | |
5 | This file is part of GNU Bash, the Bourne Again SHell. | |
6 | ||
2e4498b3 CR |
7 | Bash is free software: you can redistribute it and/or modify |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation, either version 3 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | Bash is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with Bash. If not, see <http://www.gnu.org/licenses/>. | |
19 | */ | |
29d25b54 CR |
20 | |
21 | #if defined (HAVE_CONFIG_H) | |
22 | # include <config.h> | |
23 | #endif | |
24 | ||
25 | #if defined (HAVE_UNISTD_H) | |
26 | # include <unistd.h> | |
27 | #endif /* HAVE_UNISTD_H */ | |
28 | ||
29 | #include <stdc.h> | |
30 | ||
31 | #include <bashansi.h> | |
32 | #include <bashintl.h> | |
33 | #include <bashtypes.h> | |
34 | ||
35 | #include <stdio.h> | |
36 | #include <ctype.h> | |
37 | #include <xmalloc.h> | |
38 | ||
49cf7828 | 39 | #include <shmbchar.h> |
29d25b54 CR |
40 | #include <shmbutil.h> |
41 | #include <chartypes.h> | |
1442f67c | 42 | #include <typemax.h> |
29d25b54 CR |
43 | |
44 | #include <glob/strmatch.h> | |
45 | ||
46 | #define _to_wupper(wc) (iswlower (wc) ? towupper (wc) : (wc)) | |
47 | #define _to_wlower(wc) (iswupper (wc) ? towlower (wc) : (wc)) | |
48 | ||
49 | #if !defined (HANDLE_MULTIBYTE) | |
50 | # define cval(s, i) ((s)[(i)]) | |
51 | # define iswalnum(c) (isalnum(c)) | |
52 | # define TOGGLE(x) (ISUPPER (x) ? tolower (x) : (TOUPPER (x))) | |
53 | #else | |
54 | # define TOGGLE(x) (iswupper (x) ? towlower (x) : (_to_wupper(x))) | |
55 | #endif | |
56 | ||
57 | /* These must agree with the defines in externs.h */ | |
e141c35a CR |
58 | #define CASE_NOOP 0x0000 |
59 | #define CASE_LOWER 0x0001 | |
60 | #define CASE_UPPER 0x0002 | |
61 | #define CASE_CAPITALIZE 0x0004 | |
62 | #define CASE_UNCAP 0x0008 | |
63 | #define CASE_TOGGLE 0x0010 | |
64 | #define CASE_TOGGLEALL 0x0020 | |
65 | #define CASE_UPFIRST 0x0040 | |
66 | #define CASE_LOWFIRST 0x0080 | |
67 | ||
68 | #define CASE_USEWORDS 0x1000 /* modify behavior to act on words in passed string */ | |
29d25b54 CR |
69 | |
70 | extern char *substring __P((char *, int, int)); | |
71 | ||
1442f67c CR |
72 | #ifndef UCHAR_MAX |
73 | # define UCHAR_MAX TYPE_MAXIMUM(unsigned char) | |
74 | #endif | |
75 | ||
29d25b54 CR |
76 | #if defined (HANDLE_MULTIBYTE) |
77 | static wchar_t | |
78 | cval (s, i) | |
79 | char *s; | |
80 | int i; | |
81 | { | |
82 | size_t tmp; | |
83 | wchar_t wc; | |
84 | int l; | |
85 | mbstate_t mps; | |
86 | ||
49cf7828 | 87 | if (MB_CUR_MAX == 1 || is_basic (s[i])) |
29d25b54 CR |
88 | return ((wchar_t)s[i]); |
89 | l = strlen (s); | |
90 | if (i >= (l - 1)) | |
91 | return ((wchar_t)s[i]); | |
92 | memset (&mps, 0, sizeof (mbstate_t)); | |
93 | tmp = mbrtowc (&wc, s + i, l - i, &mps); | |
94 | if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp)) | |
95 | return ((wchar_t)s[i]); | |
96 | return wc; | |
97 | } | |
98 | #endif | |
99 | ||
100 | /* Modify the case of characters in STRING matching PAT based on the value of | |
101 | FLAGS. If PAT is null, modify the case of each character */ | |
102 | char * | |
103 | sh_modcase (string, pat, flags) | |
104 | const char *string; | |
105 | char *pat; | |
106 | int flags; | |
107 | { | |
108 | int start, next, end; | |
e141c35a | 109 | int inword, c, nc, nop, match, usewords; |
29d25b54 CR |
110 | char *ret, *s; |
111 | wchar_t wc; | |
112 | #if defined (HANDLE_MULTIBYTE) | |
113 | wchar_t nwc; | |
114 | char mb[MB_LEN_MAX+1]; | |
115 | int mlen; | |
bf6bd355 | 116 | size_t m; |
29d25b54 CR |
117 | mbstate_t state; |
118 | #endif | |
119 | ||
5f8cde23 CR |
120 | if (string == 0 || *string == 0) |
121 | { | |
122 | ret = (char *)xmalloc (1); | |
123 | ret[0] = '\0'; | |
124 | return ret; | |
125 | } | |
126 | ||
29d25b54 CR |
127 | #if defined (HANDLE_MULTIBYTE) |
128 | memset (&state, 0, sizeof (mbstate_t)); | |
129 | #endif | |
130 | ||
131 | start = 0; | |
132 | end = strlen (string); | |
133 | ||
134 | ret = (char *)xmalloc (end + 1); | |
135 | strcpy (ret, string); | |
136 | ||
e141c35a CR |
137 | /* See if we are supposed to split on alphanumerics and operate on each word */ |
138 | usewords = (flags & CASE_USEWORDS); | |
139 | flags &= ~CASE_USEWORDS; | |
140 | ||
29d25b54 CR |
141 | inword = 0; |
142 | while (start < end) | |
143 | { | |
144 | wc = cval (ret, start); | |
145 | ||
146 | if (iswalnum (wc) == 0) | |
147 | { | |
148 | inword = 0; | |
1442f67c | 149 | #if 0 |
29d25b54 CR |
150 | ADVANCE_CHAR (ret, end, start); |
151 | continue; | |
1442f67c | 152 | #endif |
29d25b54 CR |
153 | } |
154 | ||
155 | if (pat) | |
156 | { | |
157 | next = start; | |
158 | ADVANCE_CHAR (ret, end, next); | |
159 | s = substring (ret, start, next); | |
160 | match = strmatch (pat, s, FNM_EXTMATCH) != FNM_NOMATCH; | |
161 | free (s); | |
162 | if (match == 0) | |
163 | { | |
164 | start = next; | |
165 | inword = 1; | |
166 | continue; | |
167 | } | |
168 | } | |
169 | ||
e141c35a CR |
170 | /* XXX - for now, the toggling operators work on the individual |
171 | words in the string, breaking on alphanumerics. Should I | |
172 | leave the capitalization operators to do that also? */ | |
29d25b54 CR |
173 | if (flags == CASE_CAPITALIZE) |
174 | { | |
e141c35a CR |
175 | if (usewords) |
176 | nop = inword ? CASE_LOWER : CASE_UPPER; | |
177 | else | |
178 | nop = (start > 0) ? CASE_LOWER : CASE_UPPER; | |
29d25b54 CR |
179 | inword = 1; |
180 | } | |
181 | else if (flags == CASE_UNCAP) | |
182 | { | |
e141c35a CR |
183 | if (usewords) |
184 | nop = inword ? CASE_UPPER : CASE_LOWER; | |
185 | else | |
186 | nop = (start > 0) ? CASE_UPPER : CASE_LOWER; | |
29d25b54 CR |
187 | inword = 1; |
188 | } | |
e141c35a CR |
189 | else if (flags == CASE_UPFIRST) |
190 | { | |
191 | if (usewords) | |
192 | nop = inword ? CASE_NOOP : CASE_UPPER; | |
193 | else | |
194 | nop = (start > 0) ? CASE_NOOP : CASE_UPPER; | |
195 | inword = 1; | |
196 | } | |
197 | else if (flags == CASE_LOWFIRST) | |
198 | { | |
199 | if (usewords) | |
200 | nop = inword ? CASE_NOOP : CASE_LOWER; | |
201 | else | |
202 | nop = (start > 0) ? CASE_NOOP : CASE_LOWER; | |
203 | inword = 1; | |
204 | } | |
29d25b54 CR |
205 | else if (flags == CASE_TOGGLE) |
206 | { | |
207 | nop = inword ? CASE_NOOP : CASE_TOGGLE; | |
208 | inword = 1; | |
209 | } | |
210 | else | |
211 | nop = flags; | |
212 | ||
1442f67c CR |
213 | /* Need to check UCHAR_MAX since wc may have already been converted to a |
214 | wide character by cval() */ | |
215 | if (MB_CUR_MAX == 1 || (wc <= UCHAR_MAX && is_basic ((int)wc))) | |
29d25b54 | 216 | { |
1442f67c | 217 | singlebyte: |
29d25b54 CR |
218 | switch (nop) |
219 | { | |
dc9f44b3 | 220 | default: |
29d25b54 CR |
221 | case CASE_NOOP: nc = wc; break; |
222 | case CASE_UPPER: nc = TOUPPER (wc); break; | |
223 | case CASE_LOWER: nc = TOLOWER (wc); break; | |
224 | case CASE_TOGGLEALL: | |
225 | case CASE_TOGGLE: nc = TOGGLE (wc); break; | |
226 | } | |
227 | ret[start] = nc; | |
228 | } | |
229 | #if defined (HANDLE_MULTIBYTE) | |
230 | else | |
231 | { | |
bf6bd355 CR |
232 | m = mbrtowc (&wc, string + start, end - start, &state); |
233 | if (MB_INVALIDCH (m)) | |
1442f67c CR |
234 | { |
235 | wc = (unsigned char)string[start]; | |
236 | goto singlebyte; | |
237 | } | |
bf6bd355 CR |
238 | else if (MB_NULLWCH (m)) |
239 | wc = L'\0'; | |
29d25b54 CR |
240 | switch (nop) |
241 | { | |
dc9f44b3 | 242 | default: |
29d25b54 | 243 | case CASE_NOOP: nwc = wc; break; |
49cf7828 CR |
244 | case CASE_UPPER: nwc = _to_wupper (wc); break; |
245 | case CASE_LOWER: nwc = _to_wlower (wc); break; | |
29d25b54 CR |
246 | case CASE_TOGGLEALL: |
247 | case CASE_TOGGLE: nwc = TOGGLE (wc); break; | |
248 | } | |
249 | if (nwc != wc) /* just skip unchanged characters */ | |
250 | { | |
251 | mlen = wcrtomb (mb, nwc, &state); | |
252 | if (mlen > 0) | |
253 | mb[mlen] = '\0'; | |
254 | /* Assume the same width */ | |
255 | strncpy (ret + start, mb, mlen); | |
256 | } | |
257 | } | |
258 | #endif | |
259 | ||
260 | /* This assumes that the upper and lower case versions are the same width. */ | |
261 | ADVANCE_CHAR (ret, end, start); | |
262 | } | |
263 | ||
264 | return ret; | |
265 | } |