]> git.ipfire.org Git - thirdparty/bash.git/blob - lib/sh/casemod.c
bash-4.3-alpha cleanup
[thirdparty/bash.git] / lib / sh / casemod.c
1 /* casemod.c -- functions to change case of strings */
2
3 /* Copyright (C) 2008,2009 Free Software Foundation, Inc.
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #if defined (HAVE_CONFIG_H)
22 # include <config.h>
23 #endif
24
25 #if defined (HAVE_UNISTD_H)
26 # include <unistd.h>
27 #endif /* HAVE_UNISTD_H */
28
29 #include <stdc.h>
30
31 #include <bashansi.h>
32 #include <bashintl.h>
33 #include <bashtypes.h>
34
35 #include <stdio.h>
36 #include <ctype.h>
37 #include <xmalloc.h>
38
39 #include <shmbchar.h>
40 #include <shmbutil.h>
41 #include <chartypes.h>
42
43 #include <glob/strmatch.h>
44
45 #define _to_wupper(wc) (iswlower (wc) ? towupper (wc) : (wc))
46 #define _to_wlower(wc) (iswupper (wc) ? towlower (wc) : (wc))
47
48 #if !defined (HANDLE_MULTIBYTE)
49 # define cval(s, i) ((s)[(i)])
50 # define iswalnum(c) (isalnum(c))
51 # define TOGGLE(x) (ISUPPER (x) ? tolower (x) : (TOUPPER (x)))
52 #else
53 # define TOGGLE(x) (iswupper (x) ? towlower (x) : (_to_wupper(x)))
54 #endif
55
56 /* These must agree with the defines in externs.h */
57 #define CASE_NOOP 0x0000
58 #define CASE_LOWER 0x0001
59 #define CASE_UPPER 0x0002
60 #define CASE_CAPITALIZE 0x0004
61 #define CASE_UNCAP 0x0008
62 #define CASE_TOGGLE 0x0010
63 #define CASE_TOGGLEALL 0x0020
64 #define CASE_UPFIRST 0x0040
65 #define CASE_LOWFIRST 0x0080
66
67 #define CASE_USEWORDS 0x1000 /* modify behavior to act on words in passed string */
68
69 extern char *substring __P((char *, int, int));
70
71 #if defined (HANDLE_MULTIBYTE)
72 static wchar_t
73 cval (s, i)
74 char *s;
75 int i;
76 {
77 size_t tmp;
78 wchar_t wc;
79 int l;
80 mbstate_t mps;
81
82 if (MB_CUR_MAX == 1 || is_basic (s[i]))
83 return ((wchar_t)s[i]);
84 l = strlen (s);
85 if (i >= (l - 1))
86 return ((wchar_t)s[i]);
87 memset (&mps, 0, sizeof (mbstate_t));
88 tmp = mbrtowc (&wc, s + i, l - i, &mps);
89 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
90 return ((wchar_t)s[i]);
91 return wc;
92 }
93 #endif
94
95 /* Modify the case of characters in STRING matching PAT based on the value of
96 FLAGS. If PAT is null, modify the case of each character */
97 char *
98 sh_modcase (string, pat, flags)
99 const char *string;
100 char *pat;
101 int flags;
102 {
103 int start, next, end;
104 int inword, c, nc, nop, match, usewords;
105 char *ret, *s;
106 wchar_t wc;
107 #if defined (HANDLE_MULTIBYTE)
108 wchar_t nwc;
109 char mb[MB_LEN_MAX+1];
110 int mlen;
111 size_t m;
112 mbstate_t state;
113 #endif
114
115 if (string == 0 || *string == 0)
116 {
117 ret = (char *)xmalloc (1);
118 ret[0] = '\0';
119 return ret;
120 }
121
122 #if defined (HANDLE_MULTIBYTE)
123 memset (&state, 0, sizeof (mbstate_t));
124 #endif
125
126 start = 0;
127 end = strlen (string);
128
129 ret = (char *)xmalloc (end + 1);
130 strcpy (ret, string);
131
132 /* See if we are supposed to split on alphanumerics and operate on each word */
133 usewords = (flags & CASE_USEWORDS);
134 flags &= ~CASE_USEWORDS;
135
136 inword = 0;
137 while (start < end)
138 {
139 wc = cval (ret, start);
140
141 if (iswalnum (wc) == 0)
142 {
143 inword = 0;
144 ADVANCE_CHAR (ret, end, start);
145 continue;
146 }
147
148 if (pat)
149 {
150 next = start;
151 ADVANCE_CHAR (ret, end, next);
152 s = substring (ret, start, next);
153 match = strmatch (pat, s, FNM_EXTMATCH) != FNM_NOMATCH;
154 free (s);
155 if (match == 0)
156 {
157 start = next;
158 inword = 1;
159 continue;
160 }
161 }
162
163 /* XXX - for now, the toggling operators work on the individual
164 words in the string, breaking on alphanumerics. Should I
165 leave the capitalization operators to do that also? */
166 if (flags == CASE_CAPITALIZE)
167 {
168 if (usewords)
169 nop = inword ? CASE_LOWER : CASE_UPPER;
170 else
171 nop = (start > 0) ? CASE_LOWER : CASE_UPPER;
172 inword = 1;
173 }
174 else if (flags == CASE_UNCAP)
175 {
176 if (usewords)
177 nop = inword ? CASE_UPPER : CASE_LOWER;
178 else
179 nop = (start > 0) ? CASE_UPPER : CASE_LOWER;
180 inword = 1;
181 }
182 else if (flags == CASE_UPFIRST)
183 {
184 if (usewords)
185 nop = inword ? CASE_NOOP : CASE_UPPER;
186 else
187 nop = (start > 0) ? CASE_NOOP : CASE_UPPER;
188 inword = 1;
189 }
190 else if (flags == CASE_LOWFIRST)
191 {
192 if (usewords)
193 nop = inword ? CASE_NOOP : CASE_LOWER;
194 else
195 nop = (start > 0) ? CASE_NOOP : CASE_LOWER;
196 inword = 1;
197 }
198 else if (flags == CASE_TOGGLE)
199 {
200 nop = inword ? CASE_NOOP : CASE_TOGGLE;
201 inword = 1;
202 }
203 else
204 nop = flags;
205
206 if (MB_CUR_MAX == 1 || is_basic ((int)wc))
207 {
208 switch (nop)
209 {
210 default:
211 case CASE_NOOP: nc = wc; break;
212 case CASE_UPPER: nc = TOUPPER (wc); break;
213 case CASE_LOWER: nc = TOLOWER (wc); break;
214 case CASE_TOGGLEALL:
215 case CASE_TOGGLE: nc = TOGGLE (wc); break;
216 }
217 ret[start] = nc;
218 }
219 #if defined (HANDLE_MULTIBYTE)
220 else
221 {
222 m = mbrtowc (&wc, string + start, end - start, &state);
223 if (MB_INVALIDCH (m))
224 wc = (wchar_t)string[start];
225 else if (MB_NULLWCH (m))
226 wc = L'\0';
227 switch (nop)
228 {
229 default:
230 case CASE_NOOP: nwc = wc; break;
231 case CASE_UPPER: nwc = _to_wupper (wc); break;
232 case CASE_LOWER: nwc = _to_wlower (wc); break;
233 case CASE_TOGGLEALL:
234 case CASE_TOGGLE: nwc = TOGGLE (wc); break;
235 }
236 if (nwc != wc) /* just skip unchanged characters */
237 {
238 mlen = wcrtomb (mb, nwc, &state);
239 if (mlen > 0)
240 mb[mlen] = '\0';
241 /* Assume the same width */
242 strncpy (ret + start, mb, mlen);
243 }
244 }
245 #endif
246
247 /* This assumes that the upper and lower case versions are the same width. */
248 ADVANCE_CHAR (ret, end, start);
249 }
250
251 return ret;
252 }