]> git.ipfire.org Git - thirdparty/bash.git/blame - lib/sh/casemod.c
bash-4.3-beta overlay
[thirdparty/bash.git] / lib / sh / casemod.c
CommitLineData
29d25b54
CR
1/* casemod.c -- functions to change case of strings */
2
012bac39 3/* Copyright (C) 2008,2009 Free Software Foundation, Inc.
29d25b54
CR
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
2e4498b3
CR
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
29d25b54
CR
20
21#if defined (HAVE_CONFIG_H)
22# include <config.h>
23#endif
24
25#if defined (HAVE_UNISTD_H)
26# include <unistd.h>
27#endif /* HAVE_UNISTD_H */
28
29#include <stdc.h>
30
31#include <bashansi.h>
32#include <bashintl.h>
33#include <bashtypes.h>
34
35#include <stdio.h>
36#include <ctype.h>
37#include <xmalloc.h>
38
49cf7828 39#include <shmbchar.h>
29d25b54
CR
40#include <shmbutil.h>
41#include <chartypes.h>
1442f67c 42#include <typemax.h>
29d25b54
CR
43
44#include <glob/strmatch.h>
45
46#define _to_wupper(wc) (iswlower (wc) ? towupper (wc) : (wc))
47#define _to_wlower(wc) (iswupper (wc) ? towlower (wc) : (wc))
48
49#if !defined (HANDLE_MULTIBYTE)
50# define cval(s, i) ((s)[(i)])
51# define iswalnum(c) (isalnum(c))
52# define TOGGLE(x) (ISUPPER (x) ? tolower (x) : (TOUPPER (x)))
53#else
54# define TOGGLE(x) (iswupper (x) ? towlower (x) : (_to_wupper(x)))
55#endif
56
57/* These must agree with the defines in externs.h */
e141c35a
CR
58#define CASE_NOOP 0x0000
59#define CASE_LOWER 0x0001
60#define CASE_UPPER 0x0002
61#define CASE_CAPITALIZE 0x0004
62#define CASE_UNCAP 0x0008
63#define CASE_TOGGLE 0x0010
64#define CASE_TOGGLEALL 0x0020
65#define CASE_UPFIRST 0x0040
66#define CASE_LOWFIRST 0x0080
67
68#define CASE_USEWORDS 0x1000 /* modify behavior to act on words in passed string */
29d25b54
CR
69
70extern char *substring __P((char *, int, int));
71
1442f67c
CR
72#ifndef UCHAR_MAX
73# define UCHAR_MAX TYPE_MAXIMUM(unsigned char)
74#endif
75
29d25b54
CR
76#if defined (HANDLE_MULTIBYTE)
77static wchar_t
78cval (s, i)
79 char *s;
80 int i;
81{
82 size_t tmp;
83 wchar_t wc;
84 int l;
85 mbstate_t mps;
86
49cf7828 87 if (MB_CUR_MAX == 1 || is_basic (s[i]))
29d25b54
CR
88 return ((wchar_t)s[i]);
89 l = strlen (s);
90 if (i >= (l - 1))
91 return ((wchar_t)s[i]);
92 memset (&mps, 0, sizeof (mbstate_t));
93 tmp = mbrtowc (&wc, s + i, l - i, &mps);
94 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
95 return ((wchar_t)s[i]);
96 return wc;
97}
98#endif
99
100/* Modify the case of characters in STRING matching PAT based on the value of
101 FLAGS. If PAT is null, modify the case of each character */
102char *
103sh_modcase (string, pat, flags)
104 const char *string;
105 char *pat;
106 int flags;
107{
108 int start, next, end;
e141c35a 109 int inword, c, nc, nop, match, usewords;
29d25b54
CR
110 char *ret, *s;
111 wchar_t wc;
112#if defined (HANDLE_MULTIBYTE)
113 wchar_t nwc;
114 char mb[MB_LEN_MAX+1];
115 int mlen;
bf6bd355 116 size_t m;
29d25b54
CR
117 mbstate_t state;
118#endif
119
5f8cde23
CR
120 if (string == 0 || *string == 0)
121 {
122 ret = (char *)xmalloc (1);
123 ret[0] = '\0';
124 return ret;
125 }
126
29d25b54
CR
127#if defined (HANDLE_MULTIBYTE)
128 memset (&state, 0, sizeof (mbstate_t));
129#endif
130
131 start = 0;
132 end = strlen (string);
133
134 ret = (char *)xmalloc (end + 1);
135 strcpy (ret, string);
136
e141c35a
CR
137 /* See if we are supposed to split on alphanumerics and operate on each word */
138 usewords = (flags & CASE_USEWORDS);
139 flags &= ~CASE_USEWORDS;
140
29d25b54
CR
141 inword = 0;
142 while (start < end)
143 {
144 wc = cval (ret, start);
145
146 if (iswalnum (wc) == 0)
147 {
148 inword = 0;
1442f67c 149#if 0
29d25b54
CR
150 ADVANCE_CHAR (ret, end, start);
151 continue;
1442f67c 152#endif
29d25b54
CR
153 }
154
155 if (pat)
156 {
157 next = start;
158 ADVANCE_CHAR (ret, end, next);
159 s = substring (ret, start, next);
160 match = strmatch (pat, s, FNM_EXTMATCH) != FNM_NOMATCH;
161 free (s);
162 if (match == 0)
163 {
164 start = next;
165 inword = 1;
166 continue;
167 }
168 }
169
e141c35a
CR
170 /* XXX - for now, the toggling operators work on the individual
171 words in the string, breaking on alphanumerics. Should I
172 leave the capitalization operators to do that also? */
29d25b54
CR
173 if (flags == CASE_CAPITALIZE)
174 {
e141c35a
CR
175 if (usewords)
176 nop = inword ? CASE_LOWER : CASE_UPPER;
177 else
178 nop = (start > 0) ? CASE_LOWER : CASE_UPPER;
29d25b54
CR
179 inword = 1;
180 }
181 else if (flags == CASE_UNCAP)
182 {
e141c35a
CR
183 if (usewords)
184 nop = inword ? CASE_UPPER : CASE_LOWER;
185 else
186 nop = (start > 0) ? CASE_UPPER : CASE_LOWER;
29d25b54
CR
187 inword = 1;
188 }
e141c35a
CR
189 else if (flags == CASE_UPFIRST)
190 {
191 if (usewords)
192 nop = inword ? CASE_NOOP : CASE_UPPER;
193 else
194 nop = (start > 0) ? CASE_NOOP : CASE_UPPER;
195 inword = 1;
196 }
197 else if (flags == CASE_LOWFIRST)
198 {
199 if (usewords)
200 nop = inword ? CASE_NOOP : CASE_LOWER;
201 else
202 nop = (start > 0) ? CASE_NOOP : CASE_LOWER;
203 inword = 1;
204 }
29d25b54
CR
205 else if (flags == CASE_TOGGLE)
206 {
207 nop = inword ? CASE_NOOP : CASE_TOGGLE;
208 inword = 1;
209 }
210 else
211 nop = flags;
212
1442f67c
CR
213 /* Need to check UCHAR_MAX since wc may have already been converted to a
214 wide character by cval() */
215 if (MB_CUR_MAX == 1 || (wc <= UCHAR_MAX && is_basic ((int)wc)))
29d25b54 216 {
1442f67c 217singlebyte:
29d25b54
CR
218 switch (nop)
219 {
dc9f44b3 220 default:
29d25b54
CR
221 case CASE_NOOP: nc = wc; break;
222 case CASE_UPPER: nc = TOUPPER (wc); break;
223 case CASE_LOWER: nc = TOLOWER (wc); break;
224 case CASE_TOGGLEALL:
225 case CASE_TOGGLE: nc = TOGGLE (wc); break;
226 }
227 ret[start] = nc;
228 }
229#if defined (HANDLE_MULTIBYTE)
230 else
231 {
bf6bd355
CR
232 m = mbrtowc (&wc, string + start, end - start, &state);
233 if (MB_INVALIDCH (m))
1442f67c
CR
234 {
235 wc = (unsigned char)string[start];
236 goto singlebyte;
237 }
bf6bd355
CR
238 else if (MB_NULLWCH (m))
239 wc = L'\0';
29d25b54
CR
240 switch (nop)
241 {
dc9f44b3 242 default:
29d25b54 243 case CASE_NOOP: nwc = wc; break;
49cf7828
CR
244 case CASE_UPPER: nwc = _to_wupper (wc); break;
245 case CASE_LOWER: nwc = _to_wlower (wc); break;
29d25b54
CR
246 case CASE_TOGGLEALL:
247 case CASE_TOGGLE: nwc = TOGGLE (wc); break;
248 }
249 if (nwc != wc) /* just skip unchanged characters */
250 {
251 mlen = wcrtomb (mb, nwc, &state);
252 if (mlen > 0)
253 mb[mlen] = '\0';
254 /* Assume the same width */
255 strncpy (ret + start, mb, mlen);
256 }
257 }
258#endif
259
260 /* This assumes that the upper and lower case versions are the same width. */
261 ADVANCE_CHAR (ret, end, start);
262 }
263
264 return ret;
265}