]> git.ipfire.org Git - thirdparty/bash.git/blob - examples/loadables/dsv.c
bash-5.2 distribution sources and documentation
[thirdparty/bash.git] / examples / loadables / dsv.c
1 /* dsv - process a line of delimiter-separated data and populate an indexed
2 array with the fields */
3
4 /*
5 Copyright (C) 2022 Free Software Foundation, Inc.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 /* See Makefile for compilation details. */
22
23 #include <config.h>
24
25 #if defined (HAVE_UNISTD_H)
26 # include <unistd.h>
27 #endif
28 #include "bashansi.h"
29 #include <stdio.h>
30
31 #include "loadables.h"
32
33 #define DSV_ARRAY_DEFAULT "DSV"
34
35 #define NQUOTE 0
36 #define DQUOTE 1
37 #define SQUOTE 2
38
39 #define F_SHELLQUOTE 0x01
40 #define F_GREEDY 0x02
41 #define F_PRESERVE 0x04
42
43 /* Split LINE into delimiter-separated fields, storing each field into a
44 separate element of array variable DSV, starting at index 0. The format
45 of LINE is delimiter-separated values. By default, this splits lines of
46 CSV data as described in RFC 4180. If *DSTRING is any other value than
47 ',', this uses that character as a field delimiter. Pass F_SHELLQUOTE in
48 FLAGS to understand shell-like double-quoting and backslash-escaping in
49 double quotes instead of the "" CSV behavior, and shell-like single quotes.
50 Pass F_GREEDY in FLAGS to consume multiple leading and trailing instances
51 of *DSTRING and consecutive instances of *DSTRING in LINE without creating
52 null fields. If you want to preserve the quote characters in the generated
53 fields, pass F_PRESERVE; by default, this removes them. */
54 static int
55 dsvsplit (dsv, line, dstring, flags)
56 SHELL_VAR *dsv;
57 char *line, *dstring;
58 int flags;
59 {
60 arrayind_t ind;
61 char *field, *prev, *buf, *xbuf;
62 int delim, qstate;
63 int b, rval;
64
65 xbuf = 0;
66 ind = 0;
67 field = prev = line;
68
69 /* If we want a greedy split, consume leading instances of *DSTRING */
70 if (flags & F_GREEDY)
71 {
72 while (*prev == *dstring)
73 prev++;
74 field = prev;
75 }
76
77 do
78 {
79 if (*prev == '"')
80 {
81 if (xbuf == 0)
82 xbuf = xmalloc (strlen (prev) + 1);
83 buf = xbuf;
84 b = 0;
85 if (flags & F_PRESERVE)
86 buf[b++] = *prev;
87 qstate = DQUOTE;
88 for (field = ++prev; *field; field++)
89 {
90 if (qstate == DQUOTE && *field == '"' && field[1] == '"' && (flags & F_SHELLQUOTE) == 0)
91 buf[b++] = *field++; /* skip double quote */
92 else if (qstate == DQUOTE && (flags & F_SHELLQUOTE) && *field == '\\' && strchr (slashify_in_quotes, field[1]) != 0)
93 buf[b++] = *++field; /* backslash quoted double quote */
94 else if (qstate == DQUOTE && *field == '"')
95 {
96 qstate = NQUOTE;
97 if (flags & F_PRESERVE)
98 buf[b++] = *field;
99 }
100 else if (qstate == NQUOTE && *field == *dstring)
101 break;
102 else
103 /* This copies any text between a closing double quote and the
104 delimiter. If you want to change that, make sure to do the
105 copy only if qstate == DQUOTE. */
106 buf[b++] = *field;
107 }
108 buf[b] = '\0';
109 }
110 else if ((flags & F_SHELLQUOTE) && *prev == '\'')
111 {
112 if (xbuf == 0)
113 xbuf = xmalloc (strlen (prev) + 1);
114 buf = xbuf;
115 b = 0;
116 if (flags & F_PRESERVE)
117 buf[b++] = *prev;
118 qstate = SQUOTE;
119 for (field = ++prev; *field; field++)
120 {
121 if (qstate == SQUOTE && *field == '\'')
122 {
123 qstate = NQUOTE;
124 if (flags & F_PRESERVE)
125 buf[b++] = *field;
126 }
127 else if (qstate == NQUOTE && *field == *dstring)
128 break;
129 else
130 /* This copies any text between a closing single quote and the
131 delimiter. If you want to change that, make sure to do the
132 copy only if qstate == SQUOTE. */
133 buf[b++] = *field;
134 }
135 buf[b] = '\0';
136 }
137 else
138 {
139 buf = prev;
140 field = prev + strcspn (prev, dstring);
141 }
142
143 delim = *field;
144 *field = '\0';
145
146 if ((flags & F_GREEDY) == 0 || buf[0])
147 {
148 bind_array_element (dsv, ind, buf, 0);
149 ind++;
150 }
151
152 *field = delim;
153
154 if (delim == *dstring)
155 prev = field + 1;
156 }
157 while (delim == *dstring);
158
159 if (xbuf)
160 free (xbuf);
161
162 return (rval = ind); /* number of fields */
163 }
164
165 int
166 dsv_builtin (list)
167 WORD_LIST *list;
168 {
169 int opt, rval, flags;
170 char *array_name, *dsvstring, *delims;
171 SHELL_VAR *v;
172
173 array_name = 0;
174 rval = EXECUTION_SUCCESS;
175
176 delims = ",";
177 flags = 0;
178
179 reset_internal_getopt ();
180 while ((opt = internal_getopt (list, "a:d:Sgp")) != -1)
181 {
182 switch (opt)
183 {
184 case 'a':
185 array_name = list_optarg;
186 break;
187 case 'd':
188 delims = list_optarg;
189 break;
190 case 'S':
191 flags |= F_SHELLQUOTE;
192 break;
193 case 'g':
194 flags |= F_GREEDY;
195 break;
196 case 'p':
197 flags |= F_PRESERVE;
198 break;
199 CASE_HELPOPT;
200 default:
201 builtin_usage ();
202 return (EX_USAGE);
203 }
204 }
205 list = loptend;
206
207 if (array_name == 0)
208 array_name = DSV_ARRAY_DEFAULT;
209
210 if (legal_identifier (array_name) == 0)
211 {
212 sh_invalidid (array_name);
213 return (EXECUTION_FAILURE);
214 }
215
216 if (list == 0)
217 {
218 builtin_error ("dsv string argument required");
219 return (EX_USAGE);
220 }
221
222 v = find_or_make_array_variable (array_name, 1);
223 if (v == 0 || readonly_p (v) || noassign_p (v))
224 {
225 if (v && readonly_p (v))
226 err_readonly (array_name);
227 return (EXECUTION_FAILURE);
228 }
229 else if (array_p (v) == 0)
230 {
231 builtin_error ("%s: not an indexed array", array_name);
232 return (EXECUTION_FAILURE);
233 }
234 if (invisible_p (v))
235 VUNSETATTR (v, att_invisible);
236 array_flush (array_cell (v));
237
238 dsvstring = list->word->word;
239
240 if (dsvstring == 0 || *dsvstring == 0)
241 return (EXECUTION_SUCCESS);
242
243 opt = dsvsplit (v, dsvstring, delims, flags);
244 /* Maybe do something with OPT here, it's the number of fields */
245
246 return (rval);
247 }
248
249 /* Called when builtin is enabled and loaded from the shared object. If this
250 function returns 0, the load fails. */
251 int
252 dsv_builtin_load (name)
253 char *name;
254 {
255 return (1);
256 }
257
258 /* Called when builtin is disabled. */
259 void
260 dsv_builtin_unload (name)
261 char *name;
262 {
263 }
264
265 char *dsv_doc[] = {
266 "Read delimiter-separated fields from STRING.",
267 "",
268 "Parse STRING, a line of delimiter-separated values, into individual",
269 "fields, and store them into the indexed array ARRAYNAME starting at",
270 "index 0. The parsing understands and skips over double-quoted strings. ",
271 "If ARRAYNAME is not supplied, \"DSV\" is the default array name.",
272 "If the delimiter is a comma, the default, this parses comma-",
273 "separated values as specified in RFC 4180.",
274 "",
275 "The -d option specifies the delimiter. The delimiter is the first",
276 "character of the DELIMS argument. Specifying a DELIMS argument that",
277 "contains more than one character is not supported and will produce",
278 "unexpected results. The -S option enables shell-like quoting: double-",
279 "quoted strings can contain backslashes preceding special characters,",
280 "and the backslash will be removed; and single-quoted strings are",
281 "processed as the shell would process them. The -g option enables a",
282 "greedy split: sequences of the delimiter are skipped at the beginning",
283 "and end of STRING, and consecutive instances of the delimiter in STRING",
284 "do not generate empty fields. If the -p option is supplied, dsv leaves",
285 "quote characters as part of the generated field; otherwise they are",
286 "removed.",
287 "",
288 "The return value is 0 unless an invalid option is supplied or the ARRAYNAME",
289 "argument is invalid or readonly.",
290 (char *)NULL
291 };
292
293 struct builtin dsv_struct = {
294 "dsv", /* builtin name */
295 dsv_builtin, /* function implementing the builtin */
296 BUILTIN_ENABLED, /* initial flags for builtin */
297 dsv_doc, /* array of long documentation strings. */
298 "dsv [-a ARRAYNAME] [-d DELIMS] [-Sgp] string", /* usage synopsis; becomes short_doc */
299 0 /* reserved for internal use */
300 };