]>
git.ipfire.org Git - thirdparty/bash.git/blob - examples/loadables/dsv.c
1 /* dsv - process a line of delimiter-separated data and populate an indexed
2 array with the fields */
5 Copyright (C) 2022 Free Software Foundation, Inc.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
21 /* See Makefile for compilation details. */
25 #if defined (HAVE_UNISTD_H)
31 #include "loadables.h"
33 #define DSV_ARRAY_DEFAULT "DSV"
39 #define F_SHELLQUOTE 0x01
41 #define F_PRESERVE 0x04
43 /* Split LINE into delimiter-separated fields, storing each field into a
44 separate element of array variable DSV, starting at index 0. The format
45 of LINE is delimiter-separated values. By default, this splits lines of
46 CSV data as described in RFC 4180. If *DSTRING is any other value than
47 ',', this uses that character as a field delimiter. Pass F_SHELLQUOTE in
48 FLAGS to understand shell-like double-quoting and backslash-escaping in
49 double quotes instead of the "" CSV behavior, and shell-like single quotes.
50 Pass F_GREEDY in FLAGS to consume multiple leading and trailing instances
51 of *DSTRING and consecutive instances of *DSTRING in LINE without creating
52 null fields. If you want to preserve the quote characters in the generated
53 fields, pass F_PRESERVE; by default, this removes them. */
55 dsvsplit (dsv
, line
, dstring
, flags
)
61 char *field
, *prev
, *buf
, *xbuf
;
69 /* If we want a greedy split, consume leading instances of *DSTRING */
72 while (*prev
== *dstring
)
82 xbuf
= xmalloc (strlen (prev
) + 1);
85 if (flags
& F_PRESERVE
)
88 for (field
= ++prev
; *field
; field
++)
90 if (qstate
== DQUOTE
&& *field
== '"' && field
[1] == '"' && (flags
& F_SHELLQUOTE
) == 0)
91 buf
[b
++] = *field
++; /* skip double quote */
92 else if (qstate
== DQUOTE
&& (flags
& F_SHELLQUOTE
) && *field
== '\\' && strchr (slashify_in_quotes
, field
[1]) != 0)
93 buf
[b
++] = *++field
; /* backslash quoted double quote */
94 else if (qstate
== DQUOTE
&& *field
== '"')
97 if (flags
& F_PRESERVE
)
100 else if (qstate
== NQUOTE
&& *field
== *dstring
)
103 /* This copies any text between a closing double quote and the
104 delimiter. If you want to change that, make sure to do the
105 copy only if qstate == DQUOTE. */
110 else if ((flags
& F_SHELLQUOTE
) && *prev
== '\'')
113 xbuf
= xmalloc (strlen (prev
) + 1);
116 if (flags
& F_PRESERVE
)
119 for (field
= ++prev
; *field
; field
++)
121 if (qstate
== SQUOTE
&& *field
== '\'')
124 if (flags
& F_PRESERVE
)
127 else if (qstate
== NQUOTE
&& *field
== *dstring
)
130 /* This copies any text between a closing single quote and the
131 delimiter. If you want to change that, make sure to do the
132 copy only if qstate == SQUOTE. */
140 field
= prev
+ strcspn (prev
, dstring
);
146 if ((flags
& F_GREEDY
) == 0 || buf
[0])
148 bind_array_element (dsv
, ind
, buf
, 0);
154 if (delim
== *dstring
)
157 while (delim
== *dstring
);
162 return (rval
= ind
); /* number of fields */
169 int opt
, rval
, flags
;
170 char *array_name
, *dsvstring
, *delims
;
174 rval
= EXECUTION_SUCCESS
;
179 reset_internal_getopt ();
180 while ((opt
= internal_getopt (list
, "a:d:Sgp")) != -1)
185 array_name
= list_optarg
;
188 delims
= list_optarg
;
191 flags
|= F_SHELLQUOTE
;
208 array_name
= DSV_ARRAY_DEFAULT
;
210 if (legal_identifier (array_name
) == 0)
212 sh_invalidid (array_name
);
213 return (EXECUTION_FAILURE
);
218 builtin_error ("dsv string argument required");
222 v
= find_or_make_array_variable (array_name
, 1);
223 if (v
== 0 || readonly_p (v
) || noassign_p (v
))
225 if (v
&& readonly_p (v
))
226 err_readonly (array_name
);
227 return (EXECUTION_FAILURE
);
229 else if (array_p (v
) == 0)
231 builtin_error ("%s: not an indexed array", array_name
);
232 return (EXECUTION_FAILURE
);
235 VUNSETATTR (v
, att_invisible
);
236 array_flush (array_cell (v
));
238 dsvstring
= list
->word
->word
;
240 if (dsvstring
== 0 || *dsvstring
== 0)
241 return (EXECUTION_SUCCESS
);
243 opt
= dsvsplit (v
, dsvstring
, delims
, flags
);
244 /* Maybe do something with OPT here, it's the number of fields */
249 /* Called when builtin is enabled and loaded from the shared object. If this
250 function returns 0, the load fails. */
252 dsv_builtin_load (name
)
258 /* Called when builtin is disabled. */
260 dsv_builtin_unload (name
)
266 "Read delimiter-separated fields from STRING.",
268 "Parse STRING, a line of delimiter-separated values, into individual",
269 "fields, and store them into the indexed array ARRAYNAME starting at",
270 "index 0. The parsing understands and skips over double-quoted strings. ",
271 "If ARRAYNAME is not supplied, \"DSV\" is the default array name.",
272 "If the delimiter is a comma, the default, this parses comma-",
273 "separated values as specified in RFC 4180.",
275 "The -d option specifies the delimiter. The delimiter is the first",
276 "character of the DELIMS argument. Specifying a DELIMS argument that",
277 "contains more than one character is not supported and will produce",
278 "unexpected results. The -S option enables shell-like quoting: double-",
279 "quoted strings can contain backslashes preceding special characters,",
280 "and the backslash will be removed; and single-quoted strings are",
281 "processed as the shell would process them. The -g option enables a",
282 "greedy split: sequences of the delimiter are skipped at the beginning",
283 "and end of STRING, and consecutive instances of the delimiter in STRING",
284 "do not generate empty fields. If the -p option is supplied, dsv leaves",
285 "quote characters as part of the generated field; otherwise they are",
288 "The return value is 0 unless an invalid option is supplied or the ARRAYNAME",
289 "argument is invalid or readonly.",
293 struct builtin dsv_struct
= {
294 "dsv", /* builtin name */
295 dsv_builtin
, /* function implementing the builtin */
296 BUILTIN_ENABLED
, /* initial flags for builtin */
297 dsv_doc
, /* array of long documentation strings. */
298 "dsv [-a ARRAYNAME] [-d DELIMS] [-Sgp] string", /* usage synopsis; becomes short_doc */
299 0 /* reserved for internal use */