From: Chet Ramey Date: Fri, 17 Apr 2020 19:43:10 +0000 (-0400) Subject: commit bash-20200413 snapshot X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=96a2ec128b24f328a86645ef2ceaa5d4a96515ed;p=thirdparty%2Fbash.git commit bash-20200413 snapshot --- diff --git a/CWRU/CWRU.chlog b/CWRU/CWRU.chlog index ba2fdbe1d..9e668be5c 100644 --- a/CWRU/CWRU.chlog +++ b/CWRU/CWRU.chlog @@ -7976,3 +7976,36 @@ lib/readline/display.c - rl_redisplay: if the line consumes more than a screen's worth of lines, mark the lines that are off the top of the display as having a `normal' face + + 4/14 + ---- +variables.c + - LINENO: no longer has the integer attribute, since it does not take + arithmetic expressions on assignment (use $((expr)) instead) + +lib/readline/mbutil.c + - _rl_find_prev_utf8char: new function, uses the structure of UTF-8 + characters to find the start of the previous one no matter where we + start: at the beginning of the next character, at the end of a + multibyte character, or in the middle of a multibyte character. + EXPERIMENTAL + - _rl_find_prev_mbchar_internal: for now, call _rl_find_prev_utf8char + if we are in a UTF-8 locale. EXPERIMENTAL + + 4/15 + ---- +lib/readline/mbutil.c + - _rl_utf8_mblen: changes to handle invalid multibyte sequences better, + even when N means that the sequence would be too short anyway + +examples/loadables/csv.c + - csv: new loadable builtin to parse a line of csv data and store it + into an array supplied as an argument + + 4/16 + ---- +examples/loadables/cut.c + - cut: new loadable builtin + - lcut: new builtin that does what cut does but on a string supplied + as an argument (only one string for now) + diff --git a/MANIFEST b/MANIFEST index aa25ab58f..653c1a1b0 100644 --- a/MANIFEST +++ b/MANIFEST @@ -723,7 +723,8 @@ examples/loadables/truefalse.c f examples/loadables/fdflags.c f examples/loadables/finfo.c f examples/loadables/cat.c f -#examples/loadables/cut.c f +examples/loadables/csv.c f +examples/loadables/cut.c f examples/loadables/logname.c f examples/loadables/basename.c f examples/loadables/dirname.c f diff --git a/examples/loadables/Makefile.in b/examples/loadables/Makefile.in index 9f84509fa..e044d18dd 100644 --- a/examples/loadables/Makefile.in +++ b/examples/loadables/Makefile.in @@ -103,7 +103,7 @@ INC = -I. -I.. -I$(topdir) -I$(topdir)/lib -I$(topdir)/builtins -I${srcdir} \ ALLPROG = print truefalse sleep finfo logname basename dirname fdflags \ tty pathchk tee head mkdir rmdir mkfifo mktemp printenv id whoami \ uname sync push ln unlink realpath strftime mypid setpgid seq rm \ - accept + accept csv cut OTHERPROG = necho hello cat pushd stat all: $(SHOBJ_STATUS) @@ -215,6 +215,12 @@ unlink: unlink.o realpath: realpath.o $(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ realpath.o $(SHOBJ_LIBS) +csv: csv.o + $(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ csv.o $(SHOBJ_LIBS) + +cut: cut.o + $(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ cut.o $(SHOBJ_LIBS) + strftime: strftime.o $(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ strftime.o $(SHOBJ_LIBS) @@ -296,6 +302,8 @@ rmdir.o: rmdir.c necho.o: necho.c hello.o: hello.c cat.o: cat.c +csv.o: csv.c +cut.o: cut.c printenv.o: printenv.c id.o: id.c whoami.o: whoami.c diff --git a/examples/loadables/csv.c b/examples/loadables/csv.c new file mode 100644 index 000000000..11228f1ab --- /dev/null +++ b/examples/loadables/csv.c @@ -0,0 +1,206 @@ +/* csv - process a line of csv data and populate an indexed array with the + fields */ + +/* + Copyright (C) 2020 Free Software Foundation, Inc. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see . +*/ + +/* See Makefile for compilation details. */ + +#include + +#if defined (HAVE_UNISTD_H) +# include +#endif +#include "bashansi.h" +#include + +#include "loadables.h" + +#define CSV_ARRAY_DEFAULT "CSV" + +#define NQUOTE 0 +#define DQUOTE 1 + +/* Split LINE into comma-separated fields, storing each field into a separate + element of array variable CSV, starting at index 0. The format of LINE is + as described in RFC 4180. */ +static int +csvsplit (csv, line) + SHELL_VAR *csv; + char *line; +{ + arrayind_t ind; + char *field, *prev, *buf, *xbuf; + int delim, qstate; + int b, rval; + + xbuf = 0; + ind = 0; + field = prev = line; + + do + { + if (*prev == '"') + { + if (xbuf == 0) + xbuf = xmalloc (strlen (prev) + 1); + buf = xbuf; + b = 0; + qstate = DQUOTE; + for (field = ++prev; *field; field++) + { + if (qstate == DQUOTE && *field == '"' && field[1] == '"') + buf[b++] = *field++; /* skip double quote */ + else if (qstate == DQUOTE && *field == '"') + qstate = NQUOTE; + else if (qstate == NQUOTE && *field == ',') + break; + else + /* This copies any text between a closing double quote and the + delimiter. If you want to change that, make sure to do the + copy only if qstate == DQUOTE. */ + buf[b++] = *field; + } + buf[b] = '\0'; + } + else + { + buf = prev; + field = prev + strcspn (prev, ","); + } + + delim = *field; + *field = '\0'; + + bind_array_element (csv, ind, buf, 0); + ind++; + + *field = delim; + + if (delim == ',') + prev = field + 1; + } + while (delim == ','); + + if (xbuf) + free (xbuf); + + return (rval = ind); /* number of fields */ +} + +int +csv_builtin (list) + WORD_LIST *list; +{ + int opt, rval; + char *array_name, *csvstring; + SHELL_VAR *v; + + array_name = 0; + rval = EXECUTION_SUCCESS; + + reset_internal_getopt (); + while ((opt = internal_getopt (list, "a:")) != -1) + { + switch (opt) + { + case 'a': + array_name = list_optarg; + break; + CASE_HELPOPT; + default: + builtin_usage (); + return (EX_USAGE); + } + } + list = loptend; + + if (array_name == 0) + array_name = CSV_ARRAY_DEFAULT; + + if (legal_identifier (array_name) == 0) + { + sh_invalidid (array_name); + return (EXECUTION_FAILURE); + } + + if (list == 0) + { + builtin_error ("csv string argument required"); + return (EX_USAGE); + } + + v = find_or_make_array_variable (array_name, 1); + if (v == 0 || readonly_p (v) || noassign_p (v)) + { + if (v && readonly_p (v)) + err_readonly (array_name); + return (EXECUTION_FAILURE); + } + else if (array_p (v) == 0) + { + builtin_error ("%s: not an indexed array", array_name); + return (EXECUTION_FAILURE); + } + if (invisible_p (v)) + VUNSETATTR (v, att_invisible); + array_flush (array_cell (v)); + + csvstring = list->word->word; + + if (csvstring == 0 || *csvstring == 0) + return (EXECUTION_SUCCESS); + + opt = csvsplit (v, csvstring); + /* Maybe do something with OPT here, it's the number of fields */ + + return (rval); +} + +/* Called when builtin is enabled and loaded from the shared object. If this + function returns 0, the load fails. */ +int +csv_builtin_load (name) + char *name; +{ + return (1); +} + +/* Called when builtin is disabled. */ +void +csv_builtin_unload (name) + char *name; +{ +} + +char *csv_doc[] = { + "Read comma-separated fields from a string.", + "", + "Parse STRING, a line of comma-separated values, into individual fields,", + "and store them into the indexed array ARRAYNAME starting at index 0.", + "If ARRAYNAME is not supplied, \"CSV\" is the default array name.", + (char *)NULL +}; + +struct builtin csv_struct = { + "csv", /* builtin name */ + csv_builtin, /* function implementing the builtin */ + BUILTIN_ENABLED, /* initial flags for builtin */ + csv_doc, /* array of long documentation strings. */ + "csv [-a ARRAY] string", /* usage synopsis; becomes short_doc */ + 0 /* reserved for internal use */ +}; diff --git a/examples/loadables/cut.c b/examples/loadables/cut.c new file mode 100644 index 000000000..6129b1f97 --- /dev/null +++ b/examples/loadables/cut.c @@ -0,0 +1,641 @@ +/* lcut - extract specified fields from a line and assign them to an array or + print them to the standard output */ + +/* + Copyright (C) 2020 Free Software Foundation, Inc. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see . +*/ + +/* See Makefile for compilation details. */ + +#include + +#if defined (HAVE_UNISTD_H) +# include +#endif +#include "bashansi.h" +#include +#include +#include + +#include "loadables.h" +#include "shmbutil.h" + +#define CUT_ARRAY_DEFAULT "CUTFIELDS" + +#define NOPOS -2 /* sentinel for unset startpos/endpos */ + +#define BOL 0 +#define EOL INT_MAX +#define NORANGE -1 /* just a position, no range */ + +#define BFLAG (1 << 0) +#define CFLAG (1 << 1) +#define DFLAG (1 << 2) +#define FFLAG (1 << 3) +#define SFLAG (1 << 4) + +struct cutpos +{ + int startpos, endpos; /* zero-based, correction done in getlist() */ +}; + +struct cutop +{ + int flags; + int delim; + int npos; + struct cutpos *poslist; +}; + +static int +poscmp (a, b) + void *a, *b; +{ + struct cutpos *p1, *p2; + + p1 = (struct cutpos *)a; + p2 = (struct cutpos *)b; + return (p1->startpos - p2->startpos); +} + +static int +getlist (arg, opp) + char *arg; + struct cutpos **opp; +{ + char *ntok, *ltok, *larg; + int s, e; + intmax_t num; + struct cutpos *poslist; + int npos, nsize; + + poslist = 0; + nsize = npos = 0; + s = e = 0; + larg = arg; + while (ltok = strsep (&larg, ",")) + { + if (*ltok == 0) + continue; + + ntok = strsep (<ok, "-"); + if (*ntok == 0) + s = BOL; + else + { + if (legal_number (ntok, &num) == 0 || (int)num != num || num <= 0) + { + builtin_error ("%s: invalid list value", ntok); + *opp = poslist; + return -1; + } + s = num; + s--; /* fields are 1-based */ + } + if (ltok == 0) + e = NORANGE; + else if (*ltok == 0) + e = EOL; + else + { + if (legal_number (ltok, &num) == 0 || (int)num != num || num <= 0) + { + builtin_error ("%s: invalid list value", ltok); + *opp = poslist; + return -1; + } + e = num; + e--; + if (e == s) + e = NORANGE; + } + + if (npos == nsize) + { + nsize += 4; + poslist = (struct cutpos *)xrealloc (poslist, nsize * sizeof (struct cutpos)); + } + poslist[npos].startpos = s; + poslist[npos].endpos = e; + npos++; + } + if (npos == 0) + { + builtin_error ("missing list of positions"); + *opp = poslist; + return -1; + } + + qsort (poslist, npos, sizeof(poslist[0]), poscmp); + *opp = poslist; + + return npos; +} + +static int +cutbytes (v, line, ops) + SHELL_VAR *v; + char *line; + struct cutop *ops; +{ + arrayind_t ind; + char *buf, *bmap; + size_t llen; + int i, b, n, s, e; + + llen = strlen (line); + buf = xmalloc (llen + 1); + bmap = xmalloc (llen + 1); + memset (bmap, 0, llen); + + for (n = 0; n < ops->npos; n++) + { + s = ops->poslist[n].startpos; /* no translation needed yet */ + e = ops->poslist[n].endpos; + if (e == NORANGE) + e = s; + else if (e == EOL || e >= llen) + e = llen - 1; + /* even if a column is specified multiple times, it will only be printed + once */ + for (i = s; i <= e; i++) + bmap[i] = 1; + } + + b = 0; + for (i = 0; i < llen; i++) + if (bmap[i]) + buf[b++] = line[i]; + buf[b] = 0; + + if (v) + { + ind = 0; + bind_array_element (v, ind, buf, 0); + ind++; + } + else + printf ("%s\n", buf); + + free (buf); + free (bmap); + + return ind; +} + +static int +cutchars (v, line, ops) + SHELL_VAR *v; + char *line; + struct cutop *ops; +{ + arrayind_t ind; + char *buf, *bmap; + wchar_t *wbuf, *wb2; + size_t llen, wlen; + int i, b, n, s, e; + + if (MB_CUR_MAX == 1) + return (cutbytes (v, line, ops)); + if (locale_utf8locale && utf8_mbsmbchar (line) == 0) + return (cutbytes (v, line, ops)); + + llen = strlen (line); + wbuf = (wchar_t *)xmalloc ((llen + 1) * sizeof (wchar_t)); + + wlen = mbstowcs (wbuf, line, llen); + if (MB_INVALIDCH (wlen)) + { + free (wbuf); + return (cutbytes (v, line, ops)); + } + + bmap = xmalloc (llen + 1); + memset (bmap, 0, llen); + + for (n = 0; n < ops->npos; n++) + { + s = ops->poslist[n].startpos; /* no translation needed yet */ + e = ops->poslist[n].endpos; + if (e == NORANGE) + e = s; + else if (e == EOL || e >= wlen) + e = wlen - 1; + /* even if a column is specified multiple times, it will only be printed + once */ + for (i = s; i <= e; i++) + bmap[i] = 1; + } + + wb2 = (wchar_t *)xmalloc ((wlen + 1) * sizeof (wchar_t)); + b = 0; + for (i = 0; i < wlen; i++) + if (bmap[i]) + wb2[b++] = wbuf[i]; + wb2[b] = 0; + + free (wbuf); + + buf = bmap; + n = wcstombs (buf, wb2, llen); + + if (v) + { + ind = 0; + bind_array_element (v, ind, buf, 0); + ind++; + } + else + printf ("%s\n", buf); + + free (buf); + free (wb2); + + return ind; +} + +/* The basic strategy is to cut the line into fields using strsep, populate + an array of fields from 0..nf, then select those fields using the same + bitmap approach as cut{bytes,chars} and assign them to the array variable + V or print them on stdout. This function obeys SFLAG. */ +static int +cutfields (v, line, ops) + SHELL_VAR *v; + char *line; + struct cutop *ops; +{ + arrayind_t ind; + char *buf, *bmap, *field, **fields, delim[2]; + size_t llen, fsize; + int i, b, n, s, e, nf; + + ind = 0; + + delim[0] = ops->delim; + delim[1] = '\0'; + + fields = 0; + nf = 0; + fsize = 0; + + field = buf = line; + do + { + field = strsep (&buf, delim); /* destructive */ + if (nf == fsize) + { + fsize += 8; + fields = xrealloc (fields, fsize * sizeof (char *)); + } + fields[nf] = field; + if (field) + nf++; + } + while (field); + + if (nf == 1) + { + free (fields); + if (ops->flags & SFLAG) + return ind; + if (v) + { + bind_array_element (v, ind, line, 0); + ind++; + } + else + printf ("%s\n", line); + return ind; + } + + bmap = xmalloc (nf + 1); + memset (bmap, 0, nf); + + for (n = 0; n < ops->npos; n++) + { + s = ops->poslist[n].startpos; /* no translation needed yet */ + e = ops->poslist[n].endpos; + if (e == NORANGE) + e = s; + else if (e == EOL || e >= nf) + e = nf - 1; + /* even if a column is specified multiple times, it will only be printed + once */ + for (i = s; i <= e; i++) + bmap[i] = 1; + } + + for (i = 1, b = 0; b < nf; b++) + { + if (bmap[b] == 0) + continue; + if (v) + { + bind_array_element (v, ind, fields[b], 0); + ind++; + } + else + { + if (i == 0) + putchar (ops->delim); + printf ("%s", fields[b]); + } + i = 0; + } + if (v == 0) + putchar ('\n'); + + return nf; +} + +static int +cutline (v, line, ops) + SHELL_VAR *v; + char *line; + struct cutop *ops; +{ + int rval; + + if (ops->flags & BFLAG) + rval = cutbytes (v, line, ops); + else if (ops->flags & CFLAG) + rval = cutchars (v, line, ops); + else + rval = cutfields (v, line, ops); + + return (rval >= 0 ? EXECUTION_SUCCESS : EXECUTION_FAILURE); +} + +static int +cutfile (v, list, ops) + SHELL_VAR *v; + WORD_LIST *list; + struct cutop *ops; +{ + int fd, unbuffered_read; + char *line, *b; + size_t llen; + WORD_LIST *l; + ssize_t n; + + line = 0; + llen = 0; + + l = list; + do + { + /* for each file */ + if (l == 0 || (l->word->word[0] == '-' && l->word->word[1] == '\0')) + fd = 0; + else + fd = open (l->word->word, O_RDONLY); + if (fd < 0) + { + file_error (l->word->word); + return (EXECUTION_FAILURE); + } + +#ifndef __CYGWIN__ + unbuffered_read = (lseek (fd, 0L, SEEK_CUR) < 0) && (errno == ESPIPE); +#else + unbuffered_read = 1; +#endif + + while ((n = zgetline (fd, &line, &llen, '\n', unbuffered_read)) != -1) + cutline (v, line, ops); /* can modify line */ + if (fd > 0) + close (fd); + + if (l) + l = l->next; + } + while (l); + + free (line); + return EXECUTION_SUCCESS; +} + +#define OPTSET(x) ((cutflags & (x)) ? 1 : 0) + +static int +cut_internal (which, list) + int which; /* not used yet */ + WORD_LIST *list; +{ + int opt, rval, cutflags, delim, npos; + char *array_name, *cutstring, *list_arg; + SHELL_VAR *v; + struct cutop op; + struct cutpos *poslist; + + v = 0; + rval = EXECUTION_SUCCESS; + + cutflags = 0; + array_name = 0; + list_arg = 0; + delim = '\t'; + + reset_internal_getopt (); + while ((opt = internal_getopt (list, "a:b:c:d:f:sn")) != -1) + { + switch (opt) + { + case 'a': + array_name = list_optarg; + break; + case 'b': + cutflags |= BFLAG; + list_arg = list_optarg; + break; + case 'c': + cutflags |= CFLAG; + list_arg = list_optarg; + break; + case 'd': + cutflags |= DFLAG; + delim = list_optarg[0]; + if (delim == 0 || list_optarg[1]) + { + builtin_error ("delimiter must be a single non-null character"); + return (EX_USAGE); + } + break; + case 'f': + cutflags |= FFLAG; + list_arg = list_optarg; + break; + case 'n': + break; + case 's': + cutflags |= SFLAG; + break; + CASE_HELPOPT; + default: + builtin_usage (); + return (EX_USAGE); + } + } + list = loptend; + + if (array_name && (legal_identifier (array_name) == 0)) + { + sh_invalidid (array_name); + return (EXECUTION_FAILURE); + } + + if (list == 0 && which == 0) + { + builtin_error ("string argument required"); + return (EX_USAGE); + } + + /* options are mutually exclusive and one is required */ + if ((OPTSET (BFLAG) + OPTSET (CFLAG) + OPTSET (FFLAG)) != 1) + { + builtin_usage (); + return (EX_USAGE); + } + + if ((npos = getlist (list_arg, &poslist)) < 0) + { + free (poslist); + return (EXECUTION_FAILURE); + } + + if (array_name) + { + v = find_or_make_array_variable (array_name, 1); + if (v == 0 || readonly_p (v) || noassign_p (v)) + { + if (v && readonly_p (v)) + err_readonly (array_name); + return (EXECUTION_FAILURE); + } + else if (array_p (v) == 0) + { + builtin_error ("%s: not an indexed array", array_name); + return (EXECUTION_FAILURE); + } + if (invisible_p (v)) + VUNSETATTR (v, att_invisible); + array_flush (array_cell (v)); + } + + op.flags = cutflags; + op.delim = delim; + op.npos = npos; + op.poslist = poslist; + + /* we implement cut as a builtin with a cutfile() function that opens each + filename in LIST as a filename (or `-' for stdin) and runs cutline on + every line in the file. */ + if (which == 0) + { + cutstring = list->word->word; + if (cutstring == 0 || *cutstring == 0) + { + free (poslist); + return (EXECUTION_SUCCESS); + } + rval = cutline (v, cutstring, &op); + } + else + rval = cutfile (v, list, &op); + + return (rval); +} + +int +lcut_builtin (list) + WORD_LIST *list; +{ + return (cut_internal (0, list)); +} + +int +cut_builtin (list) + WORD_LIST *list; +{ + return (cut_internal (1, list)); +} + +/* Called when builtin is enabled and loaded from the shared object. If this + function returns 0, the load fails. */ +int +lcut_builtin_load (name) + char *name; +{ + return (1); +} + +/* Called when builtin is disabled. */ +void +lcut_builtin_unload (name) + char *name; +{ +} + +char *lcut_doc[] = { + "Extract selected fields from a string.", + "", + "Select portions of LINE (as specified by LIST) and assign them to", + "elements of the indexed array ARRAY starting at index 0, or write", + "them to the standard output if -a is not specified.", + "", + "Items specified by LIST are either column positions or fields delimited", + "by a special character, and are described more completely in cut(1).", + "", + "Columns correspond to bytes (-b), characters (-c), or fields (-f). The", + "field delimiter is specified by -d (default TAB). Column numbering", + "starts at 1.", + (char *)NULL +}; + +struct builtin lcut_struct = { + "lcut", /* builtin name */ + lcut_builtin, /* function implementing the builtin */ + BUILTIN_ENABLED, /* initial flags for builtin */ + lcut_doc, /* array of long documentation strings. */ + "lcut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] line", /* usage synopsis; becomes short_doc */ + 0 /* reserved for internal use */ +}; + +char *cut_doc[] = { + "Extract selected fields from each line of a file.", + "", + "Select portions of each line (as specified by LIST) from each FILE", + "and write them to the standard output. cut reads from the standard", + "input if no FILE arguments are specified or if a FILE argument is a", + "single hyphen.", + "", + "Items specified by LIST are either column positions or fields delimited", + "by a special character, and are described more completely in cut(1).", + "", + "Columns correspond to bytes (-b), characters (-c), or fields (-f). The", + "field delimiter is specified by -d (default TAB). Column numbering", + "starts at 1.", + (char *)NULL +}; + +struct builtin cut_struct = { + "cut", /* builtin name */ + cut_builtin, /* function implementing the builtin */ + BUILTIN_ENABLED, /* initial flags for builtin */ + cut_doc, /* array of long documentation strings. */ + "cut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] [file ...]", /* usage synopsis; becomes short_doc */ + 0 /* reserved for internal use */ +}; diff --git a/examples/loadables/lcut.c b/examples/loadables/lcut.c new file mode 100644 index 000000000..4dfc09142 --- /dev/null +++ b/examples/loadables/lcut.c @@ -0,0 +1,563 @@ +/* lcut - extract specified fields from a line and assign them to an array or + print them to the standard output */ + +/* + Copyright (C) 2020 Free Software Foundation, Inc. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see . +*/ + +/* See Makefile for compilation details. */ + +#include + +#if defined (HAVE_UNISTD_H) +# include +#endif +#include "bashansi.h" +#include + +#include "loadables.h" +#include "shmbutil.h" + +#define CUT_ARRAY_DEFAULT "CUTFIELDS" + +#define NOPOS -2 /* sentinel for unset startpos/endpos */ + +#define BOL 0 +#define EOL INT_MAX +#define NORANGE -1 /* just a position, no range */ + +#define BFLAG (1 << 0) +#define CFLAG (1 << 1) +#define DFLAG (1 << 2) +#define FFLAG (1 << 3) +#define SFLAG (1 << 4) + +struct cutpos +{ + int startpos, endpos; /* zero-based, correction done in getlist() */ +}; + +struct cutop +{ + int flags; + int delim; + int npos; + struct cutpos *poslist; +}; + +static int +poscmp (a, b) + void *a, *b; +{ + struct cutpos *p1, *p2; + + p1 = (struct cutpos *)a; + p2 = (struct cutpos *)b; + return (p1->startpos - p2->startpos); +} + +static int +getlist (arg, opp) + char *arg; + struct cutpos **opp; +{ + char *ntok, *ltok, *larg; + int s, e; + intmax_t num; + struct cutpos *poslist; + int npos, nsize; + + poslist = 0; + nsize = npos = 0; + s = e = 0; + larg = arg; + while (ltok = strsep (&larg, ",")) + { + if (*ltok == 0) + continue; + + ntok = strsep (<ok, "-"); + if (*ntok == 0) + s = BOL; + else + { + if (legal_number (ntok, &num) == 0 || (int)num != num || num <= 0) + { + builtin_error ("%s: invalid list value", ntok); + *opp = poslist; + return -1; + } + s = num; + s--; /* fields are 1-based */ + } + if (ltok == 0) + e = NORANGE; + else if (*ltok == 0) + e = EOL; + else + { + if (legal_number (ltok, &num) == 0 || (int)num != num || num <= 0) + { + builtin_error ("%s: invalid list value", ltok); + *opp = poslist; + return -1; + } + e = num; + e--; + if (e == s) + e = NORANGE; + } + + if (npos == nsize) + { + nsize += 4; + poslist = (struct cutpos *)xrealloc (poslist, nsize * sizeof (struct cutpos)); + } + poslist[npos].startpos = s; + poslist[npos].endpos = e; + npos++; + } + if (npos == 0) + { + builtin_error ("missing list of positions"); + *opp = poslist; + return -1; + } + + qsort (poslist, npos, sizeof(poslist[0]), poscmp); + *opp = poslist; + + return npos; +} + +static int +cutbytes (v, line, ops) + SHELL_VAR *v; + char *line; + struct cutop *ops; +{ + arrayind_t ind; + char *buf, *bmap; + size_t llen; + int i, b, n, s, e; + + llen = strlen (line); + buf = xmalloc (llen + 1); + bmap = xmalloc (llen + 1); + memset (bmap, 0, llen); + + for (n = 0; n < ops->npos; n++) + { + s = ops->poslist[n].startpos; /* no translation needed yet */ + e = ops->poslist[n].endpos; + if (e == NORANGE) + e = s; + else if (e == EOL || e >= llen) + e = llen - 1; + /* even if a column is specified multiple times, it will only be printed + once */ + for (i = s; i <= e; i++) + bmap[i] = 1; + } + + b = 0; + for (i = 0; i < llen; i++) + if (bmap[i]) + buf[b++] = line[i]; + buf[b] = 0; + + if (v) + { + ind = 0; + bind_array_element (v, ind, buf, 0); + ind++; + } + else + printf ("%s\n", buf); + + free (buf); + free (bmap); + + return ind; +} + +static int +cutchars (v, line, ops) + SHELL_VAR *v; + char *line; + struct cutop *ops; +{ + arrayind_t ind; + char *buf, *bmap; + wchar_t *wbuf, *wb2; + size_t llen, wlen; + int i, b, n, s, e; + + if (MB_CUR_MAX == 1) + return (cutbytes (v, line, ops)); + if (locale_utf8locale && utf8_mbsmbchar (line) == 0) + return (cutbytes (v, line, ops)); + + llen = strlen (line); + wbuf = (wchar_t *)xmalloc ((llen + 1) * sizeof (wchar_t)); + + wlen = mbstowcs (wbuf, line, llen); + if (MB_INVALIDCH (wlen)) + { + free (wbuf); + return (cutbytes (v, line, ops)); + } + + bmap = xmalloc (llen + 1); + memset (bmap, 0, llen); + + for (n = 0; n < ops->npos; n++) + { + s = ops->poslist[n].startpos; /* no translation needed yet */ + e = ops->poslist[n].endpos; + if (e == NORANGE) + e = s; + else if (e == EOL || e >= wlen) + e = wlen - 1; + /* even if a column is specified multiple times, it will only be printed + once */ + for (i = s; i <= e; i++) + bmap[i] = 1; + } + + wb2 = (wchar_t *)xmalloc ((wlen + 1) * sizeof (wchar_t)); + b = 0; + for (i = 0; i < wlen; i++) + if (bmap[i]) + wb2[b++] = wbuf[i]; + wb2[b] = 0; + + free (wbuf); + + buf = bmap; + n = wcstombs (buf, wb2, llen); + + if (v) + { + ind = 0; + bind_array_element (v, ind, buf, 0); + ind++; + } + else + printf ("%s\n", buf); + + free (buf); + free (wb2); + + return ind; +} + +/* The basic strategy is to cut the line into fields using strsep, populate + an array of fields from 0..nf, then select those fields using the same + bitmap approach as cut{bytes,chars} and assign them to the array variable + V or print them on stdout. This function obeys SFLAG. */ +static int +cutfields (v, line, ops) + SHELL_VAR *v; + char *line; + struct cutop *ops; +{ + arrayind_t ind; + char *buf, *bmap, *field, **fields, delim[2]; + size_t llen, fsize; + int i, b, n, s, e, nf; + + ind = 0; + + delim[0] = ops->delim; + delim[1] = '\0'; + + fields = 0; + nf = 0; + fsize = 0; + + field = buf = line; + do + { + field = strsep (&buf, delim); /* destructive */ + if (nf == fsize) + { + fsize += 8; + fields = xrealloc (fields, fsize * sizeof (char *)); + } + fields[nf] = field; + if (field) + nf++; + } + while (field); + + if (nf == 1) + { + free (fields); + if (ops->flags & SFLAG) + return ind; + if (v) + { + bind_array_element (v, ind, line, 0); + ind++; + } + else + printf ("%s\n", line); + return ind; + } + + bmap = xmalloc (nf + 1); + memset (bmap, 0, nf); + + for (n = 0; n < ops->npos; n++) + { + s = ops->poslist[n].startpos; /* no translation needed yet */ + e = ops->poslist[n].endpos; + if (e == NORANGE) + e = s; + else if (e == EOL || e >= nf) + e = nf - 1; + /* even if a column is specified multiple times, it will only be printed + once */ + for (i = s; i <= e; i++) + bmap[i] = 1; + } + + for (i = 1, b = 0; b < nf; b++) + { + if (bmap[b] == 0) + continue; + if (v) + { + bind_array_element (v, ind, fields[b], 0); + ind++; + } + else + { + if (i == 0) + putchar (ops->delim); + printf ("%s", fields[b]); + } + i = 0; + } + if (v == 0) + putchar ('\n'); + + return nf; +} + +static int +cutline (v, line, ops) + SHELL_VAR *v; + char *line; + struct cutop *ops; +{ + int rval; + + if (ops->flags & BFLAG) + rval = cutbytes (v, line, ops); + else if (ops->flags & CFLAG) + rval = cutchars (v, line, ops); + else + rval = cutfields (v, line, ops); + + return (rval >= 0 ? EXECUTION_SUCCESS : EXECUTION_FAILURE); +} + +static int +cutfile (v, list, ops) + SHELL_VAR *v; + WORD_LIST *list; + struct cutop *ops; +{ +} + +#define OPTSET(x) ((cutflags & (x)) ? 1 : 0) + +static int +cut_internal (which, list) + int which; /* not used yet */ + WORD_LIST *list; +{ + int opt, rval, cutflags, delim, npos; + char *array_name, *cutstring, *list_arg; + SHELL_VAR *v; + struct cutop op; + struct cutpos *poslist; + + v = 0; + rval = EXECUTION_SUCCESS; + + cutflags = 0; + array_name = 0; + list_arg = 0; + delim = '\t'; + + reset_internal_getopt (); + while ((opt = internal_getopt (list, "a:b:c:d:f:sn")) != -1) + { + switch (opt) + { + case 'a': + array_name = list_optarg; + break; + case 'b': + cutflags |= BFLAG; + list_arg = list_optarg; + break; + case 'c': + cutflags |= CFLAG; + list_arg = list_optarg; + break; + case 'd': + cutflags |= DFLAG; + delim = list_optarg[0]; + if (delim == 0 || list_optarg[1]) + { + builtin_error ("delimiter must be a single non-null character"); + return (EX_USAGE); + } + break; + case 'f': + cutflags |= FFLAG; + list_arg = list_optarg; + break; + case 'n': + break; + case 's': + cutflags |= SFLAG; + break; + CASE_HELPOPT; + default: + builtin_usage (); + return (EX_USAGE); + } + } + list = loptend; + + if (array_name && (legal_identifier (array_name) == 0)) + { + sh_invalidid (array_name); + return (EXECUTION_FAILURE); + } + + if (list == 0) + { + builtin_error ("string argument required"); + return (EX_USAGE); + } + + /* options are mutually exclusive and one is required */ + if ((OPTSET (BFLAG) + OPTSET (CFLAG) + OPTSET (FFLAG)) != 1) + { + builtin_usage (); + return (EX_USAGE); + } + + if ((npos = getlist (list_arg, &poslist)) < 0) + { + free (poslist); + return (EXECUTION_FAILURE); + } + + if (array_name) + { + v = find_or_make_array_variable (array_name, 1); + if (v == 0 || readonly_p (v) || noassign_p (v)) + { + if (v && readonly_p (v)) + err_readonly (array_name); + return (EXECUTION_FAILURE); + } + else if (array_p (v) == 0) + { + builtin_error ("%s: not an indexed array", array_name); + return (EXECUTION_FAILURE); + } + if (invisible_p (v)) + VUNSETATTR (v, att_invisible); + array_flush (array_cell (v)); + } + + cutstring = list->word->word; + + if (cutstring == 0 || *cutstring == 0) + { + free (poslist); + return (EXECUTION_SUCCESS); + } + + op.flags = cutflags; + op.delim = delim; + op.npos = npos; + op.poslist = poslist; + + /* we can eventually implement cut as a builtin with a cutfile() function + that opens cutstring as a filename (or `-' for stdin) and runs cutline + on every line in the file. */ + if (which == 0) + rval = cutline (v, cutstring, &op); + + return (rval); +} + +int +lcut_builtin (list) + WORD_LIST *list; +{ + return (cut_internal (0, list)); +} + +/* Called when builtin is enabled and loaded from the shared object. If this + function returns 0, the load fails. */ +int +lcut_builtin_load (name) + char *name; +{ + return (1); +} + +/* Called when builtin is disabled. */ +void +lcut_builtin_unload (name) + char *name; +{ +} + +char *lcut_doc[] = { + "Extract selected fields from a string.", + "", + "Select portions of LINE (as specified by LIST) and assign them to", + "elements of the indexed array ARRAY starting at index 0, or write", + "them to the standard output if -a is not specified.", + "", + "Items specified by LIST are either column positions or fields delimited", + "by a special character, and are described more completely in cut(1).", + "", + "Columns correspond to bytes (-b), characters (-c), or fields (-f). The", + "field delimiter is specified by -d (default TAB). Column numbering", + "starts at 1.", + (char *)NULL +}; + +struct builtin lcut_struct = { + "lcut", /* builtin name */ + lcut_builtin, /* function implementing the builtin */ + BUILTIN_ENABLED, /* initial flags for builtin */ + lcut_doc, /* array of long documentation strings. */ + "lcut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] line", /* usage synopsis; becomes short_doc */ + 0 /* reserved for internal use */ +}; diff --git a/jobs.c b/jobs.c index 516239e36..95e68efce 100644 --- a/jobs.c +++ b/jobs.c @@ -2118,11 +2118,10 @@ list_all_jobs (format) anything else with it. ASYNC_P says what to do with the tty. If non-zero, then don't give it away. */ pid_t -make_child (command, async_p) +make_child (command, flags) char *command; - int async_p; { - int forksleep; + int async_p, forksleep; sigset_t set, oset, termset, chldset, oset_copy; pid_t pid; SigHandler *oterm; @@ -2147,6 +2146,7 @@ make_child (command, async_p) making_children (); + async_p = (flags & FORK_ASYNC); forksleep = 1; #if defined (BUFFERED_INPUT) diff --git a/jobs.h b/jobs.h index 945fe3da9..f89669a5e 100644 --- a/jobs.h +++ b/jobs.h @@ -196,6 +196,11 @@ struct procchain { #define ANY_PID (pid_t)-1 +/* flags for make_child () */ +#define FORK_SYNC 0 +#define FORK_ASYNC 1 +#define FORK_NOJOB 2 + /* System calls. */ #if !defined (HAVE_UNISTD_H) extern pid_t fork (), getpid (), getpgrp (); diff --git a/lib/readline/mbutil.c b/lib/readline/mbutil.c index 17716357f..dc62b4cc2 100644 --- a/lib/readline/mbutil.c +++ b/lib/readline/mbutil.c @@ -1,6 +1,6 @@ /* mbutil.c -- readline multibyte character utility functions */ -/* Copyright (C) 2001-2017 Free Software Foundation, Inc. +/* Copyright (C) 2001-2020 Free Software Foundation, Inc. This file is part of the GNU Readline Library (Readline), a library for reading lines of text with interactive input and history editing. @@ -86,7 +86,7 @@ int _rl_utf8locale = 0; static int _rl_utf8_mblen (const char *s, size_t n) { - unsigned char c, c1; + unsigned char c, c1, c2, c3; if (s == 0) return (0); /* no shift states */ @@ -101,25 +101,46 @@ _rl_utf8_mblen (const char *s, size_t n) c1 = (unsigned char)s[1]; if (c < 0xe0) { - if (n >= 2 && (s[1] ^ 0x80) < 0x40) + if (n == 1) + return -2; + if (n >= 2 && (c1 ^ 0x80) < 0x40) return 2; } else if (c < 0xf0) { - if (n >= 3 - && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 + if (n == 1) + return -2; + if ((c1 ^ 0x80) < 0x40 && (c >= 0xe1 || c1 >= 0xa0) && (c != 0xed || c1 < 0xa0)) - return 3; + { + if (n == 2) + return -2; + c2 = (unsigned char)s[2]; + if ((c2 ^ 0x80) < 0x40) + return 3; + } } - else if (c < 0xf8) + else if (c < 0xf4) { - if (n >= 4 - && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 - && (s[3] ^ 0x80) < 0x40 + if (n == 1) + return -2; + if (((c1 ^ 0x80) < 0x40) && (c >= 0xf1 || c1 >= 0x90) && (c < 0xf4 || (c == 0xf4 && c1 < 0x90))) - return 4; + { + if (n == 2) + return -2; + c2 = (unsigned char)s[2]; + if ((c2 ^ 0x80) < 0x40) + { + if (n == 3) + return -2; + c3 = (unsigned char)s[3]; + if ((c3 ^ 0x80) < 0x40) + return 4; + } + } } } /* invalid or incomplete multibyte character */ @@ -206,6 +227,66 @@ _rl_find_next_mbchar_internal (char *string, int seed, int count, int find_non_z return point; } +static inline int +_rl_test_nonzero (char *string, int ind, int len) +{ + size_t tmp; + wchar_t wc; + mbstate_t ps; + + memset (&ps, 0, sizeof (mbstate_t)); + tmp = mbrtowc (&wc, string + ind, len - ind, &ps); + /* treat invalid multibyte sequences as non-zero-width */ + return (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp) || WCWIDTH (wc) > 0); +} + +/* experimental -- needs to handle zero-width characters better */ +static int +_rl_find_prev_utf8char (char *string, int seed, int find_non_zero) +{ + char *s; + unsigned char b; + int save, prev; + size_t len; + + if (find_non_zero) + len = RL_STRLEN (string); + + prev = seed - 1; + while (prev >= 0) + { + b = (unsigned char)string[prev]; + if (UTF8_SINGLEBYTE (b)) + return (prev); + + save = prev; + + /* Move back until we're not in the middle of a multibyte char */ + if (UTF8_MBCHAR (b)) + { + while (prev > 0 && (b = (unsigned char)string[--prev]) && UTF8_MBCHAR (b)) + ; + } + + if (UTF8_MBFIRSTCHAR (b)) + { + if (find_non_zero) + { + if (_rl_test_nonzero (string, prev, len)) + return (prev); + else /* valid but WCWIDTH (wc) == 0 */ + prev = prev - 1; + } + else + return (prev); + } + else + return (save); /* invalid utf-8 multibyte sequence */ + } + + return ((prev < 0) ? 0 : prev); +} + /*static*/ int _rl_find_prev_mbchar_internal (char *string, int seed, int find_non_zero) { @@ -214,6 +295,9 @@ _rl_find_prev_mbchar_internal (char *string, int seed, int find_non_zero) size_t tmp; wchar_t wc; + if (_rl_utf8locale) + return (_rl_find_prev_utf8char (string, seed, find_non_zero)); + memset(&ps, 0, sizeof(mbstate_t)); length = strlen(string); diff --git a/nojobs.c b/nojobs.c index a2b17be50..b7b64d171 100644 --- a/nojobs.c +++ b/nojobs.c @@ -490,18 +490,19 @@ siginterrupt (sig, flag) anything else with it. ASYNC_P says what to do with the tty. If non-zero, then don't give it away. */ pid_t -make_child (command, async_p) +make_child (command, flags) char *command; - int async_p; + int flags; { pid_t pid; - int forksleep; + int async_p, forksleep; sigset_t set, oset; /* Discard saved memory. */ if (command) free (command); + async_p = (flags & FORK_ASYNC); start_pipeline (); #if defined (BUFFERED_INPUT) diff --git a/variables.c b/variables.c index 0f2a24c49..aacc12fa4 100644 --- a/variables.c +++ b/variables.c @@ -2076,7 +2076,7 @@ initialize_dynamic_variables () INIT_DYNAMIC_VAR ("SRANDOM", (char *)NULL, get_urandom, (sh_var_assign_func_t *)NULL); VSETATTR (v, att_integer); INIT_DYNAMIC_VAR ("LINENO", (char *)NULL, get_lineno, assign_lineno); - VSETATTR (v, att_integer|att_regenerate); + VSETATTR (v, att_regenerate); INIT_DYNAMIC_VAR ("BASHPID", (char *)NULL, get_bashpid, null_assign); VSETATTR (v, att_integer);