From be295e1b6f2efda87373bda67ee48a886051771d Mon Sep 17 00:00:00 2001 From: Rose <83477269+AtariDreams@users.noreply.github.com> Date: Tue, 8 Nov 2022 11:52:46 -0500 Subject: [PATCH] Update regex to the modified rxspenser repo This repo was linked in the original repo as a newer version, with the current version being the original version that abandoned development 8 years ago. Nothing has been changed outside of that source, which is from upstream. --- vcnet/regex.vcxproj | 6 +- vcnet/regex.vcxproj.filters | 2 +- vcnet/regex/Makefile | 25 +- vcnet/regex/README | 92 ++- vcnet/regex/WHATSNEW | 31 + vcnet/regex/cclass.h | 31 - vcnet/regex/cname.h | 102 --- vcnet/regex/debug.c | 38 +- vcnet/regex/engine.c | 233 ++++--- vcnet/regex/{regex.def => librxspencer.def} | 12 +- vcnet/regex/main.c | 200 +++--- vcnet/regex/regcomp.c | 678 ++++++++++++-------- vcnet/regex/regerror.c | 151 ++--- vcnet/regex/regex.h | 31 +- vcnet/regex/regex2.h | 30 +- vcnet/regex/regexec.c | 26 +- vcnet/regex/regfree.c | 6 +- vcnet/regex/{regex.3 => rxspencer.3} | 4 +- vcnet/regex/{regex.7 => rxspencer.7} | 2 +- vcnet/regex/split.c | 37 +- 20 files changed, 872 insertions(+), 865 deletions(-) delete mode 100644 vcnet/regex/cclass.h delete mode 100644 vcnet/regex/cname.h rename vcnet/regex/{regex.def => librxspencer.def} (55%) rename vcnet/regex/{regex.3 => rxspencer.3} (99%) rename vcnet/regex/{regex.7 => rxspencer.7} (99%) diff --git a/vcnet/regex.vcxproj b/vcnet/regex.vcxproj index b9c166086c..221a01be00 100644 --- a/vcnet/regex.vcxproj +++ b/vcnet/regex.vcxproj @@ -71,7 +71,7 @@ ProgramDatabase - regex/regex.def + regex/librxspencer.def true Windows MachineX64 @@ -93,7 +93,7 @@ ProgramDatabase - regex/regex.def + regex/librxspencer.def true Windows true @@ -109,7 +109,7 @@ - + diff --git a/vcnet/regex.vcxproj.filters b/vcnet/regex.vcxproj.filters index 562bdfb63f..a9060d9a3a 100644 --- a/vcnet/regex.vcxproj.filters +++ b/vcnet/regex.vcxproj.filters @@ -32,7 +32,7 @@ - + Source Files diff --git a/vcnet/regex/Makefile b/vcnet/regex/Makefile index 3882b37864..455eb27bc6 100644 --- a/vcnet/regex/Makefile +++ b/vcnet/regex/Makefile @@ -19,7 +19,7 @@ LIBS= # Internal stuff, should not need changing. OBJPRODN=regcomp.o regexec.o regerror.o regfree.o OBJS=$(OBJPRODN) split.o debug.o main.o -H=cclass.h cname.h regex2.h utils.h +H=regex2.h utils.h REGSRC=regcomp.c regerror.c regexec.c regfree.c ALLSRC=$(REGSRC) engine.c debug.c main.c split.c @@ -28,11 +28,6 @@ LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c JUNKLINT=possible pointer alignment|null effect -# arrangements to build forward-reference header files -.SUFFIXES: .ih .h -.c.ih: - sh ./mkh $(MKHFLAGS) -p $< >$@ - default: r lib: purge $(OBJPRODN) @@ -52,11 +47,11 @@ $(REGEXH): $(REGEXHSRC) mkh # dependencies $(OBJPRODN) debug.o: utils.h regex.h regex2.h -regcomp.o: cclass.h cname.h regcomp.ih -regexec.o: engine.c engine.ih -regerror.o: regerror.ih -debug.o: debug.ih -main.o: main.ih +regcomp.o: regcomp.h +regexec.o: engine.c engine.h +regerror.o: regerror.h +debug.o: debug.h +main.o: main.h # tester re: $(OBJS) @@ -92,7 +87,7 @@ fullprint: ti README WHATSNEW notes todo | list ti *.h | list list *.c - list regex.3 regex.7 + list rxspenser.3 rxspenser.7 print: ti README WHATSNEW notes todo | list @@ -103,9 +98,9 @@ print: mf.tmp: Makefile sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@ -DTRH=cclass.h cname.h regex2.h utils.h +DTRH=regex2.h utils.h PRE=COPYRIGHT README WHATSNEW -POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch] +POST=mkh rxspenser.3 rxspenser.7 tests $(DTRH) $(ALLSRC) fake/*.[ch] FILES=$(PRE) Makefile $(POST) DTR=$(PRE) Makefile=mf.tmp $(POST) dtr: $(FILES) mf.tmp @@ -123,7 +118,7 @@ tidy: rm -f junk* core core.* *.core dtr *.tmp lint clean: tidy - rm -f *.o *.s *.ih re libregex.a + rm -f *.o *.s re libregex.a # don't do this one unless you know what you're doing spotless: clean diff --git a/vcnet/regex/README b/vcnet/regex/README index e6ce373444..e09c014af9 100644 --- a/vcnet/regex/README +++ b/vcnet/regex/README @@ -1,32 +1,76 @@ -alpha3.8 release. -Tue Aug 10 15:51:48 EDT 1999 -henry@spsystems.net (formerly henry@zoo.toronto.edu) +This is a modified version of Henry Spencer's "BSD" regular expression +library. The original library can be found at +https://github.com/garyhouston/regex. A description of Spencer's +various libraries can be found at https://garyhouston.github.io/regex/ -See WHATSNEW for change listing. +The changes in this version are: -installation notes: --------- -Read the comments at the beginning of Makefile before running. +* The library name has been changed to rxspencer, the header installs + into a directory rxspencer, and the man pages named accordingly, to + allow easy installation as a system library on Linux without + conflicting with other regex libraries. +* A CMake build system, which can build either static or shared + libraries, maybe even on non-Unix systems. +* A few code modernizations and changes to improve portabilty, avoid + compiler warnings, and improve robustness. -Utils.h contains some things that just might have to be modified on -some systems, as well as a nested include (ugh) of . +I (Gary Houston) originally made this version with a build system +based on GNU Automake and Libtool, to allow building a shared library +for a project where the library needed to be dynamically loaded. The +CMake scripts were contributed by Stephen Just, modified from LuaDist +versions. -The "fake" directory contains quick-and-dirty fakes for some header -files and routines that old systems may not have. Note also that --DUSEBCOPY will make utils.h substitute bcopy() for memmove(). +Spencer's original license can be found in the COPYRIGHT file. The CMake +scripts are licensed under the MIT license. I release all of my own +changes to the public domain under the Creative Commons Zero license. -After that, "make r" will build regcomp.o, regexec.o, regfree.o, -and regerror.o (the actual routines), bundle them together into a test -program, and run regression tests on them. No output is good output. +Installation +============ -"make lib" builds just the .o files for the actual routines (when -you're happy with testing and have adjusted CFLAGS for production), -and puts them together into libregex.a. You can pick up either the -library or *.o ("make lib" makes sure there are no other .o files left -around to confuse things). +CMake needs to be installed. To build from a Linux command line, or +something compatible, within the source directory: -Main.c, debug.c, split.c are used for regression testing but are not part -of the RE routines themselves. +to configure for a static library: +cmake . +or +cmake -Drxshared=0 . -Regex.h goes in /usr/include. All other .h files are internal only. --------- +to configure for a shared library: +cmake -Drxshared=1 . + +to disable tests: +cmake -DBUILD_TESTING=OFF . + +to disable manuals/ documentation: +cmake -DINSTALL_DOCS=OFF . + +to build: +make +make install + +also required on Linux, after installing the shared library: +ldconfig + +to run the tests: +make test + +to run the tests, with output displayed: +ctest -V + +Usage +===== + +Include the following header in a C program: + +#include + +Link with the library using the -lrxspencer flag, as in: +gcc test.c -o test -lrxspencer + +The library is further described in the supplied man pages: rxspencer.3 +and rxspencer.7, which are formatted at +https://garyhouston.github.io/regex/regex3.html and +https://garyhouston.github.io/regex/regex7.html. + +-- +Gary Houston, ghouston@arglist.com diff --git a/vcnet/regex/WHATSNEW b/vcnet/regex/WHATSNEW index 12953433d3..ec7354f738 100644 --- a/vcnet/regex/WHATSNEW +++ b/vcnet/regex/WHATSNEW @@ -1,3 +1,34 @@ +New in 3.9.0 (Gary Houston): Conversion to cmake build system, +contributed by Stephen Just. Simplify version labelling. Simplify +build system by not generating headers. Adjust types and add casts to +avoid compiler warnings. Remove unused and undocumented REG_DUMP +flag. Make regerror more robust. + +New in alpha3.8.g7 (Gary Houston): Configuration scripts updated. Make sure +symlinks aren't created by libtool. 26 Nov 2015. + +New in alpha3.8.g6 (Gary Houston): Bug fix for integer overflow in regcomp +for excessively long pattern strings. CERT Vulnerability Note VU#695940. +Found by Guido Vranken. Configuration scripts updated. 25 Feb 2015. + +New in alpha3.8.g5 (Gary Houston): Configuration scripts updated. Fixed +"make check". Deleted unused "fake" C library headers. + +New in alpha3.8.g4 (Gary Houston): Makefile.am: account for $(DESTDIR) +in install-data-local, thanks to Aleksey Cheusov. Configuration scripts +updated. + +New in alpha3.8.g3 (Gary Houston): configuration scripts updated. + +New in alpha3.8.g2 (Gary Houston): configure.in, Makefile.am: updated +for newer versions of autoconf/automake/libtool, with miscellaneous +changes. mkh: don't set PATH, in case utilities are in non-standard +locations (i.e., cygwin). + +New in alpha3.8.g1 (Gary Houston): Use autoconf/automake/libtool to +build, test and install the library. Renamed the installed files to +avoid conflicts with other regex libraries. + New in alpha3.8: Bug fix for signed/unsigned mixup, found and fixed by the FreeBSD folks. diff --git a/vcnet/regex/cclass.h b/vcnet/regex/cclass.h deleted file mode 100644 index 0c293028e9..0000000000 --- a/vcnet/regex/cclass.h +++ /dev/null @@ -1,31 +0,0 @@ -/* character-class table */ -static struct cclass { - char *name; - char *chars; - char *multis; -} cclasses[] = { - "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", "", - "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - "", - "blank", " \t", "", - "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", "", - "digit", "0123456789", "", - "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "", - "lower", "abcdefghijklmnopqrstuvwxyz", - "", - "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - "", - "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "", - "space", "\t\n\v\f\r ", "", - "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - "", - "xdigit", "0123456789ABCDEFabcdef", - "", - NULL, 0, "" -}; diff --git a/vcnet/regex/cname.h b/vcnet/regex/cname.h deleted file mode 100644 index 02e86e912e..0000000000 --- a/vcnet/regex/cname.h +++ /dev/null @@ -1,102 +0,0 @@ -/* character-name table */ -static struct cname { - char *name; - char code; -} cnames[] = { - "NUL", '\0', - "SOH", '\001', - "STX", '\002', - "ETX", '\003', - "EOT", '\004', - "ENQ", '\005', - "ACK", '\006', - "BEL", '\007', - "alert", '\007', - "BS", '\010', - "backspace", '\b', - "HT", '\011', - "tab", '\t', - "LF", '\012', - "newline", '\n', - "VT", '\013', - "vertical-tab", '\v', - "FF", '\014', - "form-feed", '\f', - "CR", '\015', - "carriage-return", '\r', - "SO", '\016', - "SI", '\017', - "DLE", '\020', - "DC1", '\021', - "DC2", '\022', - "DC3", '\023', - "DC4", '\024', - "NAK", '\025', - "SYN", '\026', - "ETB", '\027', - "CAN", '\030', - "EM", '\031', - "SUB", '\032', - "ESC", '\033', - "IS4", '\034', - "FS", '\034', - "IS3", '\035', - "GS", '\035', - "IS2", '\036', - "RS", '\036', - "IS1", '\037', - "US", '\037', - "space", ' ', - "exclamation-mark", '!', - "quotation-mark", '"', - "number-sign", '#', - "dollar-sign", '$', - "percent-sign", '%', - "ampersand", '&', - "apostrophe", '\'', - "left-parenthesis", '(', - "right-parenthesis", ')', - "asterisk", '*', - "plus-sign", '+', - "comma", ',', - "hyphen", '-', - "hyphen-minus", '-', - "period", '.', - "full-stop", '.', - "slash", '/', - "solidus", '/', - "zero", '0', - "one", '1', - "two", '2', - "three", '3', - "four", '4', - "five", '5', - "six", '6', - "seven", '7', - "eight", '8', - "nine", '9', - "colon", ':', - "semicolon", ';', - "less-than-sign", '<', - "equals-sign", '=', - "greater-than-sign", '>', - "question-mark", '?', - "commercial-at", '@', - "left-square-bracket", '[', - "backslash", '\\', - "reverse-solidus", '\\', - "right-square-bracket", ']', - "circumflex", '^', - "circumflex-accent", '^', - "underscore", '_', - "low-line", '_', - "grave-accent", '`', - "left-brace", '{', - "left-curly-bracket", '{', - "vertical-line", '|', - "right-brace", '}', - "right-curly-bracket", '}', - "tilde", '~', - "DEL", '\177', - NULL, 0, -}; diff --git a/vcnet/regex/debug.c b/vcnet/regex/debug.c index 99ce7da6dd..b9dcc64c75 100644 --- a/vcnet/regex/debug.c +++ b/vcnet/regex/debug.c @@ -4,25 +4,25 @@ #include #include #include -#include -#include "utils.h" +#include "regex.h" #include "regex2.h" -#include "debug.ih" + +static void s_print(struct re_guts *g, FILE *d); +static char *regchar(int ch); /* - regprint - print a regexp for debugging - == void regprint(regex_t *r, FILE *d); */ void regprint(r, d) regex_t *r; FILE *d; { - register struct re_guts *g = r->re_g; - register int i; - register int c; - register int last; + struct re_guts *g = r->re_g; + int i; + int c; + int last; int nincat[NC]; fprintf(d, "%ld states, %d categories", (long)g->nstates, @@ -85,21 +85,20 @@ FILE *d; /* - s_print - print the strip for debugging - == static void s_print(register struct re_guts *g, FILE *d); */ static void s_print(g, d) -register struct re_guts *g; +struct re_guts *g; FILE *d; { - register sop *s; - register cset *cs; - register int i; - register int done = 0; - register sop opnd; - register int col = 0; - register int last; - register sopno offset = 2; + sop *s; + cset *cs; + int i; + int done = 0; + sop opnd; + int col = 0; + int last; + sopno offset = 2; # define GAP() { if (offset % 5 == 0) { \ if (col > 40) { \ fprintf(d, "\n\t"); \ @@ -216,7 +215,7 @@ FILE *d; fprintf(d, ">"); break; default: - fprintf(d, "!%d(%d)!", OP(*s), opnd); + fprintf(d, "!%ld(%ld)!", OP(*s), opnd); break; } if (!done) @@ -226,7 +225,6 @@ FILE *d; /* - regchar - make a character printable - == static char *regchar(int ch); */ static char * /* -> representation */ regchar(ch) diff --git a/vcnet/regex/engine.c b/vcnet/regex/engine.c index 0b88dcf1ed..02667d36fc 100644 --- a/vcnet/regex/engine.c +++ b/vcnet/regex/engine.c @@ -45,8 +45,6 @@ struct match { states empty; /* empty set of states */ }; -#include "engine.ih" - #ifdef REDEBUG #define SP(t, s, c) print(m, t, s, c, stdout) #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) @@ -57,26 +55,52 @@ struct match { #define NOTE(s) /* nothing */ #endif +static char *dissect(struct match *m, char *start, + char *stop, sopno startst, sopno stopst); +static char *backref(struct match *m, char *start, + char *stop, sopno startst, sopno stopst, sopno lev); +static char *fast(struct match *m, char *start, + char *stop, sopno startst, sopno stopst); +static char *slow(struct match *m, char *start, + char *stop, sopno startst, sopno stopst); +static states step(struct re_guts *g, sopno start, sopno stop, + states bef, int ch, states aft); +#ifdef REDEBUG +static void print(struct match *m, char *caption, states st, + int ch, FILE *d); +static void at(struct match *m, char *title, char *start, char *stop, + sopno startst, sopno stopst); +static char *pchar(int ch); +#endif + +#define BOL (OUT+1) +#define EOL (BOL+1) +#define BOLEOL (BOL+2) +#define NOTHING (BOL+3) +#define BOW (BOL+4) +#define EOW (BOL+5) +#define CODEMAX (BOL+5) // highest code used +#define NONCHAR(c) ((c) > CHAR_MAX) +#define NNONCHAR (CODEMAX-CHAR_MAX) + /* - matcher - the actual matching engine - == static int matcher(register struct re_guts *g, char *string, \ - == size_t nmatch, regmatch_t pmatch[], int eflags); */ static int /* 0 success, REG_NOMATCH failure */ matcher(g, string, nmatch, pmatch, eflags) -register struct re_guts *g; +struct re_guts *g; char *string; size_t nmatch; regmatch_t pmatch[]; int eflags; { - register char *endp; - register size_t i; + char *endp; + int i; struct match mv; - register struct match *m = &mv; - register char *dp; - const register sopno gf = g->firststate+1; /* +1 for OEND */ - const register sopno gl = g->laststate; + struct match *m = &mv; + char *dp; + const sopno gf = g->firststate+1; /* +1 for OEND */ + const sopno gl = g->laststate; char *start; char *stop; @@ -149,14 +173,14 @@ int eflags; STATETEARDOWN(m); return(REG_ESPACE); } - for (i = 1; i <= m->g->nsub; i++) + for (i = 1; (size_t) i <= m->g->nsub; i++) m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; if (!g->backrefs && !(m->eflags®_BACKR)) { NOTE("dissecting"); dp = dissect(m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) - m->lastpos = (char **)malloc((g->nplus+1) * + m->lastpos = (char **)malloc(((size_t)g->nplus+1) * sizeof(char *)); if (g->nplus > 0 && m->lastpos == NULL) { free(m->pmatch); @@ -181,7 +205,7 @@ int eflags; break; /* defeat */ /* try it on a shorter possibility */ #ifndef NDEBUG - for (i = 1; i <= m->g->nsub; i++) { + for (i = 1; (size_t) i <= m->g->nsub; i++) { assert(m->pmatch[i].rm_so == -1); assert(m->pmatch[i].rm_eo == -1); } @@ -201,13 +225,13 @@ int eflags; /* fill in the details if requested */ if (nmatch > 0) { - pmatch[0].rm_so = m->coldp - m->offp; - pmatch[0].rm_eo = endp - m->offp; + pmatch[0].rm_so = (regoff_t)(m->coldp - m->offp); + pmatch[0].rm_eo = (regoff_t)(endp - m->offp); } if (nmatch > 1) { assert(m->pmatch != NULL); - for (i = 1; i < nmatch; i++) - if (i <= m->g->nsub) + for (i = 1; (size_t) i < nmatch; i++) + if ((size_t) i <= m->g->nsub) pmatch[i] = m->pmatch[i]; else { pmatch[i].rm_so = -1; @@ -225,30 +249,28 @@ int eflags; /* - dissect - figure out what matched what, no back references - == static char *dissect(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); */ static char * /* == stop (success) always */ dissect(m, start, stop, startst, stopst) -register struct match *m; +struct match *m; char *start; char *stop; sopno startst; sopno stopst; { - register int i; - register sopno ss; /* start sop of current subRE */ - register sopno es; /* end sop of current subRE */ - register char *sp; /* start of string matched by it */ - register char *stp; /* string matched by it cannot pass here */ - register char *rest; /* start of rest of string */ - register char *tail; /* string unmatched by rest of RE */ - register sopno ssub; /* start sop of subsubRE */ - register sopno esub; /* end sop of subsubRE */ - register char *ssp; /* start of string matched by subsubRE */ - register char *sep; /* end of string matched by subsubRE */ - register char *oldssp; /* previous ssp */ - register char *dp; + int i; + sopno ss; /* start sop of current subRE */ + sopno es; /* end sop of current subRE */ + char *sp; /* start of string matched by it */ + char *stp; /* string matched by it cannot pass here */ + char *rest; /* start of rest of string */ + char *tail; /* string unmatched by rest of RE */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + char *ssp; /* start of string matched by subsubRE */ + char *sep; /* end of string matched by subsubRE */ + char *oldssp; /* previous ssp */ + char *dp; AT("diss", start, stop, startst, stopst); sp = start; @@ -393,13 +415,13 @@ sopno stopst; break; case OLPAREN: i = OPND(m->g->strip[ss]); - assert(0 < i && i <= m->g->nsub); - m->pmatch[i].rm_so = sp - m->offp; + assert(0 < i && (size_t) i <= m->g->nsub); + m->pmatch[i].rm_so = (regoff_t)(sp - m->offp); break; case ORPAREN: i = OPND(m->g->strip[ss]); - assert(0 < i && i <= m->g->nsub); - m->pmatch[i].rm_eo = sp - m->offp; + assert(0 < i && (size_t) i <= m->g->nsub); + m->pmatch[i].rm_eo = (regoff_t)(sp - m->offp); break; default: /* uh oh */ assert(nope); @@ -413,30 +435,28 @@ sopno stopst; /* - backref - figure out what matched what, figuring in back references - == static char *backref(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst, sopno lev); */ static char * /* == stop (success) or NULL (failure) */ backref(m, start, stop, startst, stopst, lev) -register struct match *m; +struct match *m; char *start; char *stop; sopno startst; sopno stopst; sopno lev; /* PLUS nesting level */ { - register int i; - register sopno ss; /* start sop of current subRE */ - register char *sp; /* start of string matched by it */ - register sopno ssub; /* start sop of subsubRE */ - register sopno esub; /* end sop of subsubRE */ - register char *ssp; /* start of string matched by subsubRE */ - register char *dp; - register size_t len; - register int hard; - register sop s; - register regoff_t offsave; - register cset *cs; + int i; + sopno ss; /* start sop of current subRE */ + char *sp; /* start of string matched by it */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + char *ssp; /* start of string matched by subsubRE */ + char *dp; + size_t len; + int hard; + sop s; + regoff_t offsave; + cset *cs; AT("back", start, stop, startst, stopst); sp = start; @@ -524,12 +544,12 @@ sopno lev; /* PLUS nesting level */ switch (OP(s)) { case OBACK_: /* the vilest depths */ i = OPND(s); - assert(0 < i && i <= m->g->nsub); + assert(0 < i && (size_t) i <= m->g->nsub); if (m->pmatch[i].rm_eo == -1) return(NULL); assert(m->pmatch[i].rm_so != -1); - len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; - assert(stop - m->beginp >= len); + len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so); + assert((size_t) (stop - m->beginp) >= len); if (sp > stop - len) return(NULL); /* not enough left to match */ ssp = m->offp + m->pmatch[i].rm_so; @@ -585,9 +605,9 @@ sopno lev; /* PLUS nesting level */ break; case OLPAREN: /* must undo assignment if rest fails */ i = OPND(s); - assert(0 < i && i <= m->g->nsub); + assert(0 < i && (size_t) i <= m->g->nsub); offsave = m->pmatch[i].rm_so; - m->pmatch[i].rm_so = sp - m->offp; + m->pmatch[i].rm_so = (regoff_t)(sp - m->offp); dp = backref(m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); @@ -596,9 +616,9 @@ sopno lev; /* PLUS nesting level */ break; case ORPAREN: /* must undo assignment if rest fails */ i = OPND(s); - assert(0 < i && i <= m->g->nsub); + assert(0 < i && (size_t) i <= m->g->nsub); offsave = m->pmatch[i].rm_eo; - m->pmatch[i].rm_eo = sp - m->offp; + m->pmatch[i].rm_eo = (regoff_t)(sp - m->offp); dp = backref(m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); @@ -618,26 +638,24 @@ sopno lev; /* PLUS nesting level */ /* - fast - step through the string at top speed - == static char *fast(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); */ static char * /* where tentative match ended, or NULL */ fast(m, start, stop, startst, stopst) -register struct match *m; +struct match *m; char *start; char *stop; sopno startst; sopno stopst; { - register states st = m->st; - register states fresh = m->fresh; - register states tmp = m->tmp; - register char *p = start; - register int c = (start == m->beginp) ? OUT : *(start-1); - register int lastc; /* previous c */ - register int flagch; - register int i; - register char *coldp; /* last p after which no match was underway */ + states st = m->st; + states fresh = m->fresh; + states tmp = m->tmp; + char *p = start; + int c = (start == m->beginp) ? OUT : *(start-1); + int lastc; /* previous c */ + int flagch; + int i; + char *coldp; /* last p after which no match was underway */ CLEAR(st); SET1(st, startst); @@ -709,26 +727,23 @@ sopno stopst; /* - slow - step through the string more deliberately - == static char *slow(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); */ static char * /* where it ended */ slow(m, start, stop, startst, stopst) -register struct match *m; +struct match *m; char *start; char *stop; sopno startst; sopno stopst; { - register states st = m->st; - register states empty = m->empty; - register states tmp = m->tmp; - register char *p = start; - register int c = (start == m->beginp) ? OUT : *(start-1); - register int lastc; /* previous c */ - register int flagch; - register int i; - register char *matchp; /* last p at which a match ended */ + states st = m->st; + states empty = m->empty; + char *p = start; + int c = (start == m->beginp) ? OUT : *(start-1); + int lastc; /* previous c */ + int flagch; + int i; + char *matchp; /* last p at which a match ended */ AT("slow", start, stop, startst, stopst); CLEAR(st); @@ -781,10 +796,8 @@ sopno stopst; break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ - ASSIGN(tmp, st); - ASSIGN(st, empty); assert(c != OUT); - st = step(m->g, startst, stopst, tmp, c, st); + st = step(m->g, startst, stopst, st, c, empty); SP("saft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p++; @@ -796,33 +809,22 @@ sopno stopst; /* - step - map set of states reachable before char to set reachable after - == static states step(register struct re_guts *g, sopno start, sopno stop, \ - == register states bef, int ch, register states aft); - == #define BOL (OUT+1) - == #define EOL (BOL+1) - == #define BOLEOL (BOL+2) - == #define NOTHING (BOL+3) - == #define BOW (BOL+4) - == #define EOW (BOL+5) - == #define CODEMAX (BOL+5) // highest code used - == #define NONCHAR(c) ((c) > CHAR_MAX) - == #define NNONCHAR (CODEMAX-CHAR_MAX) */ static states step(g, start, stop, bef, ch, aft) -register struct re_guts *g; +struct re_guts *g; sopno start; /* start state within strip */ sopno stop; /* state after stop state within strip */ -register states bef; /* states reachable before */ +states bef; /* states reachable before */ int ch; /* character or NONCHAR code */ -register states aft; /* states already known reachable after */ +states aft; /* states already known reachable after */ { - register cset *cs; - register sop s; - register sopno pc; - register onestate here; /* note, macros know this name */ - register sopno look; - register long i; + cset *cs; + sop s; + sopno pc; + onestate here; /* note, macros know this name */ + sopno look; + long i; for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { s = g->strip[pc]; @@ -925,10 +927,6 @@ register states aft; /* states already known reachable after */ #ifdef REDEBUG /* - print - print a set of states - == #ifdef REDEBUG - == static void print(struct match *m, char *caption, states st, \ - == int ch, FILE *d); - == #endif */ static void print(m, caption, st, ch, d) @@ -938,9 +936,9 @@ states st; int ch; FILE *d; { - register struct re_guts *g = m->g; - register int i; - register int first = 1; + struct re_guts *g = m->g; + int i; + int first = 1; if (!(m->eflags®_TRACE)) return; @@ -958,10 +956,6 @@ FILE *d; /* - at - print current situation - == #ifdef REDEBUG - == static void at(struct match *m, char *title, char *start, char *stop, \ - == sopno startst, sopno stopst); - == #endif */ static void at(m, title, start, stop, startst, stopst) @@ -984,9 +978,6 @@ sopno stopst; #define PCHARDONE /* never again */ /* - pchar - make a character printable - == #ifdef REDEBUG - == static char *pchar(int ch); - == #endif * * Is this identical to regchar() over in debug.c? Well, yes. But a * duplicate here avoids having a debugging-capable regexec.o tied to diff --git a/vcnet/regex/regex.def b/vcnet/regex/librxspencer.def similarity index 55% rename from vcnet/regex/regex.def rename to vcnet/regex/librxspencer.def index 440f348206..12ef7b3bf6 100644 --- a/vcnet/regex/regex.def +++ b/vcnet/regex/librxspencer.def @@ -1,7 +1,5 @@ -LIBRARY regex -VERSION 1.0 -EXPORTS -regcomp -regerror -regexec -regfree +EXPORTS +regcomp +regerror +regexec +regfree diff --git a/vcnet/regex/main.c b/vcnet/regex/main.c index 0221e7713d..7481f73cc4 100644 --- a/vcnet/regex/main.c +++ b/vcnet/regex/main.c @@ -1,28 +1,38 @@ #include +#include #include #include -#include #include -#include "main.ih" +#include "regex.h" -char *progname; int debug = 0; int line = 0; int status = 0; int copts = REG_EXTENDED; int eopts = 0; +char *fopts = 0; regoff_t startoff = 0; regoff_t endoff = 0; - extern int split(); extern void regprint(); +static void regress(FILE *in); +static void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); +static char *check(char *str, regmatch_t sub, char *should); +static int parseopts(int argc, char *argv[]); +static int options(int type, char *s); +static int opt(int c, char *s); +static void fixstr(char *p); +static char *eprint(int err); +static int efind(char *name); + /* - main - do the simple case, hand off to regress() for regression */ +int main(argc, argv) int argc; char *argv[]; @@ -32,43 +42,19 @@ char *argv[]; regmatch_t subs[NS]; char erbuf[100]; int err; - size_t len; - int c; - int errflg = 0; - register int i; - extern int optind; - extern char *optarg; - - progname = argv[0]; - - while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) - switch (c) { - case 'c': /* compile options */ - copts = options('c', optarg); - break; - case 'e': /* execute options */ - eopts = options('e', optarg); - break; - case 'S': /* start offset */ - startoff = (regoff_t)atoi(optarg); - break; - case 'E': /* end offset */ - endoff = (regoff_t)atoi(optarg); - break; - case 'x': /* Debugging. */ - debug++; - break; - case '?': - default: - errflg++; - break; + int i; + int optind = parseopts(argc, argv); + + if (fopts != 0) { + FILE *f = fopen(fopts, "r"); + if (f == NULL) { + fputs("unable to open input\n", stderr); + exit(1); } - if (errflg) { - fprintf(stderr, "usage: %s ", progname); - fprintf(stderr, "[-c copt][-C][-d] [re]\n"); - exit(2); + regress(f); + exit(status); } - + if (optind >= argc) { regress(stdin); exit(status); @@ -76,9 +62,9 @@ char *argv[]; err = regcomp(&re, argv[optind++], copts); if (err) { - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "error %s, %d/%d `%s'\n", - eprint(err), len, sizeof(erbuf), erbuf); + size_t len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "error %s, %lu/%d `%s'\n", + eprint(err), (unsigned long)len, (int)sizeof(erbuf), erbuf); exit(status); } regprint(&re, stdout); @@ -90,17 +76,17 @@ char *argv[]; if (eopts®_STARTEND) { subs[0].rm_so = startoff; - subs[0].rm_eo = strlen(argv[optind]) - endoff; + subs[0].rm_eo = (regoff_t)strlen(argv[optind]) - endoff; } err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); if (err) { - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "error %s, %d/%d `%s'\n", - eprint(err), len, sizeof(erbuf), erbuf); + size_t len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "error %s, %lu/%d `%s'\n", + eprint(err), (unsigned long)len, (int)sizeof(erbuf), erbuf); exit(status); } if (!(copts®_NOSUB)) { - len = (int)(subs[0].rm_eo - subs[0].rm_so); + int len = (int)(subs[0].rm_eo - subs[0].rm_so); if (subs[0].rm_so != -1) { if (len != 0) printf("match `%.*s'\n", len, @@ -120,9 +106,8 @@ char *argv[]; /* - regress - main loop of regression test - == void regress(FILE *in); */ -void +static void regress(in) FILE *in; { @@ -197,9 +182,8 @@ FILE *in; /* - try - try it, and report on problems - == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); */ -void +static void try(f0, f1, f2, f3, f4, opts) char *f0; char *f1; @@ -216,9 +200,9 @@ int opts; /* may not match f1 */ int nshould; char erbuf[100]; int err; - int len; + size_t len; char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; - register int i; + int i; char *grump; char f0copy[1000]; char f2copy[1000]; @@ -230,9 +214,9 @@ int opts; /* may not match f1 */ if (err != 0 && (!opt('C', f1) || err != efind(f2))) { /* unexpected error or wrong error */ len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", - line, type, eprint(err), len, - sizeof(erbuf), erbuf); + fprintf(stderr, "%d: %s error %s, %lu/%d `%s'\n", + line, type, eprint(err), (unsigned long)len, + (int)sizeof(erbuf), erbuf); status = 1; } else if (err == 0 && opt('C', f1)) { /* unexpected success */ @@ -253,17 +237,17 @@ int opts; /* may not match f1 */ if (options('e', f1)®_STARTEND) { if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) fprintf(stderr, "%d: bad STARTEND syntax\n", line); - subs[0].rm_so = strchr(f2, '(') - f2 + 1; - subs[0].rm_eo = strchr(f2, ')') - f2; + subs[0].rm_so = (regoff_t)(strchr(f2, '(') - f2 + 1); + subs[0].rm_eo = (regoff_t)(strchr(f2, ')') - f2); } err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { /* unexpected error or wrong error */ len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", - line, type, eprint(err), len, - sizeof(erbuf), erbuf); + fprintf(stderr, "%d: %s exec error %s, %lu/%d `%s'\n", + line, type, eprint(err), (unsigned long)len, + (int)sizeof(erbuf), erbuf); status = 1; } else if (err != 0) { /* nothing more to check */ @@ -306,18 +290,74 @@ int opts; /* may not match f1 */ regfree(&re); } +/* + - parseopts - half-baked option processing to avoid using getopt, which isn't always available on Windows. + */ +static int +parseopts(argc, argv) +int argc; +char *argv[]; +{ + int i, j; + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-' || argv[i][1] == 0) { + break; + } + for (j = 1; argv[i][j] != 0; j++) { + char opt = argv[i][j]; + if (opt == 'x') { + debug++; + } else { + char *arg; + if (argv[i][j+1] != 0) { + arg = argv[i] + j+1; + } else { + if (i == argc-1) { + fprintf(stderr, "option requires an argument -- '%c'\n", opt); + exit(2); + } + arg = argv[i+1]; + i++; + } + switch (opt) { + case 'c': + copts = options(opt, arg); + break; + case 'e': + eopts = options(opt, arg); + break; + case 'f': + fopts = arg; + break; + case 'S': + startoff = (regoff_t)atoi(arg); + break; + case 'E': + endoff = (regoff_t)atoi(arg); + break; + default: + fprintf(stderr, "usage: %s ", argv[0]); + fprintf(stderr, "[-x][-c copt][-e eopt][-f file][-S startoff][-E endoff] [re]\n"); + exit(2); + } + break; + } + } + } + return i; +} + /* - options - pick options out of a regression-test string - == int options(int type, char *s); */ -int +static int options(type, s) int type; /* 'c' compile, 'e' exec */ char *s; { - register char *p; - register int o = (type == 'c') ? copts : eopts; - register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; + char *p; + int o = (type == 'c') ? copts : eopts; + char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; for (p = s; *p != '\0'; p++) if (strchr(legal, *p) != NULL) @@ -365,9 +405,8 @@ char *s; /* - opt - is a particular option in a regression string? - == int opt(int c, char *s); */ -int /* predicate */ +static int /* predicate */ opt(c, s) int c; char *s; @@ -377,11 +416,10 @@ char *s; /* - fixstr - transform magic characters in strings - == void fixstr(register char *p); */ -void +static void fixstr(p) -register char *p; +char *p; { if (p == NULL) return; @@ -399,19 +437,18 @@ register char *p; /* - check - check a substring match - == char *check(char *str, regmatch_t sub, char *should); */ -char * /* NULL or complaint */ +static char * /* NULL or complaint */ check(str, sub, should) char *str; regmatch_t sub; char *should; { - register int len; - register int shlen; - register char *p; + int len; + size_t shlen; + char *p; static char grump[500]; - register char *at = NULL; + char *at = NULL; if (should != NULL && strcmp(should, "-") == 0) should = NULL; @@ -437,14 +474,13 @@ char *should; return("did not match"); /* check for in range */ - if (sub.rm_eo > strlen(str)) { + if ((size_t) sub.rm_eo > strlen(str)) { sprintf(grump, "start %ld end %ld, past end of string", (long)sub.rm_so, (long)sub.rm_eo); return(grump); } len = (int)(sub.rm_eo - sub.rm_so); - shlen = (int)strlen(should); p = str + sub.rm_so; /* check for not supposed to match */ @@ -453,8 +489,9 @@ char *should; return(grump); } + shlen = strlen(should); /* check for wrong match */ - if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { + if ((size_t)len != shlen || strncmp(p, should, shlen) != 0) { sprintf(grump, "matched `%.*s' instead", len, p); return(grump); } @@ -476,7 +513,6 @@ char *should; /* - eprint - convert error number to name - == static char *eprint(int err); */ static char * eprint(err) @@ -492,14 +528,12 @@ int err; /* - efind - convert error name to number - == static int efind(char *name); */ static int efind(name) char *name; { static char efbuf[100]; - size_t n; regex_t re; sprintf(efbuf, "REG_%s", name); diff --git a/vcnet/regex/regcomp.c b/vcnet/regex/regcomp.c index 22e74336aa..e16e0151b7 100644 --- a/vcnet/regex/regcomp.c +++ b/vcnet/regex/regcomp.c @@ -1,16 +1,20 @@ +#include #include #include #include #include #include #include -#include -#include "utils.h" +#include "regex.h" #include "regex2.h" -#include "cclass.h" -#include "cname.h" +#ifdef _POSIX2_RE_DUP_MAX +#define DUPMAX _POSIX2_RE_DUP_MAX +#else +#define DUPMAX 255 +#endif +#define INFINITY (DUPMAX + 1) /* * parse structure, passed up and down to avoid global variables and @@ -30,10 +34,46 @@ struct parse { sopno pend[NPAREN]; /* -> ) ([0] unused) */ }; -#include "regcomp.ih" - static char nuls[10]; /* place to point scanner in event of error */ +static void p_ere(struct parse *p, int stop); +static void p_ere_exp(struct parse *p); +static void p_str(struct parse *p); +static void p_bre(struct parse *p, int end1, + int end2); +static int p_simp_re(struct parse *p, int starordinary); +static int p_count(struct parse *p); +static void p_bracket(struct parse *p); +static void p_b_term(struct parse *p, cset *cs); +static void p_b_cclass(struct parse *p, cset *cs); +static void p_b_eclass(struct parse *p, cset *cs); +static char p_b_symbol(struct parse *p); +static char p_b_coll_elem(struct parse *p, int endc); +static char othercase(int ch); +static void bothcases(struct parse *p, int ch); +static void ordinary(struct parse *p, int ch); +static void nonnewline(struct parse *p); +static void repeat(struct parse *p, sopno start, int from, int to); +static void seterr(struct parse *p, int e); +static cset *allocset(struct parse *p); +static void freeset(struct parse *p, cset *cs); +static int freezeset(struct parse *p, cset *cs); +static int firstch(struct parse *p, cset *cs); +static int nch(struct parse *p, cset *cs); +static void mcadd(struct parse *p, cset *cs, + char *cp); +static int isinsets(struct re_guts *g, int c); +static int samesets(struct re_guts *g, int c1, int c2); +static void categorize(struct parse *p, struct re_guts *g); +static sopno dupl(struct parse *p, sopno start, sopno finish); +static void doemit(struct parse *p, sop op, sop opnd); +static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos); +static void dofwd(struct parse *p, sopno pos, sop value); +static void enlarge(struct parse *p, sopno size); +static void stripsnug(struct parse *p, struct re_guts *g); +static void findmust(struct parse *p, struct re_guts *g); +static sopno pluscount(struct parse *p, struct re_guts *g); + /* * macros for use with parse structure * BEWARE: these know that the parse structure is named `p' !!! @@ -50,12 +90,9 @@ static char nuls[10]; /* place to point scanner in event of error */ #define NEXT2() (p->next += 2) #define NEXTn(n) (p->next += (n)) #define GETNEXT() (*p->next++) -#define SETERROR(e) seterr(p, (e)) -#define REQUIRE(co, e) ((co) || SETERROR(e)) -#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) +#define REQUIRE(co, e) ((co) ? (void) 0 : seterr(p, (e))) #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) -#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) -#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) +#define EMIT(op, sopnd) doemit(p, (sop)(op), (sop)(sopnd)) #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) @@ -70,18 +107,6 @@ static int never = 0; /* for use in asserts; shuts lint up */ #define never 0 /* some s have bugs too */ #endif -/* - - regcomp - interface for parser and compilation - = extern int regcomp(regex_t *, const char *, int); - = #define REG_BASIC 0000 - = #define REG_EXTENDED 0001 - = #define REG_ICASE 0002 - = #define REG_NOSUB 0004 - = #define REG_NEWLINE 0010 - = #define REG_NOSPEC 0020 - = #define REG_PEND 0040 - = #define REG_DUMP 0200 - */ int /* 0 success, otherwise REG_something */ regcomp(preg, pattern, cflags) regex_t *preg; @@ -89,24 +114,18 @@ const char *pattern; int cflags; { struct parse pa; - register struct re_guts *g; - register struct parse *p = &pa; - register int i; - register size_t len; -#ifdef REDEBUG -# define GOODFLAGS(f) (f) -#else -# define GOODFLAGS(f) ((f)&~REG_DUMP) -#endif + struct re_guts *g; + struct parse *p = &pa; + int i; + size_t len; - cflags = GOODFLAGS(cflags); if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) return(REG_INVARG); if (cflags®_PEND) { if (preg->re_endp < pattern) return(REG_INVARG); - len = preg->re_endp - pattern; + len = (size_t) (preg->re_endp - pattern); } else len = strlen((char *)pattern); @@ -115,8 +134,16 @@ int cflags; (NC-1)*sizeof(cat_t)); if (g == NULL) return(REG_ESPACE); - p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ - p->strip = (sop *)malloc(p->ssize * sizeof(sop)); + { + /* Patched for CERT Vulnerability Note VU#695940, Feb 2015. */ + size_t new_ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ + if (new_ssize < len || new_ssize > LONG_MAX / sizeof(sop)) { + free((char *) g); + return REG_INVARG; + } + p->ssize = (sopno)new_ssize; + } + p->strip = (sop *)malloc((size_t)p->ssize * sizeof(sop)); p->slen = 0; if (p->strip == NULL) { free((char *)g); @@ -173,7 +200,7 @@ int cflags; #ifndef REDEBUG /* not debugging, so can't rely on the assert() in regexec() */ if (g->iflags&BAD) - SETERROR(REG_ASSERT); + seterr(p, REG_ASSERT); #endif /* win or lose, we're done */ @@ -184,18 +211,17 @@ int cflags; /* - p_ere - ERE parser top level, concatenation and alternation - == static void p_ere(register struct parse *p, int stop); */ static void p_ere(p, stop) -register struct parse *p; +struct parse *p; int stop; /* character this ERE should end at */ { - register char c; - register sopno prevback; - register sopno prevfwd; - register sopno conc; - register int first = 1; /* is this the first alternative? */ + char c; + sopno prevback; + sopno prevfwd; + sopno conc; + int first = 1; /* is this the first alternative? */ for (;;) { /* do a bunch of concatenated expressions */ @@ -230,17 +256,16 @@ int stop; /* character this ERE should end at */ /* - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op - == static void p_ere_exp(register struct parse *p); */ static void p_ere_exp(p) -register struct parse *p; +struct parse *p; { - register char c; - register sopno pos; - register int count; - register int count2; - register sopno subno; + char c; + sopno pos; + int count; + int count2; + sopno subno; int wascaret = 0; assert(MORE()); /* caller should have ensured this */ @@ -251,7 +276,7 @@ register struct parse *p; case '(': REQUIRE(MORE(), REG_EPAREN); p->g->nsub++; - subno = p->g->nsub; + subno = (sopno)p->g->nsub; if (subno < NPAREN) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); @@ -273,7 +298,7 @@ register struct parse *p; * all. So an unmatched ) is legal POSIX, at least until * we can get it fixed. */ - SETERROR(REG_EPAREN); + seterr(p, REG_EPAREN); break; #endif case '^': @@ -288,12 +313,12 @@ register struct parse *p; p->g->neol++; break; case '|': - SETERROR(REG_EMPTY); + seterr(p, REG_EMPTY); break; case '*': case '+': case '?': - SETERROR(REG_BADRPT); + seterr(p, REG_BADRPT); break; case '.': if (p->g->cflags®_NEWLINE) @@ -363,7 +388,7 @@ register struct parse *p; while (MORE() && PEEK() != '}') NEXT(); REQUIRE(MORE(), REG_EBRACE); - SETERROR(REG_BADBR); + seterr(p, REG_BADBR); } break; } @@ -374,16 +399,15 @@ register struct parse *p; if (!( c == '*' || c == '+' || c == '?' || (c == '{' && MORE2() && isdigit(PEEK2())) ) ) return; - SETERROR(REG_BADRPT); + seterr(p, REG_BADRPT); } /* - p_str - string (no metacharacters) "parser" - == static void p_str(register struct parse *p); */ static void p_str(p) -register struct parse *p; +struct parse *p; { REQUIRE(MORE(), REG_EMPTY); while (MORE()) @@ -392,8 +416,6 @@ register struct parse *p; /* - p_bre - BRE parser top level, anchoring and concatenation - == static void p_bre(register struct parse *p, register int end1, \ - == register int end2); * Giving end1 as OUT essentially eliminates the end1/end2 check. * * This implementation is a bit of a kludge, in that a trailing $ is first @@ -404,13 +426,13 @@ register struct parse *p; */ static void p_bre(p, end1, end2) -register struct parse *p; -register int end1; /* first terminating character */ -register int end2; /* second terminating character */ +struct parse *p; +int end1; /* first terminating character */ +int end2; /* second terminating character */ { - register sopno start = HERE(); - register int first = 1; /* first subexpression? */ - register int wasdollar = 0; + sopno start = HERE(); + int first = 1; /* first subexpression? */ + int wasdollar = 0; if (EAT('^')) { EMIT(OBOL, 0); @@ -433,19 +455,18 @@ register int end2; /* second terminating character */ /* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition - == static int p_simp_re(register struct parse *p, int starordinary); */ static int /* was the simple RE an unbackslashed $? */ p_simp_re(p, starordinary) -register struct parse *p; +struct parse *p; int starordinary; /* is a leading * an ordinary character? */ { - register int c; - register int count; - register int count2; - register sopno pos; - register int i; - register sopno subno; + int c; + int count; + int count2; + sopno pos; + int i; + sopno subno; # define BACKSL (1<g->nsub++; - subno = p->g->nsub; + subno = (sopno)p->g->nsub; if (subno < NPAREN) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); @@ -487,7 +508,7 @@ int starordinary; /* is a leading * an ordinary character? */ break; case BACKSL|')': /* should not get here -- must be user */ case BACKSL|'}': - SETERROR(REG_EPAREN); + seterr(p, REG_EPAREN); break; case BACKSL|'1': case BACKSL|'2': @@ -501,7 +522,7 @@ int starordinary; /* is a leading * an ordinary character? */ i = (c&~BACKSL) - '0'; assert(i < NPAREN); if (p->pend[i] != 0) { - assert(i <= p->g->nsub); + assert((size_t) i <= p->g->nsub); EMIT(OBACK_, i); assert(p->pbegin[i] != 0); assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); @@ -509,7 +530,7 @@ int starordinary; /* is a leading * an ordinary character? */ (void) dupl(p, p->pbegin[i]+1, p->pend[i]); EMIT(O_BACK, i); } else - SETERROR(REG_ESUBREG); + seterr(p, REG_ESUBREG); p->g->backrefs = 1; break; case '*': @@ -541,7 +562,7 @@ int starordinary; /* is a leading * an ordinary character? */ while (MORE() && !SEETWO('\\', '}')) NEXT(); REQUIRE(MORE(), REG_EBRACE); - SETERROR(REG_BADBR); + seterr(p, REG_BADBR); } } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ return(1); @@ -551,14 +572,13 @@ int starordinary; /* is a leading * an ordinary character? */ /* - p_count - parse a repetition count - == static int p_count(register struct parse *p); */ static int /* the value */ p_count(p) -register struct parse *p; +struct parse *p; { - register int count = 0; - register int ndigits = 0; + int count = 0; + int ndigits = 0; while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { count = count*10 + (GETNEXT() - '0'); @@ -571,17 +591,16 @@ register struct parse *p; /* - p_bracket - parse a bracketed character list - == static void p_bracket(register struct parse *p); * - * Note a significant property of this code: if the allocset() did SETERROR, + * Note a significant property of this code: if the allocset() did seterr, * no set operations are done. */ static void p_bracket(p) -register struct parse *p; +struct parse *p; { - register cset *cs = allocset(p); - register int invert = 0; + cset *cs = allocset(p); + int invert = 0; /* Dept of Truly Sickening Special-Case Kludges */ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { @@ -611,8 +630,8 @@ register struct parse *p; return; if (p->g->cflags®_ICASE) { - register int i; - register int ci; + int i; + int ci; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i) && isalpha(i)) { @@ -620,11 +639,14 @@ register struct parse *p; if (ci != i) CHadd(cs, ci); } + assert(cs->multis == NULL); /* xxx */ +#if 0 if (cs->multis != NULL) mccase(p, cs); +#endif } if (invert) { - register int i; + int i; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i)) @@ -633,8 +655,11 @@ register struct parse *p; CHadd(cs, i); if (p->g->cflags®_NEWLINE) CHsub(cs, '\n'); + assert(cs->multis == NULL); /* xxx */ +#if 0 if (cs->multis != NULL) mcinvert(p, cs); +#endif } assert(cs->multis == NULL); /* xxx */ @@ -648,16 +673,15 @@ register struct parse *p; /* - p_b_term - parse one term of a bracketed character list - == static void p_b_term(register struct parse *p, register cset *cs); */ static void p_b_term(p, cs) -register struct parse *p; -register cset *cs; +struct parse *p; +cset *cs; { - register char c; - register char start, finish; - register int i; + char c; + char start, finish; + int i; /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { @@ -665,7 +689,7 @@ register cset *cs; c = (MORE2()) ? PEEK2() : '\0'; break; case '-': - SETERROR(REG_ERANGE); + seterr(p, REG_ERANGE); return; /* NOTE RETURN */ break; default: @@ -712,30 +736,50 @@ register cset *cs; } } +/* Character-class table. */ +static struct cclass { + char *name; + char *chars; + char *multis; +} cclasses[] = { + {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", ""}, + {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", ""}, + {"blank", " \t", ""}, + {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", ""}, + {"digit", "0123456789", ""}, + {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ""}, + {"lower", "abcdefghijklmnopqrstuvwxyz", ""}, + {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", ""}, + {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ""}, + {"space", "\t\n\v\f\r ", ""}, + {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", ""}, + {"xdigit", "0123456789ABCDEFabcdef", ""}, + {NULL, 0, ""} +}; + /* - p_b_cclass - parse a character-class name and deal with it - == static void p_b_cclass(register struct parse *p, register cset *cs); */ static void p_b_cclass(p, cs) -register struct parse *p; -register cset *cs; +struct parse *p; +cset *cs; { - register char *sp = p->next; - register struct cclass *cp; - register size_t len; - register char *u; - register char c; + char *sp = p->next; + struct cclass *cp; + size_t len; + char *u; + char c; while (MORE() && isalpha(PEEK())) NEXT(); - len = p->next - sp; + len = (size_t)(p->next - sp); for (cp = cclasses; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') break; if (cp->name == NULL) { /* oops, didn't find it */ - SETERROR(REG_ECTYPE); + seterr(p, REG_ECTYPE); return; } @@ -748,16 +792,15 @@ register cset *cs; /* - p_b_eclass - parse an equivalence-class name and deal with it - == static void p_b_eclass(register struct parse *p, register cset *cs); * * This implementation is incomplete. xxx */ static void p_b_eclass(p, cs) -register struct parse *p; -register cset *cs; +struct parse *p; +cset *cs; { - register char c; + char c; c = p_b_coll_elem(p, '='); CHadd(cs, c); @@ -765,13 +808,12 @@ register cset *cs; /* - p_b_symbol - parse a character or [..]ed multicharacter collating symbol - == static char p_b_symbol(register struct parse *p); */ static char /* value of symbol */ p_b_symbol(p) -register struct parse *p; +struct parse *p; { - register char value; + char value; REQUIRE(MORE(), REG_EBRACK); if (!EATTWO('[', '.')) @@ -783,38 +825,139 @@ register struct parse *p; return(value); } +/* character-name table */ +static struct cname { + char *name; + char code; +} cnames[] = { + {"NUL", '\0'}, + {"SOH", '\001'}, + {"STX", '\002'}, + {"ETX", '\003'}, + {"EOT", '\004'}, + {"ENQ", '\005'}, + {"ACK", '\006'}, + {"BEL", '\007'}, + {"alert", '\007'}, + {"BS", '\010'}, + {"backspace", '\b'}, + {"HT", '\011'}, + {"tab", '\t'}, + {"LF", '\012'}, + {"newline", '\n'}, + {"VT", '\013'}, + {"vertical-tab", '\v'}, + {"FF", '\014'}, + {"form-feed", '\f'}, + {"CR", '\015'}, + {"carriage-return", '\r'}, + {"SO", '\016'}, + {"SI", '\017'}, + {"DLE", '\020'}, + {"DC1", '\021'}, + {"DC2", '\022'}, + {"DC3", '\023'}, + {"DC4", '\024'}, + {"NAK", '\025'}, + {"SYN", '\026'}, + {"ETB", '\027'}, + {"CAN", '\030'}, + {"EM", '\031'}, + {"SUB", '\032'}, + {"ESC", '\033'}, + {"IS4", '\034'}, + {"FS", '\034'}, + {"IS3", '\035'}, + {"GS", '\035'}, + {"IS2", '\036'}, + {"RS", '\036'}, + {"IS1", '\037'}, + {"US", '\037'}, + {"space", ' '}, + {"exclamation-mark", '!'}, + {"quotation-mark", '"'}, + {"number-sign", '#'}, + {"dollar-sign", '$'}, + {"percent-sign", '%'}, + {"ampersand", '&'}, + {"apostrophe", '\''}, + {"left-parenthesis", '('}, + {"right-parenthesis", ')'}, + {"asterisk", '*'}, + {"plus-sign", '+'}, + {"comma", ','}, + {"hyphen", '-'}, + {"hyphen-minus", '-'}, + {"period", '.'}, + {"full-stop", '.'}, + {"slash", '/'}, + {"solidus", '/'}, + {"zero", '0'}, + {"one", '1'}, + {"two", '2'}, + {"three", '3'}, + {"four", '4'}, + {"five", '5'}, + {"six", '6'}, + {"seven", '7'}, + {"eight", '8'}, + {"nine", '9'}, + {"colon", ':'}, + {"semicolon", ';'}, + {"less-than-sign", '<'}, + {"equals-sign", '='}, + {"greater-than-sign", '>'}, + {"question-mark", '?'}, + {"commercial-at", '@'}, + {"left-square-bracket", '['}, + {"backslash", '\\'}, + {"reverse-solidus", '\\'}, + {"right-square-bracket", ']'}, + {"circumflex", '^'}, + {"circumflex-accent", '^'}, + {"underscore", '_'}, + {"low-line", '_'}, + {"grave-accent", '`'}, + {"left-brace", '{'}, + {"left-curly-bracket", '{'}, + {"vertical-line", '|'}, + {"right-brace", '}'}, + {"right-curly-bracket", '}'}, + {"tilde", '~'}, + {"DEL", '\177'}, + {NULL, 0}, +}; + /* - p_b_coll_elem - parse a collating-element name and look it up - == static char p_b_coll_elem(register struct parse *p, int endc); */ static char /* value of collating element */ p_b_coll_elem(p, endc) -register struct parse *p; +struct parse *p; int endc; /* name ended by endc,']' */ { - register char *sp = p->next; - register struct cname *cp; - register int len; + char *sp = p->next; + struct cname *cp; + size_t len; while (MORE() && !SEETWO(endc, ']')) NEXT(); if (!MORE()) { - SETERROR(REG_EBRACK); + seterr(p, REG_EBRACK); return(0); } - len = p->next - sp; + len = (size_t)(p->next - sp); for (cp = cnames; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') return(cp->code); /* known name */ if (len == 1) return(*sp); /* single character */ - SETERROR(REG_ECOLLATE); /* neither */ + seterr(p, REG_ECOLLATE); /* neither */ return(0); } /* - othercase - return the case counterpart of an alphabetic - == static char othercase(int ch); */ static char /* if no counterpart, return ch */ othercase(ch) @@ -822,32 +965,31 @@ int ch; { assert(isalpha(ch)); if (isupper(ch)) - return(tolower(ch)); + return((char)tolower(ch)); else if (islower(ch)) - return(toupper(ch)); + return((char)toupper(ch)); else /* peculiar, but could happen */ - return(ch); + return((char)ch); } /* - bothcases - emit a dualcase version of a two-case character - == static void bothcases(register struct parse *p, int ch); * * Boy, is this implementation ever a kludge... */ static void bothcases(p, ch) -register struct parse *p; +struct parse *p; int ch; { - register char *oldnext = p->next; - register char *oldend = p->end; + char *oldnext = p->next; + char *oldend = p->end; char bracket[3]; assert(othercase(ch) != ch); /* p_bracket() would recurse */ p->next = bracket; p->end = bracket+2; - bracket[0] = ch; + bracket[0] = (char)ch; bracket[1] = ']'; bracket[2] = '\0'; p_bracket(p); @@ -858,36 +1000,34 @@ int ch; /* - ordinary - emit an ordinary character - == static void ordinary(register struct parse *p, register int ch); */ static void ordinary(p, ch) -register struct parse *p; -register int ch; +struct parse *p; +int ch; { - register cat_t *cap = p->g->categories; + cat_t *cap = p->g->categories; if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) bothcases(p, ch); else { - EMIT(OCHAR, (unsigned char)ch); + EMIT(OCHAR, (sopno)ch); if (cap[ch] == 0) - cap[ch] = p->g->ncategories++; + cap[ch] = (cat_t)(p->g->ncategories++); } } /* - nonnewline - emit REG_NEWLINE version of OANY - == static void nonnewline(register struct parse *p); * * Boy, is this implementation ever a kludge... */ static void nonnewline(p) -register struct parse *p; +struct parse *p; { - register char *oldnext = p->next; - register char *oldend = p->end; + char *oldnext = p->next; + char *oldend = p->end; char bracket[4]; p->next = bracket; @@ -904,21 +1044,20 @@ register struct parse *p; /* - repeat - generate code for a bounded repetition, recursively if needed - == static void repeat(register struct parse *p, sopno start, int from, int to); */ static void repeat(p, start, from, to) -register struct parse *p; +struct parse *p; sopno start; /* operand from here to end of strip */ int from; /* repeated from this number */ int to; /* to this number of times (maybe INFINITY) */ { - register sopno finish = HERE(); + sopno finish = HERE(); # define N 2 # define INF 3 # define REP(f, t) ((f)*8 + (t)) # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) - register sopno copy; + sopno copy; if (p->error != 0) /* head off possible runaway recursion */ return; @@ -969,41 +1108,38 @@ int to; /* to this number of times (maybe INFINITY) */ repeat(p, copy, from-1, to); break; default: /* "can't happen" */ - SETERROR(REG_ASSERT); /* just in case */ + seterr(p, REG_ASSERT); /* just in case */ break; } } /* - seterr - set an error condition - == static int seterr(register struct parse *p, int e); */ -static int /* useless but makes type checking happy */ +static void seterr(p, e) -register struct parse *p; +struct parse *p; int e; { if (p->error == 0) /* keep earliest error condition */ p->error = e; p->next = nuls; /* try to bring things to a halt */ p->end = nuls; - return(0); /* make the return value well-defined */ } /* - allocset - allocate a set of characters for [] - == static cset *allocset(register struct parse *p); */ static cset * allocset(p) -register struct parse *p; +struct parse *p; { - register int no = p->g->ncsets++; - register size_t nc; - register size_t nbytes; - register cset *cs; - register size_t css = (size_t)p->g->csetsize; - register int i; + int no = p->g->ncsets++; + int nc; + int nbytes; + cset *cs; + int css = p->g->csetsize; + int i; if (no >= p->ncsalloc) { /* need another column of space */ p->ncsalloc += CHAR_BIT; @@ -1011,25 +1147,25 @@ register struct parse *p; assert(nc % CHAR_BIT == 0); nbytes = nc / CHAR_BIT * css; if (p->g->sets == NULL) - p->g->sets = (cset *)malloc(nc * sizeof(cset)); + p->g->sets = (cset *)malloc((size_t)nc * sizeof(cset)); else p->g->sets = (cset *)realloc((char *)p->g->sets, - nc * sizeof(cset)); + (size_t)nc * sizeof(cset)); if (p->g->setbits == NULL) - p->g->setbits = (uch *)malloc(nbytes); + p->g->setbits = (uch *)malloc((size_t)nbytes); else { p->g->setbits = (uch *)realloc((char *)p->g->setbits, - nbytes); + (size_t)nbytes); /* xxx this isn't right if setbits is now NULL */ for (i = 0; i < no; i++) p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); } if (p->g->sets != NULL && p->g->setbits != NULL) (void) memset((char *)p->g->setbits + (nbytes - css), - 0, css); + 0, (size_t)css); else { no = 0; - SETERROR(REG_ESPACE); + seterr(p, REG_ESPACE); /* caller's responsibility not to do set ops */ } } @@ -1037,7 +1173,7 @@ register struct parse *p; assert(p->g->sets != NULL); /* xxx */ cs = &p->g->sets[no]; cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); - cs->mask = 1 << ((no) % CHAR_BIT); + cs->mask = (uch) (1 << ((no) % CHAR_BIT)); cs->hash = 0; cs->smultis = 0; cs->multis = NULL; @@ -1047,16 +1183,15 @@ register struct parse *p; /* - freeset - free a now-unused set - == static void freeset(register struct parse *p, register cset *cs); */ static void freeset(p, cs) -register struct parse *p; -register cset *cs; +struct parse *p; +cset *cs; { - register size_t i; - register cset *top = &p->g->sets[p->g->ncsets]; - register size_t css = (size_t)p->g->csetsize; + size_t i; + cset *top = &p->g->sets[p->g->ncsets]; + size_t css = (size_t)p->g->csetsize; for (i = 0; i < css; i++) CHsub(cs, i); @@ -1066,7 +1201,6 @@ register cset *cs; /* - freezeset - final processing on a set of characters - == static int freezeset(register struct parse *p, register cset *cs); * * The main task here is merging identical sets. This is usually a waste * of time (although the hash code minimizes the overhead), but can win @@ -1076,14 +1210,14 @@ register cset *cs; */ static int /* set number */ freezeset(p, cs) -register struct parse *p; -register cset *cs; +struct parse *p; +cset *cs; { - register uch h = cs->hash; - register size_t i; - register cset *top = &p->g->sets[p->g->ncsets]; - register cset *cs2; - register size_t css = (size_t)p->g->csetsize; + uch h = cs->hash; + size_t i; + cset *top = &p->g->sets[p->g->ncsets]; + cset *cs2; + size_t css = (size_t)p->g->csetsize; /* look for an earlier one which is the same */ for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) @@ -1106,15 +1240,14 @@ register cset *cs; /* - firstch - return first character in a set (which must have at least one) - == static int firstch(register struct parse *p, register cset *cs); */ static int /* character; there is no "none" value */ firstch(p, cs) -register struct parse *p; -register cset *cs; +struct parse *p; +cset *cs; { - register size_t i; - register size_t css = (size_t)p->g->csetsize; + size_t i; + size_t css = (size_t)p->g->csetsize; for (i = 0; i < css; i++) if (CHIN(cs, i)) @@ -1125,16 +1258,15 @@ register cset *cs; /* - nch - number of characters in a set - == static int nch(register struct parse *p, register cset *cs); */ static int nch(p, cs) -register struct parse *p; -register cset *cs; +struct parse *p; +cset *cs; { - register size_t i; - register size_t css = (size_t)p->g->csetsize; - register int n = 0; + size_t i; + size_t css = (size_t)p->g->csetsize; + int n = 0; for (i = 0; i < css; i++) if (CHIN(cs, i)) @@ -1144,16 +1276,14 @@ register cset *cs; /* - mcadd - add a collating element to a cset - == static void mcadd(register struct parse *p, register cset *cs, \ - == register char *cp); */ static void mcadd(p, cs, cp) -register struct parse *p; -register cset *cs; -register char *cp; +struct parse *p; +cset *cs; +char *cp; { - register size_t oldend = cs->smultis; + size_t oldend = cs->smultis; cs->smultis += strlen(cp) + 1; if (cs->multis == NULL) @@ -1161,7 +1291,7 @@ register char *cp; else cs->multis = realloc(cs->multis, cs->smultis); if (cs->multis == NULL) { - SETERROR(REG_ESPACE); + seterr(p, REG_ESPACE); return; } @@ -1169,17 +1299,17 @@ register char *cp; cs->multis[cs->smultis - 1] = '\0'; } +#if 0 /* - mcsub - subtract a collating element from a cset - == static void mcsub(register cset *cs, register char *cp); */ static void mcsub(cs, cp) -register cset *cs; -register char *cp; +cset *cs; +char *cp; { - register char *fp = mcfind(cs, cp); - register size_t len = strlen(fp); + char *fp = mcfind(cs, cp); + size_t len = strlen(fp); assert(fp != NULL); (void) memmove(fp, fp + len + 1, @@ -1198,26 +1328,24 @@ register char *cp; /* - mcin - is a collating element in a cset? - == static int mcin(register cset *cs, register char *cp); */ static int mcin(cs, cp) -register cset *cs; -register char *cp; +cset *cs; +char *cp; { return(mcfind(cs, cp) != NULL); } /* - mcfind - find a collating element in a cset - == static char *mcfind(register cset *cs, register char *cp); */ static char * mcfind(cs, cp) -register cset *cs; -register char *cp; +cset *cs; +char *cp; { - register char *p; + char *p; if (cs->multis == NULL) return(NULL); @@ -1229,47 +1357,45 @@ register char *cp; /* - mcinvert - invert the list of collating elements in a cset - == static void mcinvert(register struct parse *p, register cset *cs); * * This would have to know the set of possibilities. Implementation * is deferred. */ static void mcinvert(p, cs) -register struct parse *p; -register cset *cs; +struct parse *p; +cset *cs; { assert(cs->multis == NULL); /* xxx */ } /* - mccase - add case counterparts of the list of collating elements in a cset - == static void mccase(register struct parse *p, register cset *cs); * * This would have to know the set of possibilities. Implementation * is deferred. */ static void mccase(p, cs) -register struct parse *p; -register cset *cs; +struct parse *p; +cset *cs; { assert(cs->multis == NULL); /* xxx */ } +#endif /* - isinsets - is this character in any sets? - == static int isinsets(register struct re_guts *g, int c); */ static int /* predicate */ isinsets(g, c) -register struct re_guts *g; +struct re_guts *g; int c; { - register uch *col; - register int i; - register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; - register unsigned uc = (unsigned char)c; + uch *col; + int i; + int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + unsigned uc = (unsigned char)c; for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) if (col[uc] != 0) @@ -1279,19 +1405,18 @@ int c; /* - samesets - are these two characters in exactly the same sets? - == static int samesets(register struct re_guts *g, int c1, int c2); */ static int /* predicate */ samesets(g, c1, c2) -register struct re_guts *g; +struct re_guts *g; int c1; int c2; { - register uch *col; - register int i; - register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; - register unsigned uc1 = (unsigned char)c1; - register unsigned uc2 = (unsigned char)c2; + uch *col; + int i; + int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + unsigned uc1 = (unsigned char)c1; + unsigned uc2 = (unsigned char)c2; for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) if (col[uc1] != col[uc2]) @@ -1301,17 +1426,16 @@ int c2; /* - categorize - sort out character categories - == static void categorize(struct parse *p, register struct re_guts *g); */ static void categorize(p, g) struct parse *p; -register struct re_guts *g; +struct re_guts *g; { - register cat_t *cats = g->categories; - register int c; - register int c2; - register cat_t cat; + cat_t *cats = g->categories; + int c; + int c2; + cat_t cat; /* avoid making error situations worse */ if (p->error != 0) @@ -1319,7 +1443,7 @@ register struct re_guts *g; for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (cats[c] == 0 && isinsets(g, c)) { - cat = g->ncategories++; + cat = (cat_t)g->ncategories++; cats[c] = cat; for (c2 = c+1; c2 <= CHAR_MAX; c2++) if (cats[c2] == 0 && samesets(g, c, c2)) @@ -1329,23 +1453,22 @@ register struct re_guts *g; /* - dupl - emit a duplicate of a bunch of sops - == static sopno dupl(register struct parse *p, sopno start, sopno finish); */ static sopno /* start of duplicate */ dupl(p, start, finish) -register struct parse *p; +struct parse *p; sopno start; /* from here */ sopno finish; /* to this less one */ { - register sopno ret = HERE(); - register sopno len = finish - start; + sopno ret = HERE(); + sopno len = finish - start; assert(finish >= start); if (len == 0) return(ret); enlarge(p, p->ssize + len); /* this many unexpected additions */ assert(p->ssize >= p->slen + len); - (void) memmove((char *)(p->strip + p->slen), + (void) memcpy((char *)(p->strip + p->slen), (char *)(p->strip + start), (size_t)len*sizeof(sop)); p->slen += len; return(ret); @@ -1353,7 +1476,6 @@ sopno finish; /* to this less one */ /* - doemit - emit a strip operator - == static void doemit(register struct parse *p, sop op, size_t opnd); * * It might seem better to implement this as a macro with a function as * hard-case backup, but it's just too big and messy unless there are @@ -1361,9 +1483,9 @@ sopno finish; /* to this less one */ */ static void doemit(p, op, opnd) -register struct parse *p; +struct parse *p; sop op; -size_t opnd; +sop opnd; { /* avoid making error situations worse */ if (p->error != 0) @@ -1383,18 +1505,17 @@ size_t opnd; /* - doinsert - insert a sop into the strip - == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); */ static void doinsert(p, op, opnd, pos) -register struct parse *p; +struct parse *p; sop op; -size_t opnd; +sopno opnd; sopno pos; { - register sopno sn; - register sop s; - register int i; + sopno sn; + sop s; + int i; /* avoid making error situations worse */ if (p->error != 0) @@ -1417,18 +1538,17 @@ sopno pos; } memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], - (HERE()-pos-1)*sizeof(sop)); + (size_t)(HERE()-pos-1)*sizeof(sop)); p->strip[pos] = s; } /* - dofwd - complete a forward reference - == static void dofwd(register struct parse *p, sopno pos, sop value); */ static void dofwd(p, pos, value) -register struct parse *p; -register sopno pos; +struct parse *p; +sopno pos; sop value; { /* avoid making error situations worse */ @@ -1441,21 +1561,20 @@ sop value; /* - enlarge - enlarge the strip - == static void enlarge(register struct parse *p, sopno size); */ static void enlarge(p, size) -register struct parse *p; -register sopno size; +struct parse *p; +sopno size; { - register sop *sp; + sop *sp; if (p->ssize >= size) return; - sp = (sop *)realloc(p->strip, size*sizeof(sop)); + sp = (sop *)realloc(p->strip, (size_t)size*sizeof(sop)); if (sp == NULL) { - SETERROR(REG_ESPACE); + seterr(p, REG_ESPACE); return; } p->strip = sp; @@ -1464,24 +1583,22 @@ register sopno size; /* - stripsnug - compact the strip - == static void stripsnug(register struct parse *p, register struct re_guts *g); */ static void stripsnug(p, g) -register struct parse *p; -register struct re_guts *g; +struct parse *p; +struct re_guts *g; { g->nstates = p->slen; - g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); + g->strip = (sop *)realloc((char *)p->strip, (size_t)p->slen * sizeof(sop)); if (g->strip == NULL) { - SETERROR(REG_ESPACE); + seterr(p, REG_ESPACE); g->strip = p->strip; } } /* - findmust - fill in must and mlen with longest mandatory literal string - == static void findmust(register struct parse *p, register struct re_guts *g); * * This algorithm could do fancy things like analyzing the operands of | * for common subsequences. Someday. This code is simple and finds most @@ -1492,15 +1609,15 @@ register struct re_guts *g; static void findmust(p, g) struct parse *p; -register struct re_guts *g; +struct re_guts *g; { - register sop *scan; + sop *scan; sop *start; - register sop *newstart; - register sopno newlen; - register sop s; - register char *cp; - register sopno i; + sop *newstart; + int newlen; + sop s; + char *cp; + sopno i; /* avoid making error situations worse */ if (p->error != 0) @@ -1568,17 +1685,16 @@ register struct re_guts *g; /* - pluscount - count + nesting - == static sopno pluscount(register struct parse *p, register struct re_guts *g); */ static sopno /* nesting depth */ pluscount(p, g) struct parse *p; -register struct re_guts *g; +struct re_guts *g; { - register sop *scan; - register sop s; - register sopno plusnest = 0; - register sopno maxnest = 0; + sop *scan; + sop s; + sopno plusnest = 0; + sopno maxnest = 0; if (p->error != 0) return(0); /* there may not be an OEND */ diff --git a/vcnet/regex/regerror.c b/vcnet/regex/regerror.c index 9ddd25ca9b..34a9daf425 100644 --- a/vcnet/regex/regerror.c +++ b/vcnet/regex/regerror.c @@ -1,126 +1,77 @@ -#include +#include #include #include -#include -#include -#include -#include -#include "utils.h" -#include "regerror.ih" +#include "regex.h" -/* - = #define REG_OKAY 0 - = #define REG_NOMATCH 1 - = #define REG_BADPAT 2 - = #define REG_ECOLLATE 3 - = #define REG_ECTYPE 4 - = #define REG_EESCAPE 5 - = #define REG_ESUBREG 6 - = #define REG_EBRACK 7 - = #define REG_EPAREN 8 - = #define REG_EBRACE 9 - = #define REG_BADBR 10 - = #define REG_ERANGE 11 - = #define REG_ESPACE 12 - = #define REG_BADRPT 13 - = #define REG_EMPTY 14 - = #define REG_ASSERT 15 - = #define REG_INVARG 16 - = #define REG_ATOI 255 // convert name to number (!) - = #define REG_ITOA 0400 // convert number to name (!) - */ static struct rerr { int code; char *name; char *explain; } rerrs[] = { - REG_OKAY, "REG_OKAY", "no errors detected", - REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match", - REG_BADPAT, "REG_BADPAT", "invalid regular expression", - REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element", - REG_ECTYPE, "REG_ECTYPE", "invalid character class", - REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)", - REG_ESUBREG, "REG_ESUBREG", "invalid backreference number", - REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced", - REG_EPAREN, "REG_EPAREN", "parentheses not balanced", - REG_EBRACE, "REG_EBRACE", "braces not balanced", - REG_BADBR, "REG_BADBR", "invalid repetition count(s)", - REG_ERANGE, "REG_ERANGE", "invalid character range", - REG_ESPACE, "REG_ESPACE", "out of memory", - REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid", - REG_EMPTY, "REG_EMPTY", "empty (sub)expression", - REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug", - REG_INVARG, "REG_INVARG", "invalid argument to regex routine", - -1, "", "*** unknown regexp error code ***", + {REG_OKAY, "REG_OKAY", "no errors detected"}, + {REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"}, + {REG_BADPAT, "REG_BADPAT", "invalid regular expression"}, + {REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"}, + {REG_ECTYPE, "REG_ECTYPE", "invalid character class"}, + {REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)"}, + {REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"}, + {REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced"}, + {REG_EPAREN, "REG_EPAREN", "parentheses not balanced"}, + {REG_EBRACE, "REG_EBRACE", "braces not balanced"}, + {REG_BADBR, "REG_BADBR", "invalid repetition count(s)"}, + {REG_ERANGE, "REG_ERANGE", "invalid character range"}, + {REG_ESPACE, "REG_ESPACE", "out of memory"}, + {REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid"}, + {REG_EMPTY, "REG_EMPTY", "empty (sub)expression"}, + {REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"}, + {REG_INVARG, "REG_INVARG", "invalid argument to regex routine"}, + {-1, "", "*** unknown regexp error code ***"}, }; +static size_t +set_result(char *errbuf, size_t errbuf_size, char *result) { + size_t result_len = strlen(result); + if (errbuf_size > 0) { + size_t copy_len = result_len < errbuf_size ? result_len : errbuf_size - 1; + memcpy(errbuf, result, copy_len); + errbuf[copy_len] = 0; + } + return result_len + 1; +} + /* - regerror - the interface to error numbers - = extern size_t regerror(int, const regex_t *, char *, size_t); */ -/* ARGSUSED */ size_t -regerror( -int errcode, -const regex_t *preg, -char *errbuf, -size_t errbuf_size) +regerror(int errorcode, const regex_t *preg, char *errbuf, size_t errbuf_size) { - register struct rerr *r; - register size_t len; - register int target = errcode &~ REG_ITOA; - register char *s; + struct rerr *r; char convbuf[50]; - if (errcode == REG_ATOI) - s = regatoi(preg, convbuf); + if (errorcode == REG_ATOI) { + if (preg == NULL || preg->re_endp == NULL) + return set_result(errbuf, errbuf_size, "0"); + for (r = rerrs; r->code >= 0; r++) + if (strcmp(r->name, preg->re_endp) == 0) + break; + if (r->code < 0) + return set_result(errbuf, errbuf_size, "0"); + snprintf(convbuf, sizeof convbuf, "%d", r->code); + return set_result(errbuf, errbuf_size, convbuf); + } else { + int target = errorcode &~ REG_ITOA; + for (r = rerrs; r->code >= 0; r++) if (r->code == target) break; - - if (errcode®_ITOA) { + if (errorcode & REG_ITOA) { if (r->code >= 0) - (void) strcpy(convbuf, r->name); - else - sprintf(convbuf, "REG_0x%x", target); - assert(strlen(convbuf) < sizeof(convbuf)); - s = convbuf; - } else - s = r->explain; - } - - len = strlen(s) + 1; - if (errbuf_size > 0) { - if (errbuf_size > len) - (void) strcpy(errbuf, s); - else { - (void) strncpy(errbuf, s, errbuf_size-1); - errbuf[errbuf_size-1] = '\0'; + return set_result(errbuf, errbuf_size, r->name); + snprintf(convbuf, sizeof convbuf, "REG_0x%x", target); + return set_result(errbuf, errbuf_size, convbuf); } + return set_result(errbuf, errbuf_size, r->explain); } - - return(len); -} - -/* - - regatoi - internal routine to implement REG_ATOI - == static char *regatoi(const regex_t *preg, char *localbuf); - */ -static char * -regatoi(preg, localbuf) -const regex_t *preg; -char *localbuf; -{ - register struct rerr *r; - - for (r = rerrs; r->code >= 0; r++) - if (strcmp(r->name, preg->re_endp) == 0) - break; - if (r->code < 0) - return("0"); - - sprintf(localbuf, "%d", r->code); - return(localbuf); } diff --git a/vcnet/regex/regex.h b/vcnet/regex/regex.h index 6918a55212..c312fe0194 100644 --- a/vcnet/regex/regex.h +++ b/vcnet/regex/regex.h @@ -1,26 +1,31 @@ #ifndef _REGEX_H_ -#define _REGEX_H_ /* never again */ -/* ========= begin header generated by ./mkh ========= */ +#define _REGEX_H_ + #ifdef __cplusplus extern "C" { #endif -/* === regex2.h === */ -typedef long regoff_t; +#include + +typedef off_t regoff_t; + typedef struct { int re_magic; size_t re_nsub; /* number of parenthesized subexpressions */ const char *re_endp; /* end pointer for REG_PEND */ struct re_guts *re_g; /* none of your business :-) */ } regex_t; + typedef struct { regoff_t rm_so; /* start of match */ regoff_t rm_eo; /* end of match */ } regmatch_t; - -/* === regcomp.c === */ extern int regcomp(regex_t *, const char *, int); +extern size_t regerror(int, const regex_t *, char *, size_t); +extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); +extern void regfree(regex_t *); + #define REG_BASIC 0000 #define REG_EXTENDED 0001 #define REG_ICASE 0002 @@ -28,10 +33,6 @@ extern int regcomp(regex_t *, const char *, int); #define REG_NEWLINE 0010 #define REG_NOSPEC 0020 #define REG_PEND 0040 -#define REG_DUMP 0200 - - -/* === regerror.c === */ #define REG_OKAY 0 #define REG_NOMATCH 1 #define REG_BADPAT 2 @@ -51,11 +52,6 @@ extern int regcomp(regex_t *, const char *, int); #define REG_INVARG 16 #define REG_ATOI 255 /* convert name to number (!) */ #define REG_ITOA 0400 /* convert number to name (!) */ -extern size_t regerror(int, const regex_t *, char *, size_t); - - -/* === regexec.c === */ -extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); #define REG_NOTBOL 00001 #define REG_NOTEOL 00002 #define REG_STARTEND 00004 @@ -63,12 +59,7 @@ extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); #define REG_LARGE 01000 /* force large representation */ #define REG_BACKR 02000 /* force use of backref code */ - -/* === regfree.c === */ -extern void regfree(regex_t *); - #ifdef __cplusplus } #endif -/* ========= end header generated by ./mkh ========= */ #endif diff --git a/vcnet/regex/regex2.h b/vcnet/regex/regex2.h index 58fd8d8a43..278a5d6b27 100644 --- a/vcnet/regex/regex2.h +++ b/vcnet/regex/regex2.h @@ -1,17 +1,3 @@ -/* - * First, the stuff that ends up in the outside-world include file - = typedef off_t regoff_t; - = typedef struct { - = int re_magic; - = size_t re_nsub; // number of parenthesized subexpressions - = const char *re_endp; // end pointer for REG_PEND - = struct re_guts *re_g; // none of your business :-) - = } regex_t; - = typedef struct { - = regoff_t rm_so; // start of match - = regoff_t rm_eo; // end of match - = } regmatch_t; - */ /* * internals of regex_t */ @@ -38,12 +24,15 @@ */ typedef long sop; /* strip operator */ typedef long sopno; +typedef unsigned char uch; + #define OPRMASK 0x7c000000 #define OPDMASK 0x03ffffff #define OPSHIFT (26) #define OP(n) ((n)&OPRMASK) #define OPND(n) ((n)&OPDMASK) #define SOP(op, opnd) ((op)|(opnd)) + /* operators meaning operand */ /* (back, fwd are offsets) */ #define OEND (1< char[smulti] ab\0cd\0ef\0\0 */ } cset; + /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ -#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) -#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) +#define CHadd(cs, c) ((cs)->ptr[(int)(c)] |= (cs)->mask, (cs)->hash = (uch)((cs)->hash + (c))) +#define CHsub(cs, c) ((cs)->ptr[(int)(c)] &= (uch)~(cs)->mask, (cs)->hash = (uch)((cs)->hash - (c))) #define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) #define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ #define MCsub(p, cs, cp) mcsub(p, cs, cp) #define MCin(p, cs, cp) mcin(p, cs, cp) +#define NC (CHAR_MAX - CHAR_MIN + 1) /* stuff for character categories */ typedef unsigned char cat_t; @@ -132,3 +123,10 @@ struct re_guts { /* misc utilities */ #define OUT (CHAR_MAX+1) /* a non-character value */ #define ISWORD(c) (isalnum(c) || (c) == '_') + +/* switch off assertions (if not already off) if no REDEBUG */ +#ifndef REDEBUG +#ifndef NDEBUG +#define NDEBUG /* no assertions please */ +#endif +#endif diff --git a/vcnet/regex/regexec.c b/vcnet/regex/regexec.c index dcb11b285c..0c4f1f97c6 100644 --- a/vcnet/regex/regexec.c +++ b/vcnet/regex/regexec.c @@ -5,15 +5,15 @@ * macros that code uses. This lets the same code operate on two different * representations for state sets. */ +#include #include #include #include #include #include #include -#include -#include "utils.h" +#include "regex.h" #include "regex2.h" static int nope = 0; /* for use in asserts; shuts lint up */ @@ -68,20 +68,20 @@ static int nope = 0; /* for use in asserts; shuts lint up */ /* macros for manipulating states, large version */ #define states char * -#define CLEAR(v) memset(v, 0, m->g->nstates) +#define CLEAR(v) memset(v, 0, (size_t)m->g->nstates) #define SET0(v, n) ((v)[n] = 0) #define SET1(v, n) ((v)[n] = 1) #define ISSET(v, n) ((v)[n]) -#define ASSIGN(d, s) memcpy(d, s, m->g->nstates) -#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) +#define ASSIGN(d, s) memcpy(d, s, (size_t)m->g->nstates) +#define EQ(a, b) (memcmp(a, b, (size_t)m->g->nstates) == 0) #define STATEVARS int vn; char *space -#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ +#define STATESETUP(m, nv) { (m)->space = malloc((size_t)((nv)*(m)->g->nstates)); \ if ((m)->space == NULL) return(REG_ESPACE); \ (m)->vn = 0; } #define STATETEARDOWN(m) { free((m)->space); } #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) #define onestate int -#define INIT(o, n) ((o) = (n)) +#define INIT(o, n) ((o) = (int)(n)) #define INC(o) ((o)++) #define ISSTATEIN(v, o) ((v)[o]) /* some abbreviations; note that some of these know variable names! */ @@ -96,14 +96,6 @@ static int nope = 0; /* for use in asserts; shuts lint up */ /* - regexec - interface for matching - = extern int regexec(const regex_t *, const char *, size_t, \ - = regmatch_t [], int); - = #define REG_NOTBOL 00001 - = #define REG_NOTEOL 00002 - = #define REG_STARTEND 00004 - = #define REG_TRACE 00400 // tracing of execution - = #define REG_LARGE 01000 // force large representation - = #define REG_BACKR 02000 // force use of backref code * * We put this here so we can exploit knowledge of the state representation * when choosing which matcher to call. Also, by this point the matchers @@ -117,7 +109,7 @@ size_t nmatch; regmatch_t pmatch[]; int eflags; { - register struct re_guts *g = preg->re_g; + struct re_guts *g = preg->re_g; #ifdef REDEBUG # define GOODFLAGS(f) (f) #else @@ -131,7 +123,7 @@ int eflags; return(REG_BADPAT); eflags = GOODFLAGS(eflags); - if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) + if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); else return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); diff --git a/vcnet/regex/regfree.c b/vcnet/regex/regfree.c index 9a6acf1733..b3908f0314 100644 --- a/vcnet/regex/regfree.c +++ b/vcnet/regex/regfree.c @@ -1,20 +1,18 @@ #include #include #include -#include -#include "utils.h" +#include "regex.h" #include "regex2.h" /* - regfree - free everything - = extern void regfree(regex_t *); */ void regfree(preg) regex_t *preg; { - register struct re_guts *g; + struct re_guts *g; if (preg->re_magic != MAGIC1) /* oops */ return; /* nice to complain, but hard */ diff --git a/vcnet/regex/regex.3 b/vcnet/regex/rxspencer.3 similarity index 99% rename from vcnet/regex/regex.3 rename to vcnet/regex/rxspencer.3 index bc747096d6..3016432a61 100644 --- a/vcnet/regex/regex.3 +++ b/vcnet/regex/rxspencer.3 @@ -2,7 +2,7 @@ .BY "Henry Spencer" .de ZR .\" one other place knows this name: the SEE ALSO section -.IR regex (7) \\$1 +.IR rxspencer (7) \\$1 .. .SH NAME regcomp, regexec, regerror, regfree \- regular-expression library @@ -421,7 +421,7 @@ A `{' \fInot\fR followed by a digit is considered an ordinary character. `^' and `$' beginning and ending subexpressions in obsolete (``basic'') REs are anchors, not ordinary characters. .SH SEE ALSO -grep(1), regex(7) +grep(1), rxspencer(7) .PP POSIX 1003.2, sections 2.8 (Regular Expression Notation) and diff --git a/vcnet/regex/regex.7 b/vcnet/regex/rxspencer.7 similarity index 99% rename from vcnet/regex/regex.7 rename to vcnet/regex/rxspencer.7 index 0fa180269e..387790a0f1 100644 --- a/vcnet/regex/regex.7 +++ b/vcnet/regex/rxspencer.7 @@ -207,7 +207,7 @@ matched by the \fId\fRth parenthesized subexpression left to right), so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'. .SH SEE ALSO -regex(3) +rxspencer(3) .PP POSIX 1003.2, section 2.8 (Regular Expression Notation). .SH HISTORY diff --git a/vcnet/regex/split.c b/vcnet/regex/split.c index 188bdb775b..3713e89aaa 100644 --- a/vcnet/regex/split.c +++ b/vcnet/regex/split.c @@ -12,14 +12,14 @@ char *fields[]; /* list is not NULL-terminated */ int nfields; /* number of entries available in fields[] */ char *sep; /* "" white, "c" single char, "ab" [ab]+ */ { - register char *p = string; - register char c; /* latest character */ - register char sepc = sep[0]; - register char sepc2; - register int fn; - register char **fp = fields; - register char *sepp; - register int trimtrail; + char *p = string; + char c; /* latest character */ + char sepc = sep[0]; + char sepc2; + int fn; + char **fp = fields; + char *sepp; + int trimtrail; /* white space */ if (sepc == '\0') { @@ -153,7 +153,7 @@ int argc; char *argv[]; { char buf[512]; - register int n; + int n; # define MNF 10 char *fields[MNF]; @@ -179,25 +179,27 @@ char *argv[]; exit(0); } +static void dosplit(string, seps) char *string; char *seps; { # define NF 5 char *fields[NF]; - register int nf; + int nf; nf = split(string, fields, NF, seps); print(nf, NF, fields); } +static void print(nf, nfp, fields) int nf; int nfp; char *fields[]; { - register int fn; - register int bound; + int fn; + int bound; bound = (nf > nfp) ? nfp : nf; printf("%d:\t", nf); @@ -274,15 +276,16 @@ struct { NULL, NULL, 0, { NULL }, }; +static void regress() { char buf[512]; - register int n; + int n; char *fields[RNF+1]; - register int nf; - register int i; - register int printit; - register char *f; + int nf; + int i; + int printit; + char *f; for (n = 0; tests[n].str != NULL; n++) { (void) strcpy(buf, tests[n].str); -- 2.47.2