]>
git.ipfire.org Git - people/ms/u-boot.git/blob - lib/slre.c
2 * Copyright (c) 2004-2005 Sergey Lyubka <valenok@gmail.com>
5 * "THE BEER-WARE LICENSE" (Revision 42):
6 * Sergey Lyubka wrote this file. As long as you retain this notice you
7 * can do whatever you want with this stuff. If we meet some day, and you think
8 * this stuff is worth it, you can buy me a beer in return.
12 * Downloaded Sat Nov 5 17:43:06 CET 2011 at
13 * http://slre.sourceforge.net/1.0/slre.c
24 #include <linux/ctype.h>
25 #endif /* SLRE_TEST */
31 enum {END
, BRANCH
, ANY
, EXACT
, ANYOF
, ANYBUT
, OPEN
, CLOSE
, BOL
, EOL
,
32 STAR
, PLUS
, STARQ
, PLUSQ
, QUEST
, SPACE
, NONSPACE
, DIGIT
};
40 {"END", 0, ""}, /* End of code block or program */
41 {"BRANCH", 2, "oo"}, /* Alternative operator, "|" */
42 {"ANY", 0, ""}, /* Match any character, "." */
43 {"EXACT", 2, "d"}, /* Match exact string */
44 {"ANYOF", 2, "D"}, /* Match any from set, "[]" */
45 {"ANYBUT", 2, "D"}, /* Match any but from set, "[^]"*/
46 {"OPEN ", 1, "i"}, /* Capture start, "(" */
47 {"CLOSE", 1, "i"}, /* Capture end, ")" */
48 {"BOL", 0, ""}, /* Beginning of string, "^" */
49 {"EOL", 0, ""}, /* End of string, "$" */
50 {"STAR", 1, "o"}, /* Match zero or more times "*" */
51 {"PLUS", 1, "o"}, /* Match one or more times, "+" */
52 {"STARQ", 1, "o"}, /* Non-greedy STAR, "*?" */
53 {"PLUSQ", 1, "o"}, /* Non-greedy PLUS, "+?" */
54 {"QUEST", 1, "o"}, /* Match zero or one time, "?" */
55 {"SPACE", 0, ""}, /* Match whitespace, "\s" */
56 {"NONSPACE", 0, ""}, /* Match non-space, "\S" */
57 {"DIGIT", 0, ""} /* Match digit, "\d" */
59 #endif /* SLRE_TEST */
62 * Commands and operands are all unsigned char (1 byte long). All code offsets
63 * are relative to current address, and positive (always point forward). Data
64 * offsets are absolute. Commands with operands:
66 * BRANCH offset1 offset2
67 * Try to match the code block that follows the BRANCH instruction
68 * (code block ends with END). If no match, try to match code block that
69 * starts at offset1. If either of these match, jump to offset2.
71 * EXACT data_offset data_length
72 * Try to match exact string. String is recorded in data section from
73 * data_offset, and has length data_length.
76 * CLOSE capture_number
77 * If the user have passed 'struct cap' array for captures, OPEN
78 * records the beginning of the matched substring (cap->ptr), CLOSE
79 * sets the length (cap->len) for respective capture_number.
84 * *, +, ?, respectively. Try to gobble as much as possible from the
85 * matched buffer, until code block that follows these instructions
86 * matches. When the longest possible string is matched,
89 * STARQ, PLUSQ are non-greedy versions of STAR and PLUS.
92 static const char *meta_chars
= "|.^$*+?()[\\";
97 print_character_set(FILE *fp
, const unsigned char *p
, int len
)
101 for (i
= 0; i
< len
; i
++) {
103 (void) fputc(',', fp
);
107 (void) fprintf(fp
, "\\x%02x", p
[i
]);
109 (void) fprintf(fp
, "%s", opcodes
[p
[i
]].name
);
110 } else if (isprint(p
[i
])) {
111 (void) fputc(p
[i
], fp
);
113 (void) fprintf(fp
, "\\x%02x", p
[i
]);
119 slre_dump(const struct slre
*r
, FILE *fp
)
121 int i
, j
, ch
, op
, pc
;
123 for (pc
= 0; pc
< r
->code_size
; pc
++) {
126 (void) fprintf(fp
, "%3d %s ", pc
, opcodes
[op
].name
);
128 for (i
= 0; opcodes
[op
].flags
[i
] != '\0'; i
++)
129 switch (opcodes
[op
].flags
[i
]) {
131 (void) fprintf(fp
, "%d ", r
->code
[pc
+ 1]);
135 (void) fprintf(fp
, "%d ",
136 pc
+ r
->code
[pc
+ 1] - i
);
140 print_character_set(fp
, r
->data
+
141 r
->code
[pc
+ 1], r
->code
[pc
+ 2]);
145 (void) fputc('"', fp
);
146 for (j
= 0; j
< r
->code
[pc
+ 2]; j
++) {
147 ch
= r
->data
[r
->code
[pc
+ 1] + j
];
149 (void) fputc(ch
, fp
);
155 (void) fputc('"', fp
);
160 (void) fputc('\n', fp
);
163 #endif /* SLRE_TEST */
166 set_jump_offset(struct slre
*r
, int pc
, int offset
)
168 assert(offset
< r
->code_size
);
170 if (r
->code_size
- offset
> 0xff)
171 r
->err_str
= "Jump offset is too big";
173 r
->code
[pc
] = (unsigned char) (r
->code_size
- offset
);
177 emit(struct slre
*r
, int code
)
179 if (r
->code_size
>= (int) (sizeof(r
->code
) / sizeof(r
->code
[0])))
180 r
->err_str
= "RE is too long (code overflow)";
182 r
->code
[r
->code_size
++] = (unsigned char) code
;
186 store_char_in_data(struct slre
*r
, int ch
)
188 if (r
->data_size
>= (int) sizeof(r
->data
))
189 r
->err_str
= "RE is too long (data overflow)";
191 r
->data
[r
->data_size
++] = ch
;
195 exact(struct slre
*r
, const char **re
)
197 int old_data_size
= r
->data_size
;
199 while (**re
!= '\0' && (strchr(meta_chars
, **re
)) == NULL
)
200 store_char_in_data(r
, *(*re
)++);
203 emit(r
, old_data_size
);
204 emit(r
, r
->data_size
- old_data_size
);
208 get_escape_char(const char **re
)
243 anyof(struct slre
*r
, const char **re
)
245 int esc
, old_data_size
= r
->data_size
, op
= ANYOF
;
257 emit(r
, old_data_size
);
258 emit(r
, r
->data_size
- old_data_size
);
263 esc
= get_escape_char(re
);
264 if ((esc
& 0xff) == 0) {
265 store_char_in_data(r
, 0);
266 store_char_in_data(r
, esc
>> 8);
268 store_char_in_data(r
, esc
);
272 store_char_in_data(r
, (*re
)[-1]);
276 r
->err_str
= "No closing ']' bracket";
280 relocate(struct slre
*r
, int begin
, int shift
)
283 memmove(r
->code
+ begin
+ shift
, r
->code
+ begin
, r
->code_size
- begin
);
284 r
->code_size
+= shift
;
288 quantifier(struct slre
*r
, int prev
, int op
)
290 if (r
->code
[prev
] == EXACT
&& r
->code
[prev
+ 2] > 1) {
293 emit(r
, r
->code
[prev
+ 1] + r
->code
[prev
+ 2]);
295 prev
= r
->code_size
- 3;
297 relocate(r
, prev
, 2);
299 set_jump_offset(r
, prev
+ 1, prev
);
303 exact_one_char(struct slre
*r
, int ch
)
306 emit(r
, r
->data_size
);
308 store_char_in_data(r
, ch
);
312 fixup_branch(struct slre
*r
, int fixup
)
316 set_jump_offset(r
, fixup
, fixup
- 2);
321 compile(struct slre
*r
, const char **re
)
323 int op
, esc
, branch_start
, last_op
, fixup
, cap_no
, level
;
327 branch_start
= last_op
= r
->code_size
;
343 last_op
= r
->code_size
;
347 last_op
= r
->code_size
;
351 last_op
= r
->code_size
;
352 esc
= get_escape_char(re
);
356 exact_one_char(r
, esc
);
359 last_op
= r
->code_size
;
360 cap_no
= ++r
->num_caps
;
365 if (*(*re
)++ != ')') {
366 r
->err_str
= "No closing bracket";
375 fixup_branch(r
, fixup
);
377 r
->err_str
= "Unbalanced brackets";
385 op
= (*re
)[-1] == '*' ? STAR
: PLUS
;
388 op
= op
== STAR
? STARQ
: PLUSQ
;
390 quantifier(r
, last_op
, op
);
393 quantifier(r
, last_op
, QUEST
);
396 fixup_branch(r
, fixup
);
397 relocate(r
, branch_start
, 3);
398 r
->code
[branch_start
] = BRANCH
;
399 set_jump_offset(r
, branch_start
+ 1, branch_start
);
400 fixup
= branch_start
+ 2;
401 r
->code
[fixup
] = 0xff;
405 last_op
= r
->code_size
;
412 slre_compile(struct slre
*r
, const char *re
)
415 r
->code_size
= r
->data_size
= r
->num_caps
= r
->anchored
= 0;
420 emit(r
, OPEN
); /* This will capture what matches full RE */
426 if (r
->code
[2] == BRANCH
)
433 return (r
->err_str
== NULL
? 1 : 0);
436 static int match(const struct slre
*, int,
437 const char *, int, int *, struct cap
*);
440 loop_greedy(const struct slre
*r
, int pc
, const char *s
, int len
, int *ofs
)
442 int saved_offset
, matched_offset
;
444 saved_offset
= matched_offset
= *ofs
;
446 while (match(r
, pc
+ 2, s
, len
, ofs
, NULL
)) {
448 if (match(r
, pc
+ r
->code
[pc
+ 1], s
, len
, ofs
, NULL
))
449 matched_offset
= saved_offset
;
453 *ofs
= matched_offset
;
457 loop_non_greedy(const struct slre
*r
, int pc
, const char *s
, int len
, int *ofs
)
459 int saved_offset
= *ofs
;
461 while (match(r
, pc
+ 2, s
, len
, ofs
, NULL
)) {
463 if (match(r
, pc
+ r
->code
[pc
+ 1], s
, len
, ofs
, NULL
))
471 is_any_of(const unsigned char *p
, int len
, const char *s
, int *ofs
)
477 for (i
= 0; i
< len
; i
++)
487 is_any_but(const unsigned char *p
, int len
, const char *s
, int *ofs
)
493 for (i
= 0; i
< len
; i
++) {
503 match(const struct slre
*r
, int pc
, const char *s
, int len
,
504 int *ofs
, struct cap
*caps
)
506 int n
, saved_offset
, res
= 1;
508 while (res
&& r
->code
[pc
] != END
) {
510 assert(pc
< r
->code_size
);
511 assert(pc
< (int) (sizeof(r
->code
) / sizeof(r
->code
[0])));
513 switch (r
->code
[pc
]) {
516 res
= match(r
, pc
+ 3, s
, len
, ofs
, caps
);
519 res
= match(r
, pc
+ r
->code
[pc
+ 1],
522 pc
+= r
->code
[pc
+ 2];
526 n
= r
->code
[pc
+ 2]; /* String length */
527 if (n
<= len
- *ofs
&& !memcmp(s
+ *ofs
, r
->data
+
528 r
->code
[pc
+ 1], n
)) {
537 if (!match(r
, pc
+ 2, s
, len
, ofs
, caps
))
539 pc
+= r
->code
[pc
+ 1];
543 loop_greedy(r
, pc
, s
, len
, ofs
);
544 pc
+= r
->code
[pc
+ 1];
548 loop_non_greedy(r
, pc
, s
, len
, ofs
);
549 pc
+= r
->code
[pc
+ 1];
552 res
= match(r
, pc
+ 2, s
, len
, ofs
, caps
);
556 loop_greedy(r
, pc
, s
, len
, ofs
);
557 pc
+= r
->code
[pc
+ 1];
560 res
= match(r
, pc
+ 2, s
, len
, ofs
, caps
);
564 loop_non_greedy(r
, pc
, s
, len
, ofs
);
565 pc
+= r
->code
[pc
+ 1];
569 if (*ofs
< len
&& isspace(((unsigned char *)s
)[*ofs
])) {
578 !isspace(((unsigned char *)s
)[*ofs
])) {
586 if (*ofs
< len
&& isdigit(((unsigned char *)s
)[*ofs
])) {
603 res
= is_any_of(r
->data
+ r
->code
[pc
+ 1],
604 r
->code
[pc
+ 2], s
, ofs
);
610 res
= is_any_but(r
->data
+ r
->code
[pc
+ 1],
611 r
->code
[pc
+ 2], s
, ofs
);
615 res
= *ofs
== 0 ? 1 : 0;
619 res
= *ofs
== len
? 1 : 0;
624 caps
[r
->code
[pc
+ 1]].ptr
= s
+ *ofs
;
629 caps
[r
->code
[pc
+ 1]].len
= (s
+ *ofs
) -
630 caps
[r
->code
[pc
+ 1]].ptr
;
637 printf("unknown cmd (%d) at %d\n", r
->code
[pc
], pc
);
647 slre_match(const struct slre
*r
, const char *buf
, int len
,
650 int i
, ofs
= 0, res
= 0;
653 res
= match(r
, 0, buf
, len
, &ofs
, caps
);
655 for (i
= 0; i
< len
&& res
== 0; i
++) {
657 res
= match(r
, 0, buf
, len
, &ofs
, caps
);
667 int main(int argc
, char *argv
[])
670 struct cap caps
[N_CAPS
];
671 unsigned char data
[1 * 1024 * 1024];
676 fprintf(stderr
, "Usage: %s 'slre' <file>\n", argv
[0]);
680 fp
= fopen(argv
[2], "rb");
682 fprintf(stderr
, "Error: cannot open %s:%s\n",
683 argv
[2], strerror(errno
));
687 if (!slre_compile(&slre
, argv
[1])) {
688 fprintf(stderr
, "Error compiling slre: %s\n", slre
.err_str
);
692 slre_dump(&slre
, stderr
);
694 while (fgets(data
, sizeof(data
), fp
) != NULL
) {
697 if ((len
> 0) && (data
[len
-1] == '\n')) {
702 printf("Data = \"%s\"\n", data
);
704 (void) memset(caps
, 0, sizeof(caps
));
708 res
= slre_match(&slre
, data
, len
, caps
);
709 printf("Result [%d]: %d\n", i
, res
);
711 for (i
= 0; i
< N_CAPS
; i
++) {
712 if (caps
[i
].len
> 0) {
713 printf("Substring %d: len=%d [%.*s]\n", i
,
715 caps
[i
].len
, caps
[i
].ptr
);
718 printf("----------------------------------------------------\n");
724 #endif /* SLRE_TEST */