]>
Commit | Line | Data |
---|---|---|
d73c4440 RL |
1 | #! /usr/bin/env perl |
2 | # Copyright 2018 The OpenSSL Project Authors. All Rights Reserved. | |
3 | # | |
9059ab42 | 4 | # Licensed under the Apache License 2.0 (the "License"). You may not use |
d73c4440 RL |
5 | # this file except in compliance with the License. You can obtain a copy |
6 | # in the file LICENSE in the source distribution or at | |
7 | # https://www.openssl.org/source/license.html | |
8 | ||
9 | package OpenSSL::ParseC; | |
10 | ||
11 | use strict; | |
12 | use warnings; | |
13 | ||
14 | use Exporter; | |
15 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
16 | $VERSION = "0.9"; | |
17 | @ISA = qw(Exporter); | |
18 | @EXPORT = qw(parse); | |
19 | ||
20 | # Global handler data | |
21 | my @preprocessor_conds; # A list of simple preprocessor conditions, | |
22 | # each item being a list of macros defined | |
23 | # or not defined. | |
24 | ||
25 | # Handler helpers | |
26 | sub all_conds { | |
27 | return map { ( @$_ ) } @preprocessor_conds; | |
28 | } | |
29 | ||
30 | # A list of handlers that will look at a "complete" string and try to | |
31 | # figure out what to make of it. | |
32 | # Each handler is a hash with the following keys: | |
33 | # | |
34 | # regexp a regexp to compare the "complete" string with. | |
35 | # checker a function that does a more complex comparison. | |
36 | # Use this instead of regexp if that isn't enough. | |
37 | # massager massages the "complete" string into an array with | |
38 | # the following elements: | |
39 | # | |
40 | # [0] String that needs further processing (this | |
41 | # applies to typedefs of structs), or empty. | |
42 | # [1] The name of what was found. | |
43 | # [2] A character that denotes what type of thing | |
44 | # this is: 'F' for function, 'S' for struct, | |
45 | # 'T' for typedef, 'M' for macro, 'V' for | |
46 | # variable. | |
47 | # [3] Return type (only for type 'F' and 'V') | |
48 | # [4] Value (for type 'M') or signature (for type 'F', | |
49 | # 'V', 'T' or 'S') | |
50 | # [5...] The list of preprocessor conditions this is | |
51 | # found in, as in checks for macro definitions | |
52 | # (stored as the macro's name) or the absence | |
53 | # of definition (stored as the macro's name | |
54 | # prefixed with a '!' | |
55 | # | |
56 | # If the massager returns an empty list, it means the | |
57 | # "complete" string has side effects but should otherwise | |
58 | # be ignored. | |
59 | # If the massager is undefined, the "complete" string | |
60 | # should be ignored. | |
61 | my @opensslcpphandlers = ( | |
62 | ################################################################## | |
63 | # OpenSSL CPP specials | |
64 | # | |
65 | # These are used to convert certain pre-precessor expressions into | |
66 | # others that @cpphandlers have a better chance to understand. | |
67 | ||
fcd2d5a6 | 68 | { regexp => qr/#if (!?)OPENSSL_API_([0-9_]+)$/, |
d73c4440 | 69 | massager => sub { |
fcd2d5a6 | 70 | my $cnd = $1 eq '!' ? 'ndef' : 'def'; |
d73c4440 | 71 | return (<<"EOF"); |
fcd2d5a6 | 72 | #if$cnd DEPRECATEDIN_$2 |
d73c4440 RL |
73 | EOF |
74 | } | |
75 | } | |
76 | ); | |
77 | my @cpphandlers = ( | |
78 | ################################################################## | |
79 | # CPP stuff | |
80 | ||
81 | { regexp => qr/#ifdef ?(.*)/, | |
82 | massager => sub { | |
83 | my %opts; | |
84 | if (ref($_[$#_]) eq "HASH") { | |
85 | %opts = %{$_[$#_]}; | |
86 | pop @_; | |
87 | } | |
88 | push @preprocessor_conds, [ $1 ]; | |
89 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
90 | if $opts{debug}; | |
91 | return (); | |
92 | }, | |
93 | }, | |
94 | { regexp => qr/#ifndef ?(.*)/, | |
95 | massager => sub { | |
96 | my %opts; | |
97 | if (ref($_[$#_]) eq "HASH") { | |
98 | %opts = %{$_[$#_]}; | |
99 | pop @_; | |
100 | } | |
101 | push @preprocessor_conds, [ '!'.$1 ]; | |
102 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
103 | if $opts{debug}; | |
104 | return (); | |
105 | }, | |
106 | }, | |
107 | { regexp => qr/#if (0|1)/, | |
108 | massager => sub { | |
109 | my %opts; | |
110 | if (ref($_[$#_]) eq "HASH") { | |
111 | %opts = %{$_[$#_]}; | |
112 | pop @_; | |
113 | } | |
114 | if ($1 eq "1") { | |
115 | push @preprocessor_conds, [ "TRUE" ]; | |
116 | } else { | |
117 | push @preprocessor_conds, [ "!TRUE" ]; | |
118 | } | |
119 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
120 | if $opts{debug}; | |
121 | return (); | |
122 | }, | |
123 | }, | |
124 | { regexp => qr/#if ?(.*)/, | |
125 | massager => sub { | |
126 | my %opts; | |
127 | if (ref($_[$#_]) eq "HASH") { | |
128 | %opts = %{$_[$#_]}; | |
129 | pop @_; | |
130 | } | |
131 | my @results = (); | |
132 | my $conds = $1; | |
133 | if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) { | |
134 | push @results, $1; # Handle the simple case | |
135 | my $rest = $2; | |
136 | my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/; | |
137 | print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n" | |
138 | if $opts{debug}; | |
139 | if ($rest =~ m/$re/) { | |
140 | my @rest = split /\|\|/, $rest; | |
141 | shift @rest; | |
142 | foreach (@rest) { | |
143 | m|^defined<<<\(([^\)]*)\)>>>$|; | |
144 | die "Something wrong...$opts{PLACE}" if $1 eq ""; | |
145 | push @results, $1; | |
146 | } | |
147 | } else { | |
148 | $conds =~ s/<<<|>>>//g; | |
149 | warn "Warning: complicated #if expression(1): $conds$opts{PLACE}" | |
150 | if $opts{warnings}; | |
151 | } | |
152 | } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) { | |
153 | push @results, '!'.$1; # Handle the simple case | |
154 | my $rest = $2; | |
155 | my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/; | |
156 | print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n" | |
157 | if $opts{debug}; | |
158 | if ($rest =~ m/$re/) { | |
159 | my @rest = split /\&\&/, $rest; | |
160 | shift @rest; | |
161 | foreach (@rest) { | |
162 | m|^!defined<<<\(([^\)]*)\)>>>$|; | |
163 | die "Something wrong...$opts{PLACE}" if $1 eq ""; | |
164 | push @results, '!'.$1; | |
165 | } | |
166 | } else { | |
167 | $conds =~ s/<<<|>>>//g; | |
168 | warn "Warning: complicated #if expression(2): $conds$opts{PLACE}" | |
169 | if $opts{warnings}; | |
170 | } | |
171 | } else { | |
172 | $conds =~ s/<<<|>>>//g; | |
173 | warn "Warning: complicated #if expression(3): $conds$opts{PLACE}" | |
174 | if $opts{warnings}; | |
175 | } | |
176 | print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n" | |
177 | if $opts{debug}; | |
178 | push @preprocessor_conds, [ @results ]; | |
179 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
180 | if $opts{debug}; | |
181 | return (); | |
182 | }, | |
183 | }, | |
184 | { regexp => qr/#elif (.*)/, | |
185 | massager => sub { | |
186 | my %opts; | |
187 | if (ref($_[$#_]) eq "HASH") { | |
188 | %opts = %{$_[$#_]}; | |
189 | pop @_; | |
190 | } | |
191 | die "An #elif without corresponding condition$opts{PLACE}" | |
192 | if !@preprocessor_conds; | |
193 | pop @preprocessor_conds; | |
194 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
195 | if $opts{debug}; | |
196 | return (<<"EOF"); | |
197 | #if $1 | |
198 | EOF | |
199 | }, | |
200 | }, | |
201 | { regexp => qr/#else/, | |
202 | massager => sub { | |
203 | my %opts; | |
204 | if (ref($_[$#_]) eq "HASH") { | |
205 | %opts = %{$_[$#_]}; | |
206 | pop @_; | |
207 | } | |
208 | die "An #else without corresponding condition$opts{PLACE}" | |
209 | if !@preprocessor_conds; | |
210 | # Invert all conditions on the last level | |
211 | my $stuff = pop @preprocessor_conds; | |
212 | push @preprocessor_conds, [ | |
213 | map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff | |
214 | ]; | |
215 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
216 | if $opts{debug}; | |
217 | return (); | |
218 | }, | |
219 | }, | |
220 | { regexp => qr/#endif ?/, | |
221 | massager => sub { | |
222 | my %opts; | |
223 | if (ref($_[$#_]) eq "HASH") { | |
224 | %opts = %{$_[$#_]}; | |
225 | pop @_; | |
226 | } | |
227 | die "An #endif without corresponding condition$opts{PLACE}" | |
228 | if !@preprocessor_conds; | |
229 | pop @preprocessor_conds; | |
230 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
231 | if $opts{debug}; | |
232 | return (); | |
233 | }, | |
234 | }, | |
235 | { regexp => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/, | |
236 | massager => sub { | |
237 | my $name = $1; | |
238 | my $params = $2; | |
239 | my $spaceval = $3||""; | |
240 | my $val = $4||""; | |
241 | return ("", | |
242 | $1, 'M', "", $params ? "$name$params$spaceval" : $val, | |
243 | all_conds()); } | |
244 | }, | |
245 | { regexp => qr/#.*/, | |
246 | massager => sub { return (); } | |
247 | }, | |
248 | ); | |
249 | ||
250 | my @opensslchandlers = ( | |
251 | ################################################################## | |
252 | # OpenSSL C specials | |
253 | # | |
254 | # They are really preprocessor stuff, but they look like C stuff | |
255 | # to this parser. All of these do replacements, anything else is | |
256 | # an error. | |
257 | ||
d73c4440 RL |
258 | ##### |
259 | # Deprecated stuff, by OpenSSL release. | |
260 | ||
261 | # We trick the parser by pretending that the declaration is wrapped in a | |
262 | # check if the DEPRECATEDIN macro is defined or not. Callers of parse() | |
263 | # will have to decide what to do with it. | |
2284f64c | 264 | { regexp => qr/(DEPRECATEDIN_\d+(?:_\d+_\d+)?)<<<\((.*)\)>>>/, |
d73c4440 RL |
265 | massager => sub { return (<<"EOF"); |
266 | #ifndef $1 | |
267 | $2; | |
268 | #endif | |
269 | EOF | |
270 | }, | |
271 | }, | |
272 | ||
273 | ##### | |
274 | # LHASH stuff | |
275 | ||
276 | # LHASH_OF(foo) is used as a type, but the chandlers won't take it | |
277 | # gracefully, so we expand it here. | |
278 | { regexp => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/, | |
279 | massager => sub { return ("$1struct lhash_st_$2$3"); } | |
280 | }, | |
281 | { regexp => qr/DEFINE_LHASH_OF<<<\((.*)\)>>>/, | |
282 | massager => sub { | |
283 | return (<<"EOF"); | |
284 | static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *), | |
285 | int (*cfn)(const $1 *, const $1 *)); | |
286 | static ossl_inline void lh_$1_free(LHASH_OF($1) *lh); | |
287 | static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d); | |
288 | static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d); | |
289 | static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d); | |
290 | static ossl_inline int lh_$1_error(LHASH_OF($1) *lh); | |
291 | static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh); | |
292 | static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out); | |
293 | static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh, | |
294 | BIO *out); | |
295 | static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out); | |
296 | static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh); | |
297 | static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl); | |
298 | static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *)); | |
299 | LHASH_OF($1) | |
300 | EOF | |
301 | } | |
302 | }, | |
303 | ||
304 | ##### | |
305 | # STACK stuff | |
306 | ||
307 | # STACK_OF(foo) is used as a type, but the chandlers won't take it | |
308 | # gracefully, so we expand it here. | |
309 | { regexp => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/, | |
310 | massager => sub { return ("$1struct stack_st_$2$3"); } | |
311 | }, | |
312 | # { regexp => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/, | |
313 | # massager => sub { | |
314 | # my $before = $1; | |
315 | # my $stack_of = "struct stack_st_$2"; | |
316 | # my $after = $3; | |
317 | # if ($after =~ m|^\w|) { $after = " ".$after; } | |
318 | # return ("$before$stack_of$after"); | |
319 | # } | |
320 | # }, | |
9fdcc21f | 321 | { regexp => qr/SKM_DEFINE_STACK_OF<<<\((.*),\s*(.*),\s*(.*)\)>>>/, |
d73c4440 RL |
322 | massager => sub { |
323 | return (<<"EOF"); | |
324 | STACK_OF($1); | |
325 | typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b); | |
326 | typedef void (*sk_$1_freefunc)($3 *a); | |
327 | typedef $3 * (*sk_$1_copyfunc)(const $3 *a); | |
328 | static ossl_inline int sk_$1_num(const STACK_OF($1) *sk); | |
329 | static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx); | |
330 | static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare); | |
331 | static ossl_inline STACK_OF($1) *sk_$1_new_null(void); | |
332 | static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare, | |
333 | int n); | |
334 | static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n); | |
335 | static ossl_inline void sk_$1_free(STACK_OF($1) *sk); | |
336 | static ossl_inline void sk_$1_zero(STACK_OF($1) *sk); | |
337 | static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i); | |
338 | static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr); | |
339 | static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr); | |
340 | static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr); | |
341 | static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk); | |
342 | static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk); | |
343 | static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk, | |
344 | sk_$1_freefunc freefunc); | |
345 | static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx); | |
346 | static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr); | |
347 | static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr); | |
348 | static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr); | |
349 | static ossl_inline void sk_$1_sort(STACK_OF($1) *sk); | |
350 | static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk); | |
351 | static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk); | |
352 | static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk, | |
353 | sk_$1_copyfunc copyfunc, | |
354 | sk_$1_freefunc freefunc); | |
355 | static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk, | |
356 | sk_$1_compfunc compare); | |
357 | EOF | |
358 | } | |
359 | }, | |
9fdcc21f | 360 | { regexp => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
361 | massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); }, |
362 | }, | |
363 | { regexp => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/, | |
364 | massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); }, | |
365 | }, | |
9fdcc21f | 366 | { regexp => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
367 | massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); }, |
368 | }, | |
369 | { regexp => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/, | |
370 | massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); }, | |
371 | }, | |
372 | { regexp => qr/PREDECLARE_STACK_OF<<<\((.*)\)>>>/, | |
373 | massager => sub { return ("STACK_OF($1);"); } | |
374 | }, | |
375 | { regexp => qr/DECLARE_STACK_OF<<<\((.*)\)>>>/, | |
376 | massager => sub { return ("STACK_OF($1);"); } | |
377 | }, | |
9fdcc21f | 378 | { regexp => qr/DECLARE_SPECIAL_STACK_OF<<<\((.*?),\s*(.*?)\)>>>/, |
d73c4440 RL |
379 | massager => sub { return ("STACK_OF($1);"); } |
380 | }, | |
381 | ||
382 | ##### | |
383 | # ASN1 stuff | |
384 | ||
385 | { regexp => qr/TYPEDEF_D2I_OF<<<\((.*)\)>>>/, | |
386 | massager => sub { | |
387 | return ("typedef $1 *d2i_of_$1($1 **,const unsigned char **,long)"); | |
388 | }, | |
389 | }, | |
390 | { regexp => qr/TYPEDEF_I2D_OF<<<\((.*)\)>>>/, | |
391 | massager => sub { | |
392 | return ("typedef $1 *i2d_of_$1($1 *,unsigned char **)"); | |
393 | }, | |
394 | }, | |
395 | { regexp => qr/TYPEDEF_D2I2D_OF<<<\((.*)\)>>>/, | |
396 | massager => sub { | |
397 | return ("TYPEDEF_D2I_OF($1); TYPEDEF_I2D_OF($1)"); | |
398 | }, | |
399 | }, | |
400 | { regexp => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/, | |
401 | massager => sub { | |
402 | return (<<"EOF"); | |
d73c4440 | 403 | const ASN1_ITEM *$1_it(void); |
d73c4440 RL |
404 | EOF |
405 | }, | |
406 | }, | |
9fdcc21f DO |
407 | { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_only<<<\((.*),\s*(.*)\)>>>/, |
408 | massager => sub { | |
409 | return (<<"EOF"); | |
410 | int d2i_$2(void); | |
411 | int i2d_$2(void); | |
412 | EOF | |
413 | }, | |
414 | }, | |
415 | { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),\s*(.*),\s*(.*)\)>>>/, | |
d73c4440 RL |
416 | massager => sub { |
417 | return (<<"EOF"); | |
418 | int d2i_$3(void); | |
419 | int i2d_$3(void); | |
420 | DECLARE_ASN1_ITEM($2) | |
421 | EOF | |
422 | }, | |
423 | }, | |
9fdcc21f | 424 | { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
425 | massager => sub { |
426 | return (<<"EOF"); | |
427 | int d2i_$2(void); | |
428 | int i2d_$2(void); | |
429 | DECLARE_ASN1_ITEM($2) | |
9fdcc21f DO |
430 | EOF |
431 | }, | |
432 | }, | |
433 | { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/, | |
434 | massager => sub { | |
435 | return (<<"EOF"); | |
436 | int $2_free(void); | |
437 | int $2_new(void); | |
d73c4440 RL |
438 | EOF |
439 | }, | |
440 | }, | |
441 | { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/, | |
442 | massager => sub { | |
443 | return (<<"EOF"); | |
444 | int $1_free(void); | |
445 | int $1_new(void); | |
446 | EOF | |
447 | }, | |
448 | }, | |
9fdcc21f | 449 | { regexp => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
450 | massager => sub { |
451 | return (<<"EOF"); | |
452 | int d2i_$2(void); | |
453 | int i2d_$2(void); | |
454 | int $2_free(void); | |
455 | int $2_new(void); | |
456 | DECLARE_ASN1_ITEM($2) | |
457 | EOF | |
458 | }, | |
459 | }, | |
9fdcc21f | 460 | { regexp => qr/DECLARE_ASN1_FUNCTIONS<<<\((.*)\)>>>/, |
d73c4440 RL |
461 | massager => sub { return (<<"EOF"); |
462 | int d2i_$1(void); | |
463 | int i2d_$1(void); | |
464 | int $1_free(void); | |
465 | int $1_new(void); | |
466 | DECLARE_ASN1_ITEM($1) | |
467 | EOF | |
468 | } | |
469 | }, | |
470 | { regexp => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/, | |
471 | massager => sub { | |
472 | return (<<"EOF"); | |
473 | int i2d_$1_NDEF(void); | |
474 | EOF | |
475 | } | |
476 | }, | |
477 | { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/, | |
478 | massager => sub { | |
479 | return (<<"EOF"); | |
480 | int $1_print_ctx(void); | |
481 | EOF | |
482 | } | |
483 | }, | |
9fdcc21f | 484 | { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
485 | massager => sub { |
486 | return (<<"EOF"); | |
487 | int $2_print_ctx(void); | |
488 | EOF | |
489 | } | |
490 | }, | |
491 | { regexp => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/, | |
492 | massager => sub { return (); } | |
493 | }, | |
9fdcc21f DO |
494 | { regexp => qr/DECLARE_ASN1_DUP_FUNCTION<<<\((.*)\)>>>/, |
495 | massager => sub { | |
496 | return (<<"EOF"); | |
497 | int $1_dup(void); | |
498 | EOF | |
499 | } | |
500 | }, | |
501 | { regexp => qr/DECLARE_ASN1_DUP_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/, | |
502 | massager => sub { | |
503 | return (<<"EOF"); | |
504 | int $2_dup(void); | |
505 | EOF | |
506 | } | |
507 | }, | |
d73c4440 RL |
508 | { regexp => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/, |
509 | massager => sub { return (); } | |
510 | }, | |
511 | { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/, | |
512 | massager => sub { return (<<"EOF"); | |
513 | #ifndef OPENSSL_NO_STDIO | |
514 | int PEM_read_$1(void); | |
515 | int PEM_write_$1(void); | |
516 | #endif | |
517 | int PEM_read_bio_$1(void); | |
518 | int PEM_write_bio_$1(void); | |
519 | EOF | |
520 | }, | |
521 | }, | |
522 | ||
523 | ##### | |
524 | # PEM stuff | |
525 | { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/, | |
526 | massager => sub { return (<<"EOF"); | |
527 | #ifndef OPENSSL_NO_STDIO | |
528 | int PEM_write_$1(void); | |
529 | #endif | |
530 | int PEM_write_bio_$1(void); | |
531 | EOF | |
532 | }, | |
533 | }, | |
534 | { regexp => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/, | |
535 | massager => sub { return (<<"EOF"); | |
536 | #ifndef OPENSSL_NO_STDIO | |
537 | int PEM_read_$1(void); | |
538 | #endif | |
539 | int PEM_read_bio_$1(void); | |
540 | EOF | |
541 | }, | |
542 | }, | |
543 | ||
544 | # Spurious stuff found in the OpenSSL headers | |
545 | # Usually, these are just macros that expand to, well, something | |
546 | { regexp => qr/__NDK_FPABI__/, | |
547 | massager => sub { return (); } | |
548 | }, | |
549 | ); | |
550 | ||
551 | my $anoncnt = 0; | |
552 | ||
553 | my @chandlers = ( | |
554 | ################################################################## | |
555 | # C stuff | |
556 | ||
557 | # extern "C" of individual items | |
558 | # Note that the main parse function has a special hack for 'extern "C" {' | |
559 | # which can't be done in handlers | |
560 | # We simply ignore it. | |
561 | { regexp => qr/extern "C" (.*;)/, | |
562 | massager => sub { return ($1); }, | |
563 | }, | |
4c2883a9 RL |
564 | # any other extern is just ignored |
565 | { regexp => qr/^\s* # Any spaces before | |
566 | extern # The keyword we look for | |
567 | \b # word to non-word boundary | |
568 | .* # Anything after | |
569 | ; | |
570 | /x, | |
571 | massager => sub { return (); }, | |
572 | }, | |
d73c4440 RL |
573 | # union, struct and enum definitions |
574 | # Because this one might appear a little everywhere within type | |
575 | # definitions, we take it out and replace it with just | |
576 | # 'union|struct|enum name' while registering it. | |
577 | # This makes use of the parser trick to surround the outer braces | |
578 | # with <<< and >>> | |
579 | { regexp => qr/(.*) # Anything before ($1) | |
580 | \b # word to non-word boundary | |
581 | (union|struct|enum) # The word used ($2) | |
582 | (?:\s([[:alpha:]_]\w*))? # Struct or enum name ($3) | |
583 | <<<(\{.*?\})>>> # Struct or enum definition ($4) | |
584 | (.*) # Anything after ($5) | |
585 | ; | |
586 | /x, | |
587 | massager => sub { | |
588 | my $before = $1; | |
589 | my $word = $2; | |
590 | my $name = $3 | |
591 | || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct | |
592 | my $definition = $4; | |
593 | my $after = $5; | |
594 | my $type = $word eq "struct" ? 'S' : 'E'; | |
595 | if ($before ne "" || $after ne ";") { | |
596 | if ($after =~ m|^\w|) { $after = " ".$after; } | |
597 | return ("$before$word $name$after;", | |
598 | "$word $name", $type, "", "$word$definition", all_conds()); | |
599 | } | |
600 | # If there was no before nor after, make the return much simple | |
601 | return ("", "$word $name", $type, "", "$word$definition", all_conds()); | |
602 | } | |
603 | }, | |
604 | # Named struct and enum forward declarations | |
605 | # We really just ignore them, but we need to parse them or the variable | |
606 | # declaration handler further down will think it's a variable declaration. | |
607 | { regexp => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/, | |
608 | massager => sub { return (); } | |
609 | }, | |
610 | # Function returning function pointer declaration | |
611 | { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) | |
612 | ((?:\w|\*|\s)*?) # Return type ($2) | |
613 | \s? # Possible space | |
614 | <<<\(\* | |
615 | ([[:alpha:]_]\w*) # Function name ($3) | |
616 | (\(.*\)) # Parameters ($4) | |
617 | \)>>> | |
618 | <<<(\(.*\))>>> # F.p. parameters ($5) | |
619 | ; | |
620 | /x, | |
621 | massager => sub { | |
622 | return ("", $3, 'F', "", "$2(*$4)$5", all_conds()) | |
623 | if defined $1; | |
624 | return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); } | |
625 | }, | |
626 | # Function pointer declaration, or typedef thereof | |
627 | { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) | |
628 | ((?:\w|\*|\s)*?) # Return type ($2) | |
629 | <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name ($3) | |
630 | <<<(\(.*\))>>> # F.p. parameters ($4) | |
631 | ; | |
632 | /x, | |
633 | massager => sub { | |
634 | return ("", $3, 'T', "", "$2(*)$4", all_conds()) | |
635 | if defined $1; | |
636 | return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds()); | |
637 | }, | |
638 | }, | |
639 | # Function declaration, or typedef thereof | |
640 | { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) | |
641 | ((?:\w|\*|\s)*?) # Return type ($2) | |
642 | \s? # Possible space | |
643 | ([[:alpha:]_]\w*) # Function name ($3) | |
644 | <<<(\(.*\))>>> # Parameters ($4) | |
645 | ; | |
646 | /x, | |
647 | massager => sub { | |
648 | return ("", $3, 'T', "", "$2$4", all_conds()) | |
649 | if defined $1; | |
650 | return ("", $3, 'F', $2, "$2$4", all_conds()); | |
651 | }, | |
652 | }, | |
653 | # Variable declaration, including arrays, or typedef thereof | |
654 | { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) | |
655 | ((?:\w|\*|\s)*?) # Type ($2) | |
656 | \s? # Possible space | |
657 | ([[:alpha:]_]\w*) # Variable name ($3) | |
658 | ((?:<<<\[[^\]]*\]>>>)*) # Possible array declaration ($4) | |
659 | ; | |
660 | /x, | |
661 | massager => sub { | |
662 | return ("", $3, 'T', "", $2.($4||""), all_conds()) | |
663 | if defined $1; | |
664 | return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds()); | |
665 | }, | |
666 | }, | |
667 | ); | |
668 | ||
669 | # End handlers are almost the same as handlers, except they are run through | |
670 | # ONCE when the input has been parsed through. These are used to check for | |
671 | # remaining stuff, such as an unfinished #ifdef and stuff like that that the | |
672 | # main parser can't check on its own. | |
673 | my @endhandlers = ( | |
674 | { massager => sub { | |
675 | my %opts = %{$_[0]}; | |
676 | ||
677 | die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE} | |
678 | if @preprocessor_conds; | |
679 | } | |
680 | } | |
681 | ); | |
682 | ||
683 | # takes a list of strings that can each contain one or several lines of code | |
684 | # also takes a hash of options as last argument. | |
685 | # | |
686 | # returns a list of hashes with information: | |
687 | # | |
688 | # name name of the thing | |
689 | # type type, see the massage handler function | |
690 | # returntype return type of functions and variables | |
691 | # value value for macros, signature for functions, variables | |
692 | # and structs | |
693 | # conds preprocessor conditions (array ref) | |
694 | ||
695 | sub parse { | |
696 | my %opts; | |
697 | if (ref($_[$#_]) eq "HASH") { | |
698 | %opts = %{$_[$#_]}; | |
699 | pop @_; | |
700 | } | |
701 | my %state = ( | |
702 | in_extern_C => 0, # An exception to parenthesis processing. | |
703 | cpp_parens => [], # A list of ending parens and braces found in | |
704 | # preprocessor directives | |
705 | c_parens => [], # A list of ending parens and braces found in | |
706 | # C statements | |
707 | in_string => "", # empty string when outside a string, otherwise | |
708 | # "'" or '"' depending on the starting quote. | |
709 | in_comment => "", # empty string when outside a comment, otherwise | |
710 | # "/*" or "//" depending on the type of comment | |
711 | # found. The latter will never be multiline | |
712 | # NOTE: in_string and in_comment will never be | |
713 | # true (in perl semantics) at the same time. | |
714 | current_line => 0, | |
715 | ); | |
716 | my @result = (); | |
717 | my $normalized_line = ""; # $input_line, but normalized. In essence, this | |
718 | # means that ALL whitespace is removed unless | |
719 | # it absolutely has to be present, and in that | |
720 | # case, there's only one space. | |
721 | # The cases where a space needs to stay present | |
722 | # are: | |
723 | # 1. between words | |
724 | # 2. between words and number | |
725 | # 3. after the first word of a preprocessor | |
726 | # directive. | |
727 | # 4. for the #define directive, between the macro | |
728 | # name/args and its value, so we end up with: | |
729 | # #define FOO val | |
730 | # #define BAR(x) something(x) | |
731 | my $collected_stmt = ""; # Where we're building up a C line until it's a | |
732 | # complete definition/declaration, as determined | |
733 | # by any handler being capable of matching it. | |
734 | ||
735 | # We use $_ shamelessly when looking through @lines. | |
736 | # In case we find a \ at the end, we keep filling it up with more lines. | |
737 | $_ = undef; | |
738 | ||
739 | foreach my $line (@_) { | |
740 | # split tries to be smart when a string ends with the thing we split on | |
741 | $line .= "\n" unless $line =~ m|\R$|; | |
742 | $line .= "#"; | |
743 | ||
744 | # We use ¦undef¦ as a marker for a new line from the file. | |
745 | # Since we convert one line to several and unshift that into @lines, | |
746 | # that's the only safe way we have to track the original lines | |
747 | my @lines = map { ( undef, $_ ) } split $/, $line; | |
748 | ||
749 | # Remember that extra # we added above? Now we remove it | |
750 | pop @lines; | |
751 | pop @lines; # Don't forget the undef | |
752 | ||
753 | while (@lines) { | |
754 | if (!defined($lines[0])) { | |
755 | shift @lines; | |
756 | $state{current_line}++; | |
757 | if (!defined($_)) { | |
758 | $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n"; | |
759 | $opts{PLACE2} = $opts{filename}.":".$state{current_line}; | |
760 | } | |
761 | next; | |
762 | } | |
763 | ||
764 | $_ = "" unless defined $_; | |
765 | $_ .= shift @lines; | |
766 | ||
767 | if (m|\\$|) { | |
768 | $_ = $`; | |
769 | next; | |
770 | } | |
771 | ||
772 | if ($opts{debug}) { | |
773 | print STDERR "DEBUG:----------------------------\n"; | |
774 | print STDERR "DEBUG: \$_ = '$_'\n"; | |
775 | } | |
776 | ||
777 | ########################################################## | |
778 | # Now that we have a full line, let's process through it | |
779 | while(1) { | |
780 | unless ($state{in_comment}) { | |
781 | # Begin with checking if the current $normalized_line | |
782 | # contains a preprocessor directive | |
783 | # This is only done if we're not inside a comment and | |
784 | # if it's a preprocessor directive and it's finished. | |
785 | if ($normalized_line =~ m|^#| && $_ eq "") { | |
786 | print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n" | |
787 | if $opts{debug}; | |
788 | $opts{debug_type} = "OPENSSL CPP"; | |
789 | my @r = ( _run_handlers($normalized_line, | |
790 | @opensslcpphandlers, | |
791 | \%opts) ); | |
792 | if (shift @r) { | |
793 | # Checking if there are lines to inject. | |
794 | if (@r) { | |
795 | @r = split $/, (pop @r).$_; | |
796 | print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n" | |
797 | if $opts{debug} && @r; | |
798 | @lines = ( @r, @lines ); | |
799 | ||
800 | $_ = ""; | |
801 | } | |
802 | } else { | |
803 | print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n" | |
804 | if $opts{debug}; | |
805 | $opts{debug_type} = "CPP"; | |
806 | my @r = ( _run_handlers($normalized_line, | |
807 | @cpphandlers, | |
808 | \%opts) ); | |
809 | if (shift @r) { | |
810 | if (ref($r[0]) eq "HASH") { | |
811 | push @result, shift @r; | |
812 | } | |
813 | ||
814 | # Now, check if there are lines to inject. | |
815 | # Really, this should never happen, it IS a | |
816 | # preprocessor directive after all... | |
817 | if (@r) { | |
818 | @r = split $/, pop @r; | |
819 | print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n" | |
820 | if $opts{debug} && @r; | |
821 | @lines = ( @r, @lines ); | |
822 | $_ = ""; | |
823 | } | |
824 | } | |
825 | } | |
826 | ||
827 | # Note: we simply ignore all directives that no | |
828 | # handler matches | |
829 | $normalized_line = ""; | |
830 | } | |
831 | ||
832 | # If the two strings end and start with a character that | |
833 | # shouldn't get concatenated, add a space | |
834 | my $space = | |
835 | ($collected_stmt =~ m/(?:"|')$/ | |
836 | || ($collected_stmt =~ m/(?:\w|\d)$/ | |
837 | && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : ""; | |
838 | ||
839 | # Now, unless we're building up a preprocessor directive or | |
840 | # are in the middle of a string, or the parens et al aren't | |
841 | # balanced up yet, let's try and see if there's a OpenSSL | |
842 | # or C handler that can make sense of what we have so far. | |
843 | if ( $normalized_line !~ m|^#| | |
844 | && ($collected_stmt ne "" || $normalized_line ne "") | |
845 | && ! @{$state{c_parens}} | |
846 | && ! $state{in_string} ) { | |
847 | if ($opts{debug}) { | |
848 | print STDERR "DEBUG[OPENSSL C]: \$collected_stmt = '$collected_stmt'\n"; | |
849 | print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n"; | |
850 | } | |
851 | $opts{debug_type} = "OPENSSL C"; | |
852 | my @r = ( _run_handlers($collected_stmt | |
853 | .$space | |
854 | .$normalized_line, | |
855 | @opensslchandlers, | |
856 | \%opts) ); | |
857 | if (shift @r) { | |
858 | # Checking if there are lines to inject. | |
859 | if (@r) { | |
860 | @r = split $/, (pop @r).$_; | |
861 | print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n" | |
862 | if $opts{debug} && @r; | |
863 | @lines = ( @r, @lines ); | |
864 | ||
865 | $_ = ""; | |
866 | } | |
867 | $normalized_line = ""; | |
868 | $collected_stmt = ""; | |
869 | } else { | |
870 | if ($opts{debug}) { | |
871 | print STDERR "DEBUG[C]: \$collected_stmt = '$collected_stmt'\n"; | |
872 | print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n"; | |
873 | } | |
874 | $opts{debug_type} = "C"; | |
875 | my @r = ( _run_handlers($collected_stmt | |
876 | .$space | |
877 | .$normalized_line, | |
878 | @chandlers, | |
879 | \%opts) ); | |
880 | if (shift @r) { | |
881 | if (ref($r[0]) eq "HASH") { | |
882 | push @result, shift @r; | |
883 | } | |
884 | ||
885 | # Checking if there are lines to inject. | |
886 | if (@r) { | |
887 | @r = split $/, (pop @r).$_; | |
888 | print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n" | |
889 | if $opts{debug} && @r; | |
890 | @lines = ( @r, @lines ); | |
891 | ||
892 | $_ = ""; | |
893 | } | |
894 | $normalized_line = ""; | |
895 | $collected_stmt = ""; | |
896 | } | |
897 | } | |
898 | } | |
899 | if ($_ eq "") { | |
900 | $collected_stmt .= $space.$normalized_line; | |
901 | $normalized_line = ""; | |
902 | } | |
903 | } | |
904 | ||
905 | if ($_ eq "") { | |
906 | $_ = undef; | |
907 | last; | |
908 | } | |
909 | ||
910 | # Take care of inside string first. | |
911 | if ($state{in_string}) { | |
912 | if (m/ (?:^|(?<!\\)) # Make sure it's not escaped | |
913 | $state{in_string} # Look for matching quote | |
914 | /x) { | |
915 | $normalized_line .= $`.$&; | |
916 | $state{in_string} = ""; | |
917 | $_ = $'; | |
918 | next; | |
919 | } else { | |
920 | die "Unfinished string without continuation found$opts{PLACE}\n"; | |
921 | } | |
922 | } | |
923 | # ... or inside comments, whichever happens to apply | |
924 | elsif ($state{in_comment}) { | |
925 | ||
926 | # This should never happen | |
927 | die "Something went seriously wrong, multiline //???$opts{PLACE}\n" | |
928 | if ($state{in_comment} eq "//"); | |
929 | ||
930 | # A note: comments are simply discarded. | |
931 | ||
932 | if (m/ (?:^|(?<!\\)) # Make sure it's not escaped | |
933 | \*\/ # Look for C comment end | |
934 | /x) { | |
935 | $state{in_comment} = ""; | |
936 | $_ = $'; | |
937 | print STDERR "DEBUG: Found end of comment, followed by '$_'\n" | |
938 | if $opts{debug}; | |
939 | next; | |
940 | } else { | |
941 | $_ = ""; | |
942 | next; | |
943 | } | |
944 | } | |
945 | ||
946 | # At this point, it's safe to remove leading whites, but | |
947 | # we need to be careful with some preprocessor lines | |
948 | if (m|^\s+|) { | |
949 | my $rest = $'; | |
950 | my $space = ""; | |
951 | $space = " " | |
952 | if ($normalized_line =~ m/^ | |
953 | \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)? | |
954 | | \#[a-z]+ | |
955 | $/x); | |
956 | print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n" | |
957 | if $opts{debug}; | |
958 | $_ = $space.$rest; | |
959 | } | |
960 | ||
961 | my $parens = | |
962 | $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens'; | |
963 | (my $paren_singular = $parens) =~ s|s$||; | |
964 | ||
965 | # Now check for specific tokens, and if they are parens, | |
966 | # check them against $state{$parens}. Note that we surround | |
967 | # the outermost parens with extra "<<<" and ">>>". Those | |
968 | # are for the benefit of handlers who to need to detect | |
969 | # them, and they will be removed from the final output. | |
970 | if (m|^[\{\[\(]|) { | |
971 | my $body = $&; | |
972 | $_ = $'; | |
973 | if (!@{$state{$parens}}) { | |
974 | if ("$normalized_line$body" =~ m|^extern "C"\{$|) { | |
975 | $state{in_extern_C} = 1; | |
976 | print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n" | |
977 | if $opts{debug}; | |
978 | $normalized_line = ""; | |
979 | } else { | |
980 | $normalized_line .= "<<<".$body; | |
981 | } | |
982 | } else { | |
983 | $normalized_line .= $body; | |
984 | } | |
985 | ||
986 | if ($normalized_line ne "") { | |
987 | print STDERR "DEBUG: found $paren_singular start '$body'\n" | |
988 | if $opts{debug}; | |
989 | $body =~ tr|\{\[\(|\}\]\)|; | |
990 | print STDERR "DEBUG: pushing $paren_singular end '$body'\n" | |
991 | if $opts{debug}; | |
992 | push @{$state{$parens}}, $body; | |
993 | } | |
994 | } elsif (m|^[\}\]\)]|) { | |
995 | $_ = $'; | |
996 | ||
997 | if (!@{$state{$parens}} | |
998 | && $& eq '}' && $state{in_extern_C}) { | |
999 | print STDERR "DEBUG: found end of 'extern \"C\"'\n" | |
1000 | if $opts{debug}; | |
1001 | $state{in_extern_C} = 0; | |
1002 | } else { | |
1003 | print STDERR "DEBUG: Trying to match '$&' against '" | |
1004 | ,join("', '", @{$state{$parens}}) | |
1005 | ,"'\n" | |
1006 | if $opts{debug}; | |
1007 | die "Unmatched parentheses$opts{PLACE}\n" | |
1008 | unless (@{$state{$parens}} | |
1009 | && pop @{$state{$parens}} eq $&); | |
1010 | if (!@{$state{$parens}}) { | |
1011 | $normalized_line .= $&.">>>"; | |
1012 | } else { | |
1013 | $normalized_line .= $&; | |
1014 | } | |
1015 | } | |
1016 | } elsif (m|^["']|) { # string start | |
1017 | my $body = $&; | |
1018 | $_ = $'; | |
1019 | ||
1020 | # We want to separate strings from \w and \d with one space. | |
1021 | $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/; | |
1022 | $normalized_line .= $body; | |
1023 | $state{in_string} = $body; | |
1024 | } elsif (m|^\/\*|) { # C style comment | |
1025 | print STDERR "DEBUG: found start of C style comment\n" | |
1026 | if $opts{debug}; | |
1027 | $state{in_comment} = $&; | |
1028 | $_ = $'; | |
1029 | } elsif (m|^\/\/|) { # C++ style comment | |
1030 | print STDERR "DEBUG: found C++ style comment\n" | |
1031 | if $opts{debug}; | |
1032 | $_ = ""; # (just discard it entirely) | |
1033 | } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ ) | |
1034 | (?i: U | L | UL | LL | ULL )? | |
1035 | | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)? | |
1036 | ) /x) { | |
1037 | print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n" | |
1038 | if $opts{debug}; | |
1039 | $normalized_line .= $&; | |
1040 | $_ = $'; | |
1041 | } elsif (m/^[[:alpha:]_]\w*/) { | |
1042 | my $body = $&; | |
1043 | my $rest = $'; | |
1044 | my $space = ""; | |
1045 | ||
1046 | # Now, only add a space if it's needed to separate | |
1047 | # two \w characters, and we also surround strings with | |
1048 | # a space. In this case, that's if $normalized_line ends | |
1049 | # with a \w, \d, " or '. | |
1050 | $space = " " | |
1051 | if ($normalized_line =~ m/("|')$/ | |
1052 | || ($normalized_line =~ m/(\w|\d)$/ | |
1053 | && $body =~ m/^(\w|\d)/)); | |
1054 | ||
1055 | print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n" | |
1056 | if $opts{debug}; | |
1057 | $normalized_line .= $space.$body; | |
1058 | $_ = $rest; | |
1059 | } elsif (m|^(?:\\)?.|) { # Catch-all | |
1060 | $normalized_line .= $&; | |
1061 | $_ = $'; | |
1062 | } | |
1063 | } | |
1064 | } | |
1065 | } | |
1066 | foreach my $handler (@endhandlers) { | |
1067 | if ($handler->{massager}) { | |
1068 | $handler->{massager}->(\%opts); | |
1069 | } | |
1070 | } | |
1071 | return @result; | |
1072 | } | |
1073 | ||
1074 | # arg1: line to check | |
1075 | # arg2...: handlers to check | |
1076 | # return undef when no handler matched | |
1077 | sub _run_handlers { | |
1078 | my %opts; | |
1079 | if (ref($_[$#_]) eq "HASH") { | |
1080 | %opts = %{$_[$#_]}; | |
1081 | pop @_; | |
1082 | } | |
1083 | my $line = shift; | |
1084 | my @handlers = @_; | |
1085 | ||
1086 | foreach my $handler (@handlers) { | |
1087 | if ($handler->{regexp} | |
1088 | && $line =~ m|^$handler->{regexp}$|) { | |
1089 | if ($handler->{massager}) { | |
1090 | if ($opts{debug}) { | |
1091 | print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n"; | |
1092 | print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n"; | |
1093 | } | |
1094 | my $saved_line = $line; | |
1095 | my @massaged = | |
1096 | map { s/(<<<|>>>)//g; $_ } | |
1097 | $handler->{massager}->($saved_line, \%opts); | |
1098 | print STDERR "DEBUG[",$opts{debug_type},"]: Got back '" | |
1099 | , join("', '", @massaged), "'\n" | |
1100 | if $opts{debug}; | |
1101 | ||
1102 | # Because we may get back new lines to be | |
1103 | # injected before whatever else that follows, | |
1104 | # and the injected stuff might include | |
1105 | # preprocessor lines, we need to inject them | |
1106 | # in @lines and set $_ to the empty string to | |
1107 | # break out from the inner loops | |
1108 | my $injected_lines = shift @massaged || ""; | |
1109 | ||
1110 | if (@massaged) { | |
1111 | return (1, | |
1112 | { | |
1113 | name => shift @massaged, | |
1114 | type => shift @massaged, | |
1115 | returntype => shift @massaged, | |
1116 | value => shift @massaged, | |
1117 | conds => [ @massaged ] | |
1118 | }, | |
1119 | $injected_lines | |
1120 | ); | |
1121 | } else { | |
1122 | print STDERR "DEBUG[",$opts{debug_type},"]: (ignore, possible side effects)\n" | |
1123 | if $opts{debug} && $injected_lines eq ""; | |
1124 | return (1, $injected_lines); | |
1125 | } | |
1126 | } | |
1127 | return (1); | |
1128 | } | |
1129 | } | |
1130 | return (0); | |
1131 | } |