]>
Commit | Line | Data |
---|---|---|
d73c4440 | 1 | #! /usr/bin/env perl |
a28d06f3 | 2 | # Copyright 2018-2021 The OpenSSL Project Authors. All Rights Reserved. |
d73c4440 | 3 | # |
9059ab42 | 4 | # Licensed under the Apache License 2.0 (the "License"). You may not use |
d73c4440 RL |
5 | # this file except in compliance with the License. You can obtain a copy |
6 | # in the file LICENSE in the source distribution or at | |
7 | # https://www.openssl.org/source/license.html | |
8 | ||
9 | package OpenSSL::ParseC; | |
10 | ||
11 | use strict; | |
12 | use warnings; | |
13 | ||
14 | use Exporter; | |
15 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
16 | $VERSION = "0.9"; | |
17 | @ISA = qw(Exporter); | |
18 | @EXPORT = qw(parse); | |
19 | ||
20 | # Global handler data | |
21 | my @preprocessor_conds; # A list of simple preprocessor conditions, | |
22 | # each item being a list of macros defined | |
23 | # or not defined. | |
24 | ||
25 | # Handler helpers | |
26 | sub all_conds { | |
27 | return map { ( @$_ ) } @preprocessor_conds; | |
28 | } | |
29 | ||
30 | # A list of handlers that will look at a "complete" string and try to | |
31 | # figure out what to make of it. | |
32 | # Each handler is a hash with the following keys: | |
33 | # | |
34 | # regexp a regexp to compare the "complete" string with. | |
35 | # checker a function that does a more complex comparison. | |
36 | # Use this instead of regexp if that isn't enough. | |
37 | # massager massages the "complete" string into an array with | |
38 | # the following elements: | |
39 | # | |
40 | # [0] String that needs further processing (this | |
41 | # applies to typedefs of structs), or empty. | |
42 | # [1] The name of what was found. | |
43 | # [2] A character that denotes what type of thing | |
44 | # this is: 'F' for function, 'S' for struct, | |
45 | # 'T' for typedef, 'M' for macro, 'V' for | |
46 | # variable. | |
47 | # [3] Return type (only for type 'F' and 'V') | |
48 | # [4] Value (for type 'M') or signature (for type 'F', | |
49 | # 'V', 'T' or 'S') | |
50 | # [5...] The list of preprocessor conditions this is | |
51 | # found in, as in checks for macro definitions | |
52 | # (stored as the macro's name) or the absence | |
53 | # of definition (stored as the macro's name | |
54 | # prefixed with a '!' | |
55 | # | |
56 | # If the massager returns an empty list, it means the | |
57 | # "complete" string has side effects but should otherwise | |
58 | # be ignored. | |
59 | # If the massager is undefined, the "complete" string | |
60 | # should be ignored. | |
61 | my @opensslcpphandlers = ( | |
62 | ################################################################## | |
63 | # OpenSSL CPP specials | |
64 | # | |
65 | # These are used to convert certain pre-precessor expressions into | |
66 | # others that @cpphandlers have a better chance to understand. | |
67 | ||
a6a4d0ac RL |
68 | # This changes any OPENSSL_NO_DEPRECATED_x_y[_z] check to a check of |
69 | # OPENSSL_NO_DEPRECATEDIN_x_y[_z]. That's due to <openssl/macros.h> | |
70 | # creating OPENSSL_NO_DEPRECATED_x_y[_z], but the ordinals files using | |
71 | # DEPRECATEDIN_x_y[_z]. | |
72 | { regexp => qr/#if(def|ndef) OPENSSL_NO_DEPRECATED_(\d+_\d+(?:_\d+)?)$/, | |
d73c4440 | 73 | massager => sub { |
d73c4440 | 74 | return (<<"EOF"); |
a6a4d0ac | 75 | #if$1 OPENSSL_NO_DEPRECATEDIN_$2 |
d73c4440 RL |
76 | EOF |
77 | } | |
053730c5 | 78 | } |
d73c4440 RL |
79 | ); |
80 | my @cpphandlers = ( | |
81 | ################################################################## | |
82 | # CPP stuff | |
83 | ||
84 | { regexp => qr/#ifdef ?(.*)/, | |
85 | massager => sub { | |
86 | my %opts; | |
87 | if (ref($_[$#_]) eq "HASH") { | |
88 | %opts = %{$_[$#_]}; | |
89 | pop @_; | |
90 | } | |
91 | push @preprocessor_conds, [ $1 ]; | |
92 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
93 | if $opts{debug}; | |
94 | return (); | |
95 | }, | |
96 | }, | |
97 | { regexp => qr/#ifndef ?(.*)/, | |
98 | massager => sub { | |
99 | my %opts; | |
100 | if (ref($_[$#_]) eq "HASH") { | |
101 | %opts = %{$_[$#_]}; | |
102 | pop @_; | |
103 | } | |
104 | push @preprocessor_conds, [ '!'.$1 ]; | |
105 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
106 | if $opts{debug}; | |
107 | return (); | |
108 | }, | |
109 | }, | |
110 | { regexp => qr/#if (0|1)/, | |
111 | massager => sub { | |
112 | my %opts; | |
113 | if (ref($_[$#_]) eq "HASH") { | |
114 | %opts = %{$_[$#_]}; | |
115 | pop @_; | |
116 | } | |
117 | if ($1 eq "1") { | |
118 | push @preprocessor_conds, [ "TRUE" ]; | |
119 | } else { | |
120 | push @preprocessor_conds, [ "!TRUE" ]; | |
121 | } | |
122 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
123 | if $opts{debug}; | |
124 | return (); | |
125 | }, | |
126 | }, | |
127 | { regexp => qr/#if ?(.*)/, | |
128 | massager => sub { | |
129 | my %opts; | |
130 | if (ref($_[$#_]) eq "HASH") { | |
131 | %opts = %{$_[$#_]}; | |
132 | pop @_; | |
133 | } | |
134 | my @results = (); | |
135 | my $conds = $1; | |
136 | if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) { | |
137 | push @results, $1; # Handle the simple case | |
138 | my $rest = $2; | |
139 | my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/; | |
140 | print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n" | |
141 | if $opts{debug}; | |
142 | if ($rest =~ m/$re/) { | |
143 | my @rest = split /\|\|/, $rest; | |
144 | shift @rest; | |
145 | foreach (@rest) { | |
146 | m|^defined<<<\(([^\)]*)\)>>>$|; | |
147 | die "Something wrong...$opts{PLACE}" if $1 eq ""; | |
148 | push @results, $1; | |
149 | } | |
150 | } else { | |
151 | $conds =~ s/<<<|>>>//g; | |
152 | warn "Warning: complicated #if expression(1): $conds$opts{PLACE}" | |
153 | if $opts{warnings}; | |
154 | } | |
155 | } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) { | |
156 | push @results, '!'.$1; # Handle the simple case | |
157 | my $rest = $2; | |
158 | my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/; | |
159 | print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n" | |
160 | if $opts{debug}; | |
161 | if ($rest =~ m/$re/) { | |
162 | my @rest = split /\&\&/, $rest; | |
163 | shift @rest; | |
164 | foreach (@rest) { | |
165 | m|^!defined<<<\(([^\)]*)\)>>>$|; | |
166 | die "Something wrong...$opts{PLACE}" if $1 eq ""; | |
167 | push @results, '!'.$1; | |
168 | } | |
169 | } else { | |
170 | $conds =~ s/<<<|>>>//g; | |
171 | warn "Warning: complicated #if expression(2): $conds$opts{PLACE}" | |
172 | if $opts{warnings}; | |
173 | } | |
174 | } else { | |
175 | $conds =~ s/<<<|>>>//g; | |
176 | warn "Warning: complicated #if expression(3): $conds$opts{PLACE}" | |
177 | if $opts{warnings}; | |
178 | } | |
179 | print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n" | |
180 | if $opts{debug}; | |
181 | push @preprocessor_conds, [ @results ]; | |
182 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
183 | if $opts{debug}; | |
184 | return (); | |
185 | }, | |
186 | }, | |
187 | { regexp => qr/#elif (.*)/, | |
188 | massager => sub { | |
189 | my %opts; | |
190 | if (ref($_[$#_]) eq "HASH") { | |
191 | %opts = %{$_[$#_]}; | |
192 | pop @_; | |
193 | } | |
194 | die "An #elif without corresponding condition$opts{PLACE}" | |
195 | if !@preprocessor_conds; | |
196 | pop @preprocessor_conds; | |
197 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
198 | if $opts{debug}; | |
199 | return (<<"EOF"); | |
200 | #if $1 | |
201 | EOF | |
202 | }, | |
203 | }, | |
204 | { regexp => qr/#else/, | |
205 | massager => sub { | |
206 | my %opts; | |
207 | if (ref($_[$#_]) eq "HASH") { | |
208 | %opts = %{$_[$#_]}; | |
209 | pop @_; | |
210 | } | |
211 | die "An #else without corresponding condition$opts{PLACE}" | |
212 | if !@preprocessor_conds; | |
213 | # Invert all conditions on the last level | |
214 | my $stuff = pop @preprocessor_conds; | |
215 | push @preprocessor_conds, [ | |
216 | map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff | |
217 | ]; | |
218 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
219 | if $opts{debug}; | |
220 | return (); | |
221 | }, | |
222 | }, | |
223 | { regexp => qr/#endif ?/, | |
224 | massager => sub { | |
225 | my %opts; | |
226 | if (ref($_[$#_]) eq "HASH") { | |
227 | %opts = %{$_[$#_]}; | |
228 | pop @_; | |
229 | } | |
230 | die "An #endif without corresponding condition$opts{PLACE}" | |
231 | if !@preprocessor_conds; | |
232 | pop @preprocessor_conds; | |
233 | print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" | |
234 | if $opts{debug}; | |
235 | return (); | |
236 | }, | |
237 | }, | |
238 | { regexp => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/, | |
239 | massager => sub { | |
240 | my $name = $1; | |
241 | my $params = $2; | |
242 | my $spaceval = $3||""; | |
243 | my $val = $4||""; | |
244 | return ("", | |
245 | $1, 'M', "", $params ? "$name$params$spaceval" : $val, | |
246 | all_conds()); } | |
247 | }, | |
248 | { regexp => qr/#.*/, | |
249 | massager => sub { return (); } | |
250 | }, | |
251 | ); | |
252 | ||
253 | my @opensslchandlers = ( | |
254 | ################################################################## | |
255 | # OpenSSL C specials | |
256 | # | |
257 | # They are really preprocessor stuff, but they look like C stuff | |
258 | # to this parser. All of these do replacements, anything else is | |
259 | # an error. | |
260 | ||
d73c4440 RL |
261 | ##### |
262 | # Deprecated stuff, by OpenSSL release. | |
263 | ||
053730c5 RL |
264 | # OSSL_DEPRECATEDIN_x_y[_z] is simply ignored. Such declarations are |
265 | # supposed to be guarded with an '#ifdef OPENSSL_NO_DEPRECATED_x_y[_z]' | |
266 | { regexp => qr/OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/, | |
267 | massager => sub { return $1; }, | |
268 | }, | |
269 | { regexp => qr/(.*?)\s+OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/, | |
270 | massager => sub { return "$1 $2"; }, | |
271 | }, | |
272 | ||
d406f0fe RL |
273 | ##### |
274 | # Core stuff | |
275 | ||
276 | # OSSL_CORE_MAKE_FUNC is a macro to create the necessary data and inline | |
277 | # function the libcrypto<->provider interface | |
278 | { regexp => qr/OSSL_CORE_MAKE_FUNC<<<\((.*?),(.*?),(.*?)\)>>>/, | |
279 | massager => sub { | |
280 | return (<<"EOF"); | |
281 | typedef $1 OSSL_FUNC_$2_fn$3; | |
282 | static ossl_inline OSSL_FUNC_$2_fn *OSSL_FUNC_$2(const OSSL_DISPATCH *opf); | |
283 | EOF | |
284 | }, | |
285 | }, | |
286 | ||
d73c4440 RL |
287 | ##### |
288 | # LHASH stuff | |
289 | ||
290 | # LHASH_OF(foo) is used as a type, but the chandlers won't take it | |
291 | # gracefully, so we expand it here. | |
292 | { regexp => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/, | |
293 | massager => sub { return ("$1struct lhash_st_$2$3"); } | |
294 | }, | |
5317b6ee | 295 | { regexp => qr/DEFINE_LHASH_OF(?:_INTERNAL|_EX)?<<<\((.*)\)>>>/, |
d73c4440 RL |
296 | massager => sub { |
297 | return (<<"EOF"); | |
298 | static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *), | |
299 | int (*cfn)(const $1 *, const $1 *)); | |
300 | static ossl_inline void lh_$1_free(LHASH_OF($1) *lh); | |
301 | static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d); | |
302 | static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d); | |
303 | static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d); | |
304 | static ossl_inline int lh_$1_error(LHASH_OF($1) *lh); | |
305 | static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh); | |
306 | static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out); | |
307 | static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh, | |
308 | BIO *out); | |
309 | static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out); | |
310 | static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh); | |
311 | static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl); | |
312 | static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *)); | |
313 | LHASH_OF($1) | |
314 | EOF | |
315 | } | |
316 | }, | |
317 | ||
318 | ##### | |
319 | # STACK stuff | |
320 | ||
321 | # STACK_OF(foo) is used as a type, but the chandlers won't take it | |
322 | # gracefully, so we expand it here. | |
323 | { regexp => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/, | |
324 | massager => sub { return ("$1struct stack_st_$2$3"); } | |
325 | }, | |
326 | # { regexp => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/, | |
327 | # massager => sub { | |
328 | # my $before = $1; | |
329 | # my $stack_of = "struct stack_st_$2"; | |
330 | # my $after = $3; | |
331 | # if ($after =~ m|^\w|) { $after = " ".$after; } | |
332 | # return ("$before$stack_of$after"); | |
333 | # } | |
334 | # }, | |
9fdcc21f | 335 | { regexp => qr/SKM_DEFINE_STACK_OF<<<\((.*),\s*(.*),\s*(.*)\)>>>/, |
d73c4440 RL |
336 | massager => sub { |
337 | return (<<"EOF"); | |
338 | STACK_OF($1); | |
339 | typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b); | |
340 | typedef void (*sk_$1_freefunc)($3 *a); | |
341 | typedef $3 * (*sk_$1_copyfunc)(const $3 *a); | |
342 | static ossl_inline int sk_$1_num(const STACK_OF($1) *sk); | |
343 | static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx); | |
344 | static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare); | |
345 | static ossl_inline STACK_OF($1) *sk_$1_new_null(void); | |
346 | static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare, | |
347 | int n); | |
348 | static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n); | |
349 | static ossl_inline void sk_$1_free(STACK_OF($1) *sk); | |
350 | static ossl_inline void sk_$1_zero(STACK_OF($1) *sk); | |
351 | static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i); | |
352 | static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr); | |
353 | static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr); | |
354 | static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr); | |
355 | static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk); | |
356 | static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk); | |
357 | static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk, | |
358 | sk_$1_freefunc freefunc); | |
359 | static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx); | |
360 | static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr); | |
361 | static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr); | |
362 | static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr); | |
363 | static ossl_inline void sk_$1_sort(STACK_OF($1) *sk); | |
364 | static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk); | |
365 | static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk); | |
366 | static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk, | |
367 | sk_$1_copyfunc copyfunc, | |
368 | sk_$1_freefunc freefunc); | |
369 | static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk, | |
370 | sk_$1_compfunc compare); | |
262cda1c MC |
371 | EOF |
372 | } | |
373 | }, | |
374 | { regexp => qr/SKM_DEFINE_STACK_OF_INTERNAL<<<\((.*),\s*(.*),\s*(.*)\)>>>/, | |
375 | massager => sub { | |
376 | return (<<"EOF"); | |
377 | STACK_OF($1); | |
378 | typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b); | |
379 | typedef void (*sk_$1_freefunc)($3 *a); | |
380 | typedef $3 * (*sk_$1_copyfunc)(const $3 *a); | |
381 | static ossl_unused ossl_inline $2 *ossl_check_$1_type($2 *ptr); | |
382 | static ossl_unused ossl_inline const OPENSSL_STACK *ossl_check_const_$1_sk_type(const STACK_OF($1) *sk); | |
383 | static ossl_unused ossl_inline OPENSSL_sk_compfunc ossl_check_$1_compfunc_type(sk_$1_compfunc cmp); | |
384 | static ossl_unused ossl_inline OPENSSL_sk_copyfunc ossl_check_$1_copyfunc_type(sk_$1_copyfunc cpy); | |
385 | static ossl_unused ossl_inline OPENSSL_sk_freefunc ossl_check_$1_freefunc_type(sk_$1_freefunc fr); | |
d73c4440 RL |
386 | EOF |
387 | } | |
388 | }, | |
9fdcc21f | 389 | { regexp => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
390 | massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); }, |
391 | }, | |
392 | { regexp => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/, | |
393 | massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); }, | |
394 | }, | |
9fdcc21f | 395 | { regexp => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
396 | massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); }, |
397 | }, | |
398 | { regexp => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/, | |
399 | massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); }, | |
400 | }, | |
d73c4440 RL |
401 | |
402 | ##### | |
403 | # ASN1 stuff | |
d73c4440 RL |
404 | { regexp => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/, |
405 | massager => sub { | |
406 | return (<<"EOF"); | |
d73c4440 | 407 | const ASN1_ITEM *$1_it(void); |
d73c4440 RL |
408 | EOF |
409 | }, | |
410 | }, | |
9fdcc21f DO |
411 | { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_only<<<\((.*),\s*(.*)\)>>>/, |
412 | massager => sub { | |
413 | return (<<"EOF"); | |
414 | int d2i_$2(void); | |
415 | int i2d_$2(void); | |
416 | EOF | |
417 | }, | |
418 | }, | |
419 | { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),\s*(.*),\s*(.*)\)>>>/, | |
d73c4440 RL |
420 | massager => sub { |
421 | return (<<"EOF"); | |
422 | int d2i_$3(void); | |
423 | int i2d_$3(void); | |
424 | DECLARE_ASN1_ITEM($2) | |
425 | EOF | |
426 | }, | |
427 | }, | |
9fdcc21f | 428 | { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
429 | massager => sub { |
430 | return (<<"EOF"); | |
431 | int d2i_$2(void); | |
432 | int i2d_$2(void); | |
433 | DECLARE_ASN1_ITEM($2) | |
9fdcc21f DO |
434 | EOF |
435 | }, | |
436 | }, | |
437 | { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/, | |
438 | massager => sub { | |
439 | return (<<"EOF"); | |
440 | int $2_free(void); | |
441 | int $2_new(void); | |
d73c4440 RL |
442 | EOF |
443 | }, | |
444 | }, | |
445 | { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/, | |
446 | massager => sub { | |
447 | return (<<"EOF"); | |
448 | int $1_free(void); | |
449 | int $1_new(void); | |
450 | EOF | |
451 | }, | |
452 | }, | |
9fdcc21f | 453 | { regexp => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
454 | massager => sub { |
455 | return (<<"EOF"); | |
456 | int d2i_$2(void); | |
457 | int i2d_$2(void); | |
458 | int $2_free(void); | |
459 | int $2_new(void); | |
460 | DECLARE_ASN1_ITEM($2) | |
461 | EOF | |
462 | }, | |
463 | }, | |
9fdcc21f | 464 | { regexp => qr/DECLARE_ASN1_FUNCTIONS<<<\((.*)\)>>>/, |
d73c4440 RL |
465 | massager => sub { return (<<"EOF"); |
466 | int d2i_$1(void); | |
467 | int i2d_$1(void); | |
468 | int $1_free(void); | |
469 | int $1_new(void); | |
470 | DECLARE_ASN1_ITEM($1) | |
471 | EOF | |
472 | } | |
473 | }, | |
474 | { regexp => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/, | |
475 | massager => sub { | |
476 | return (<<"EOF"); | |
477 | int i2d_$1_NDEF(void); | |
478 | EOF | |
479 | } | |
480 | }, | |
481 | { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/, | |
482 | massager => sub { | |
483 | return (<<"EOF"); | |
484 | int $1_print_ctx(void); | |
485 | EOF | |
486 | } | |
487 | }, | |
9fdcc21f | 488 | { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/, |
d73c4440 RL |
489 | massager => sub { |
490 | return (<<"EOF"); | |
491 | int $2_print_ctx(void); | |
492 | EOF | |
493 | } | |
494 | }, | |
495 | { regexp => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/, | |
496 | massager => sub { return (); } | |
497 | }, | |
9fdcc21f DO |
498 | { regexp => qr/DECLARE_ASN1_DUP_FUNCTION<<<\((.*)\)>>>/, |
499 | massager => sub { | |
500 | return (<<"EOF"); | |
501 | int $1_dup(void); | |
502 | EOF | |
503 | } | |
504 | }, | |
505 | { regexp => qr/DECLARE_ASN1_DUP_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/, | |
506 | massager => sub { | |
507 | return (<<"EOF"); | |
508 | int $2_dup(void); | |
509 | EOF | |
510 | } | |
511 | }, | |
895419b7 RL |
512 | # Universal translator of attributed PEM declarators |
513 | { regexp => qr/ | |
514 | DECLARE_ASN1 | |
515 | (_ENCODE_FUNCTIONS_only|_ENCODE_FUNCTIONS|_ENCODE_FUNCTIONS_name | |
516 | |_ALLOC_FUNCTIONS_name|_ALLOC_FUNCTIONS|_FUNCTIONS_name|_FUNCTIONS | |
517 | |_NDEF_FUNCTION|_PRINT_FUNCTION|_PRINT_FUNCTION_name | |
518 | |_DUP_FUNCTION|_DUP_FUNCTION_name) | |
519 | _attr | |
520 | <<<\(\s*OSSL_DEPRECATEDIN_(.*?)\s*,(.*?)\)>>> | |
521 | /x, | |
522 | massager => sub { return (<<"EOF"); | |
523 | DECLARE_ASN1$1($3) | |
524 | EOF | |
525 | }, | |
526 | }, | |
d73c4440 RL |
527 | { regexp => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/, |
528 | massager => sub { return (); } | |
529 | }, | |
3ad9c478 RL |
530 | |
531 | ##### | |
532 | # PEM stuff | |
d73c4440 RL |
533 | { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/, |
534 | massager => sub { return (<<"EOF"); | |
535 | #ifndef OPENSSL_NO_STDIO | |
536 | int PEM_read_$1(void); | |
537 | int PEM_write_$1(void); | |
538 | #endif | |
539 | int PEM_read_bio_$1(void); | |
540 | int PEM_write_bio_$1(void); | |
9256e8a2 RL |
541 | EOF |
542 | }, | |
543 | }, | |
544 | { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)_ex<<<\((.*?),.*\)>>>/, | |
545 | massager => sub { return (<<"EOF"); | |
546 | #ifndef OPENSSL_NO_STDIO | |
547 | int PEM_read_$1(void); | |
548 | int PEM_write_$1(void); | |
549 | int PEM_read_$1_ex(void); | |
550 | int PEM_write_$1_ex(void); | |
551 | #endif | |
552 | int PEM_read_bio_$1(void); | |
553 | int PEM_write_bio_$1(void); | |
554 | int PEM_read_bio_$1_ex(void); | |
555 | int PEM_write_bio_$1_ex(void); | |
d73c4440 RL |
556 | EOF |
557 | }, | |
558 | }, | |
d73c4440 RL |
559 | { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/, |
560 | massager => sub { return (<<"EOF"); | |
561 | #ifndef OPENSSL_NO_STDIO | |
562 | int PEM_write_$1(void); | |
563 | #endif | |
564 | int PEM_write_bio_$1(void); | |
9256e8a2 RL |
565 | EOF |
566 | }, | |
567 | }, | |
568 | { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)_ex<<<\((.*?),.*\)>>>/, | |
569 | massager => sub { return (<<"EOF"); | |
570 | #ifndef OPENSSL_NO_STDIO | |
571 | int PEM_write_$1(void); | |
572 | int PEM_write_$1_ex(void); | |
573 | #endif | |
574 | int PEM_write_bio_$1(void); | |
575 | int PEM_write_bio_$1_ex(void); | |
d73c4440 RL |
576 | EOF |
577 | }, | |
578 | }, | |
579 | { regexp => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/, | |
580 | massager => sub { return (<<"EOF"); | |
581 | #ifndef OPENSSL_NO_STDIO | |
582 | int PEM_read_$1(void); | |
583 | #endif | |
584 | int PEM_read_bio_$1(void); | |
9256e8a2 RL |
585 | EOF |
586 | }, | |
587 | }, | |
588 | { regexp => qr/DECLARE_PEM(?|_read|_read_cb)_ex<<<\((.*?),.*\)>>>/, | |
589 | massager => sub { return (<<"EOF"); | |
590 | #ifndef OPENSSL_NO_STDIO | |
591 | int PEM_read_$1(void); | |
592 | int PEM_read_$1_ex(void); | |
593 | #endif | |
594 | int PEM_read_bio_$1(void); | |
595 | int PEM_read_bio_$1_ex(void); | |
3ad9c478 RL |
596 | EOF |
597 | }, | |
598 | }, | |
599 | # Universal translator of attributed PEM declarators | |
600 | { regexp => qr/ | |
601 | DECLARE_PEM | |
9256e8a2 RL |
602 | ((?:_rw|_rw_cb|_rw_const|_write|_write_cb|_write_const|_read|_read_cb) |
603 | (?:_ex)?) | |
3ad9c478 RL |
604 | _attr |
605 | <<<\(\s*OSSL_DEPRECATEDIN_(.*?)\s*,(.*?)\)>>> | |
606 | /x, | |
607 | massager => sub { return (<<"EOF"); | |
608 | DECLARE_PEM$1($3) | |
d73c4440 RL |
609 | EOF |
610 | }, | |
611 | }, | |
612 | ||
7a2ad00f RL |
613 | # OpenSSL's declaration of externs with possible export linkage |
614 | # (really only relevant on Windows) | |
615 | { regexp => qr/OPENSSL_(?:EXPORT|EXTERN)/, | |
616 | massager => sub { return ("extern"); } | |
617 | }, | |
618 | ||
d73c4440 RL |
619 | # Spurious stuff found in the OpenSSL headers |
620 | # Usually, these are just macros that expand to, well, something | |
621 | { regexp => qr/__NDK_FPABI__/, | |
622 | massager => sub { return (); } | |
623 | }, | |
624 | ); | |
625 | ||
626 | my $anoncnt = 0; | |
627 | ||
628 | my @chandlers = ( | |
629 | ################################################################## | |
630 | # C stuff | |
631 | ||
632 | # extern "C" of individual items | |
633 | # Note that the main parse function has a special hack for 'extern "C" {' | |
634 | # which can't be done in handlers | |
635 | # We simply ignore it. | |
dc5ce519 | 636 | { regexp => qr/^extern "C" (.*(?:;|>>>))/, |
d73c4440 RL |
637 | massager => sub { return ($1); }, |
638 | }, | |
4c2883a9 RL |
639 | # any other extern is just ignored |
640 | { regexp => qr/^\s* # Any spaces before | |
641 | extern # The keyword we look for | |
642 | \b # word to non-word boundary | |
643 | .* # Anything after | |
644 | ; | |
645 | /x, | |
646 | massager => sub { return (); }, | |
647 | }, | |
d73c4440 RL |
648 | # union, struct and enum definitions |
649 | # Because this one might appear a little everywhere within type | |
650 | # definitions, we take it out and replace it with just | |
651 | # 'union|struct|enum name' while registering it. | |
652 | # This makes use of the parser trick to surround the outer braces | |
653 | # with <<< and >>> | |
654 | { regexp => qr/(.*) # Anything before ($1) | |
655 | \b # word to non-word boundary | |
656 | (union|struct|enum) # The word used ($2) | |
657 | (?:\s([[:alpha:]_]\w*))? # Struct or enum name ($3) | |
658 | <<<(\{.*?\})>>> # Struct or enum definition ($4) | |
659 | (.*) # Anything after ($5) | |
660 | ; | |
661 | /x, | |
662 | massager => sub { | |
663 | my $before = $1; | |
664 | my $word = $2; | |
665 | my $name = $3 | |
666 | || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct | |
667 | my $definition = $4; | |
668 | my $after = $5; | |
669 | my $type = $word eq "struct" ? 'S' : 'E'; | |
670 | if ($before ne "" || $after ne ";") { | |
671 | if ($after =~ m|^\w|) { $after = " ".$after; } | |
672 | return ("$before$word $name$after;", | |
673 | "$word $name", $type, "", "$word$definition", all_conds()); | |
674 | } | |
675 | # If there was no before nor after, make the return much simple | |
676 | return ("", "$word $name", $type, "", "$word$definition", all_conds()); | |
677 | } | |
678 | }, | |
679 | # Named struct and enum forward declarations | |
680 | # We really just ignore them, but we need to parse them or the variable | |
681 | # declaration handler further down will think it's a variable declaration. | |
682 | { regexp => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/, | |
683 | massager => sub { return (); } | |
684 | }, | |
685 | # Function returning function pointer declaration | |
fc661b50 | 686 | # This sort of declaration may have a body (inline functions, for example) |
d73c4440 RL |
687 | { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) |
688 | ((?:\w|\*|\s)*?) # Return type ($2) | |
689 | \s? # Possible space | |
690 | <<<\(\* | |
691 | ([[:alpha:]_]\w*) # Function name ($3) | |
692 | (\(.*\)) # Parameters ($4) | |
693 | \)>>> | |
694 | <<<(\(.*\))>>> # F.p. parameters ($5) | |
fc661b50 | 695 | (?:<<<\{.*\}>>>|;) # Body or semicolon |
d73c4440 RL |
696 | /x, |
697 | massager => sub { | |
fc661b50 | 698 | return ("", $3, 'T', "", "$2(*$4)$5", all_conds()) |
d73c4440 RL |
699 | if defined $1; |
700 | return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); } | |
701 | }, | |
702 | # Function pointer declaration, or typedef thereof | |
fc661b50 | 703 | # This sort of declaration never has a function body |
d73c4440 RL |
704 | { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) |
705 | ((?:\w|\*|\s)*?) # Return type ($2) | |
706 | <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name ($3) | |
707 | <<<(\(.*\))>>> # F.p. parameters ($4) | |
708 | ; | |
709 | /x, | |
710 | massager => sub { | |
711 | return ("", $3, 'T', "", "$2(*)$4", all_conds()) | |
712 | if defined $1; | |
713 | return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds()); | |
714 | }, | |
715 | }, | |
716 | # Function declaration, or typedef thereof | |
fc661b50 | 717 | # This sort of declaration may have a body (inline functions, for example) |
d73c4440 RL |
718 | { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) |
719 | ((?:\w|\*|\s)*?) # Return type ($2) | |
720 | \s? # Possible space | |
721 | ([[:alpha:]_]\w*) # Function name ($3) | |
722 | <<<(\(.*\))>>> # Parameters ($4) | |
fc661b50 | 723 | (?:<<<\{.*\}>>>|;) # Body or semicolon |
d73c4440 RL |
724 | /x, |
725 | massager => sub { | |
726 | return ("", $3, 'T', "", "$2$4", all_conds()) | |
727 | if defined $1; | |
728 | return ("", $3, 'F', $2, "$2$4", all_conds()); | |
729 | }, | |
730 | }, | |
731 | # Variable declaration, including arrays, or typedef thereof | |
732 | { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) | |
733 | ((?:\w|\*|\s)*?) # Type ($2) | |
734 | \s? # Possible space | |
735 | ([[:alpha:]_]\w*) # Variable name ($3) | |
736 | ((?:<<<\[[^\]]*\]>>>)*) # Possible array declaration ($4) | |
737 | ; | |
738 | /x, | |
739 | massager => sub { | |
740 | return ("", $3, 'T', "", $2.($4||""), all_conds()) | |
741 | if defined $1; | |
742 | return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds()); | |
743 | }, | |
744 | }, | |
745 | ); | |
746 | ||
747 | # End handlers are almost the same as handlers, except they are run through | |
748 | # ONCE when the input has been parsed through. These are used to check for | |
749 | # remaining stuff, such as an unfinished #ifdef and stuff like that that the | |
750 | # main parser can't check on its own. | |
751 | my @endhandlers = ( | |
752 | { massager => sub { | |
753 | my %opts = %{$_[0]}; | |
754 | ||
755 | die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE} | |
756 | if @preprocessor_conds; | |
757 | } | |
758 | } | |
759 | ); | |
760 | ||
761 | # takes a list of strings that can each contain one or several lines of code | |
762 | # also takes a hash of options as last argument. | |
763 | # | |
764 | # returns a list of hashes with information: | |
765 | # | |
766 | # name name of the thing | |
767 | # type type, see the massage handler function | |
768 | # returntype return type of functions and variables | |
769 | # value value for macros, signature for functions, variables | |
770 | # and structs | |
771 | # conds preprocessor conditions (array ref) | |
772 | ||
773 | sub parse { | |
774 | my %opts; | |
775 | if (ref($_[$#_]) eq "HASH") { | |
776 | %opts = %{$_[$#_]}; | |
777 | pop @_; | |
778 | } | |
779 | my %state = ( | |
780 | in_extern_C => 0, # An exception to parenthesis processing. | |
781 | cpp_parens => [], # A list of ending parens and braces found in | |
782 | # preprocessor directives | |
783 | c_parens => [], # A list of ending parens and braces found in | |
784 | # C statements | |
785 | in_string => "", # empty string when outside a string, otherwise | |
786 | # "'" or '"' depending on the starting quote. | |
787 | in_comment => "", # empty string when outside a comment, otherwise | |
788 | # "/*" or "//" depending on the type of comment | |
789 | # found. The latter will never be multiline | |
790 | # NOTE: in_string and in_comment will never be | |
791 | # true (in perl semantics) at the same time. | |
792 | current_line => 0, | |
793 | ); | |
794 | my @result = (); | |
795 | my $normalized_line = ""; # $input_line, but normalized. In essence, this | |
796 | # means that ALL whitespace is removed unless | |
797 | # it absolutely has to be present, and in that | |
798 | # case, there's only one space. | |
799 | # The cases where a space needs to stay present | |
800 | # are: | |
801 | # 1. between words | |
802 | # 2. between words and number | |
803 | # 3. after the first word of a preprocessor | |
804 | # directive. | |
805 | # 4. for the #define directive, between the macro | |
806 | # name/args and its value, so we end up with: | |
807 | # #define FOO val | |
808 | # #define BAR(x) something(x) | |
809 | my $collected_stmt = ""; # Where we're building up a C line until it's a | |
810 | # complete definition/declaration, as determined | |
811 | # by any handler being capable of matching it. | |
812 | ||
813 | # We use $_ shamelessly when looking through @lines. | |
814 | # In case we find a \ at the end, we keep filling it up with more lines. | |
815 | $_ = undef; | |
816 | ||
817 | foreach my $line (@_) { | |
818 | # split tries to be smart when a string ends with the thing we split on | |
819 | $line .= "\n" unless $line =~ m|\R$|; | |
820 | $line .= "#"; | |
821 | ||
822 | # We use ¦undef¦ as a marker for a new line from the file. | |
823 | # Since we convert one line to several and unshift that into @lines, | |
824 | # that's the only safe way we have to track the original lines | |
825 | my @lines = map { ( undef, $_ ) } split $/, $line; | |
826 | ||
827 | # Remember that extra # we added above? Now we remove it | |
828 | pop @lines; | |
829 | pop @lines; # Don't forget the undef | |
830 | ||
831 | while (@lines) { | |
832 | if (!defined($lines[0])) { | |
833 | shift @lines; | |
834 | $state{current_line}++; | |
835 | if (!defined($_)) { | |
836 | $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n"; | |
837 | $opts{PLACE2} = $opts{filename}.":".$state{current_line}; | |
838 | } | |
839 | next; | |
840 | } | |
841 | ||
842 | $_ = "" unless defined $_; | |
843 | $_ .= shift @lines; | |
844 | ||
845 | if (m|\\$|) { | |
846 | $_ = $`; | |
847 | next; | |
848 | } | |
849 | ||
850 | if ($opts{debug}) { | |
851 | print STDERR "DEBUG:----------------------------\n"; | |
852 | print STDERR "DEBUG: \$_ = '$_'\n"; | |
853 | } | |
854 | ||
855 | ########################################################## | |
856 | # Now that we have a full line, let's process through it | |
857 | while(1) { | |
858 | unless ($state{in_comment}) { | |
859 | # Begin with checking if the current $normalized_line | |
860 | # contains a preprocessor directive | |
861 | # This is only done if we're not inside a comment and | |
862 | # if it's a preprocessor directive and it's finished. | |
863 | if ($normalized_line =~ m|^#| && $_ eq "") { | |
864 | print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n" | |
865 | if $opts{debug}; | |
866 | $opts{debug_type} = "OPENSSL CPP"; | |
867 | my @r = ( _run_handlers($normalized_line, | |
868 | @opensslcpphandlers, | |
869 | \%opts) ); | |
870 | if (shift @r) { | |
871 | # Checking if there are lines to inject. | |
872 | if (@r) { | |
873 | @r = split $/, (pop @r).$_; | |
874 | print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n" | |
875 | if $opts{debug} && @r; | |
876 | @lines = ( @r, @lines ); | |
877 | ||
878 | $_ = ""; | |
879 | } | |
880 | } else { | |
881 | print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n" | |
882 | if $opts{debug}; | |
883 | $opts{debug_type} = "CPP"; | |
884 | my @r = ( _run_handlers($normalized_line, | |
885 | @cpphandlers, | |
886 | \%opts) ); | |
887 | if (shift @r) { | |
888 | if (ref($r[0]) eq "HASH") { | |
889 | push @result, shift @r; | |
890 | } | |
891 | ||
892 | # Now, check if there are lines to inject. | |
893 | # Really, this should never happen, it IS a | |
894 | # preprocessor directive after all... | |
895 | if (@r) { | |
896 | @r = split $/, pop @r; | |
897 | print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n" | |
898 | if $opts{debug} && @r; | |
899 | @lines = ( @r, @lines ); | |
900 | $_ = ""; | |
901 | } | |
902 | } | |
903 | } | |
904 | ||
905 | # Note: we simply ignore all directives that no | |
906 | # handler matches | |
907 | $normalized_line = ""; | |
908 | } | |
909 | ||
910 | # If the two strings end and start with a character that | |
911 | # shouldn't get concatenated, add a space | |
912 | my $space = | |
913 | ($collected_stmt =~ m/(?:"|')$/ | |
914 | || ($collected_stmt =~ m/(?:\w|\d)$/ | |
915 | && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : ""; | |
916 | ||
917 | # Now, unless we're building up a preprocessor directive or | |
918 | # are in the middle of a string, or the parens et al aren't | |
919 | # balanced up yet, let's try and see if there's a OpenSSL | |
920 | # or C handler that can make sense of what we have so far. | |
921 | if ( $normalized_line !~ m|^#| | |
922 | && ($collected_stmt ne "" || $normalized_line ne "") | |
923 | && ! @{$state{c_parens}} | |
924 | && ! $state{in_string} ) { | |
925 | if ($opts{debug}) { | |
926 | print STDERR "DEBUG[OPENSSL C]: \$collected_stmt = '$collected_stmt'\n"; | |
927 | print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n"; | |
928 | } | |
929 | $opts{debug_type} = "OPENSSL C"; | |
930 | my @r = ( _run_handlers($collected_stmt | |
931 | .$space | |
932 | .$normalized_line, | |
933 | @opensslchandlers, | |
934 | \%opts) ); | |
935 | if (shift @r) { | |
936 | # Checking if there are lines to inject. | |
937 | if (@r) { | |
938 | @r = split $/, (pop @r).$_; | |
939 | print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n" | |
940 | if $opts{debug} && @r; | |
941 | @lines = ( @r, @lines ); | |
942 | ||
943 | $_ = ""; | |
944 | } | |
945 | $normalized_line = ""; | |
946 | $collected_stmt = ""; | |
947 | } else { | |
948 | if ($opts{debug}) { | |
949 | print STDERR "DEBUG[C]: \$collected_stmt = '$collected_stmt'\n"; | |
950 | print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n"; | |
951 | } | |
952 | $opts{debug_type} = "C"; | |
953 | my @r = ( _run_handlers($collected_stmt | |
954 | .$space | |
955 | .$normalized_line, | |
956 | @chandlers, | |
957 | \%opts) ); | |
958 | if (shift @r) { | |
959 | if (ref($r[0]) eq "HASH") { | |
960 | push @result, shift @r; | |
961 | } | |
962 | ||
963 | # Checking if there are lines to inject. | |
964 | if (@r) { | |
965 | @r = split $/, (pop @r).$_; | |
966 | print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n" | |
967 | if $opts{debug} && @r; | |
968 | @lines = ( @r, @lines ); | |
969 | ||
970 | $_ = ""; | |
971 | } | |
972 | $normalized_line = ""; | |
973 | $collected_stmt = ""; | |
974 | } | |
975 | } | |
976 | } | |
977 | if ($_ eq "") { | |
978 | $collected_stmt .= $space.$normalized_line; | |
979 | $normalized_line = ""; | |
980 | } | |
981 | } | |
982 | ||
983 | if ($_ eq "") { | |
984 | $_ = undef; | |
985 | last; | |
986 | } | |
987 | ||
988 | # Take care of inside string first. | |
989 | if ($state{in_string}) { | |
990 | if (m/ (?:^|(?<!\\)) # Make sure it's not escaped | |
991 | $state{in_string} # Look for matching quote | |
992 | /x) { | |
993 | $normalized_line .= $`.$&; | |
994 | $state{in_string} = ""; | |
995 | $_ = $'; | |
996 | next; | |
997 | } else { | |
998 | die "Unfinished string without continuation found$opts{PLACE}\n"; | |
999 | } | |
1000 | } | |
1001 | # ... or inside comments, whichever happens to apply | |
1002 | elsif ($state{in_comment}) { | |
1003 | ||
1004 | # This should never happen | |
1005 | die "Something went seriously wrong, multiline //???$opts{PLACE}\n" | |
1006 | if ($state{in_comment} eq "//"); | |
1007 | ||
1008 | # A note: comments are simply discarded. | |
1009 | ||
1010 | if (m/ (?:^|(?<!\\)) # Make sure it's not escaped | |
1011 | \*\/ # Look for C comment end | |
1012 | /x) { | |
1013 | $state{in_comment} = ""; | |
1014 | $_ = $'; | |
1015 | print STDERR "DEBUG: Found end of comment, followed by '$_'\n" | |
1016 | if $opts{debug}; | |
1017 | next; | |
1018 | } else { | |
1019 | $_ = ""; | |
1020 | next; | |
1021 | } | |
1022 | } | |
1023 | ||
1024 | # At this point, it's safe to remove leading whites, but | |
1025 | # we need to be careful with some preprocessor lines | |
1026 | if (m|^\s+|) { | |
1027 | my $rest = $'; | |
1028 | my $space = ""; | |
1029 | $space = " " | |
1030 | if ($normalized_line =~ m/^ | |
1031 | \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)? | |
1032 | | \#[a-z]+ | |
1033 | $/x); | |
1034 | print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n" | |
1035 | if $opts{debug}; | |
1036 | $_ = $space.$rest; | |
1037 | } | |
1038 | ||
1039 | my $parens = | |
1040 | $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens'; | |
1041 | (my $paren_singular = $parens) =~ s|s$||; | |
1042 | ||
1043 | # Now check for specific tokens, and if they are parens, | |
1044 | # check them against $state{$parens}. Note that we surround | |
1045 | # the outermost parens with extra "<<<" and ">>>". Those | |
1046 | # are for the benefit of handlers who to need to detect | |
1047 | # them, and they will be removed from the final output. | |
1048 | if (m|^[\{\[\(]|) { | |
1049 | my $body = $&; | |
1050 | $_ = $'; | |
1051 | if (!@{$state{$parens}}) { | |
1052 | if ("$normalized_line$body" =~ m|^extern "C"\{$|) { | |
1053 | $state{in_extern_C} = 1; | |
1054 | print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n" | |
1055 | if $opts{debug}; | |
1056 | $normalized_line = ""; | |
1057 | } else { | |
1058 | $normalized_line .= "<<<".$body; | |
1059 | } | |
1060 | } else { | |
1061 | $normalized_line .= $body; | |
1062 | } | |
1063 | ||
1064 | if ($normalized_line ne "") { | |
1065 | print STDERR "DEBUG: found $paren_singular start '$body'\n" | |
1066 | if $opts{debug}; | |
1067 | $body =~ tr|\{\[\(|\}\]\)|; | |
1068 | print STDERR "DEBUG: pushing $paren_singular end '$body'\n" | |
1069 | if $opts{debug}; | |
1070 | push @{$state{$parens}}, $body; | |
1071 | } | |
1072 | } elsif (m|^[\}\]\)]|) { | |
1073 | $_ = $'; | |
1074 | ||
1075 | if (!@{$state{$parens}} | |
1076 | && $& eq '}' && $state{in_extern_C}) { | |
1077 | print STDERR "DEBUG: found end of 'extern \"C\"'\n" | |
1078 | if $opts{debug}; | |
1079 | $state{in_extern_C} = 0; | |
1080 | } else { | |
1081 | print STDERR "DEBUG: Trying to match '$&' against '" | |
1082 | ,join("', '", @{$state{$parens}}) | |
1083 | ,"'\n" | |
1084 | if $opts{debug}; | |
1085 | die "Unmatched parentheses$opts{PLACE}\n" | |
1086 | unless (@{$state{$parens}} | |
1087 | && pop @{$state{$parens}} eq $&); | |
1088 | if (!@{$state{$parens}}) { | |
1089 | $normalized_line .= $&.">>>"; | |
1090 | } else { | |
1091 | $normalized_line .= $&; | |
1092 | } | |
1093 | } | |
1094 | } elsif (m|^["']|) { # string start | |
1095 | my $body = $&; | |
1096 | $_ = $'; | |
1097 | ||
1098 | # We want to separate strings from \w and \d with one space. | |
1099 | $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/; | |
1100 | $normalized_line .= $body; | |
1101 | $state{in_string} = $body; | |
1102 | } elsif (m|^\/\*|) { # C style comment | |
1103 | print STDERR "DEBUG: found start of C style comment\n" | |
1104 | if $opts{debug}; | |
1105 | $state{in_comment} = $&; | |
1106 | $_ = $'; | |
1107 | } elsif (m|^\/\/|) { # C++ style comment | |
1108 | print STDERR "DEBUG: found C++ style comment\n" | |
1109 | if $opts{debug}; | |
1110 | $_ = ""; # (just discard it entirely) | |
1111 | } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ ) | |
1112 | (?i: U | L | UL | LL | ULL )? | |
1113 | | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)? | |
1114 | ) /x) { | |
1115 | print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n" | |
1116 | if $opts{debug}; | |
1117 | $normalized_line .= $&; | |
1118 | $_ = $'; | |
1119 | } elsif (m/^[[:alpha:]_]\w*/) { | |
1120 | my $body = $&; | |
1121 | my $rest = $'; | |
1122 | my $space = ""; | |
1123 | ||
1124 | # Now, only add a space if it's needed to separate | |
1125 | # two \w characters, and we also surround strings with | |
1126 | # a space. In this case, that's if $normalized_line ends | |
1127 | # with a \w, \d, " or '. | |
1128 | $space = " " | |
1129 | if ($normalized_line =~ m/("|')$/ | |
1130 | || ($normalized_line =~ m/(\w|\d)$/ | |
1131 | && $body =~ m/^(\w|\d)/)); | |
1132 | ||
1133 | print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n" | |
1134 | if $opts{debug}; | |
1135 | $normalized_line .= $space.$body; | |
1136 | $_ = $rest; | |
1137 | } elsif (m|^(?:\\)?.|) { # Catch-all | |
1138 | $normalized_line .= $&; | |
1139 | $_ = $'; | |
1140 | } | |
1141 | } | |
1142 | } | |
1143 | } | |
1144 | foreach my $handler (@endhandlers) { | |
1145 | if ($handler->{massager}) { | |
1146 | $handler->{massager}->(\%opts); | |
1147 | } | |
1148 | } | |
1149 | return @result; | |
1150 | } | |
1151 | ||
1152 | # arg1: line to check | |
1153 | # arg2...: handlers to check | |
1154 | # return undef when no handler matched | |
1155 | sub _run_handlers { | |
1156 | my %opts; | |
1157 | if (ref($_[$#_]) eq "HASH") { | |
1158 | %opts = %{$_[$#_]}; | |
1159 | pop @_; | |
1160 | } | |
1161 | my $line = shift; | |
1162 | my @handlers = @_; | |
1163 | ||
1164 | foreach my $handler (@handlers) { | |
1165 | if ($handler->{regexp} | |
1166 | && $line =~ m|^$handler->{regexp}$|) { | |
1167 | if ($handler->{massager}) { | |
1168 | if ($opts{debug}) { | |
1169 | print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n"; | |
1170 | print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n"; | |
1171 | } | |
1172 | my $saved_line = $line; | |
1173 | my @massaged = | |
1174 | map { s/(<<<|>>>)//g; $_ } | |
1175 | $handler->{massager}->($saved_line, \%opts); | |
1176 | print STDERR "DEBUG[",$opts{debug_type},"]: Got back '" | |
1177 | , join("', '", @massaged), "'\n" | |
1178 | if $opts{debug}; | |
1179 | ||
1180 | # Because we may get back new lines to be | |
1181 | # injected before whatever else that follows, | |
1182 | # and the injected stuff might include | |
1183 | # preprocessor lines, we need to inject them | |
1184 | # in @lines and set $_ to the empty string to | |
1185 | # break out from the inner loops | |
1186 | my $injected_lines = shift @massaged || ""; | |
1187 | ||
1188 | if (@massaged) { | |
1189 | return (1, | |
1190 | { | |
1191 | name => shift @massaged, | |
1192 | type => shift @massaged, | |
1193 | returntype => shift @massaged, | |
1194 | value => shift @massaged, | |
1195 | conds => [ @massaged ] | |
1196 | }, | |
1197 | $injected_lines | |
1198 | ); | |
1199 | } else { | |
1200 | print STDERR "DEBUG[",$opts{debug_type},"]: (ignore, possible side effects)\n" | |
1201 | if $opts{debug} && $injected_lines eq ""; | |
1202 | return (1, $injected_lines); | |
1203 | } | |
1204 | } | |
1205 | return (1); | |
1206 | } | |
1207 | } | |
1208 | return (0); | |
1209 | } |