2 # Copyright 2018-2021 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
9 package OpenSSL
::ParseC
;
15 use vars
qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
21 my @preprocessor_conds; # A list of simple preprocessor conditions,
22 # each item being a list of macros defined
27 return map { ( @
$_ ) } @preprocessor_conds;
30 # A list of handlers that will look at a "complete" string and try to
31 # figure out what to make of it.
32 # Each handler is a hash with the following keys:
34 # regexp a regexp to compare the "complete" string with.
35 # checker a function that does a more complex comparison.
36 # Use this instead of regexp if that isn't enough.
37 # massager massages the "complete" string into an array with
38 # the following elements:
40 # [0] String that needs further processing (this
41 # applies to typedefs of structs), or empty.
42 # [1] The name of what was found.
43 # [2] A character that denotes what type of thing
44 # this is: 'F' for function, 'S' for struct,
45 # 'T' for typedef, 'M' for macro, 'V' for
47 # [3] Return type (only for type 'F' and 'V')
48 # [4] Value (for type 'M') or signature (for type 'F',
50 # [5...] The list of preprocessor conditions this is
51 # found in, as in checks for macro definitions
52 # (stored as the macro's name) or the absence
53 # of definition (stored as the macro's name
56 # If the massager returns an empty list, it means the
57 # "complete" string has side effects but should otherwise
59 # If the massager is undefined, the "complete" string
61 my @opensslcpphandlers = (
62 ##################################################################
63 # OpenSSL CPP specials
65 # These are used to convert certain pre-precessor expressions into
66 # others that @cpphandlers have a better chance to understand.
68 # This changes any OPENSSL_NO_DEPRECATED_x_y[_z] check to a check of
69 # OPENSSL_NO_DEPRECATEDIN_x_y[_z]. That's due to <openssl/macros.h>
70 # creating OPENSSL_NO_DEPRECATED_x_y[_z], but the ordinals files using
71 # DEPRECATEDIN_x_y[_z].
72 { regexp
=> qr/#if(def|ndef) OPENSSL_NO_DEPRECATED_(\d+_\d+(?:_\d+)?)$/,
75 #if$1 OPENSSL_NO_DEPRECATEDIN_$2
81 ##################################################################
84 { regexp
=> qr/#ifdef ?(.*)/,
87 if (ref($_[$#_]) eq "HASH") {
91 push @preprocessor_conds, [ $1 ];
92 print STDERR
"DEBUG[",$opts{debug_type
},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
97 { regexp
=> qr/#ifndef ?(.*)/,
100 if (ref($_[$#_]) eq "HASH") {
104 push @preprocessor_conds, [ '!'.$1 ];
105 print STDERR
"DEBUG[",$opts{debug_type
},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
110 { regexp
=> qr/#if (0|1)/,
113 if (ref($_[$#_]) eq "HASH") {
118 push @preprocessor_conds, [ "TRUE" ];
120 push @preprocessor_conds, [ "!TRUE" ];
122 print STDERR
"DEBUG[",$opts{debug_type
},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
127 { regexp
=> qr/#if ?(.*)/,
130 if (ref($_[$#_]) eq "HASH") {
136 if ($conds =~ m
|^defined<<<\
(([^\
)]*)\
)>>>(.*)$|) {
137 push @results, $1; # Handle the simple case
139 my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/;
140 print STDERR
"DEBUG[",$opts{debug_type
},"]: Matching '$rest' with '$re'\n"
142 if ($rest =~ m/$re/) {
143 my @rest = split /\|\|/, $rest;
146 m
|^defined<<<\
(([^\
)]*)\
)>>>$|;
147 die "Something wrong...$opts{PLACE}" if $1 eq "";
151 $conds =~ s/<<<|>>>//g;
152 warn "Warning: complicated #if expression(1): $conds$opts{PLACE}"
155 } elsif ($conds =~ m
|^!defined<<<\
(([^\
)]*)\
)>>>(.*)$|) {
156 push @results, '!'.$1; # Handle the simple case
158 my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/;
159 print STDERR
"DEBUG[",$opts{debug_type
},"]: Matching '$rest' with '$re'\n"
161 if ($rest =~ m/$re/) {
162 my @rest = split /\&\&/, $rest;
165 m
|^!defined<<<\
(([^\
)]*)\
)>>>$|;
166 die "Something wrong...$opts{PLACE}" if $1 eq "";
167 push @results, '!'.$1;
170 $conds =~ s/<<<|>>>//g;
171 warn "Warning: complicated #if expression(2): $conds$opts{PLACE}"
175 $conds =~ s/<<<|>>>//g;
176 warn "Warning: complicated #if expression(3): $conds$opts{PLACE}"
179 print STDERR
"DEBUG[",$opts{debug_type
},"]: Added preprocessor conds: '", join("', '", @results), "'\n"
181 push @preprocessor_conds, [ @results ];
182 print STDERR
"DEBUG[",$opts{debug_type
},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
187 { regexp
=> qr/#elif (.*)/,
190 if (ref($_[$#_]) eq "HASH") {
194 die "An #elif without corresponding condition$opts{PLACE}"
195 if !@preprocessor_conds;
196 pop @preprocessor_conds;
197 print STDERR
"DEBUG[",$opts{debug_type
},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
204 { regexp
=> qr/#else/,
207 if (ref($_[$#_]) eq "HASH") {
211 die "An #else without corresponding condition$opts{PLACE}"
212 if !@preprocessor_conds;
213 # Invert all conditions on the last level
214 my $stuff = pop @preprocessor_conds;
215 push @preprocessor_conds, [
216 map { m
|^!(.*)$| ?
$1 : '!'.$_ } @
$stuff
218 print STDERR
"DEBUG[",$opts{debug_type
},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
223 { regexp
=> qr/#endif ?/,
226 if (ref($_[$#_]) eq "HASH") {
230 die "An #endif without corresponding condition$opts{PLACE}"
231 if !@preprocessor_conds;
232 pop @preprocessor_conds;
233 print STDERR
"DEBUG[",$opts{debug_type
},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
238 { regexp
=> qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/,
242 my $spaceval = $3||"";
245 $1, 'M', "", $params ?
"$name$params$spaceval" : $val,
249 massager
=> sub { return (); }
253 my @opensslchandlers = (
254 ##################################################################
257 # They are really preprocessor stuff, but they look like C stuff
258 # to this parser. All of these do replacements, anything else is
262 # Deprecated stuff, by OpenSSL release.
264 # OSSL_DEPRECATEDIN_x_y[_z] is simply ignored. Such declarations are
265 # supposed to be guarded with an '#ifdef OPENSSL_NO_DEPRECATED_x_y[_z]'
266 { regexp
=> qr/OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/,
267 massager
=> sub { return $1; },
269 { regexp
=> qr/(.*?)\s+OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/,
270 massager
=> sub { return "$1 $2"; },
276 # OSSL_CORE_MAKE_FUNC is a macro to create the necessary data and inline
277 # function the libcrypto<->provider interface
278 { regexp
=> qr/OSSL_CORE_MAKE_FUNC<<<\((.*?),(.*?),(.*?)\)>>>/,
281 typedef $1 OSSL_FUNC_$2_fn$3;
282 static ossl_inline OSSL_FUNC_$2_fn *OSSL_FUNC_$2(const OSSL_DISPATCH *opf);
290 # LHASH_OF(foo) is used as a type, but the chandlers won't take it
291 # gracefully, so we expand it here.
292 { regexp
=> qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/,
293 massager
=> sub { return ("$1struct lhash_st_$2$3"); }
295 { regexp
=> qr/DEFINE_LHASH_OF(?:_INTERNAL|_EX)?<<<\((.*)\)>>>/,
298 static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *),
299 int (*cfn)(const $1 *, const $1 *));
300 static ossl_inline void lh_$1_free(LHASH_OF($1) *lh);
301 static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d);
302 static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d);
303 static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d);
304 static ossl_inline int lh_$1_error(LHASH_OF($1) *lh);
305 static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh);
306 static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out);
307 static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh,
309 static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out);
310 static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh);
311 static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl);
312 static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *));
321 # STACK_OF(foo) is used as a type, but the chandlers won't take it
322 # gracefully, so we expand it here.
323 { regexp
=> qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/,
324 massager
=> sub { return ("$1struct stack_st_$2$3"); }
326 # { regexp => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/,
329 # my $stack_of = "struct stack_st_$2";
331 # if ($after =~ m|^\w|) { $after = " ".$after; }
332 # return ("$before$stack_of$after");
335 { regexp
=> qr/SKM_DEFINE_STACK_OF<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
339 typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
340 typedef void (*sk_$1_freefunc)($3 *a);
341 typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
342 static ossl_inline int sk_$1_num(const STACK_OF($1) *sk);
343 static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx);
344 static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare);
345 static ossl_inline STACK_OF($1) *sk_$1_new_null(void);
346 static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare,
348 static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n);
349 static ossl_inline void sk_$1_free(STACK_OF($1) *sk);
350 static ossl_inline void sk_$1_zero(STACK_OF($1) *sk);
351 static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i);
352 static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr);
353 static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr);
354 static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr);
355 static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk);
356 static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk);
357 static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk,
358 sk_$1_freefunc freefunc);
359 static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx);
360 static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr);
361 static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr);
362 static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr);
363 static ossl_inline void sk_$1_sort(STACK_OF($1) *sk);
364 static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk);
365 static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk);
366 static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk,
367 sk_$1_copyfunc copyfunc,
368 sk_$1_freefunc freefunc);
369 static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk,
370 sk_$1_compfunc compare);
374 { regexp
=> qr/SKM_DEFINE_STACK_OF_INTERNAL<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
378 typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
379 typedef void (*sk_$1_freefunc)($3 *a);
380 typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
381 static ossl_unused ossl_inline $2 *ossl_check_$1_type($2 *ptr);
382 static ossl_unused ossl_inline const OPENSSL_STACK *ossl_check_const_$1_sk_type(const STACK_OF($1) *sk);
383 static ossl_unused ossl_inline OPENSSL_sk_compfunc ossl_check_$1_compfunc_type(sk_$1_compfunc cmp);
384 static ossl_unused ossl_inline OPENSSL_sk_copyfunc ossl_check_$1_copyfunc_type(sk_$1_copyfunc cpy);
385 static ossl_unused ossl_inline OPENSSL_sk_freefunc ossl_check_$1_freefunc_type(sk_$1_freefunc fr);
389 { regexp
=> qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),\s*(.*)\)>>>/,
390 massager
=> sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); },
392 { regexp
=> qr/DEFINE_STACK_OF<<<\((.*)\)>>>/,
393 massager
=> sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); },
395 { regexp
=> qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),\s*(.*)\)>>>/,
396 massager
=> sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); },
398 { regexp
=> qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/,
399 massager
=> sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); },
404 { regexp
=> qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/,
407 const ASN1_ITEM *$1_it(void);
411 { regexp
=> qr/DECLARE_ASN1_ENCODE_FUNCTIONS_only<<<\((.*),\s*(.*)\)>>>/,
419 { regexp
=> qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
424 DECLARE_ASN1_ITEM($2)
428 { regexp
=> qr/DECLARE_ASN1_ENCODE_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
433 DECLARE_ASN1_ITEM($2)
437 { regexp
=> qr/DECLARE_ASN1_ALLOC_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
445 { regexp
=> qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/,
453 { regexp
=> qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
460 DECLARE_ASN1_ITEM($2)
464 { regexp
=> qr/DECLARE_ASN1_FUNCTIONS<<<\((.*)\)>>>/,
465 massager
=> sub { return (<<"EOF");
470 DECLARE_ASN1_ITEM($1)
474 { regexp
=> qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/,
477 int i2d_$1_NDEF(void);
481 { regexp
=> qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/,
484 int $1_print_ctx(void);
488 { regexp
=> qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
491 int $2_print_ctx(void);
495 { regexp
=> qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/,
496 massager
=> sub { return (); }
498 { regexp
=> qr/DECLARE_ASN1_DUP_FUNCTION<<<\((.*)\)>>>/,
505 { regexp
=> qr/DECLARE_ASN1_DUP_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
512 # Universal translator of attributed PEM declarators
515 (_ENCODE_FUNCTIONS_only
|_ENCODE_FUNCTIONS
|_ENCODE_FUNCTIONS_name
516 |_ALLOC_FUNCTIONS_name
|_ALLOC_FUNCTIONS
|_FUNCTIONS_name
|_FUNCTIONS
517 |_NDEF_FUNCTION
|_PRINT_FUNCTION
|_PRINT_FUNCTION_name
518 |_DUP_FUNCTION
|_DUP_FUNCTION_name
)
520 <<<\
(\s
*OSSL_DEPRECATEDIN_
(.*?
)\s
*,(.*?
)\
)>>>
522 massager
=> sub { return (<<"EOF");
527 { regexp
=> qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/,
528 massager
=> sub { return (); }
533 { regexp
=> qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/,
534 massager
=> sub { return (<<"EOF");
535 #ifndef OPENSSL_NO_STDIO
536 int PEM_read_$1(void);
537 int PEM_write_$1(void);
539 int PEM_read_bio_$1(void);
540 int PEM_write_bio_$1(void);
544 { regexp
=> qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)_ex<<<\((.*?),.*\)>>>/,
545 massager
=> sub { return (<<"EOF");
546 #ifndef OPENSSL_NO_STDIO
547 int PEM_read_$1(void);
548 int PEM_write_$1(void);
549 int PEM_read_$1_ex(void);
550 int PEM_write_$1_ex(void);
552 int PEM_read_bio_$1(void);
553 int PEM_write_bio_$1(void);
554 int PEM_read_bio_$1_ex(void);
555 int PEM_write_bio_$1_ex(void);
559 { regexp
=> qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/,
560 massager
=> sub { return (<<"EOF");
561 #ifndef OPENSSL_NO_STDIO
562 int PEM_write_$1(void);
564 int PEM_write_bio_$1(void);
568 { regexp
=> qr/DECLARE_PEM(?|_write|_write_cb|_write_const)_ex<<<\((.*?),.*\)>>>/,
569 massager
=> sub { return (<<"EOF");
570 #ifndef OPENSSL_NO_STDIO
571 int PEM_write_$1(void);
572 int PEM_write_$1_ex(void);
574 int PEM_write_bio_$1(void);
575 int PEM_write_bio_$1_ex(void);
579 { regexp
=> qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/,
580 massager
=> sub { return (<<"EOF");
581 #ifndef OPENSSL_NO_STDIO
582 int PEM_read_$1(void);
584 int PEM_read_bio_$1(void);
588 { regexp
=> qr/DECLARE_PEM(?|_read|_read_cb)_ex<<<\((.*?),.*\)>>>/,
589 massager
=> sub { return (<<"EOF");
590 #ifndef OPENSSL_NO_STDIO
591 int PEM_read_$1(void);
592 int PEM_read_$1_ex(void);
594 int PEM_read_bio_$1(void);
595 int PEM_read_bio_$1_ex(void);
599 # Universal translator of attributed PEM declarators
602 ((?
:_rw
|_rw_cb
|_rw_const
|_write
|_write_cb
|_write_const
|_read
|_read_cb
)
605 <<<\
(\s
*OSSL_DEPRECATEDIN_
(.*?
)\s
*,(.*?
)\
)>>>
607 massager
=> sub { return (<<"EOF");
613 # OpenSSL's declaration of externs with possible export linkage
614 # (really only relevant on Windows)
615 { regexp
=> qr/OPENSSL_(?:EXPORT|EXTERN)/,
616 massager
=> sub { return ("extern"); }
619 # Spurious stuff found in the OpenSSL headers
620 # Usually, these are just macros that expand to, well, something
621 { regexp
=> qr/__NDK_FPABI__/,
622 massager
=> sub { return (); }
629 ##################################################################
632 # extern "C" of individual items
633 # Note that the main parse function has a special hack for 'extern "C" {'
634 # which can't be done in handlers
635 # We simply ignore it.
636 { regexp
=> qr/^extern "C" (.*(?:;|>>>))/,
637 massager
=> sub { return ($1); },
639 # any other extern is just ignored
640 { regexp
=> qr
/^\s
* # Any spaces before
641 extern
# The keyword we look for
642 \b # word to non-word boundary
646 massager
=> sub { return (); },
648 # union, struct and enum definitions
649 # Because this one might appear a little everywhere within type
650 # definitions, we take it out and replace it with just
651 # 'union|struct|enum name' while registering it.
652 # This makes use of the parser trick to surround the outer braces
654 { regexp
=> qr
/(.*) # Anything before ($1)
655 \b # word to non-word boundary
656 (union
|struct
|enum
) # The word used ($2)
657 (?
:\s
([[:alpha
:]_
]\w
*))?
# Struct or enum name ($3)
658 <<<(\
{.*?\
})>>> # Struct or enum definition ($4)
659 (.*) # Anything after ($5)
666 || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct
669 my $type = $word eq "struct" ?
'S' : 'E';
670 if ($before ne "" || $after ne ";") {
671 if ($after =~ m
|^\w
|) { $after = " ".$after; }
672 return ("$before$word $name$after;",
673 "$word $name", $type, "", "$word$definition", all_conds
());
675 # If there was no before nor after, make the return much simple
676 return ("", "$word $name", $type, "", "$word$definition", all_conds
());
679 # Named struct and enum forward declarations
680 # We really just ignore them, but we need to parse them or the variable
681 # declaration handler further down will think it's a variable declaration.
682 { regexp
=> qr/^(union|struct|enum) ([[:alpha:]_]\w*);/,
683 massager
=> sub { return (); }
685 # Function returning function pointer declaration
686 # This sort of declaration may have a body (inline functions, for example)
687 { regexp
=> qr
/(?
:(typedef
)\s?
)?
# Possible typedef ($1)
688 ((?
:\w
|\
*|\s
)*?
) # Return type ($2)
691 ([[:alpha
:]_
]\w
*) # Function name ($3)
692 (\
(.*\
)) # Parameters ($4)
694 <<<(\
(.*\
))>>> # F.p. parameters ($5)
695 (?
:<<<\
{.*\
}>>>|;) # Body or semicolon
698 return ("", $3, 'T', "", "$2(*$4)$5", all_conds
())
700 return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds
()); }
702 # Function pointer declaration, or typedef thereof
703 # This sort of declaration never has a function body
704 { regexp
=> qr
/(?
:(typedef
)\s?
)?
# Possible typedef ($1)
705 ((?
:\w
|\
*|\s
)*?
) # Return type ($2)
706 <<<\
(\
*([[:alpha
:]_
]\w
*)\
)>>> # T.d. or var name ($3)
707 <<<(\
(.*\
))>>> # F.p. parameters ($4)
711 return ("", $3, 'T', "", "$2(*)$4", all_conds
())
713 return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds
());
716 # Function declaration, or typedef thereof
717 # This sort of declaration may have a body (inline functions, for example)
718 { regexp
=> qr
/(?
:(typedef
)\s?
)?
# Possible typedef ($1)
719 ((?
:\w
|\
*|\s
)*?
) # Return type ($2)
721 ([[:alpha
:]_
]\w
*) # Function name ($3)
722 <<<(\
(.*\
))>>> # Parameters ($4)
723 (?
:<<<\
{.*\
}>>>|;) # Body or semicolon
726 return ("", $3, 'T', "", "$2$4", all_conds
())
728 return ("", $3, 'F', $2, "$2$4", all_conds
());
731 # Variable declaration, including arrays, or typedef thereof
732 { regexp
=> qr
/(?
:(typedef
)\s?
)?
# Possible typedef ($1)
733 ((?
:\w
|\
*|\s
)*?
) # Type ($2)
735 ([[:alpha
:]_
]\w
*) # Variable name ($3)
736 ((?
:<<<\
[[^\
]]*\
]>>>)*) # Possible array declaration ($4)
740 return ("", $3, 'T', "", $2.($4||""), all_conds
())
742 return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds
());
747 # End handlers are almost the same as handlers, except they are run through
748 # ONCE when the input has been parsed through. These are used to check for
749 # remaining stuff, such as an unfinished #ifdef and stuff like that that the
750 # main parser can't check on its own.
755 die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename
} ?
" in file ".$opts{filename
}: ""),$opts{PLACE
}
756 if @preprocessor_conds;
761 # takes a list of strings that can each contain one or several lines of code
762 # also takes a hash of options as last argument.
764 # returns a list of hashes with information:
766 # name name of the thing
767 # type type, see the massage handler function
768 # returntype return type of functions and variables
769 # value value for macros, signature for functions, variables
771 # conds preprocessor conditions (array ref)
775 if (ref($_[$#_]) eq "HASH") {
780 in_extern_C
=> 0, # An exception to parenthesis processing.
781 cpp_parens
=> [], # A list of ending parens and braces found in
782 # preprocessor directives
783 c_parens
=> [], # A list of ending parens and braces found in
785 in_string
=> "", # empty string when outside a string, otherwise
786 # "'" or '"' depending on the starting quote.
787 in_comment
=> "", # empty string when outside a comment, otherwise
788 # "/*" or "//" depending on the type of comment
789 # found. The latter will never be multiline
790 # NOTE: in_string and in_comment will never be
791 # true (in perl semantics) at the same time.
795 my $normalized_line = ""; # $input_line, but normalized. In essence, this
796 # means that ALL whitespace is removed unless
797 # it absolutely has to be present, and in that
798 # case, there's only one space.
799 # The cases where a space needs to stay present
802 # 2. between words and number
803 # 3. after the first word of a preprocessor
805 # 4. for the #define directive, between the macro
806 # name/args and its value, so we end up with:
808 # #define BAR(x) something(x)
809 my $collected_stmt = ""; # Where we're building up a C line until it's a
810 # complete definition/declaration, as determined
811 # by any handler being capable of matching it.
813 # We use $_ shamelessly when looking through @lines.
814 # In case we find a \ at the end, we keep filling it up with more lines.
817 foreach my $line (@_) {
818 # split tries to be smart when a string ends with the thing we split on
819 $line .= "\n" unless $line =~ m
|\R
$|;
822 # We use ¦undef¦ as a marker for a new line from the file.
823 # Since we convert one line to several and unshift that into @lines,
824 # that's the only safe way we have to track the original lines
825 my @lines = map { ( undef, $_ ) } split $/, $line;
827 # Remember that extra # we added above? Now we remove it
829 pop @lines; # Don't forget the undef
832 if (!defined($lines[0])) {
834 $state{current_line
}++;
836 $opts{PLACE
} = " at ".$opts{filename
}." line ".$state{current_line
}."\n";
837 $opts{PLACE2
} = $opts{filename
}.":".$state{current_line
};
842 $_ = "" unless defined $_;
851 print STDERR "DEBUG:----------------------------\n";
852 print STDERR "DEBUG: \$_ = '$_'\n";
855 ##########################################################
856 # Now that we have a full line, let's process through it
858 unless ($state{in_comment}) {
859 # Begin with checking if the current $normalized_line
860 # contains a preprocessor directive
861 # This is only done if we're not inside a comment and
862 # if it's a preprocessor directive and it's finished.
863 if ($normalized_line =~ m|^#| && $_ eq "") {
864 print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n"
866 $opts{debug_type} = "OPENSSL CPP";
867 my @r = ( _run_handlers($normalized_line,
871 # Checking if there are lines to inject.
873 @r = split $/, (pop @r).$_;
874 print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n"
875 if $opts{debug} && @r;
876 @lines = ( @r, @lines );
881 print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n"
883 $opts{debug_type} = "CPP";
884 my @r = ( _run_handlers($normalized_line,
888 if (ref($r[0]) eq "HASH") {
889 push @result, shift @r;
892 # Now, check if there are lines to inject.
893 # Really, this should never happen, it IS a
894 # preprocessor directive after all...
896 @r = split $/, pop @r;
897 print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n"
898 if $opts{debug} && @r;
899 @lines = ( @r, @lines );
905 # Note: we simply ignore all directives that no
907 $normalized_line = "";
910 # If the two strings end and start with a character that
911 # shouldn't get concatenated, add a space
913 ($collected_stmt =~ m/(?:"|')$/
914 || ($collected_stmt =~ m/(?:\w|\d)$/
915 && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : "";
917 # Now, unless we're building up a preprocessor directive or
918 # are in the middle of a string, or the parens et al aren't
919 # balanced up yet, let's try and see if there's a OpenSSL
920 # or C handler that can make sense of what we have so far.
921 if ( $normalized_line !~ m|^#|
922 && ($collected_stmt ne "" || $normalized_line ne "")
923 && ! @{$state{c_parens}}
924 && ! $state{in_string} ) {
926 print STDERR "DEBUG[OPENSSL C]: \$collected_stmt = '$collected_stmt'\n";
927 print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n";
929 $opts{debug_type} = "OPENSSL C";
930 my @r = ( _run_handlers($collected_stmt
936 # Checking if there are lines to inject.
938 @r = split $/, (pop @r).$_;
939 print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n"
940 if $opts{debug} && @r;
941 @lines = ( @r, @lines );
945 $normalized_line = "";
946 $collected_stmt = "";
949 print STDERR "DEBUG[C]: \$collected_stmt = '$collected_stmt'\n";
950 print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n";
952 $opts{debug_type} = "C";
953 my @r = ( _run_handlers($collected_stmt
959 if (ref($r[0]) eq "HASH") {
960 push @result, shift @r;
963 # Checking if there are lines to inject.
965 @r = split $/, (pop @r).$_;
966 print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n"
967 if $opts{debug} && @r;
968 @lines = ( @r, @lines );
972 $normalized_line = "";
973 $collected_stmt = "";
978 $collected_stmt .= $space.$normalized_line;
979 $normalized_line = "";
988 # Take care of inside string first.
989 if ($state{in_string}) {
990 if (m/ (?:^|(?<!\\)) # Make sure it's not escaped
991 $state{in_string} # Look for matching quote
993 $normalized_line .= $`.$&;
994 $state{in_string
} = "";
998 die "Unfinished string without continuation found$opts{PLACE}\n";
1001 # ... or inside comments, whichever happens to apply
1002 elsif ($state{in_comment}) {
1004 # This should never happen
1005 die "Something went seriously wrong, multiline //???$opts{PLACE}\n"
1006 if ($state{in_comment} eq "//");
1008 # A note: comments are simply discarded.
1010 if (m/ (?:^|(?<!\\)) # Make sure it's
not escaped
1011 \
*\
/ # Look for C comment end
1013 $state{in_comment
} = "";
1015 print STDERR "DEBUG: Found end of comment, followed by '$_'\n"
1024 # At this point, it's safe to remove leading whites
, but
1025 # we need to be careful with some preprocessor lines
1030 if ($normalized_line =~ m/^
1031 \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)?
1034 print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n"
1040 $normalized_line =~ m|^#| ? 'cpp_parens
' : 'c_parens
';
1041 (my $paren_singular = $parens) =~ s|s$||;
1043 # Now check for specific tokens, and if they are parens,
1044 # check them against $state{$parens}. Note that we surround
1045 # the outermost parens with extra "<<<" and ">>>". Those
1046 # are for the benefit of handlers who to need to detect
1047 # them, and they will be removed from the final output.
1051 if (!@
{$state{$parens}}) {
1052 if ("$normalized_line$body" =~ m
|^extern
"C"\
{$|) {
1053 $state{in_extern_C
} = 1;
1054 print STDERR
"DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n"
1056 $normalized_line = "";
1058 $normalized_line .= "<<<".$body;
1061 $normalized_line .= $body;
1064 if ($normalized_line ne "") {
1065 print STDERR
"DEBUG: found $paren_singular start '$body'\n"
1067 $body =~ tr
|\
{\
[\
(|\
}\
]\
)|;
1068 print STDERR
"DEBUG: pushing $paren_singular end '$body'\n"
1070 push @
{$state{$parens}}, $body;
1072 } elsif (m
|^[\
}\
]\
)]|) {
1075 if (!@{$state{$parens}}
1076 && $& eq '}' && $state{in_extern_C}) {
1077 print STDERR "DEBUG: found end of 'extern
\"C
\"'\n"
1079 $state{in_extern_C} = 0;
1081 print STDERR "DEBUG: Trying to match '$&' against '"
1082 ,join("', '", @{$state{$parens}})
1085 die "Unmatched parentheses$opts{PLACE}\n"
1086 unless (@{$state{$parens}}
1087 && pop @{$state{$parens}} eq $&);
1088 if (!@{$state{$parens}}) {
1089 $normalized_line .= $&.">>>";
1091 $normalized_line .= $&;
1094 } elsif (m|^["']|) { # string start
1098 # We want to separate strings from \w and \d with one space.
1099 $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/;
1100 $normalized_line .= $body;
1101 $state{in_string} = $body;
1102 } elsif (m|^\/\*|) { # C style comment
1103 print STDERR "DEBUG: found start of C style comment\n"
1105 $state{in_comment} = $&;
1107 } elsif (m
|^\
/\/|) { # C++ style comment
1108 print STDERR
"DEBUG: found C++ style comment\n"
1110 $_ = ""; # (just discard it entirely)
1111 } elsif (m
/^ (?
| (?
: 0[xX
][[:xdigit
:]]+ | 0[bB
][01]+ | [0-9]+ )
1112 (?i
: U
| L
| UL
| LL
| ULL
)?
1113 | [0-9]+\
.[0-9]+(?
:[eE
][\
-\
+]\d
+)?
(?i
: F
| L
)?
1115 print STDERR
"DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n"
1117 $normalized_line .= $&;
1119 } elsif (m/^[[:alpha:]_]\w*/) {
1124 # Now, only add a space if it's needed to separate
1125 # two \w characters, and we also surround strings with
1126 # a space. In this case, that's if $normalized_line ends
1127 # with a \w, \d, " or '.
1129 if ($normalized_line =~ m/("|')$/
1130 || ($normalized_line =~ m/(\w|\d)$/
1131 && $body =~ m/^(\w|\d)/));
1133 print STDERR
"DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n"
1135 $normalized_line .= $space.$body;
1137 } elsif (m
|^(?
:\\)?
.|) { # Catch-all
1138 $normalized_line .= $&;
1144 foreach my $handler (@endhandlers) {
1145 if ($handler->{massager}) {
1146 $handler->{massager}->(\%opts);
1152 # arg1: line to check
1153 # arg2...: handlers to check
1154 # return undef when no handler matched
1157 if (ref($_[$#_]) eq "HASH") {
1164 foreach my $handler (@handlers) {
1165 if ($handler->{regexp}
1166 && $line =~ m|^$handler->{regexp}$|) {
1167 if ($handler->{massager}) {
1169 print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n";
1170 print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n";
1172 my $saved_line = $line;
1174 map { s/(<<<|>>>)//g; $_ }
1175 $handler->{massager}->($saved_line, \%opts);
1176 print STDERR "DEBUG[",$opts{debug_type},"]: Got back '"
1177 , join("', '", @massaged), "'\n"
1180 # Because we may get back new lines to be
1181 # injected before whatever else that follows,
1182 # and the injected stuff might include
1183 # preprocessor lines, we need to inject them
1184 # in @lines and set $_ to the empty string to
1185 # break out from the inner loops
1186 my $injected_lines = shift @massaged || "";
1191 name => shift @massaged,
1192 type => shift @massaged,
1193 returntype => shift @massaged,
1194 value => shift @massaged,
1195 conds => [ @massaged ]
1200 print STDERR "DEBUG[",$opts{debug_type},"]: (ignore, possible side effects)\n"
1201 if $opts{debug} && $injected_lines eq "";
1202 return (1, $injected_lines);