]> git.ipfire.org Git - thirdparty/openssl.git/blame - util/perl/OpenSSL/ParseC.pm
Remove EXPORT_VAR_AS_FUNC
[thirdparty/openssl.git] / util / perl / OpenSSL / ParseC.pm
CommitLineData
d73c4440
RL
1#! /usr/bin/env perl
2# Copyright 2018 The OpenSSL Project Authors. All Rights Reserved.
3#
9059ab42 4# Licensed under the Apache License 2.0 (the "License"). You may not use
d73c4440
RL
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9package OpenSSL::ParseC;
10
11use strict;
12use warnings;
13
14use Exporter;
15use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
16$VERSION = "0.9";
17@ISA = qw(Exporter);
18@EXPORT = qw(parse);
19
20# Global handler data
21my @preprocessor_conds; # A list of simple preprocessor conditions,
22 # each item being a list of macros defined
23 # or not defined.
24
25# Handler helpers
26sub all_conds {
27 return map { ( @$_ ) } @preprocessor_conds;
28}
29
30# A list of handlers that will look at a "complete" string and try to
31# figure out what to make of it.
32# Each handler is a hash with the following keys:
33#
34# regexp a regexp to compare the "complete" string with.
35# checker a function that does a more complex comparison.
36# Use this instead of regexp if that isn't enough.
37# massager massages the "complete" string into an array with
38# the following elements:
39#
40# [0] String that needs further processing (this
41# applies to typedefs of structs), or empty.
42# [1] The name of what was found.
43# [2] A character that denotes what type of thing
44# this is: 'F' for function, 'S' for struct,
45# 'T' for typedef, 'M' for macro, 'V' for
46# variable.
47# [3] Return type (only for type 'F' and 'V')
48# [4] Value (for type 'M') or signature (for type 'F',
49# 'V', 'T' or 'S')
50# [5...] The list of preprocessor conditions this is
51# found in, as in checks for macro definitions
52# (stored as the macro's name) or the absence
53# of definition (stored as the macro's name
54# prefixed with a '!'
55#
56# If the massager returns an empty list, it means the
57# "complete" string has side effects but should otherwise
58# be ignored.
59# If the massager is undefined, the "complete" string
60# should be ignored.
61my @opensslcpphandlers = (
62 ##################################################################
63 # OpenSSL CPP specials
64 #
65 # These are used to convert certain pre-precessor expressions into
66 # others that @cpphandlers have a better chance to understand.
67
fcd2d5a6 68 { regexp => qr/#if (!?)OPENSSL_API_([0-9_]+)$/,
d73c4440 69 massager => sub {
fcd2d5a6 70 my $cnd = $1 eq '!' ? 'ndef' : 'def';
d73c4440 71 return (<<"EOF");
fcd2d5a6 72#if$cnd DEPRECATEDIN_$2
d73c4440
RL
73EOF
74 }
75 }
76);
77my @cpphandlers = (
78 ##################################################################
79 # CPP stuff
80
81 { regexp => qr/#ifdef ?(.*)/,
82 massager => sub {
83 my %opts;
84 if (ref($_[$#_]) eq "HASH") {
85 %opts = %{$_[$#_]};
86 pop @_;
87 }
88 push @preprocessor_conds, [ $1 ];
89 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
90 if $opts{debug};
91 return ();
92 },
93 },
94 { regexp => qr/#ifndef ?(.*)/,
95 massager => sub {
96 my %opts;
97 if (ref($_[$#_]) eq "HASH") {
98 %opts = %{$_[$#_]};
99 pop @_;
100 }
101 push @preprocessor_conds, [ '!'.$1 ];
102 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
103 if $opts{debug};
104 return ();
105 },
106 },
107 { regexp => qr/#if (0|1)/,
108 massager => sub {
109 my %opts;
110 if (ref($_[$#_]) eq "HASH") {
111 %opts = %{$_[$#_]};
112 pop @_;
113 }
114 if ($1 eq "1") {
115 push @preprocessor_conds, [ "TRUE" ];
116 } else {
117 push @preprocessor_conds, [ "!TRUE" ];
118 }
119 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
120 if $opts{debug};
121 return ();
122 },
123 },
124 { regexp => qr/#if ?(.*)/,
125 massager => sub {
126 my %opts;
127 if (ref($_[$#_]) eq "HASH") {
128 %opts = %{$_[$#_]};
129 pop @_;
130 }
131 my @results = ();
132 my $conds = $1;
133 if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) {
134 push @results, $1; # Handle the simple case
135 my $rest = $2;
136 my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/;
137 print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
138 if $opts{debug};
139 if ($rest =~ m/$re/) {
140 my @rest = split /\|\|/, $rest;
141 shift @rest;
142 foreach (@rest) {
143 m|^defined<<<\(([^\)]*)\)>>>$|;
144 die "Something wrong...$opts{PLACE}" if $1 eq "";
145 push @results, $1;
146 }
147 } else {
148 $conds =~ s/<<<|>>>//g;
149 warn "Warning: complicated #if expression(1): $conds$opts{PLACE}"
150 if $opts{warnings};
151 }
152 } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) {
153 push @results, '!'.$1; # Handle the simple case
154 my $rest = $2;
155 my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/;
156 print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
157 if $opts{debug};
158 if ($rest =~ m/$re/) {
159 my @rest = split /\&\&/, $rest;
160 shift @rest;
161 foreach (@rest) {
162 m|^!defined<<<\(([^\)]*)\)>>>$|;
163 die "Something wrong...$opts{PLACE}" if $1 eq "";
164 push @results, '!'.$1;
165 }
166 } else {
167 $conds =~ s/<<<|>>>//g;
168 warn "Warning: complicated #if expression(2): $conds$opts{PLACE}"
169 if $opts{warnings};
170 }
171 } else {
172 $conds =~ s/<<<|>>>//g;
173 warn "Warning: complicated #if expression(3): $conds$opts{PLACE}"
174 if $opts{warnings};
175 }
176 print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n"
177 if $opts{debug};
178 push @preprocessor_conds, [ @results ];
179 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
180 if $opts{debug};
181 return ();
182 },
183 },
184 { regexp => qr/#elif (.*)/,
185 massager => sub {
186 my %opts;
187 if (ref($_[$#_]) eq "HASH") {
188 %opts = %{$_[$#_]};
189 pop @_;
190 }
191 die "An #elif without corresponding condition$opts{PLACE}"
192 if !@preprocessor_conds;
193 pop @preprocessor_conds;
194 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
195 if $opts{debug};
196 return (<<"EOF");
197#if $1
198EOF
199 },
200 },
201 { regexp => qr/#else/,
202 massager => sub {
203 my %opts;
204 if (ref($_[$#_]) eq "HASH") {
205 %opts = %{$_[$#_]};
206 pop @_;
207 }
208 die "An #else without corresponding condition$opts{PLACE}"
209 if !@preprocessor_conds;
210 # Invert all conditions on the last level
211 my $stuff = pop @preprocessor_conds;
212 push @preprocessor_conds, [
213 map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff
214 ];
215 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
216 if $opts{debug};
217 return ();
218 },
219 },
220 { regexp => qr/#endif ?/,
221 massager => sub {
222 my %opts;
223 if (ref($_[$#_]) eq "HASH") {
224 %opts = %{$_[$#_]};
225 pop @_;
226 }
227 die "An #endif without corresponding condition$opts{PLACE}"
228 if !@preprocessor_conds;
229 pop @preprocessor_conds;
230 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
231 if $opts{debug};
232 return ();
233 },
234 },
235 { regexp => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/,
236 massager => sub {
237 my $name = $1;
238 my $params = $2;
239 my $spaceval = $3||"";
240 my $val = $4||"";
241 return ("",
242 $1, 'M', "", $params ? "$name$params$spaceval" : $val,
243 all_conds()); }
244 },
245 { regexp => qr/#.*/,
246 massager => sub { return (); }
247 },
248 );
249
250my @opensslchandlers = (
251 ##################################################################
252 # OpenSSL C specials
253 #
254 # They are really preprocessor stuff, but they look like C stuff
255 # to this parser. All of these do replacements, anything else is
256 # an error.
257
d73c4440
RL
258 #####
259 # Deprecated stuff, by OpenSSL release.
260
261 # We trick the parser by pretending that the declaration is wrapped in a
262 # check if the DEPRECATEDIN macro is defined or not. Callers of parse()
263 # will have to decide what to do with it.
2284f64c 264 { regexp => qr/(DEPRECATEDIN_\d+(?:_\d+_\d+)?)<<<\((.*)\)>>>/,
d73c4440
RL
265 massager => sub { return (<<"EOF");
266#ifndef $1
267$2;
268#endif
269EOF
270 },
271 },
272
273 #####
274 # LHASH stuff
275
276 # LHASH_OF(foo) is used as a type, but the chandlers won't take it
277 # gracefully, so we expand it here.
278 { regexp => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/,
279 massager => sub { return ("$1struct lhash_st_$2$3"); }
280 },
281 { regexp => qr/DEFINE_LHASH_OF<<<\((.*)\)>>>/,
282 massager => sub {
283 return (<<"EOF");
284static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *),
285 int (*cfn)(const $1 *, const $1 *));
286static ossl_inline void lh_$1_free(LHASH_OF($1) *lh);
287static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d);
288static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d);
289static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d);
290static ossl_inline int lh_$1_error(LHASH_OF($1) *lh);
291static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh);
292static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out);
293static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh,
294 BIO *out);
295static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out);
296static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh);
297static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl);
298static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *));
299LHASH_OF($1)
300EOF
301 }
302 },
303
304 #####
305 # STACK stuff
306
307 # STACK_OF(foo) is used as a type, but the chandlers won't take it
308 # gracefully, so we expand it here.
309 { regexp => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/,
310 massager => sub { return ("$1struct stack_st_$2$3"); }
311 },
312# { regexp => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/,
313# massager => sub {
314# my $before = $1;
315# my $stack_of = "struct stack_st_$2";
316# my $after = $3;
317# if ($after =~ m|^\w|) { $after = " ".$after; }
318# return ("$before$stack_of$after");
319# }
320# },
9fdcc21f 321 { regexp => qr/SKM_DEFINE_STACK_OF<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
d73c4440
RL
322 massager => sub {
323 return (<<"EOF");
324STACK_OF($1);
325typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
326typedef void (*sk_$1_freefunc)($3 *a);
327typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
328static ossl_inline int sk_$1_num(const STACK_OF($1) *sk);
329static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx);
330static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare);
331static ossl_inline STACK_OF($1) *sk_$1_new_null(void);
332static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare,
333 int n);
334static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n);
335static ossl_inline void sk_$1_free(STACK_OF($1) *sk);
336static ossl_inline void sk_$1_zero(STACK_OF($1) *sk);
337static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i);
338static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr);
339static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr);
340static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr);
341static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk);
342static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk);
343static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk,
344 sk_$1_freefunc freefunc);
345static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx);
346static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr);
347static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr);
348static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr);
349static ossl_inline void sk_$1_sort(STACK_OF($1) *sk);
350static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk);
351static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk);
352static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk,
353 sk_$1_copyfunc copyfunc,
354 sk_$1_freefunc freefunc);
355static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk,
356 sk_$1_compfunc compare);
357EOF
358 }
359 },
9fdcc21f 360 { regexp => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),\s*(.*)\)>>>/,
d73c4440
RL
361 massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); },
362 },
363 { regexp => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/,
364 massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); },
365 },
9fdcc21f 366 { regexp => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),\s*(.*)\)>>>/,
d73c4440
RL
367 massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); },
368 },
369 { regexp => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/,
370 massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); },
371 },
372 { regexp => qr/PREDECLARE_STACK_OF<<<\((.*)\)>>>/,
373 massager => sub { return ("STACK_OF($1);"); }
374 },
375 { regexp => qr/DECLARE_STACK_OF<<<\((.*)\)>>>/,
376 massager => sub { return ("STACK_OF($1);"); }
377 },
9fdcc21f 378 { regexp => qr/DECLARE_SPECIAL_STACK_OF<<<\((.*?),\s*(.*?)\)>>>/,
d73c4440
RL
379 massager => sub { return ("STACK_OF($1);"); }
380 },
381
382 #####
383 # ASN1 stuff
384
385 { regexp => qr/TYPEDEF_D2I_OF<<<\((.*)\)>>>/,
386 massager => sub {
387 return ("typedef $1 *d2i_of_$1($1 **,const unsigned char **,long)");
388 },
389 },
390 { regexp => qr/TYPEDEF_I2D_OF<<<\((.*)\)>>>/,
391 massager => sub {
392 return ("typedef $1 *i2d_of_$1($1 *,unsigned char **)");
393 },
394 },
395 { regexp => qr/TYPEDEF_D2I2D_OF<<<\((.*)\)>>>/,
396 massager => sub {
397 return ("TYPEDEF_D2I_OF($1); TYPEDEF_I2D_OF($1)");
398 },
399 },
400 { regexp => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/,
401 massager => sub {
402 return (<<"EOF");
d73c4440 403const ASN1_ITEM *$1_it(void);
d73c4440
RL
404EOF
405 },
406 },
9fdcc21f
DO
407 { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_only<<<\((.*),\s*(.*)\)>>>/,
408 massager => sub {
409 return (<<"EOF");
410int d2i_$2(void);
411int i2d_$2(void);
412EOF
413 },
414 },
415 { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
d73c4440
RL
416 massager => sub {
417 return (<<"EOF");
418int d2i_$3(void);
419int i2d_$3(void);
420DECLARE_ASN1_ITEM($2)
421EOF
422 },
423 },
9fdcc21f 424 { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
d73c4440
RL
425 massager => sub {
426 return (<<"EOF");
427int d2i_$2(void);
428int i2d_$2(void);
429DECLARE_ASN1_ITEM($2)
9fdcc21f
DO
430EOF
431 },
432 },
433 { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
434 massager => sub {
435 return (<<"EOF");
436int $2_free(void);
437int $2_new(void);
d73c4440
RL
438EOF
439 },
440 },
441 { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/,
442 massager => sub {
443 return (<<"EOF");
444int $1_free(void);
445int $1_new(void);
446EOF
447 },
448 },
9fdcc21f 449 { regexp => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
d73c4440
RL
450 massager => sub {
451 return (<<"EOF");
452int d2i_$2(void);
453int i2d_$2(void);
454int $2_free(void);
455int $2_new(void);
456DECLARE_ASN1_ITEM($2)
457EOF
458 },
459 },
9fdcc21f 460 { regexp => qr/DECLARE_ASN1_FUNCTIONS<<<\((.*)\)>>>/,
d73c4440
RL
461 massager => sub { return (<<"EOF");
462int d2i_$1(void);
463int i2d_$1(void);
464int $1_free(void);
465int $1_new(void);
466DECLARE_ASN1_ITEM($1)
467EOF
468 }
469 },
470 { regexp => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/,
471 massager => sub {
472 return (<<"EOF");
473int i2d_$1_NDEF(void);
474EOF
475 }
476 },
477 { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/,
478 massager => sub {
479 return (<<"EOF");
480int $1_print_ctx(void);
481EOF
482 }
483 },
9fdcc21f 484 { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
d73c4440
RL
485 massager => sub {
486 return (<<"EOF");
487int $2_print_ctx(void);
488EOF
489 }
490 },
491 { regexp => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/,
492 massager => sub { return (); }
493 },
9fdcc21f
DO
494 { regexp => qr/DECLARE_ASN1_DUP_FUNCTION<<<\((.*)\)>>>/,
495 massager => sub {
496 return (<<"EOF");
497int $1_dup(void);
498EOF
499 }
500 },
501 { regexp => qr/DECLARE_ASN1_DUP_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
502 massager => sub {
503 return (<<"EOF");
504int $2_dup(void);
505EOF
506 }
507 },
d73c4440
RL
508 { regexp => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/,
509 massager => sub { return (); }
510 },
511 { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/,
512 massager => sub { return (<<"EOF");
513#ifndef OPENSSL_NO_STDIO
514int PEM_read_$1(void);
515int PEM_write_$1(void);
516#endif
517int PEM_read_bio_$1(void);
518int PEM_write_bio_$1(void);
519EOF
520 },
521 },
522
523 #####
524 # PEM stuff
525 { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/,
526 massager => sub { return (<<"EOF");
527#ifndef OPENSSL_NO_STDIO
528int PEM_write_$1(void);
529#endif
530int PEM_write_bio_$1(void);
531EOF
532 },
533 },
534 { regexp => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/,
535 massager => sub { return (<<"EOF");
536#ifndef OPENSSL_NO_STDIO
537int PEM_read_$1(void);
538#endif
539int PEM_read_bio_$1(void);
540EOF
541 },
542 },
543
544 # Spurious stuff found in the OpenSSL headers
545 # Usually, these are just macros that expand to, well, something
546 { regexp => qr/__NDK_FPABI__/,
547 massager => sub { return (); }
548 },
549 );
550
551my $anoncnt = 0;
552
553my @chandlers = (
554 ##################################################################
555 # C stuff
556
557 # extern "C" of individual items
558 # Note that the main parse function has a special hack for 'extern "C" {'
559 # which can't be done in handlers
560 # We simply ignore it.
561 { regexp => qr/extern "C" (.*;)/,
562 massager => sub { return ($1); },
563 },
4c2883a9
RL
564 # any other extern is just ignored
565 { regexp => qr/^\s* # Any spaces before
566 extern # The keyword we look for
567 \b # word to non-word boundary
568 .* # Anything after
569 ;
570 /x,
571 massager => sub { return (); },
572 },
d73c4440
RL
573 # union, struct and enum definitions
574 # Because this one might appear a little everywhere within type
575 # definitions, we take it out and replace it with just
576 # 'union|struct|enum name' while registering it.
577 # This makes use of the parser trick to surround the outer braces
578 # with <<< and >>>
579 { regexp => qr/(.*) # Anything before ($1)
580 \b # word to non-word boundary
581 (union|struct|enum) # The word used ($2)
582 (?:\s([[:alpha:]_]\w*))? # Struct or enum name ($3)
583 <<<(\{.*?\})>>> # Struct or enum definition ($4)
584 (.*) # Anything after ($5)
585 ;
586 /x,
587 massager => sub {
588 my $before = $1;
589 my $word = $2;
590 my $name = $3
591 || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct
592 my $definition = $4;
593 my $after = $5;
594 my $type = $word eq "struct" ? 'S' : 'E';
595 if ($before ne "" || $after ne ";") {
596 if ($after =~ m|^\w|) { $after = " ".$after; }
597 return ("$before$word $name$after;",
598 "$word $name", $type, "", "$word$definition", all_conds());
599 }
600 # If there was no before nor after, make the return much simple
601 return ("", "$word $name", $type, "", "$word$definition", all_conds());
602 }
603 },
604 # Named struct and enum forward declarations
605 # We really just ignore them, but we need to parse them or the variable
606 # declaration handler further down will think it's a variable declaration.
607 { regexp => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/,
608 massager => sub { return (); }
609 },
610 # Function returning function pointer declaration
611 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
612 ((?:\w|\*|\s)*?) # Return type ($2)
613 \s? # Possible space
614 <<<\(\*
615 ([[:alpha:]_]\w*) # Function name ($3)
616 (\(.*\)) # Parameters ($4)
617 \)>>>
618 <<<(\(.*\))>>> # F.p. parameters ($5)
619 ;
620 /x,
621 massager => sub {
622 return ("", $3, 'F', "", "$2(*$4)$5", all_conds())
623 if defined $1;
624 return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); }
625 },
626 # Function pointer declaration, or typedef thereof
627 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
628 ((?:\w|\*|\s)*?) # Return type ($2)
629 <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name ($3)
630 <<<(\(.*\))>>> # F.p. parameters ($4)
631 ;
632 /x,
633 massager => sub {
634 return ("", $3, 'T', "", "$2(*)$4", all_conds())
635 if defined $1;
636 return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds());
637 },
638 },
639 # Function declaration, or typedef thereof
640 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
641 ((?:\w|\*|\s)*?) # Return type ($2)
642 \s? # Possible space
643 ([[:alpha:]_]\w*) # Function name ($3)
644 <<<(\(.*\))>>> # Parameters ($4)
645 ;
646 /x,
647 massager => sub {
648 return ("", $3, 'T', "", "$2$4", all_conds())
649 if defined $1;
650 return ("", $3, 'F', $2, "$2$4", all_conds());
651 },
652 },
653 # Variable declaration, including arrays, or typedef thereof
654 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
655 ((?:\w|\*|\s)*?) # Type ($2)
656 \s? # Possible space
657 ([[:alpha:]_]\w*) # Variable name ($3)
658 ((?:<<<\[[^\]]*\]>>>)*) # Possible array declaration ($4)
659 ;
660 /x,
661 massager => sub {
662 return ("", $3, 'T', "", $2.($4||""), all_conds())
663 if defined $1;
664 return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds());
665 },
666 },
667);
668
669# End handlers are almost the same as handlers, except they are run through
670# ONCE when the input has been parsed through. These are used to check for
671# remaining stuff, such as an unfinished #ifdef and stuff like that that the
672# main parser can't check on its own.
673my @endhandlers = (
674 { massager => sub {
675 my %opts = %{$_[0]};
676
677 die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE}
678 if @preprocessor_conds;
679 }
680 }
681 );
682
683# takes a list of strings that can each contain one or several lines of code
684# also takes a hash of options as last argument.
685#
686# returns a list of hashes with information:
687#
688# name name of the thing
689# type type, see the massage handler function
690# returntype return type of functions and variables
691# value value for macros, signature for functions, variables
692# and structs
693# conds preprocessor conditions (array ref)
694
695sub parse {
696 my %opts;
697 if (ref($_[$#_]) eq "HASH") {
698 %opts = %{$_[$#_]};
699 pop @_;
700 }
701 my %state = (
702 in_extern_C => 0, # An exception to parenthesis processing.
703 cpp_parens => [], # A list of ending parens and braces found in
704 # preprocessor directives
705 c_parens => [], # A list of ending parens and braces found in
706 # C statements
707 in_string => "", # empty string when outside a string, otherwise
708 # "'" or '"' depending on the starting quote.
709 in_comment => "", # empty string when outside a comment, otherwise
710 # "/*" or "//" depending on the type of comment
711 # found. The latter will never be multiline
712 # NOTE: in_string and in_comment will never be
713 # true (in perl semantics) at the same time.
714 current_line => 0,
715 );
716 my @result = ();
717 my $normalized_line = ""; # $input_line, but normalized. In essence, this
718 # means that ALL whitespace is removed unless
719 # it absolutely has to be present, and in that
720 # case, there's only one space.
721 # The cases where a space needs to stay present
722 # are:
723 # 1. between words
724 # 2. between words and number
725 # 3. after the first word of a preprocessor
726 # directive.
727 # 4. for the #define directive, between the macro
728 # name/args and its value, so we end up with:
729 # #define FOO val
730 # #define BAR(x) something(x)
731 my $collected_stmt = ""; # Where we're building up a C line until it's a
732 # complete definition/declaration, as determined
733 # by any handler being capable of matching it.
734
735 # We use $_ shamelessly when looking through @lines.
736 # In case we find a \ at the end, we keep filling it up with more lines.
737 $_ = undef;
738
739 foreach my $line (@_) {
740 # split tries to be smart when a string ends with the thing we split on
741 $line .= "\n" unless $line =~ m|\R$|;
742 $line .= "#";
743
744 # We use ¦undef¦ as a marker for a new line from the file.
745 # Since we convert one line to several and unshift that into @lines,
746 # that's the only safe way we have to track the original lines
747 my @lines = map { ( undef, $_ ) } split $/, $line;
748
749 # Remember that extra # we added above? Now we remove it
750 pop @lines;
751 pop @lines; # Don't forget the undef
752
753 while (@lines) {
754 if (!defined($lines[0])) {
755 shift @lines;
756 $state{current_line}++;
757 if (!defined($_)) {
758 $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n";
759 $opts{PLACE2} = $opts{filename}.":".$state{current_line};
760 }
761 next;
762 }
763
764 $_ = "" unless defined $_;
765 $_ .= shift @lines;
766
767 if (m|\\$|) {
768 $_ = $`;
769 next;
770 }
771
772 if ($opts{debug}) {
773 print STDERR "DEBUG:----------------------------\n";
774 print STDERR "DEBUG: \$_ = '$_'\n";
775 }
776
777 ##########################################################
778 # Now that we have a full line, let's process through it
779 while(1) {
780 unless ($state{in_comment}) {
781 # Begin with checking if the current $normalized_line
782 # contains a preprocessor directive
783 # This is only done if we're not inside a comment and
784 # if it's a preprocessor directive and it's finished.
785 if ($normalized_line =~ m|^#| && $_ eq "") {
786 print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n"
787 if $opts{debug};
788 $opts{debug_type} = "OPENSSL CPP";
789 my @r = ( _run_handlers($normalized_line,
790 @opensslcpphandlers,
791 \%opts) );
792 if (shift @r) {
793 # Checking if there are lines to inject.
794 if (@r) {
795 @r = split $/, (pop @r).$_;
796 print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n"
797 if $opts{debug} && @r;
798 @lines = ( @r, @lines );
799
800 $_ = "";
801 }
802 } else {
803 print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n"
804 if $opts{debug};
805 $opts{debug_type} = "CPP";
806 my @r = ( _run_handlers($normalized_line,
807 @cpphandlers,
808 \%opts) );
809 if (shift @r) {
810 if (ref($r[0]) eq "HASH") {
811 push @result, shift @r;
812 }
813
814 # Now, check if there are lines to inject.
815 # Really, this should never happen, it IS a
816 # preprocessor directive after all...
817 if (@r) {
818 @r = split $/, pop @r;
819 print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n"
820 if $opts{debug} && @r;
821 @lines = ( @r, @lines );
822 $_ = "";
823 }
824 }
825 }
826
827 # Note: we simply ignore all directives that no
828 # handler matches
829 $normalized_line = "";
830 }
831
832 # If the two strings end and start with a character that
833 # shouldn't get concatenated, add a space
834 my $space =
835 ($collected_stmt =~ m/(?:"|')$/
836 || ($collected_stmt =~ m/(?:\w|\d)$/
837 && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : "";
838
839 # Now, unless we're building up a preprocessor directive or
840 # are in the middle of a string, or the parens et al aren't
841 # balanced up yet, let's try and see if there's a OpenSSL
842 # or C handler that can make sense of what we have so far.
843 if ( $normalized_line !~ m|^#|
844 && ($collected_stmt ne "" || $normalized_line ne "")
845 && ! @{$state{c_parens}}
846 && ! $state{in_string} ) {
847 if ($opts{debug}) {
848 print STDERR "DEBUG[OPENSSL C]: \$collected_stmt = '$collected_stmt'\n";
849 print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n";
850 }
851 $opts{debug_type} = "OPENSSL C";
852 my @r = ( _run_handlers($collected_stmt
853 .$space
854 .$normalized_line,
855 @opensslchandlers,
856 \%opts) );
857 if (shift @r) {
858 # Checking if there are lines to inject.
859 if (@r) {
860 @r = split $/, (pop @r).$_;
861 print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n"
862 if $opts{debug} && @r;
863 @lines = ( @r, @lines );
864
865 $_ = "";
866 }
867 $normalized_line = "";
868 $collected_stmt = "";
869 } else {
870 if ($opts{debug}) {
871 print STDERR "DEBUG[C]: \$collected_stmt = '$collected_stmt'\n";
872 print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n";
873 }
874 $opts{debug_type} = "C";
875 my @r = ( _run_handlers($collected_stmt
876 .$space
877 .$normalized_line,
878 @chandlers,
879 \%opts) );
880 if (shift @r) {
881 if (ref($r[0]) eq "HASH") {
882 push @result, shift @r;
883 }
884
885 # Checking if there are lines to inject.
886 if (@r) {
887 @r = split $/, (pop @r).$_;
888 print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n"
889 if $opts{debug} && @r;
890 @lines = ( @r, @lines );
891
892 $_ = "";
893 }
894 $normalized_line = "";
895 $collected_stmt = "";
896 }
897 }
898 }
899 if ($_ eq "") {
900 $collected_stmt .= $space.$normalized_line;
901 $normalized_line = "";
902 }
903 }
904
905 if ($_ eq "") {
906 $_ = undef;
907 last;
908 }
909
910 # Take care of inside string first.
911 if ($state{in_string}) {
912 if (m/ (?:^|(?<!\\)) # Make sure it's not escaped
913 $state{in_string} # Look for matching quote
914 /x) {
915 $normalized_line .= $`.$&;
916 $state{in_string} = "";
917 $_ = $';
918 next;
919 } else {
920 die "Unfinished string without continuation found$opts{PLACE}\n";
921 }
922 }
923 # ... or inside comments, whichever happens to apply
924 elsif ($state{in_comment}) {
925
926 # This should never happen
927 die "Something went seriously wrong, multiline //???$opts{PLACE}\n"
928 if ($state{in_comment} eq "//");
929
930 # A note: comments are simply discarded.
931
932 if (m/ (?:^|(?<!\\)) # Make sure it's not escaped
933 \*\/ # Look for C comment end
934 /x) {
935 $state{in_comment} = "";
936 $_ = $';
937 print STDERR "DEBUG: Found end of comment, followed by '$_'\n"
938 if $opts{debug};
939 next;
940 } else {
941 $_ = "";
942 next;
943 }
944 }
945
946 # At this point, it's safe to remove leading whites, but
947 # we need to be careful with some preprocessor lines
948 if (m|^\s+|) {
949 my $rest = $';
950 my $space = "";
951 $space = " "
952 if ($normalized_line =~ m/^
953 \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)?
954 | \#[a-z]+
955 $/x);
956 print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n"
957 if $opts{debug};
958 $_ = $space.$rest;
959 }
960
961 my $parens =
962 $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens';
963 (my $paren_singular = $parens) =~ s|s$||;
964
965 # Now check for specific tokens, and if they are parens,
966 # check them against $state{$parens}. Note that we surround
967 # the outermost parens with extra "<<<" and ">>>". Those
968 # are for the benefit of handlers who to need to detect
969 # them, and they will be removed from the final output.
970 if (m|^[\{\[\(]|) {
971 my $body = $&;
972 $_ = $';
973 if (!@{$state{$parens}}) {
974 if ("$normalized_line$body" =~ m|^extern "C"\{$|) {
975 $state{in_extern_C} = 1;
976 print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n"
977 if $opts{debug};
978 $normalized_line = "";
979 } else {
980 $normalized_line .= "<<<".$body;
981 }
982 } else {
983 $normalized_line .= $body;
984 }
985
986 if ($normalized_line ne "") {
987 print STDERR "DEBUG: found $paren_singular start '$body'\n"
988 if $opts{debug};
989 $body =~ tr|\{\[\(|\}\]\)|;
990 print STDERR "DEBUG: pushing $paren_singular end '$body'\n"
991 if $opts{debug};
992 push @{$state{$parens}}, $body;
993 }
994 } elsif (m|^[\}\]\)]|) {
995 $_ = $';
996
997 if (!@{$state{$parens}}
998 && $& eq '}' && $state{in_extern_C}) {
999 print STDERR "DEBUG: found end of 'extern \"C\"'\n"
1000 if $opts{debug};
1001 $state{in_extern_C} = 0;
1002 } else {
1003 print STDERR "DEBUG: Trying to match '$&' against '"
1004 ,join("', '", @{$state{$parens}})
1005 ,"'\n"
1006 if $opts{debug};
1007 die "Unmatched parentheses$opts{PLACE}\n"
1008 unless (@{$state{$parens}}
1009 && pop @{$state{$parens}} eq $&);
1010 if (!@{$state{$parens}}) {
1011 $normalized_line .= $&.">>>";
1012 } else {
1013 $normalized_line .= $&;
1014 }
1015 }
1016 } elsif (m|^["']|) { # string start
1017 my $body = $&;
1018 $_ = $';
1019
1020 # We want to separate strings from \w and \d with one space.
1021 $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/;
1022 $normalized_line .= $body;
1023 $state{in_string} = $body;
1024 } elsif (m|^\/\*|) { # C style comment
1025 print STDERR "DEBUG: found start of C style comment\n"
1026 if $opts{debug};
1027 $state{in_comment} = $&;
1028 $_ = $';
1029 } elsif (m|^\/\/|) { # C++ style comment
1030 print STDERR "DEBUG: found C++ style comment\n"
1031 if $opts{debug};
1032 $_ = ""; # (just discard it entirely)
1033 } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ )
1034 (?i: U | L | UL | LL | ULL )?
1035 | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)?
1036 ) /x) {
1037 print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n"
1038 if $opts{debug};
1039 $normalized_line .= $&;
1040 $_ = $';
1041 } elsif (m/^[[:alpha:]_]\w*/) {
1042 my $body = $&;
1043 my $rest = $';
1044 my $space = "";
1045
1046 # Now, only add a space if it's needed to separate
1047 # two \w characters, and we also surround strings with
1048 # a space. In this case, that's if $normalized_line ends
1049 # with a \w, \d, " or '.
1050 $space = " "
1051 if ($normalized_line =~ m/("|')$/
1052 || ($normalized_line =~ m/(\w|\d)$/
1053 && $body =~ m/^(\w|\d)/));
1054
1055 print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n"
1056 if $opts{debug};
1057 $normalized_line .= $space.$body;
1058 $_ = $rest;
1059 } elsif (m|^(?:\\)?.|) { # Catch-all
1060 $normalized_line .= $&;
1061 $_ = $';
1062 }
1063 }
1064 }
1065 }
1066 foreach my $handler (@endhandlers) {
1067 if ($handler->{massager}) {
1068 $handler->{massager}->(\%opts);
1069 }
1070 }
1071 return @result;
1072}
1073
1074# arg1: line to check
1075# arg2...: handlers to check
1076# return undef when no handler matched
1077sub _run_handlers {
1078 my %opts;
1079 if (ref($_[$#_]) eq "HASH") {
1080 %opts = %{$_[$#_]};
1081 pop @_;
1082 }
1083 my $line = shift;
1084 my @handlers = @_;
1085
1086 foreach my $handler (@handlers) {
1087 if ($handler->{regexp}
1088 && $line =~ m|^$handler->{regexp}$|) {
1089 if ($handler->{massager}) {
1090 if ($opts{debug}) {
1091 print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n";
1092 print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n";
1093 }
1094 my $saved_line = $line;
1095 my @massaged =
1096 map { s/(<<<|>>>)//g; $_ }
1097 $handler->{massager}->($saved_line, \%opts);
1098 print STDERR "DEBUG[",$opts{debug_type},"]: Got back '"
1099 , join("', '", @massaged), "'\n"
1100 if $opts{debug};
1101
1102 # Because we may get back new lines to be
1103 # injected before whatever else that follows,
1104 # and the injected stuff might include
1105 # preprocessor lines, we need to inject them
1106 # in @lines and set $_ to the empty string to
1107 # break out from the inner loops
1108 my $injected_lines = shift @massaged || "";
1109
1110 if (@massaged) {
1111 return (1,
1112 {
1113 name => shift @massaged,
1114 type => shift @massaged,
1115 returntype => shift @massaged,
1116 value => shift @massaged,
1117 conds => [ @massaged ]
1118 },
1119 $injected_lines
1120 );
1121 } else {
1122 print STDERR "DEBUG[",$opts{debug_type},"]: (ignore, possible side effects)\n"
1123 if $opts{debug} && $injected_lines eq "";
1124 return (1, $injected_lines);
1125 }
1126 }
1127 return (1);
1128 }
1129 }
1130 return (0);
1131}